{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9995035578355123, "eval_steps": 500, "global_step": 6042, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0004964421644878371, "grad_norm": 4.641703128814697, "learning_rate": 1.652892561983471e-08, "loss": 0.7633, "step": 1 }, { "epoch": 0.0009928843289756743, "grad_norm": 5.435983180999756, "learning_rate": 3.305785123966942e-08, "loss": 0.8699, "step": 2 }, { "epoch": 0.0014893264934635116, "grad_norm": 4.833365440368652, "learning_rate": 4.958677685950414e-08, "loss": 0.7715, "step": 3 }, { "epoch": 0.0019857686579513485, "grad_norm": 5.130953311920166, "learning_rate": 6.611570247933884e-08, "loss": 0.8326, "step": 4 }, { "epoch": 0.0024822108224391857, "grad_norm": 5.6530442237854, "learning_rate": 8.264462809917357e-08, "loss": 0.8506, "step": 5 }, { "epoch": 0.002978652986927023, "grad_norm": 5.506429672241211, "learning_rate": 9.917355371900828e-08, "loss": 0.8442, "step": 6 }, { "epoch": 0.0034750951514148603, "grad_norm": 5.1894707679748535, "learning_rate": 1.1570247933884297e-07, "loss": 0.8372, "step": 7 }, { "epoch": 0.003971537315902697, "grad_norm": 5.670557022094727, "learning_rate": 1.3223140495867768e-07, "loss": 0.8527, "step": 8 }, { "epoch": 0.004467979480390534, "grad_norm": 5.063024997711182, "learning_rate": 1.487603305785124e-07, "loss": 0.805, "step": 9 }, { "epoch": 0.004964421644878371, "grad_norm": 4.876114368438721, "learning_rate": 1.6528925619834713e-07, "loss": 0.7779, "step": 10 }, { "epoch": 0.005460863809366208, "grad_norm": 5.469033718109131, "learning_rate": 1.8181818181818183e-07, "loss": 0.8353, "step": 11 }, { "epoch": 0.005957305973854046, "grad_norm": 5.417829513549805, "learning_rate": 1.9834710743801655e-07, "loss": 0.8261, "step": 12 }, { "epoch": 0.0064537481383418836, "grad_norm": 5.452813625335693, "learning_rate": 2.1487603305785125e-07, "loss": 0.8502, "step": 13 }, { "epoch": 0.006950190302829721, "grad_norm": 5.273932456970215, "learning_rate": 2.3140495867768595e-07, "loss": 0.8572, "step": 14 }, { "epoch": 0.007446632467317558, "grad_norm": 5.208057403564453, "learning_rate": 2.4793388429752067e-07, "loss": 0.8291, "step": 15 }, { "epoch": 0.007943074631805394, "grad_norm": 5.346608638763428, "learning_rate": 2.6446280991735537e-07, "loss": 0.8143, "step": 16 }, { "epoch": 0.008439516796293232, "grad_norm": 5.233694076538086, "learning_rate": 2.809917355371901e-07, "loss": 0.7995, "step": 17 }, { "epoch": 0.008935958960781068, "grad_norm": 4.612279415130615, "learning_rate": 2.975206611570248e-07, "loss": 0.757, "step": 18 }, { "epoch": 0.009432401125268906, "grad_norm": 4.838689804077148, "learning_rate": 3.1404958677685957e-07, "loss": 0.808, "step": 19 }, { "epoch": 0.009928843289756743, "grad_norm": 4.81422758102417, "learning_rate": 3.3057851239669426e-07, "loss": 0.7948, "step": 20 }, { "epoch": 0.01042528545424458, "grad_norm": 4.6831841468811035, "learning_rate": 3.4710743801652896e-07, "loss": 0.7704, "step": 21 }, { "epoch": 0.010921727618732417, "grad_norm": 4.6157755851745605, "learning_rate": 3.6363636363636366e-07, "loss": 0.7658, "step": 22 }, { "epoch": 0.011418169783220255, "grad_norm": 4.983789443969727, "learning_rate": 3.8016528925619836e-07, "loss": 0.8396, "step": 23 }, { "epoch": 0.011914611947708093, "grad_norm": 4.088686466217041, "learning_rate": 3.966942148760331e-07, "loss": 0.7753, "step": 24 }, { "epoch": 0.012411054112195929, "grad_norm": 4.050948619842529, "learning_rate": 4.132231404958678e-07, "loss": 0.7845, "step": 25 }, { "epoch": 0.012907496276683767, "grad_norm": 4.1860456466674805, "learning_rate": 4.297520661157025e-07, "loss": 0.8276, "step": 26 }, { "epoch": 0.013403938441171603, "grad_norm": 4.136765003204346, "learning_rate": 4.462809917355372e-07, "loss": 0.8023, "step": 27 }, { "epoch": 0.013900380605659441, "grad_norm": 3.8967840671539307, "learning_rate": 4.628099173553719e-07, "loss": 0.7783, "step": 28 }, { "epoch": 0.014396822770147278, "grad_norm": 3.760824680328369, "learning_rate": 4.793388429752067e-07, "loss": 0.7628, "step": 29 }, { "epoch": 0.014893264934635116, "grad_norm": 3.5190658569335938, "learning_rate": 4.958677685950413e-07, "loss": 0.7412, "step": 30 }, { "epoch": 0.015389707099122952, "grad_norm": 3.702692985534668, "learning_rate": 5.123966942148761e-07, "loss": 0.7531, "step": 31 }, { "epoch": 0.015886149263610788, "grad_norm": 3.121241331100464, "learning_rate": 5.289256198347107e-07, "loss": 0.7727, "step": 32 }, { "epoch": 0.016382591428098628, "grad_norm": 2.288313865661621, "learning_rate": 5.454545454545455e-07, "loss": 0.7194, "step": 33 }, { "epoch": 0.016879033592586464, "grad_norm": 2.206282138824463, "learning_rate": 5.619834710743802e-07, "loss": 0.7333, "step": 34 }, { "epoch": 0.0173754757570743, "grad_norm": 2.2005562782287598, "learning_rate": 5.78512396694215e-07, "loss": 0.7198, "step": 35 }, { "epoch": 0.017871917921562137, "grad_norm": 2.2379443645477295, "learning_rate": 5.950413223140496e-07, "loss": 0.721, "step": 36 }, { "epoch": 0.018368360086049976, "grad_norm": 2.057058334350586, "learning_rate": 6.115702479338844e-07, "loss": 0.7458, "step": 37 }, { "epoch": 0.018864802250537813, "grad_norm": 1.9792146682739258, "learning_rate": 6.280991735537191e-07, "loss": 0.6996, "step": 38 }, { "epoch": 0.01936124441502565, "grad_norm": 1.8951163291931152, "learning_rate": 6.446280991735538e-07, "loss": 0.7276, "step": 39 }, { "epoch": 0.019857686579513485, "grad_norm": 1.7890640497207642, "learning_rate": 6.611570247933885e-07, "loss": 0.7238, "step": 40 }, { "epoch": 0.020354128744001325, "grad_norm": 1.7812288999557495, "learning_rate": 6.776859504132232e-07, "loss": 0.7172, "step": 41 }, { "epoch": 0.02085057090848916, "grad_norm": 1.6969952583312988, "learning_rate": 6.942148760330579e-07, "loss": 0.7271, "step": 42 }, { "epoch": 0.021347013072976997, "grad_norm": 1.5126817226409912, "learning_rate": 7.107438016528927e-07, "loss": 0.7211, "step": 43 }, { "epoch": 0.021843455237464834, "grad_norm": 1.272711157798767, "learning_rate": 7.272727272727273e-07, "loss": 0.6766, "step": 44 }, { "epoch": 0.022339897401952673, "grad_norm": 1.4133570194244385, "learning_rate": 7.438016528925621e-07, "loss": 0.6827, "step": 45 }, { "epoch": 0.02283633956644051, "grad_norm": 1.5802109241485596, "learning_rate": 7.603305785123967e-07, "loss": 0.6804, "step": 46 }, { "epoch": 0.023332781730928346, "grad_norm": 1.5968403816223145, "learning_rate": 7.768595041322315e-07, "loss": 0.6402, "step": 47 }, { "epoch": 0.023829223895416186, "grad_norm": 1.7273311614990234, "learning_rate": 7.933884297520662e-07, "loss": 0.6858, "step": 48 }, { "epoch": 0.024325666059904022, "grad_norm": 1.7977975606918335, "learning_rate": 8.099173553719009e-07, "loss": 0.7199, "step": 49 }, { "epoch": 0.024822108224391858, "grad_norm": 1.5149571895599365, "learning_rate": 8.264462809917356e-07, "loss": 0.6594, "step": 50 }, { "epoch": 0.025318550388879695, "grad_norm": 1.4670385122299194, "learning_rate": 8.429752066115703e-07, "loss": 0.6505, "step": 51 }, { "epoch": 0.025814992553367534, "grad_norm": 1.272861361503601, "learning_rate": 8.59504132231405e-07, "loss": 0.6294, "step": 52 }, { "epoch": 0.02631143471785537, "grad_norm": 1.2909882068634033, "learning_rate": 8.760330578512398e-07, "loss": 0.6814, "step": 53 }, { "epoch": 0.026807876882343207, "grad_norm": 1.1765750646591187, "learning_rate": 8.925619834710744e-07, "loss": 0.6383, "step": 54 }, { "epoch": 0.027304319046831043, "grad_norm": 1.1350483894348145, "learning_rate": 9.090909090909091e-07, "loss": 0.6652, "step": 55 }, { "epoch": 0.027800761211318883, "grad_norm": 1.048755168914795, "learning_rate": 9.256198347107438e-07, "loss": 0.6936, "step": 56 }, { "epoch": 0.02829720337580672, "grad_norm": 0.8574875593185425, "learning_rate": 9.421487603305785e-07, "loss": 0.6617, "step": 57 }, { "epoch": 0.028793645540294555, "grad_norm": 1.0410317182540894, "learning_rate": 9.586776859504134e-07, "loss": 0.6745, "step": 58 }, { "epoch": 0.02929008770478239, "grad_norm": 0.9109283089637756, "learning_rate": 9.75206611570248e-07, "loss": 0.6499, "step": 59 }, { "epoch": 0.02978652986927023, "grad_norm": 0.9413292407989502, "learning_rate": 9.917355371900827e-07, "loss": 0.6555, "step": 60 }, { "epoch": 0.030282972033758068, "grad_norm": 0.9726828336715698, "learning_rate": 1.0082644628099174e-06, "loss": 0.6507, "step": 61 }, { "epoch": 0.030779414198245904, "grad_norm": 0.9933249354362488, "learning_rate": 1.0247933884297522e-06, "loss": 0.5744, "step": 62 }, { "epoch": 0.031275856362733744, "grad_norm": 0.8879541158676147, "learning_rate": 1.041322314049587e-06, "loss": 0.6265, "step": 63 }, { "epoch": 0.031772298527221576, "grad_norm": 0.8349500298500061, "learning_rate": 1.0578512396694215e-06, "loss": 0.5986, "step": 64 }, { "epoch": 0.032268740691709416, "grad_norm": 0.7973727583885193, "learning_rate": 1.0743801652892562e-06, "loss": 0.6641, "step": 65 }, { "epoch": 0.032765182856197256, "grad_norm": 0.7868319153785706, "learning_rate": 1.090909090909091e-06, "loss": 0.6376, "step": 66 }, { "epoch": 0.03326162502068509, "grad_norm": 0.7246975898742676, "learning_rate": 1.1074380165289257e-06, "loss": 0.6284, "step": 67 }, { "epoch": 0.03375806718517293, "grad_norm": 0.8299874067306519, "learning_rate": 1.1239669421487605e-06, "loss": 0.6195, "step": 68 }, { "epoch": 0.03425450934966076, "grad_norm": 0.7683900594711304, "learning_rate": 1.140495867768595e-06, "loss": 0.6557, "step": 69 }, { "epoch": 0.0347509515141486, "grad_norm": 0.7100197672843933, "learning_rate": 1.15702479338843e-06, "loss": 0.6009, "step": 70 }, { "epoch": 0.03524739367863644, "grad_norm": 0.7492334246635437, "learning_rate": 1.1735537190082645e-06, "loss": 0.565, "step": 71 }, { "epoch": 0.03574383584312427, "grad_norm": 0.6323891282081604, "learning_rate": 1.1900826446280993e-06, "loss": 0.5786, "step": 72 }, { "epoch": 0.03624027800761211, "grad_norm": 0.6078855395317078, "learning_rate": 1.206611570247934e-06, "loss": 0.5699, "step": 73 }, { "epoch": 0.03673672017209995, "grad_norm": 0.6171720623970032, "learning_rate": 1.2231404958677688e-06, "loss": 0.6095, "step": 74 }, { "epoch": 0.037233162336587786, "grad_norm": 0.6188567280769348, "learning_rate": 1.2396694214876035e-06, "loss": 0.5939, "step": 75 }, { "epoch": 0.037729604501075625, "grad_norm": 0.798400342464447, "learning_rate": 1.2561983471074383e-06, "loss": 0.5935, "step": 76 }, { "epoch": 0.038226046665563465, "grad_norm": 0.7004590034484863, "learning_rate": 1.2727272727272728e-06, "loss": 0.6437, "step": 77 }, { "epoch": 0.0387224888300513, "grad_norm": 0.6432421207427979, "learning_rate": 1.2892561983471076e-06, "loss": 0.5909, "step": 78 }, { "epoch": 0.03921893099453914, "grad_norm": 0.5755748748779297, "learning_rate": 1.3057851239669423e-06, "loss": 0.6126, "step": 79 }, { "epoch": 0.03971537315902697, "grad_norm": 0.5382779836654663, "learning_rate": 1.322314049586777e-06, "loss": 0.5882, "step": 80 }, { "epoch": 0.04021181532351481, "grad_norm": 0.5153941512107849, "learning_rate": 1.3388429752066118e-06, "loss": 0.5558, "step": 81 }, { "epoch": 0.04070825748800265, "grad_norm": 0.5379814505577087, "learning_rate": 1.3553719008264463e-06, "loss": 0.6007, "step": 82 }, { "epoch": 0.04120469965249048, "grad_norm": 0.5509174466133118, "learning_rate": 1.371900826446281e-06, "loss": 0.5919, "step": 83 }, { "epoch": 0.04170114181697832, "grad_norm": 0.5955458283424377, "learning_rate": 1.3884297520661158e-06, "loss": 0.6109, "step": 84 }, { "epoch": 0.04219758398146616, "grad_norm": 0.5423709750175476, "learning_rate": 1.4049586776859506e-06, "loss": 0.5677, "step": 85 }, { "epoch": 0.042694026145953995, "grad_norm": 0.5979585647583008, "learning_rate": 1.4214876033057853e-06, "loss": 0.6119, "step": 86 }, { "epoch": 0.043190468310441835, "grad_norm": 0.5663396716117859, "learning_rate": 1.4380165289256199e-06, "loss": 0.6038, "step": 87 }, { "epoch": 0.04368691047492967, "grad_norm": 0.5848954916000366, "learning_rate": 1.4545454545454546e-06, "loss": 0.5971, "step": 88 }, { "epoch": 0.04418335263941751, "grad_norm": 0.6055182218551636, "learning_rate": 1.4710743801652894e-06, "loss": 0.566, "step": 89 }, { "epoch": 0.04467979480390535, "grad_norm": 0.5869228839874268, "learning_rate": 1.4876033057851241e-06, "loss": 0.6177, "step": 90 }, { "epoch": 0.04517623696839318, "grad_norm": 0.558254063129425, "learning_rate": 1.5041322314049589e-06, "loss": 0.5787, "step": 91 }, { "epoch": 0.04567267913288102, "grad_norm": 0.5229103565216064, "learning_rate": 1.5206611570247934e-06, "loss": 0.552, "step": 92 }, { "epoch": 0.04616912129736886, "grad_norm": 0.56169593334198, "learning_rate": 1.5371900826446282e-06, "loss": 0.5564, "step": 93 }, { "epoch": 0.04666556346185669, "grad_norm": 0.5381916165351868, "learning_rate": 1.553719008264463e-06, "loss": 0.5905, "step": 94 }, { "epoch": 0.04716200562634453, "grad_norm": 0.5516647696495056, "learning_rate": 1.5702479338842977e-06, "loss": 0.5882, "step": 95 }, { "epoch": 0.04765844779083237, "grad_norm": 0.5744457244873047, "learning_rate": 1.5867768595041324e-06, "loss": 0.5768, "step": 96 }, { "epoch": 0.048154889955320204, "grad_norm": 0.5509123206138611, "learning_rate": 1.603305785123967e-06, "loss": 0.6051, "step": 97 }, { "epoch": 0.048651332119808044, "grad_norm": 0.5232115387916565, "learning_rate": 1.6198347107438017e-06, "loss": 0.5531, "step": 98 }, { "epoch": 0.04914777428429588, "grad_norm": 0.6030935049057007, "learning_rate": 1.6363636363636365e-06, "loss": 0.5346, "step": 99 }, { "epoch": 0.049644216448783716, "grad_norm": 0.5373480916023254, "learning_rate": 1.6528925619834712e-06, "loss": 0.5343, "step": 100 }, { "epoch": 0.050140658613271556, "grad_norm": 0.5755754709243774, "learning_rate": 1.669421487603306e-06, "loss": 0.5724, "step": 101 }, { "epoch": 0.05063710077775939, "grad_norm": 0.579372227191925, "learning_rate": 1.6859504132231405e-06, "loss": 0.5457, "step": 102 }, { "epoch": 0.05113354294224723, "grad_norm": 0.5560076236724854, "learning_rate": 1.7024793388429753e-06, "loss": 0.5435, "step": 103 }, { "epoch": 0.05162998510673507, "grad_norm": 0.5907414555549622, "learning_rate": 1.71900826446281e-06, "loss": 0.5707, "step": 104 }, { "epoch": 0.0521264272712229, "grad_norm": 0.5198373198509216, "learning_rate": 1.7355371900826448e-06, "loss": 0.5489, "step": 105 }, { "epoch": 0.05262286943571074, "grad_norm": 0.6103171110153198, "learning_rate": 1.7520661157024795e-06, "loss": 0.5822, "step": 106 }, { "epoch": 0.053119311600198574, "grad_norm": 0.49033641815185547, "learning_rate": 1.768595041322314e-06, "loss": 0.5539, "step": 107 }, { "epoch": 0.053615753764686414, "grad_norm": 0.5989443063735962, "learning_rate": 1.7851239669421488e-06, "loss": 0.5957, "step": 108 }, { "epoch": 0.05411219592917425, "grad_norm": 0.5596713423728943, "learning_rate": 1.8016528925619835e-06, "loss": 0.5885, "step": 109 }, { "epoch": 0.054608638093662086, "grad_norm": 0.5094192028045654, "learning_rate": 1.8181818181818183e-06, "loss": 0.5564, "step": 110 }, { "epoch": 0.055105080258149926, "grad_norm": 0.5491378903388977, "learning_rate": 1.8347107438016533e-06, "loss": 0.541, "step": 111 }, { "epoch": 0.055601522422637766, "grad_norm": 0.559869647026062, "learning_rate": 1.8512396694214876e-06, "loss": 0.5818, "step": 112 }, { "epoch": 0.0560979645871256, "grad_norm": 0.46932703256607056, "learning_rate": 1.8677685950413223e-06, "loss": 0.5191, "step": 113 }, { "epoch": 0.05659440675161344, "grad_norm": 0.47639697790145874, "learning_rate": 1.884297520661157e-06, "loss": 0.5492, "step": 114 }, { "epoch": 0.05709084891610127, "grad_norm": 0.5606554746627808, "learning_rate": 1.900826446280992e-06, "loss": 0.5563, "step": 115 }, { "epoch": 0.05758729108058911, "grad_norm": 0.4998980760574341, "learning_rate": 1.917355371900827e-06, "loss": 0.5452, "step": 116 }, { "epoch": 0.05808373324507695, "grad_norm": 0.5718991756439209, "learning_rate": 1.9338842975206613e-06, "loss": 0.509, "step": 117 }, { "epoch": 0.05858017540956478, "grad_norm": 0.5617508292198181, "learning_rate": 1.950413223140496e-06, "loss": 0.5634, "step": 118 }, { "epoch": 0.05907661757405262, "grad_norm": 0.48212793469429016, "learning_rate": 1.966942148760331e-06, "loss": 0.5502, "step": 119 }, { "epoch": 0.05957305973854046, "grad_norm": 0.5278294682502747, "learning_rate": 1.9834710743801654e-06, "loss": 0.5751, "step": 120 }, { "epoch": 0.060069501903028295, "grad_norm": 0.4711643159389496, "learning_rate": 2.0000000000000003e-06, "loss": 0.6008, "step": 121 }, { "epoch": 0.060565944067516135, "grad_norm": 0.5343295335769653, "learning_rate": 2.016528925619835e-06, "loss": 0.5548, "step": 122 }, { "epoch": 0.061062386232003975, "grad_norm": 0.5161747336387634, "learning_rate": 2.0330578512396694e-06, "loss": 0.5494, "step": 123 }, { "epoch": 0.06155882839649181, "grad_norm": 0.47624602913856506, "learning_rate": 2.0495867768595044e-06, "loss": 0.5367, "step": 124 }, { "epoch": 0.06205527056097965, "grad_norm": 0.4843749701976776, "learning_rate": 2.066115702479339e-06, "loss": 0.5414, "step": 125 }, { "epoch": 0.06255171272546749, "grad_norm": 0.552000105381012, "learning_rate": 2.082644628099174e-06, "loss": 0.5691, "step": 126 }, { "epoch": 0.06304815488995533, "grad_norm": 0.5709593296051025, "learning_rate": 2.0991735537190084e-06, "loss": 0.504, "step": 127 }, { "epoch": 0.06354459705444315, "grad_norm": 0.5239576697349548, "learning_rate": 2.115702479338843e-06, "loss": 0.5309, "step": 128 }, { "epoch": 0.06404103921893099, "grad_norm": 0.5772541165351868, "learning_rate": 2.132231404958678e-06, "loss": 0.5397, "step": 129 }, { "epoch": 0.06453748138341883, "grad_norm": 0.5566551089286804, "learning_rate": 2.1487603305785124e-06, "loss": 0.552, "step": 130 }, { "epoch": 0.06503392354790667, "grad_norm": 0.5205637812614441, "learning_rate": 2.1652892561983474e-06, "loss": 0.5703, "step": 131 }, { "epoch": 0.06553036571239451, "grad_norm": 0.47055235505104065, "learning_rate": 2.181818181818182e-06, "loss": 0.5373, "step": 132 }, { "epoch": 0.06602680787688234, "grad_norm": 0.4778951406478882, "learning_rate": 2.1983471074380165e-06, "loss": 0.5347, "step": 133 }, { "epoch": 0.06652325004137018, "grad_norm": 0.6027733683586121, "learning_rate": 2.2148760330578515e-06, "loss": 0.5805, "step": 134 }, { "epoch": 0.06701969220585802, "grad_norm": 0.49311044812202454, "learning_rate": 2.231404958677686e-06, "loss": 0.5292, "step": 135 }, { "epoch": 0.06751613437034586, "grad_norm": 0.5185397863388062, "learning_rate": 2.247933884297521e-06, "loss": 0.5187, "step": 136 }, { "epoch": 0.0680125765348337, "grad_norm": 0.5504679083824158, "learning_rate": 2.2644628099173555e-06, "loss": 0.5761, "step": 137 }, { "epoch": 0.06850901869932152, "grad_norm": 0.530012845993042, "learning_rate": 2.28099173553719e-06, "loss": 0.5063, "step": 138 }, { "epoch": 0.06900546086380936, "grad_norm": 0.47322148084640503, "learning_rate": 2.297520661157025e-06, "loss": 0.5383, "step": 139 }, { "epoch": 0.0695019030282972, "grad_norm": 0.48709139227867126, "learning_rate": 2.31404958677686e-06, "loss": 0.5325, "step": 140 }, { "epoch": 0.06999834519278504, "grad_norm": 0.5769919157028198, "learning_rate": 2.3305785123966945e-06, "loss": 0.5257, "step": 141 }, { "epoch": 0.07049478735727288, "grad_norm": 0.5300930738449097, "learning_rate": 2.347107438016529e-06, "loss": 0.5214, "step": 142 }, { "epoch": 0.07099122952176072, "grad_norm": 0.5447989106178284, "learning_rate": 2.363636363636364e-06, "loss": 0.5599, "step": 143 }, { "epoch": 0.07148767168624855, "grad_norm": 0.5334503054618835, "learning_rate": 2.3801652892561985e-06, "loss": 0.53, "step": 144 }, { "epoch": 0.07198411385073639, "grad_norm": 0.4876675307750702, "learning_rate": 2.3966942148760335e-06, "loss": 0.5511, "step": 145 }, { "epoch": 0.07248055601522423, "grad_norm": 0.6594673991203308, "learning_rate": 2.413223140495868e-06, "loss": 0.5586, "step": 146 }, { "epoch": 0.07297699817971207, "grad_norm": 0.5208327174186707, "learning_rate": 2.4297520661157026e-06, "loss": 0.54, "step": 147 }, { "epoch": 0.0734734403441999, "grad_norm": 0.4774026572704315, "learning_rate": 2.4462809917355375e-06, "loss": 0.5198, "step": 148 }, { "epoch": 0.07396988250868773, "grad_norm": 0.5696403980255127, "learning_rate": 2.462809917355372e-06, "loss": 0.5426, "step": 149 }, { "epoch": 0.07446632467317557, "grad_norm": 0.5467244386672974, "learning_rate": 2.479338842975207e-06, "loss": 0.5553, "step": 150 }, { "epoch": 0.07496276683766341, "grad_norm": 0.5126383304595947, "learning_rate": 2.4958677685950416e-06, "loss": 0.5431, "step": 151 }, { "epoch": 0.07545920900215125, "grad_norm": 0.604655921459198, "learning_rate": 2.5123966942148765e-06, "loss": 0.5429, "step": 152 }, { "epoch": 0.07595565116663909, "grad_norm": 0.6104307174682617, "learning_rate": 2.528925619834711e-06, "loss": 0.5285, "step": 153 }, { "epoch": 0.07645209333112693, "grad_norm": 0.5322166085243225, "learning_rate": 2.5454545454545456e-06, "loss": 0.5284, "step": 154 }, { "epoch": 0.07694853549561476, "grad_norm": 0.5798048377037048, "learning_rate": 2.56198347107438e-06, "loss": 0.5706, "step": 155 }, { "epoch": 0.0774449776601026, "grad_norm": 0.5618263483047485, "learning_rate": 2.578512396694215e-06, "loss": 0.5357, "step": 156 }, { "epoch": 0.07794141982459044, "grad_norm": 0.5919040441513062, "learning_rate": 2.5950413223140496e-06, "loss": 0.5627, "step": 157 }, { "epoch": 0.07843786198907828, "grad_norm": 0.5078716278076172, "learning_rate": 2.6115702479338846e-06, "loss": 0.568, "step": 158 }, { "epoch": 0.07893430415356611, "grad_norm": 0.5610663294792175, "learning_rate": 2.628099173553719e-06, "loss": 0.5358, "step": 159 }, { "epoch": 0.07943074631805394, "grad_norm": 0.5360249876976013, "learning_rate": 2.644628099173554e-06, "loss": 0.5488, "step": 160 }, { "epoch": 0.07992718848254178, "grad_norm": 0.5024826526641846, "learning_rate": 2.6611570247933886e-06, "loss": 0.5324, "step": 161 }, { "epoch": 0.08042363064702962, "grad_norm": 0.5808975100517273, "learning_rate": 2.6776859504132236e-06, "loss": 0.516, "step": 162 }, { "epoch": 0.08092007281151746, "grad_norm": 0.5274789333343506, "learning_rate": 2.694214876033058e-06, "loss": 0.5444, "step": 163 }, { "epoch": 0.0814165149760053, "grad_norm": 0.4939458966255188, "learning_rate": 2.7107438016528927e-06, "loss": 0.5632, "step": 164 }, { "epoch": 0.08191295714049314, "grad_norm": 0.5250017046928406, "learning_rate": 2.7272727272727272e-06, "loss": 0.5587, "step": 165 }, { "epoch": 0.08240939930498097, "grad_norm": 0.5311417579650879, "learning_rate": 2.743801652892562e-06, "loss": 0.4894, "step": 166 }, { "epoch": 0.0829058414694688, "grad_norm": 0.5104638338088989, "learning_rate": 2.7603305785123967e-06, "loss": 0.5327, "step": 167 }, { "epoch": 0.08340228363395664, "grad_norm": 0.5202773213386536, "learning_rate": 2.7768595041322317e-06, "loss": 0.5253, "step": 168 }, { "epoch": 0.08389872579844448, "grad_norm": 0.5843322277069092, "learning_rate": 2.7933884297520662e-06, "loss": 0.5152, "step": 169 }, { "epoch": 0.08439516796293232, "grad_norm": 0.5095930695533752, "learning_rate": 2.809917355371901e-06, "loss": 0.5536, "step": 170 }, { "epoch": 0.08489161012742015, "grad_norm": 0.5945034027099609, "learning_rate": 2.8264462809917357e-06, "loss": 0.5141, "step": 171 }, { "epoch": 0.08538805229190799, "grad_norm": 0.5296297669410706, "learning_rate": 2.8429752066115707e-06, "loss": 0.5116, "step": 172 }, { "epoch": 0.08588449445639583, "grad_norm": 0.508744478225708, "learning_rate": 2.8595041322314052e-06, "loss": 0.5323, "step": 173 }, { "epoch": 0.08638093662088367, "grad_norm": 0.538629412651062, "learning_rate": 2.8760330578512398e-06, "loss": 0.5113, "step": 174 }, { "epoch": 0.08687737878537151, "grad_norm": 0.5750528573989868, "learning_rate": 2.8925619834710743e-06, "loss": 0.5146, "step": 175 }, { "epoch": 0.08737382094985933, "grad_norm": 0.4978677034378052, "learning_rate": 2.9090909090909093e-06, "loss": 0.5187, "step": 176 }, { "epoch": 0.08787026311434717, "grad_norm": 0.5219413638114929, "learning_rate": 2.925619834710744e-06, "loss": 0.5332, "step": 177 }, { "epoch": 0.08836670527883501, "grad_norm": 0.5327111482620239, "learning_rate": 2.9421487603305788e-06, "loss": 0.5124, "step": 178 }, { "epoch": 0.08886314744332285, "grad_norm": 0.4921635389328003, "learning_rate": 2.9586776859504133e-06, "loss": 0.5521, "step": 179 }, { "epoch": 0.0893595896078107, "grad_norm": 0.4596026837825775, "learning_rate": 2.9752066115702483e-06, "loss": 0.5062, "step": 180 }, { "epoch": 0.08985603177229853, "grad_norm": 0.5748628973960876, "learning_rate": 2.9917355371900832e-06, "loss": 0.5196, "step": 181 }, { "epoch": 0.09035247393678636, "grad_norm": 0.5219917893409729, "learning_rate": 3.0082644628099178e-06, "loss": 0.5027, "step": 182 }, { "epoch": 0.0908489161012742, "grad_norm": 0.4786379039287567, "learning_rate": 3.0247933884297527e-06, "loss": 0.5195, "step": 183 }, { "epoch": 0.09134535826576204, "grad_norm": 0.5411040782928467, "learning_rate": 3.041322314049587e-06, "loss": 0.5639, "step": 184 }, { "epoch": 0.09184180043024988, "grad_norm": 0.4433193504810333, "learning_rate": 3.0578512396694214e-06, "loss": 0.4926, "step": 185 }, { "epoch": 0.09233824259473772, "grad_norm": 0.5317041873931885, "learning_rate": 3.0743801652892563e-06, "loss": 0.5123, "step": 186 }, { "epoch": 0.09283468475922554, "grad_norm": 0.550360918045044, "learning_rate": 3.090909090909091e-06, "loss": 0.5364, "step": 187 }, { "epoch": 0.09333112692371338, "grad_norm": 0.5248762965202332, "learning_rate": 3.107438016528926e-06, "loss": 0.52, "step": 188 }, { "epoch": 0.09382756908820122, "grad_norm": 0.4953632056713104, "learning_rate": 3.123966942148761e-06, "loss": 0.4778, "step": 189 }, { "epoch": 0.09432401125268906, "grad_norm": 0.5225823521614075, "learning_rate": 3.1404958677685953e-06, "loss": 0.5205, "step": 190 }, { "epoch": 0.0948204534171769, "grad_norm": 0.557985246181488, "learning_rate": 3.1570247933884303e-06, "loss": 0.5318, "step": 191 }, { "epoch": 0.09531689558166474, "grad_norm": 0.5372998714447021, "learning_rate": 3.173553719008265e-06, "loss": 0.5347, "step": 192 }, { "epoch": 0.09581333774615257, "grad_norm": 0.5660268068313599, "learning_rate": 3.1900826446281e-06, "loss": 0.5644, "step": 193 }, { "epoch": 0.09630977991064041, "grad_norm": 0.5440750122070312, "learning_rate": 3.206611570247934e-06, "loss": 0.5175, "step": 194 }, { "epoch": 0.09680622207512825, "grad_norm": 0.4573107063770294, "learning_rate": 3.2231404958677685e-06, "loss": 0.4856, "step": 195 }, { "epoch": 0.09730266423961609, "grad_norm": 0.5014454126358032, "learning_rate": 3.2396694214876034e-06, "loss": 0.5201, "step": 196 }, { "epoch": 0.09779910640410393, "grad_norm": 0.514004647731781, "learning_rate": 3.2561983471074384e-06, "loss": 0.5251, "step": 197 }, { "epoch": 0.09829554856859175, "grad_norm": 0.5062541365623474, "learning_rate": 3.272727272727273e-06, "loss": 0.4988, "step": 198 }, { "epoch": 0.0987919907330796, "grad_norm": 0.5111320614814758, "learning_rate": 3.289256198347108e-06, "loss": 0.514, "step": 199 }, { "epoch": 0.09928843289756743, "grad_norm": 0.49871939420700073, "learning_rate": 3.3057851239669424e-06, "loss": 0.5333, "step": 200 }, { "epoch": 0.09978487506205527, "grad_norm": 0.5291739106178284, "learning_rate": 3.3223140495867774e-06, "loss": 0.5069, "step": 201 }, { "epoch": 0.10028131722654311, "grad_norm": 0.4781259298324585, "learning_rate": 3.338842975206612e-06, "loss": 0.4951, "step": 202 }, { "epoch": 0.10077775939103094, "grad_norm": 0.5816799998283386, "learning_rate": 3.355371900826447e-06, "loss": 0.5422, "step": 203 }, { "epoch": 0.10127420155551878, "grad_norm": 0.5272269248962402, "learning_rate": 3.371900826446281e-06, "loss": 0.5136, "step": 204 }, { "epoch": 0.10177064372000662, "grad_norm": 0.5056141018867493, "learning_rate": 3.388429752066116e-06, "loss": 0.5355, "step": 205 }, { "epoch": 0.10226708588449446, "grad_norm": 0.5148705840110779, "learning_rate": 3.4049586776859505e-06, "loss": 0.4895, "step": 206 }, { "epoch": 0.1027635280489823, "grad_norm": 0.4652334451675415, "learning_rate": 3.4214876033057855e-06, "loss": 0.4982, "step": 207 }, { "epoch": 0.10325997021347014, "grad_norm": 0.5229787230491638, "learning_rate": 3.43801652892562e-06, "loss": 0.4913, "step": 208 }, { "epoch": 0.10375641237795796, "grad_norm": 0.5417152047157288, "learning_rate": 3.454545454545455e-06, "loss": 0.5541, "step": 209 }, { "epoch": 0.1042528545424458, "grad_norm": 0.548160970211029, "learning_rate": 3.4710743801652895e-06, "loss": 0.5292, "step": 210 }, { "epoch": 0.10474929670693364, "grad_norm": 0.6501349210739136, "learning_rate": 3.4876033057851245e-06, "loss": 0.5478, "step": 211 }, { "epoch": 0.10524573887142148, "grad_norm": 0.5593235492706299, "learning_rate": 3.504132231404959e-06, "loss": 0.5018, "step": 212 }, { "epoch": 0.10574218103590932, "grad_norm": 0.564315140247345, "learning_rate": 3.520661157024794e-06, "loss": 0.5271, "step": 213 }, { "epoch": 0.10623862320039715, "grad_norm": 0.5129203200340271, "learning_rate": 3.537190082644628e-06, "loss": 0.5305, "step": 214 }, { "epoch": 0.10673506536488499, "grad_norm": 0.5357585549354553, "learning_rate": 3.553719008264463e-06, "loss": 0.5053, "step": 215 }, { "epoch": 0.10723150752937283, "grad_norm": 0.5717136263847351, "learning_rate": 3.5702479338842976e-06, "loss": 0.5303, "step": 216 }, { "epoch": 0.10772794969386067, "grad_norm": 0.5253100991249084, "learning_rate": 3.5867768595041325e-06, "loss": 0.5394, "step": 217 }, { "epoch": 0.1082243918583485, "grad_norm": 0.5367187857627869, "learning_rate": 3.603305785123967e-06, "loss": 0.5087, "step": 218 }, { "epoch": 0.10872083402283635, "grad_norm": 0.5969569683074951, "learning_rate": 3.619834710743802e-06, "loss": 0.504, "step": 219 }, { "epoch": 0.10921727618732417, "grad_norm": 0.47943976521492004, "learning_rate": 3.6363636363636366e-06, "loss": 0.5102, "step": 220 }, { "epoch": 0.10971371835181201, "grad_norm": 0.6411086916923523, "learning_rate": 3.6528925619834715e-06, "loss": 0.5639, "step": 221 }, { "epoch": 0.11021016051629985, "grad_norm": 0.6451939940452576, "learning_rate": 3.6694214876033065e-06, "loss": 0.5131, "step": 222 }, { "epoch": 0.11070660268078769, "grad_norm": 0.49339333176612854, "learning_rate": 3.685950413223141e-06, "loss": 0.4841, "step": 223 }, { "epoch": 0.11120304484527553, "grad_norm": 0.5598424077033997, "learning_rate": 3.702479338842975e-06, "loss": 0.5511, "step": 224 }, { "epoch": 0.11169948700976336, "grad_norm": 0.5323877930641174, "learning_rate": 3.71900826446281e-06, "loss": 0.4963, "step": 225 }, { "epoch": 0.1121959291742512, "grad_norm": 0.57188481092453, "learning_rate": 3.7355371900826447e-06, "loss": 0.5181, "step": 226 }, { "epoch": 0.11269237133873904, "grad_norm": 0.517224907875061, "learning_rate": 3.7520661157024796e-06, "loss": 0.5182, "step": 227 }, { "epoch": 0.11318881350322688, "grad_norm": 0.4921627640724182, "learning_rate": 3.768595041322314e-06, "loss": 0.5371, "step": 228 }, { "epoch": 0.11368525566771472, "grad_norm": 0.5161948800086975, "learning_rate": 3.785123966942149e-06, "loss": 0.5327, "step": 229 }, { "epoch": 0.11418169783220254, "grad_norm": 0.5548872351646423, "learning_rate": 3.801652892561984e-06, "loss": 0.4681, "step": 230 }, { "epoch": 0.11467813999669038, "grad_norm": 0.5513456463813782, "learning_rate": 3.818181818181819e-06, "loss": 0.5363, "step": 231 }, { "epoch": 0.11517458216117822, "grad_norm": 0.5224780440330505, "learning_rate": 3.834710743801654e-06, "loss": 0.4869, "step": 232 }, { "epoch": 0.11567102432566606, "grad_norm": 0.5323349833488464, "learning_rate": 3.851239669421488e-06, "loss": 0.5323, "step": 233 }, { "epoch": 0.1161674664901539, "grad_norm": 0.5009315609931946, "learning_rate": 3.867768595041323e-06, "loss": 0.5137, "step": 234 }, { "epoch": 0.11666390865464174, "grad_norm": 0.5568164587020874, "learning_rate": 3.884297520661157e-06, "loss": 0.5049, "step": 235 }, { "epoch": 0.11716035081912957, "grad_norm": 0.5673249363899231, "learning_rate": 3.900826446280992e-06, "loss": 0.5181, "step": 236 }, { "epoch": 0.1176567929836174, "grad_norm": 0.5338874459266663, "learning_rate": 3.917355371900827e-06, "loss": 0.499, "step": 237 }, { "epoch": 0.11815323514810525, "grad_norm": 0.601824939250946, "learning_rate": 3.933884297520662e-06, "loss": 0.505, "step": 238 }, { "epoch": 0.11864967731259309, "grad_norm": 0.5520954132080078, "learning_rate": 3.950413223140496e-06, "loss": 0.4692, "step": 239 }, { "epoch": 0.11914611947708093, "grad_norm": 0.6975885033607483, "learning_rate": 3.966942148760331e-06, "loss": 0.4976, "step": 240 }, { "epoch": 0.11964256164156875, "grad_norm": 0.5662869811058044, "learning_rate": 3.983471074380166e-06, "loss": 0.5155, "step": 241 }, { "epoch": 0.12013900380605659, "grad_norm": 0.5451173186302185, "learning_rate": 4.000000000000001e-06, "loss": 0.5047, "step": 242 }, { "epoch": 0.12063544597054443, "grad_norm": 0.5246925354003906, "learning_rate": 4.016528925619834e-06, "loss": 0.521, "step": 243 }, { "epoch": 0.12113188813503227, "grad_norm": 0.5060029029846191, "learning_rate": 4.03305785123967e-06, "loss": 0.4919, "step": 244 }, { "epoch": 0.12162833029952011, "grad_norm": 0.5549389719963074, "learning_rate": 4.049586776859504e-06, "loss": 0.489, "step": 245 }, { "epoch": 0.12212477246400795, "grad_norm": 0.5754988789558411, "learning_rate": 4.066115702479339e-06, "loss": 0.494, "step": 246 }, { "epoch": 0.12262121462849578, "grad_norm": 0.6045129299163818, "learning_rate": 4.082644628099174e-06, "loss": 0.5001, "step": 247 }, { "epoch": 0.12311765679298362, "grad_norm": 0.6243667602539062, "learning_rate": 4.099173553719009e-06, "loss": 0.5117, "step": 248 }, { "epoch": 0.12361409895747145, "grad_norm": 0.6193716526031494, "learning_rate": 4.115702479338843e-06, "loss": 0.5208, "step": 249 }, { "epoch": 0.1241105411219593, "grad_norm": 0.6187137961387634, "learning_rate": 4.132231404958678e-06, "loss": 0.5554, "step": 250 }, { "epoch": 0.12460698328644713, "grad_norm": 0.529241144657135, "learning_rate": 4.148760330578513e-06, "loss": 0.486, "step": 251 }, { "epoch": 0.12510342545093497, "grad_norm": 0.5498828887939453, "learning_rate": 4.165289256198348e-06, "loss": 0.5033, "step": 252 }, { "epoch": 0.1255998676154228, "grad_norm": 0.578742504119873, "learning_rate": 4.181818181818182e-06, "loss": 0.4809, "step": 253 }, { "epoch": 0.12609630977991065, "grad_norm": 0.5757569074630737, "learning_rate": 4.198347107438017e-06, "loss": 0.5051, "step": 254 }, { "epoch": 0.12659275194439848, "grad_norm": 0.5443708896636963, "learning_rate": 4.214876033057851e-06, "loss": 0.5174, "step": 255 }, { "epoch": 0.1270891941088863, "grad_norm": 0.5324555039405823, "learning_rate": 4.231404958677686e-06, "loss": 0.5202, "step": 256 }, { "epoch": 0.12758563627337416, "grad_norm": 0.5439510345458984, "learning_rate": 4.247933884297521e-06, "loss": 0.5268, "step": 257 }, { "epoch": 0.12808207843786198, "grad_norm": 0.49793317914009094, "learning_rate": 4.264462809917356e-06, "loss": 0.529, "step": 258 }, { "epoch": 0.12857852060234984, "grad_norm": 0.5244342088699341, "learning_rate": 4.28099173553719e-06, "loss": 0.4868, "step": 259 }, { "epoch": 0.12907496276683766, "grad_norm": 0.5378307104110718, "learning_rate": 4.297520661157025e-06, "loss": 0.4705, "step": 260 }, { "epoch": 0.1295714049313255, "grad_norm": 0.5725069046020508, "learning_rate": 4.31404958677686e-06, "loss": 0.4977, "step": 261 }, { "epoch": 0.13006784709581334, "grad_norm": 0.5870641469955444, "learning_rate": 4.330578512396695e-06, "loss": 0.5098, "step": 262 }, { "epoch": 0.13056428926030117, "grad_norm": 0.5429021120071411, "learning_rate": 4.347107438016529e-06, "loss": 0.4614, "step": 263 }, { "epoch": 0.13106073142478902, "grad_norm": 0.5630292296409607, "learning_rate": 4.363636363636364e-06, "loss": 0.5052, "step": 264 }, { "epoch": 0.13155717358927685, "grad_norm": 0.5364632606506348, "learning_rate": 4.3801652892561984e-06, "loss": 0.4822, "step": 265 }, { "epoch": 0.13205361575376467, "grad_norm": 0.5884023308753967, "learning_rate": 4.396694214876033e-06, "loss": 0.5154, "step": 266 }, { "epoch": 0.13255005791825253, "grad_norm": 0.4767780900001526, "learning_rate": 4.413223140495868e-06, "loss": 0.5202, "step": 267 }, { "epoch": 0.13304650008274035, "grad_norm": 0.5498198866844177, "learning_rate": 4.429752066115703e-06, "loss": 0.4933, "step": 268 }, { "epoch": 0.1335429422472282, "grad_norm": 0.5989260673522949, "learning_rate": 4.4462809917355374e-06, "loss": 0.5098, "step": 269 }, { "epoch": 0.13403938441171603, "grad_norm": 0.5769941806793213, "learning_rate": 4.462809917355372e-06, "loss": 0.5264, "step": 270 }, { "epoch": 0.13453582657620386, "grad_norm": 0.5134031772613525, "learning_rate": 4.479338842975207e-06, "loss": 0.4744, "step": 271 }, { "epoch": 0.1350322687406917, "grad_norm": 0.5534682273864746, "learning_rate": 4.495867768595042e-06, "loss": 0.4938, "step": 272 }, { "epoch": 0.13552871090517954, "grad_norm": 0.6034852266311646, "learning_rate": 4.5123966942148764e-06, "loss": 0.4847, "step": 273 }, { "epoch": 0.1360251530696674, "grad_norm": 0.5511651039123535, "learning_rate": 4.528925619834711e-06, "loss": 0.5095, "step": 274 }, { "epoch": 0.13652159523415522, "grad_norm": 0.5239616632461548, "learning_rate": 4.5454545454545455e-06, "loss": 0.5265, "step": 275 }, { "epoch": 0.13701803739864304, "grad_norm": 0.5133403539657593, "learning_rate": 4.56198347107438e-06, "loss": 0.4822, "step": 276 }, { "epoch": 0.1375144795631309, "grad_norm": 0.5781416296958923, "learning_rate": 4.5785123966942154e-06, "loss": 0.5278, "step": 277 }, { "epoch": 0.13801092172761872, "grad_norm": 0.5739282965660095, "learning_rate": 4.59504132231405e-06, "loss": 0.5023, "step": 278 }, { "epoch": 0.13850736389210658, "grad_norm": 0.6267958879470825, "learning_rate": 4.6115702479338845e-06, "loss": 0.4991, "step": 279 }, { "epoch": 0.1390038060565944, "grad_norm": 0.5171827077865601, "learning_rate": 4.62809917355372e-06, "loss": 0.4858, "step": 280 }, { "epoch": 0.13950024822108226, "grad_norm": 0.5680434703826904, "learning_rate": 4.6446280991735544e-06, "loss": 0.504, "step": 281 }, { "epoch": 0.13999669038557008, "grad_norm": 0.5966777801513672, "learning_rate": 4.661157024793389e-06, "loss": 0.505, "step": 282 }, { "epoch": 0.1404931325500579, "grad_norm": 0.536106288433075, "learning_rate": 4.6776859504132235e-06, "loss": 0.5274, "step": 283 }, { "epoch": 0.14098957471454576, "grad_norm": 0.5439618229866028, "learning_rate": 4.694214876033058e-06, "loss": 0.5067, "step": 284 }, { "epoch": 0.1414860168790336, "grad_norm": 0.621277391910553, "learning_rate": 4.710743801652893e-06, "loss": 0.5172, "step": 285 }, { "epoch": 0.14198245904352144, "grad_norm": 0.5810826420783997, "learning_rate": 4.727272727272728e-06, "loss": 0.4659, "step": 286 }, { "epoch": 0.14247890120800927, "grad_norm": 0.4972521662712097, "learning_rate": 4.7438016528925625e-06, "loss": 0.511, "step": 287 }, { "epoch": 0.1429753433724971, "grad_norm": 0.5812990069389343, "learning_rate": 4.760330578512397e-06, "loss": 0.5057, "step": 288 }, { "epoch": 0.14347178553698495, "grad_norm": 0.6155485510826111, "learning_rate": 4.776859504132232e-06, "loss": 0.5155, "step": 289 }, { "epoch": 0.14396822770147277, "grad_norm": 0.4836181402206421, "learning_rate": 4.793388429752067e-06, "loss": 0.508, "step": 290 }, { "epoch": 0.14446466986596063, "grad_norm": 0.53159499168396, "learning_rate": 4.8099173553719015e-06, "loss": 0.5199, "step": 291 }, { "epoch": 0.14496111203044845, "grad_norm": 0.5128259658813477, "learning_rate": 4.826446280991736e-06, "loss": 0.4985, "step": 292 }, { "epoch": 0.14545755419493628, "grad_norm": 0.5124875903129578, "learning_rate": 4.842975206611571e-06, "loss": 0.4835, "step": 293 }, { "epoch": 0.14595399635942413, "grad_norm": 0.542765736579895, "learning_rate": 4.859504132231405e-06, "loss": 0.5172, "step": 294 }, { "epoch": 0.14645043852391196, "grad_norm": 0.5733925700187683, "learning_rate": 4.87603305785124e-06, "loss": 0.4466, "step": 295 }, { "epoch": 0.1469468806883998, "grad_norm": 0.5007134675979614, "learning_rate": 4.892561983471075e-06, "loss": 0.5053, "step": 296 }, { "epoch": 0.14744332285288764, "grad_norm": 0.5409964323043823, "learning_rate": 4.90909090909091e-06, "loss": 0.496, "step": 297 }, { "epoch": 0.14793976501737546, "grad_norm": 0.5684898495674133, "learning_rate": 4.925619834710744e-06, "loss": 0.4924, "step": 298 }, { "epoch": 0.14843620718186332, "grad_norm": 0.5757994651794434, "learning_rate": 4.942148760330579e-06, "loss": 0.5247, "step": 299 }, { "epoch": 0.14893264934635114, "grad_norm": 0.5239609479904175, "learning_rate": 4.958677685950414e-06, "loss": 0.4947, "step": 300 }, { "epoch": 0.149429091510839, "grad_norm": 0.5761334896087646, "learning_rate": 4.975206611570249e-06, "loss": 0.5261, "step": 301 }, { "epoch": 0.14992553367532682, "grad_norm": 0.5478906631469727, "learning_rate": 4.991735537190083e-06, "loss": 0.4989, "step": 302 }, { "epoch": 0.15042197583981465, "grad_norm": 0.5287495255470276, "learning_rate": 5.008264462809918e-06, "loss": 0.5261, "step": 303 }, { "epoch": 0.1509184180043025, "grad_norm": 0.517784833908081, "learning_rate": 5.024793388429753e-06, "loss": 0.5275, "step": 304 }, { "epoch": 0.15141486016879033, "grad_norm": 0.5558972954750061, "learning_rate": 5.041322314049587e-06, "loss": 0.4944, "step": 305 }, { "epoch": 0.15191130233327818, "grad_norm": 0.5396081805229187, "learning_rate": 5.057851239669422e-06, "loss": 0.4807, "step": 306 }, { "epoch": 0.152407744497766, "grad_norm": 0.5222194790840149, "learning_rate": 5.074380165289257e-06, "loss": 0.5126, "step": 307 }, { "epoch": 0.15290418666225386, "grad_norm": 0.514030396938324, "learning_rate": 5.090909090909091e-06, "loss": 0.5007, "step": 308 }, { "epoch": 0.1534006288267417, "grad_norm": 0.5079663991928101, "learning_rate": 5.107438016528926e-06, "loss": 0.5042, "step": 309 }, { "epoch": 0.1538970709912295, "grad_norm": 0.5170904397964478, "learning_rate": 5.12396694214876e-06, "loss": 0.488, "step": 310 }, { "epoch": 0.15439351315571737, "grad_norm": 0.5865115523338318, "learning_rate": 5.140495867768596e-06, "loss": 0.5364, "step": 311 }, { "epoch": 0.1548899553202052, "grad_norm": 0.5727144479751587, "learning_rate": 5.15702479338843e-06, "loss": 0.5262, "step": 312 }, { "epoch": 0.15538639748469305, "grad_norm": 0.5333884358406067, "learning_rate": 5.173553719008266e-06, "loss": 0.5118, "step": 313 }, { "epoch": 0.15588283964918087, "grad_norm": 0.5417286157608032, "learning_rate": 5.190082644628099e-06, "loss": 0.4793, "step": 314 }, { "epoch": 0.1563792818136687, "grad_norm": 0.5488592386245728, "learning_rate": 5.206611570247935e-06, "loss": 0.4908, "step": 315 }, { "epoch": 0.15687572397815655, "grad_norm": 0.5863795876502991, "learning_rate": 5.223140495867769e-06, "loss": 0.4719, "step": 316 }, { "epoch": 0.15737216614264438, "grad_norm": 0.5509049296379089, "learning_rate": 5.239669421487605e-06, "loss": 0.4591, "step": 317 }, { "epoch": 0.15786860830713223, "grad_norm": 0.5878096222877502, "learning_rate": 5.256198347107438e-06, "loss": 0.5042, "step": 318 }, { "epoch": 0.15836505047162006, "grad_norm": 0.5409635305404663, "learning_rate": 5.272727272727273e-06, "loss": 0.4406, "step": 319 }, { "epoch": 0.15886149263610788, "grad_norm": 0.5758359432220459, "learning_rate": 5.289256198347108e-06, "loss": 0.4686, "step": 320 }, { "epoch": 0.15935793480059574, "grad_norm": 0.6446367502212524, "learning_rate": 5.305785123966942e-06, "loss": 0.5103, "step": 321 }, { "epoch": 0.15985437696508356, "grad_norm": 0.5064903497695923, "learning_rate": 5.322314049586777e-06, "loss": 0.4933, "step": 322 }, { "epoch": 0.16035081912957141, "grad_norm": 0.6471164226531982, "learning_rate": 5.338842975206612e-06, "loss": 0.5277, "step": 323 }, { "epoch": 0.16084726129405924, "grad_norm": 0.6208307147026062, "learning_rate": 5.355371900826447e-06, "loss": 0.4674, "step": 324 }, { "epoch": 0.16134370345854707, "grad_norm": 0.5468797087669373, "learning_rate": 5.371900826446281e-06, "loss": 0.49, "step": 325 }, { "epoch": 0.16184014562303492, "grad_norm": 0.6068199276924133, "learning_rate": 5.388429752066116e-06, "loss": 0.5336, "step": 326 }, { "epoch": 0.16233658778752275, "grad_norm": 0.6579068899154663, "learning_rate": 5.404958677685951e-06, "loss": 0.501, "step": 327 }, { "epoch": 0.1628330299520106, "grad_norm": 0.6190075278282166, "learning_rate": 5.421487603305785e-06, "loss": 0.5181, "step": 328 }, { "epoch": 0.16332947211649843, "grad_norm": 0.5081790685653687, "learning_rate": 5.438016528925621e-06, "loss": 0.5018, "step": 329 }, { "epoch": 0.16382591428098628, "grad_norm": 0.6088923215866089, "learning_rate": 5.4545454545454545e-06, "loss": 0.5115, "step": 330 }, { "epoch": 0.1643223564454741, "grad_norm": 0.6793801188468933, "learning_rate": 5.47107438016529e-06, "loss": 0.495, "step": 331 }, { "epoch": 0.16481879860996193, "grad_norm": 0.5481202006340027, "learning_rate": 5.487603305785124e-06, "loss": 0.5079, "step": 332 }, { "epoch": 0.16531524077444978, "grad_norm": 0.5712063312530518, "learning_rate": 5.50413223140496e-06, "loss": 0.5074, "step": 333 }, { "epoch": 0.1658116829389376, "grad_norm": 0.5955901145935059, "learning_rate": 5.5206611570247935e-06, "loss": 0.4865, "step": 334 }, { "epoch": 0.16630812510342546, "grad_norm": 0.5747078657150269, "learning_rate": 5.537190082644629e-06, "loss": 0.5039, "step": 335 }, { "epoch": 0.1668045672679133, "grad_norm": 0.6026285290718079, "learning_rate": 5.553719008264463e-06, "loss": 0.4741, "step": 336 }, { "epoch": 0.16730100943240112, "grad_norm": 0.5330896377563477, "learning_rate": 5.570247933884299e-06, "loss": 0.4848, "step": 337 }, { "epoch": 0.16779745159688897, "grad_norm": 0.6218036413192749, "learning_rate": 5.5867768595041325e-06, "loss": 0.5288, "step": 338 }, { "epoch": 0.1682938937613768, "grad_norm": 0.6961645483970642, "learning_rate": 5.603305785123967e-06, "loss": 0.4772, "step": 339 }, { "epoch": 0.16879033592586465, "grad_norm": 0.5017281174659729, "learning_rate": 5.619834710743802e-06, "loss": 0.5003, "step": 340 }, { "epoch": 0.16928677809035247, "grad_norm": 0.5988336801528931, "learning_rate": 5.636363636363636e-06, "loss": 0.4549, "step": 341 }, { "epoch": 0.1697832202548403, "grad_norm": 0.5328327417373657, "learning_rate": 5.6528925619834715e-06, "loss": 0.5089, "step": 342 }, { "epoch": 0.17027966241932815, "grad_norm": 0.5314119458198547, "learning_rate": 5.669421487603306e-06, "loss": 0.4796, "step": 343 }, { "epoch": 0.17077610458381598, "grad_norm": 0.5430477857589722, "learning_rate": 5.685950413223141e-06, "loss": 0.4787, "step": 344 }, { "epoch": 0.17127254674830383, "grad_norm": 0.5381618738174438, "learning_rate": 5.702479338842976e-06, "loss": 0.5101, "step": 345 }, { "epoch": 0.17176898891279166, "grad_norm": 0.6082320213317871, "learning_rate": 5.7190082644628105e-06, "loss": 0.4898, "step": 346 }, { "epoch": 0.17226543107727949, "grad_norm": 0.5712150931358337, "learning_rate": 5.735537190082645e-06, "loss": 0.5111, "step": 347 }, { "epoch": 0.17276187324176734, "grad_norm": 0.5161234736442566, "learning_rate": 5.7520661157024795e-06, "loss": 0.4652, "step": 348 }, { "epoch": 0.17325831540625516, "grad_norm": 0.5934410691261292, "learning_rate": 5.768595041322315e-06, "loss": 0.4607, "step": 349 }, { "epoch": 0.17375475757074302, "grad_norm": 0.545677661895752, "learning_rate": 5.785123966942149e-06, "loss": 0.4684, "step": 350 }, { "epoch": 0.17425119973523084, "grad_norm": 0.6063624620437622, "learning_rate": 5.801652892561984e-06, "loss": 0.4786, "step": 351 }, { "epoch": 0.17474764189971867, "grad_norm": 0.548876941204071, "learning_rate": 5.8181818181818185e-06, "loss": 0.5113, "step": 352 }, { "epoch": 0.17524408406420652, "grad_norm": 0.5993521809577942, "learning_rate": 5.834710743801654e-06, "loss": 0.5109, "step": 353 }, { "epoch": 0.17574052622869435, "grad_norm": 0.636073887348175, "learning_rate": 5.851239669421488e-06, "loss": 0.4801, "step": 354 }, { "epoch": 0.1762369683931822, "grad_norm": 0.6291443109512329, "learning_rate": 5.867768595041323e-06, "loss": 0.5413, "step": 355 }, { "epoch": 0.17673341055767003, "grad_norm": 0.6420701742172241, "learning_rate": 5.8842975206611575e-06, "loss": 0.5476, "step": 356 }, { "epoch": 0.17722985272215788, "grad_norm": 0.5654237866401672, "learning_rate": 5.900826446280993e-06, "loss": 0.4913, "step": 357 }, { "epoch": 0.1777262948866457, "grad_norm": 0.5032205581665039, "learning_rate": 5.917355371900827e-06, "loss": 0.4891, "step": 358 }, { "epoch": 0.17822273705113353, "grad_norm": 0.5491820573806763, "learning_rate": 5.933884297520661e-06, "loss": 0.5028, "step": 359 }, { "epoch": 0.1787191792156214, "grad_norm": 0.5815576314926147, "learning_rate": 5.9504132231404965e-06, "loss": 0.5203, "step": 360 }, { "epoch": 0.1792156213801092, "grad_norm": 0.6246623992919922, "learning_rate": 5.966942148760331e-06, "loss": 0.4755, "step": 361 }, { "epoch": 0.17971206354459707, "grad_norm": 0.49324890971183777, "learning_rate": 5.9834710743801665e-06, "loss": 0.4883, "step": 362 }, { "epoch": 0.1802085057090849, "grad_norm": 0.49096959829330444, "learning_rate": 6e-06, "loss": 0.4762, "step": 363 }, { "epoch": 0.18070494787357272, "grad_norm": 0.5761922597885132, "learning_rate": 6.0165289256198355e-06, "loss": 0.4719, "step": 364 }, { "epoch": 0.18120139003806057, "grad_norm": 0.5839566588401794, "learning_rate": 6.03305785123967e-06, "loss": 0.5092, "step": 365 }, { "epoch": 0.1816978322025484, "grad_norm": 0.5687536597251892, "learning_rate": 6.0495867768595055e-06, "loss": 0.4812, "step": 366 }, { "epoch": 0.18219427436703625, "grad_norm": 0.560708224773407, "learning_rate": 6.066115702479339e-06, "loss": 0.4715, "step": 367 }, { "epoch": 0.18269071653152408, "grad_norm": 0.5136208534240723, "learning_rate": 6.082644628099174e-06, "loss": 0.4858, "step": 368 }, { "epoch": 0.1831871586960119, "grad_norm": 0.5554033517837524, "learning_rate": 6.099173553719009e-06, "loss": 0.4904, "step": 369 }, { "epoch": 0.18368360086049976, "grad_norm": 0.5771739482879639, "learning_rate": 6.115702479338843e-06, "loss": 0.5012, "step": 370 }, { "epoch": 0.18418004302498758, "grad_norm": 0.5767238736152649, "learning_rate": 6.132231404958678e-06, "loss": 0.48, "step": 371 }, { "epoch": 0.18467648518947544, "grad_norm": 0.5710912346839905, "learning_rate": 6.148760330578513e-06, "loss": 0.4911, "step": 372 }, { "epoch": 0.18517292735396326, "grad_norm": 0.5057599544525146, "learning_rate": 6.165289256198348e-06, "loss": 0.4707, "step": 373 }, { "epoch": 0.1856693695184511, "grad_norm": 0.5763569474220276, "learning_rate": 6.181818181818182e-06, "loss": 0.4605, "step": 374 }, { "epoch": 0.18616581168293894, "grad_norm": 0.5901451110839844, "learning_rate": 6.198347107438017e-06, "loss": 0.4999, "step": 375 }, { "epoch": 0.18666225384742677, "grad_norm": 0.6185732483863831, "learning_rate": 6.214876033057852e-06, "loss": 0.5106, "step": 376 }, { "epoch": 0.18715869601191462, "grad_norm": 0.5454049110412598, "learning_rate": 6.231404958677686e-06, "loss": 0.4911, "step": 377 }, { "epoch": 0.18765513817640245, "grad_norm": 0.5122085213661194, "learning_rate": 6.247933884297522e-06, "loss": 0.4606, "step": 378 }, { "epoch": 0.18815158034089027, "grad_norm": 0.5916205644607544, "learning_rate": 6.264462809917355e-06, "loss": 0.5125, "step": 379 }, { "epoch": 0.18864802250537813, "grad_norm": 0.6075770258903503, "learning_rate": 6.280991735537191e-06, "loss": 0.4795, "step": 380 }, { "epoch": 0.18914446466986595, "grad_norm": 0.5932941436767578, "learning_rate": 6.297520661157025e-06, "loss": 0.4462, "step": 381 }, { "epoch": 0.1896409068343538, "grad_norm": 0.6165408492088318, "learning_rate": 6.314049586776861e-06, "loss": 0.5312, "step": 382 }, { "epoch": 0.19013734899884163, "grad_norm": 0.5720821619033813, "learning_rate": 6.330578512396694e-06, "loss": 0.475, "step": 383 }, { "epoch": 0.19063379116332949, "grad_norm": 0.5584930777549744, "learning_rate": 6.34710743801653e-06, "loss": 0.4726, "step": 384 }, { "epoch": 0.1911302333278173, "grad_norm": 0.6879786849021912, "learning_rate": 6.363636363636364e-06, "loss": 0.4937, "step": 385 }, { "epoch": 0.19162667549230514, "grad_norm": 0.5424039363861084, "learning_rate": 6.3801652892562e-06, "loss": 0.503, "step": 386 }, { "epoch": 0.192123117656793, "grad_norm": 0.5901550054550171, "learning_rate": 6.396694214876033e-06, "loss": 0.5048, "step": 387 }, { "epoch": 0.19261955982128082, "grad_norm": 0.5668871998786926, "learning_rate": 6.413223140495868e-06, "loss": 0.4952, "step": 388 }, { "epoch": 0.19311600198576867, "grad_norm": 0.5771505832672119, "learning_rate": 6.429752066115703e-06, "loss": 0.4613, "step": 389 }, { "epoch": 0.1936124441502565, "grad_norm": 0.6106002330780029, "learning_rate": 6.446280991735537e-06, "loss": 0.4731, "step": 390 }, { "epoch": 0.19410888631474432, "grad_norm": 0.5980228185653687, "learning_rate": 6.462809917355372e-06, "loss": 0.4881, "step": 391 }, { "epoch": 0.19460532847923218, "grad_norm": 0.5479972958564758, "learning_rate": 6.479338842975207e-06, "loss": 0.5026, "step": 392 }, { "epoch": 0.19510177064372, "grad_norm": 0.589094340801239, "learning_rate": 6.495867768595042e-06, "loss": 0.4754, "step": 393 }, { "epoch": 0.19559821280820786, "grad_norm": 0.6254631280899048, "learning_rate": 6.512396694214877e-06, "loss": 0.4871, "step": 394 }, { "epoch": 0.19609465497269568, "grad_norm": 0.5559399127960205, "learning_rate": 6.528925619834712e-06, "loss": 0.4744, "step": 395 }, { "epoch": 0.1965910971371835, "grad_norm": 0.6020338535308838, "learning_rate": 6.545454545454546e-06, "loss": 0.5296, "step": 396 }, { "epoch": 0.19708753930167136, "grad_norm": 0.5346516370773315, "learning_rate": 6.56198347107438e-06, "loss": 0.5095, "step": 397 }, { "epoch": 0.1975839814661592, "grad_norm": 0.580679714679718, "learning_rate": 6.578512396694216e-06, "loss": 0.5015, "step": 398 }, { "epoch": 0.19808042363064704, "grad_norm": 0.5276569128036499, "learning_rate": 6.5950413223140495e-06, "loss": 0.4998, "step": 399 }, { "epoch": 0.19857686579513487, "grad_norm": 0.5087226033210754, "learning_rate": 6.611570247933885e-06, "loss": 0.4892, "step": 400 }, { "epoch": 0.1990733079596227, "grad_norm": 0.5640090703964233, "learning_rate": 6.628099173553719e-06, "loss": 0.4699, "step": 401 }, { "epoch": 0.19956975012411055, "grad_norm": 0.5902432203292847, "learning_rate": 6.644628099173555e-06, "loss": 0.482, "step": 402 }, { "epoch": 0.20006619228859837, "grad_norm": 0.5467512011528015, "learning_rate": 6.6611570247933885e-06, "loss": 0.5024, "step": 403 }, { "epoch": 0.20056263445308622, "grad_norm": 0.5484504103660583, "learning_rate": 6.677685950413224e-06, "loss": 0.5026, "step": 404 }, { "epoch": 0.20105907661757405, "grad_norm": 0.5800901055335999, "learning_rate": 6.694214876033058e-06, "loss": 0.503, "step": 405 }, { "epoch": 0.20155551878206188, "grad_norm": 0.5092427730560303, "learning_rate": 6.710743801652894e-06, "loss": 0.4653, "step": 406 }, { "epoch": 0.20205196094654973, "grad_norm": 0.5506454110145569, "learning_rate": 6.7272727272727275e-06, "loss": 0.4912, "step": 407 }, { "epoch": 0.20254840311103756, "grad_norm": 0.5091981887817383, "learning_rate": 6.743801652892562e-06, "loss": 0.505, "step": 408 }, { "epoch": 0.2030448452755254, "grad_norm": 0.5818343758583069, "learning_rate": 6.760330578512397e-06, "loss": 0.5218, "step": 409 }, { "epoch": 0.20354128744001324, "grad_norm": 0.51610267162323, "learning_rate": 6.776859504132232e-06, "loss": 0.4805, "step": 410 }, { "epoch": 0.2040377296045011, "grad_norm": 0.6030058264732361, "learning_rate": 6.793388429752067e-06, "loss": 0.5378, "step": 411 }, { "epoch": 0.20453417176898891, "grad_norm": 0.6080808043479919, "learning_rate": 6.809917355371901e-06, "loss": 0.4786, "step": 412 }, { "epoch": 0.20503061393347674, "grad_norm": 0.5455076098442078, "learning_rate": 6.826446280991736e-06, "loss": 0.4431, "step": 413 }, { "epoch": 0.2055270560979646, "grad_norm": 0.6591982245445251, "learning_rate": 6.842975206611571e-06, "loss": 0.4955, "step": 414 }, { "epoch": 0.20602349826245242, "grad_norm": 0.702680230140686, "learning_rate": 6.859504132231406e-06, "loss": 0.5062, "step": 415 }, { "epoch": 0.20651994042694027, "grad_norm": 0.6165181398391724, "learning_rate": 6.87603305785124e-06, "loss": 0.4492, "step": 416 }, { "epoch": 0.2070163825914281, "grad_norm": 0.6168249249458313, "learning_rate": 6.8925619834710745e-06, "loss": 0.4943, "step": 417 }, { "epoch": 0.20751282475591593, "grad_norm": 0.592224657535553, "learning_rate": 6.90909090909091e-06, "loss": 0.4549, "step": 418 }, { "epoch": 0.20800926692040378, "grad_norm": 0.5401844382286072, "learning_rate": 6.925619834710744e-06, "loss": 0.4567, "step": 419 }, { "epoch": 0.2085057090848916, "grad_norm": 0.5484288930892944, "learning_rate": 6.942148760330579e-06, "loss": 0.4613, "step": 420 }, { "epoch": 0.20900215124937946, "grad_norm": 0.5484537482261658, "learning_rate": 6.9586776859504135e-06, "loss": 0.5018, "step": 421 }, { "epoch": 0.20949859341386728, "grad_norm": 0.5106563568115234, "learning_rate": 6.975206611570249e-06, "loss": 0.5065, "step": 422 }, { "epoch": 0.2099950355783551, "grad_norm": 0.5537340641021729, "learning_rate": 6.991735537190083e-06, "loss": 0.4666, "step": 423 }, { "epoch": 0.21049147774284296, "grad_norm": 0.6060318350791931, "learning_rate": 7.008264462809918e-06, "loss": 0.5047, "step": 424 }, { "epoch": 0.2109879199073308, "grad_norm": 0.5297520756721497, "learning_rate": 7.0247933884297525e-06, "loss": 0.4733, "step": 425 }, { "epoch": 0.21148436207181864, "grad_norm": 0.6519132852554321, "learning_rate": 7.041322314049588e-06, "loss": 0.4828, "step": 426 }, { "epoch": 0.21198080423630647, "grad_norm": 0.5696131587028503, "learning_rate": 7.0578512396694225e-06, "loss": 0.5149, "step": 427 }, { "epoch": 0.2124772464007943, "grad_norm": 0.5480486154556274, "learning_rate": 7.074380165289256e-06, "loss": 0.4928, "step": 428 }, { "epoch": 0.21297368856528215, "grad_norm": 0.595615565776825, "learning_rate": 7.0909090909090916e-06, "loss": 0.4928, "step": 429 }, { "epoch": 0.21347013072976997, "grad_norm": 0.5577583909034729, "learning_rate": 7.107438016528926e-06, "loss": 0.4686, "step": 430 }, { "epoch": 0.21396657289425783, "grad_norm": 0.604645848274231, "learning_rate": 7.1239669421487615e-06, "loss": 0.473, "step": 431 }, { "epoch": 0.21446301505874565, "grad_norm": 0.5617847442626953, "learning_rate": 7.140495867768595e-06, "loss": 0.4993, "step": 432 }, { "epoch": 0.21495945722323348, "grad_norm": 0.5992624163627625, "learning_rate": 7.1570247933884306e-06, "loss": 0.4946, "step": 433 }, { "epoch": 0.21545589938772133, "grad_norm": 0.5881482362747192, "learning_rate": 7.173553719008265e-06, "loss": 0.4924, "step": 434 }, { "epoch": 0.21595234155220916, "grad_norm": 0.5150551199913025, "learning_rate": 7.1900826446281005e-06, "loss": 0.4437, "step": 435 }, { "epoch": 0.216448783716697, "grad_norm": 0.7200685739517212, "learning_rate": 7.206611570247934e-06, "loss": 0.5104, "step": 436 }, { "epoch": 0.21694522588118484, "grad_norm": 0.6635454297065735, "learning_rate": 7.223140495867769e-06, "loss": 0.5007, "step": 437 }, { "epoch": 0.2174416680456727, "grad_norm": 0.5761476159095764, "learning_rate": 7.239669421487604e-06, "loss": 0.5013, "step": 438 }, { "epoch": 0.21793811021016052, "grad_norm": 0.5798068642616272, "learning_rate": 7.256198347107438e-06, "loss": 0.4911, "step": 439 }, { "epoch": 0.21843455237464834, "grad_norm": 0.6837860345840454, "learning_rate": 7.272727272727273e-06, "loss": 0.4829, "step": 440 }, { "epoch": 0.2189309945391362, "grad_norm": 0.6114369630813599, "learning_rate": 7.289256198347108e-06, "loss": 0.4758, "step": 441 }, { "epoch": 0.21942743670362402, "grad_norm": 0.532677948474884, "learning_rate": 7.305785123966943e-06, "loss": 0.4795, "step": 442 }, { "epoch": 0.21992387886811188, "grad_norm": 0.5753225684165955, "learning_rate": 7.322314049586778e-06, "loss": 0.4859, "step": 443 }, { "epoch": 0.2204203210325997, "grad_norm": 0.5690421462059021, "learning_rate": 7.338842975206613e-06, "loss": 0.4657, "step": 444 }, { "epoch": 0.22091676319708753, "grad_norm": 0.5476955771446228, "learning_rate": 7.355371900826447e-06, "loss": 0.4402, "step": 445 }, { "epoch": 0.22141320536157538, "grad_norm": 0.6237830519676208, "learning_rate": 7.371900826446282e-06, "loss": 0.495, "step": 446 }, { "epoch": 0.2219096475260632, "grad_norm": 0.6103525161743164, "learning_rate": 7.388429752066117e-06, "loss": 0.467, "step": 447 }, { "epoch": 0.22240608969055106, "grad_norm": 0.5530771613121033, "learning_rate": 7.40495867768595e-06, "loss": 0.5001, "step": 448 }, { "epoch": 0.2229025318550389, "grad_norm": 0.5842785835266113, "learning_rate": 7.421487603305786e-06, "loss": 0.457, "step": 449 }, { "epoch": 0.2233989740195267, "grad_norm": 0.5954055190086365, "learning_rate": 7.43801652892562e-06, "loss": 0.494, "step": 450 }, { "epoch": 0.22389541618401457, "grad_norm": 0.5628727078437805, "learning_rate": 7.454545454545456e-06, "loss": 0.47, "step": 451 }, { "epoch": 0.2243918583485024, "grad_norm": 0.5199972987174988, "learning_rate": 7.471074380165289e-06, "loss": 0.4521, "step": 452 }, { "epoch": 0.22488830051299025, "grad_norm": 0.5456050038337708, "learning_rate": 7.487603305785125e-06, "loss": 0.4685, "step": 453 }, { "epoch": 0.22538474267747807, "grad_norm": 0.5575901865959167, "learning_rate": 7.504132231404959e-06, "loss": 0.4922, "step": 454 }, { "epoch": 0.2258811848419659, "grad_norm": 0.5811330676078796, "learning_rate": 7.520661157024795e-06, "loss": 0.4984, "step": 455 }, { "epoch": 0.22637762700645375, "grad_norm": 0.7549650073051453, "learning_rate": 7.537190082644628e-06, "loss": 0.5067, "step": 456 }, { "epoch": 0.22687406917094158, "grad_norm": 0.5694323182106018, "learning_rate": 7.553719008264463e-06, "loss": 0.5054, "step": 457 }, { "epoch": 0.22737051133542943, "grad_norm": 0.5265411138534546, "learning_rate": 7.570247933884298e-06, "loss": 0.4676, "step": 458 }, { "epoch": 0.22786695349991726, "grad_norm": 0.5417258739471436, "learning_rate": 7.586776859504133e-06, "loss": 0.4821, "step": 459 }, { "epoch": 0.22836339566440508, "grad_norm": 0.5784080028533936, "learning_rate": 7.603305785123968e-06, "loss": 0.4942, "step": 460 }, { "epoch": 0.22885983782889294, "grad_norm": 0.5631738305091858, "learning_rate": 7.619834710743802e-06, "loss": 0.5073, "step": 461 }, { "epoch": 0.22935627999338076, "grad_norm": 0.552476167678833, "learning_rate": 7.636363636363638e-06, "loss": 0.4966, "step": 462 }, { "epoch": 0.22985272215786862, "grad_norm": 0.545906126499176, "learning_rate": 7.652892561983471e-06, "loss": 0.4854, "step": 463 }, { "epoch": 0.23034916432235644, "grad_norm": 0.524661123752594, "learning_rate": 7.669421487603307e-06, "loss": 0.4848, "step": 464 }, { "epoch": 0.2308456064868443, "grad_norm": 0.5401702523231506, "learning_rate": 7.685950413223142e-06, "loss": 0.4702, "step": 465 }, { "epoch": 0.23134204865133212, "grad_norm": 0.535542368888855, "learning_rate": 7.702479338842976e-06, "loss": 0.4692, "step": 466 }, { "epoch": 0.23183849081581995, "grad_norm": 0.518203854560852, "learning_rate": 7.71900826446281e-06, "loss": 0.4762, "step": 467 }, { "epoch": 0.2323349329803078, "grad_norm": 0.5746379494667053, "learning_rate": 7.735537190082645e-06, "loss": 0.4885, "step": 468 }, { "epoch": 0.23283137514479563, "grad_norm": 0.5678719282150269, "learning_rate": 7.75206611570248e-06, "loss": 0.4563, "step": 469 }, { "epoch": 0.23332781730928348, "grad_norm": 0.6048430800437927, "learning_rate": 7.768595041322314e-06, "loss": 0.4532, "step": 470 }, { "epoch": 0.2338242594737713, "grad_norm": 0.6642693877220154, "learning_rate": 7.785123966942149e-06, "loss": 0.4987, "step": 471 }, { "epoch": 0.23432070163825913, "grad_norm": 0.4879727363586426, "learning_rate": 7.801652892561983e-06, "loss": 0.4757, "step": 472 }, { "epoch": 0.23481714380274699, "grad_norm": 0.6083024144172668, "learning_rate": 7.81818181818182e-06, "loss": 0.4974, "step": 473 }, { "epoch": 0.2353135859672348, "grad_norm": 0.5646515488624573, "learning_rate": 7.834710743801654e-06, "loss": 0.5049, "step": 474 }, { "epoch": 0.23581002813172267, "grad_norm": 0.6288661956787109, "learning_rate": 7.851239669421489e-06, "loss": 0.4804, "step": 475 }, { "epoch": 0.2363064702962105, "grad_norm": 0.5288645029067993, "learning_rate": 7.867768595041323e-06, "loss": 0.4482, "step": 476 }, { "epoch": 0.23680291246069832, "grad_norm": 0.5560475587844849, "learning_rate": 7.884297520661158e-06, "loss": 0.485, "step": 477 }, { "epoch": 0.23729935462518617, "grad_norm": 0.5170780420303345, "learning_rate": 7.900826446280992e-06, "loss": 0.4602, "step": 478 }, { "epoch": 0.237795796789674, "grad_norm": 0.4935104250907898, "learning_rate": 7.917355371900827e-06, "loss": 0.4474, "step": 479 }, { "epoch": 0.23829223895416185, "grad_norm": 0.5225276947021484, "learning_rate": 7.933884297520661e-06, "loss": 0.4918, "step": 480 }, { "epoch": 0.23878868111864968, "grad_norm": 0.6079731583595276, "learning_rate": 7.950413223140496e-06, "loss": 0.4594, "step": 481 }, { "epoch": 0.2392851232831375, "grad_norm": 0.4985310137271881, "learning_rate": 7.966942148760332e-06, "loss": 0.472, "step": 482 }, { "epoch": 0.23978156544762536, "grad_norm": 0.5632686018943787, "learning_rate": 7.983471074380165e-06, "loss": 0.497, "step": 483 }, { "epoch": 0.24027800761211318, "grad_norm": 0.5241639018058777, "learning_rate": 8.000000000000001e-06, "loss": 0.4995, "step": 484 }, { "epoch": 0.24077444977660103, "grad_norm": 0.5031239986419678, "learning_rate": 8.016528925619836e-06, "loss": 0.4604, "step": 485 }, { "epoch": 0.24127089194108886, "grad_norm": 0.5742601156234741, "learning_rate": 8.033057851239669e-06, "loss": 0.4862, "step": 486 }, { "epoch": 0.2417673341055767, "grad_norm": 0.5736064314842224, "learning_rate": 8.049586776859505e-06, "loss": 0.5169, "step": 487 }, { "epoch": 0.24226377627006454, "grad_norm": 0.5929110646247864, "learning_rate": 8.06611570247934e-06, "loss": 0.4846, "step": 488 }, { "epoch": 0.24276021843455237, "grad_norm": 0.5669609308242798, "learning_rate": 8.082644628099174e-06, "loss": 0.4485, "step": 489 }, { "epoch": 0.24325666059904022, "grad_norm": 0.5302844047546387, "learning_rate": 8.099173553719009e-06, "loss": 0.4871, "step": 490 }, { "epoch": 0.24375310276352805, "grad_norm": 0.558034360408783, "learning_rate": 8.115702479338843e-06, "loss": 0.4859, "step": 491 }, { "epoch": 0.2442495449280159, "grad_norm": 0.5532898306846619, "learning_rate": 8.132231404958678e-06, "loss": 0.4656, "step": 492 }, { "epoch": 0.24474598709250373, "grad_norm": 0.5431181788444519, "learning_rate": 8.148760330578514e-06, "loss": 0.4466, "step": 493 }, { "epoch": 0.24524242925699155, "grad_norm": 0.5674943327903748, "learning_rate": 8.165289256198348e-06, "loss": 0.4795, "step": 494 }, { "epoch": 0.2457388714214794, "grad_norm": 0.5600504279136658, "learning_rate": 8.181818181818183e-06, "loss": 0.4525, "step": 495 }, { "epoch": 0.24623531358596723, "grad_norm": 0.5376918911933899, "learning_rate": 8.198347107438017e-06, "loss": 0.4844, "step": 496 }, { "epoch": 0.24673175575045508, "grad_norm": 0.5321533679962158, "learning_rate": 8.214876033057852e-06, "loss": 0.5025, "step": 497 }, { "epoch": 0.2472281979149429, "grad_norm": 0.6604514122009277, "learning_rate": 8.231404958677687e-06, "loss": 0.4756, "step": 498 }, { "epoch": 0.24772464007943074, "grad_norm": 0.5960185527801514, "learning_rate": 8.247933884297521e-06, "loss": 0.4321, "step": 499 }, { "epoch": 0.2482210822439186, "grad_norm": 0.5111812949180603, "learning_rate": 8.264462809917356e-06, "loss": 0.4637, "step": 500 }, { "epoch": 0.24871752440840642, "grad_norm": 0.6967871189117432, "learning_rate": 8.28099173553719e-06, "loss": 0.4764, "step": 501 }, { "epoch": 0.24921396657289427, "grad_norm": 0.5870413780212402, "learning_rate": 8.297520661157026e-06, "loss": 0.4988, "step": 502 }, { "epoch": 0.2497104087373821, "grad_norm": 0.5482614636421204, "learning_rate": 8.31404958677686e-06, "loss": 0.47, "step": 503 }, { "epoch": 0.25020685090186995, "grad_norm": 0.6019531488418579, "learning_rate": 8.330578512396695e-06, "loss": 0.4728, "step": 504 }, { "epoch": 0.25070329306635775, "grad_norm": 0.5875377655029297, "learning_rate": 8.34710743801653e-06, "loss": 0.489, "step": 505 }, { "epoch": 0.2511997352308456, "grad_norm": 0.6015675663948059, "learning_rate": 8.363636363636365e-06, "loss": 0.4786, "step": 506 }, { "epoch": 0.25169617739533345, "grad_norm": 0.6096805930137634, "learning_rate": 8.380165289256199e-06, "loss": 0.4881, "step": 507 }, { "epoch": 0.2521926195598213, "grad_norm": 0.5580508708953857, "learning_rate": 8.396694214876034e-06, "loss": 0.4811, "step": 508 }, { "epoch": 0.2526890617243091, "grad_norm": 0.5842412114143372, "learning_rate": 8.413223140495868e-06, "loss": 0.501, "step": 509 }, { "epoch": 0.25318550388879696, "grad_norm": 0.5924214720726013, "learning_rate": 8.429752066115703e-06, "loss": 0.4557, "step": 510 }, { "epoch": 0.2536819460532848, "grad_norm": 0.5741751790046692, "learning_rate": 8.446280991735539e-06, "loss": 0.4701, "step": 511 }, { "epoch": 0.2541783882177726, "grad_norm": 0.5943068861961365, "learning_rate": 8.462809917355372e-06, "loss": 0.4765, "step": 512 }, { "epoch": 0.25467483038226046, "grad_norm": 0.5044859051704407, "learning_rate": 8.479338842975208e-06, "loss": 0.4331, "step": 513 }, { "epoch": 0.2551712725467483, "grad_norm": 0.6460961699485779, "learning_rate": 8.495867768595043e-06, "loss": 0.475, "step": 514 }, { "epoch": 0.2556677147112361, "grad_norm": 0.5889000296592712, "learning_rate": 8.512396694214877e-06, "loss": 0.4598, "step": 515 }, { "epoch": 0.25616415687572397, "grad_norm": 0.5838192105293274, "learning_rate": 8.528925619834712e-06, "loss": 0.5052, "step": 516 }, { "epoch": 0.2566605990402118, "grad_norm": 0.5946657657623291, "learning_rate": 8.545454545454546e-06, "loss": 0.4445, "step": 517 }, { "epoch": 0.2571570412046997, "grad_norm": 0.6138104796409607, "learning_rate": 8.56198347107438e-06, "loss": 0.4878, "step": 518 }, { "epoch": 0.2576534833691875, "grad_norm": 0.5762873291969299, "learning_rate": 8.578512396694215e-06, "loss": 0.4983, "step": 519 }, { "epoch": 0.25814992553367533, "grad_norm": 0.60455322265625, "learning_rate": 8.59504132231405e-06, "loss": 0.4883, "step": 520 }, { "epoch": 0.2586463676981632, "grad_norm": 0.5931981205940247, "learning_rate": 8.611570247933884e-06, "loss": 0.4592, "step": 521 }, { "epoch": 0.259142809862651, "grad_norm": 0.5712152123451233, "learning_rate": 8.62809917355372e-06, "loss": 0.497, "step": 522 }, { "epoch": 0.25963925202713883, "grad_norm": 0.6638737320899963, "learning_rate": 8.644628099173555e-06, "loss": 0.482, "step": 523 }, { "epoch": 0.2601356941916267, "grad_norm": 0.556867778301239, "learning_rate": 8.66115702479339e-06, "loss": 0.4587, "step": 524 }, { "epoch": 0.2606321363561145, "grad_norm": 0.5985377430915833, "learning_rate": 8.677685950413224e-06, "loss": 0.4939, "step": 525 }, { "epoch": 0.26112857852060234, "grad_norm": 0.6211187243461609, "learning_rate": 8.694214876033059e-06, "loss": 0.4686, "step": 526 }, { "epoch": 0.2616250206850902, "grad_norm": 0.5734577178955078, "learning_rate": 8.710743801652893e-06, "loss": 0.5106, "step": 527 }, { "epoch": 0.26212146284957805, "grad_norm": 0.610625147819519, "learning_rate": 8.727272727272728e-06, "loss": 0.4257, "step": 528 }, { "epoch": 0.26261790501406584, "grad_norm": 0.5390804409980774, "learning_rate": 8.743801652892562e-06, "loss": 0.45, "step": 529 }, { "epoch": 0.2631143471785537, "grad_norm": 0.5611861348152161, "learning_rate": 8.760330578512397e-06, "loss": 0.4653, "step": 530 }, { "epoch": 0.26361078934304155, "grad_norm": 0.6018905639648438, "learning_rate": 8.776859504132233e-06, "loss": 0.4704, "step": 531 }, { "epoch": 0.26410723150752935, "grad_norm": 0.607869565486908, "learning_rate": 8.793388429752066e-06, "loss": 0.4737, "step": 532 }, { "epoch": 0.2646036736720172, "grad_norm": 0.657197117805481, "learning_rate": 8.809917355371902e-06, "loss": 0.4649, "step": 533 }, { "epoch": 0.26510011583650506, "grad_norm": 0.6494818925857544, "learning_rate": 8.826446280991737e-06, "loss": 0.4565, "step": 534 }, { "epoch": 0.2655965580009929, "grad_norm": 0.5649163722991943, "learning_rate": 8.842975206611571e-06, "loss": 0.4936, "step": 535 }, { "epoch": 0.2660930001654807, "grad_norm": 0.6429181694984436, "learning_rate": 8.859504132231406e-06, "loss": 0.5173, "step": 536 }, { "epoch": 0.26658944232996856, "grad_norm": 0.6212757229804993, "learning_rate": 8.87603305785124e-06, "loss": 0.4995, "step": 537 }, { "epoch": 0.2670858844944564, "grad_norm": 0.568551242351532, "learning_rate": 8.892561983471075e-06, "loss": 0.4431, "step": 538 }, { "epoch": 0.2675823266589442, "grad_norm": 0.5709211826324463, "learning_rate": 8.90909090909091e-06, "loss": 0.4684, "step": 539 }, { "epoch": 0.26807876882343207, "grad_norm": 0.547429621219635, "learning_rate": 8.925619834710744e-06, "loss": 0.4845, "step": 540 }, { "epoch": 0.2685752109879199, "grad_norm": 0.5361472964286804, "learning_rate": 8.942148760330578e-06, "loss": 0.4818, "step": 541 }, { "epoch": 0.2690716531524077, "grad_norm": 0.49591484665870667, "learning_rate": 8.958677685950415e-06, "loss": 0.4852, "step": 542 }, { "epoch": 0.2695680953168956, "grad_norm": 0.5087563991546631, "learning_rate": 8.97520661157025e-06, "loss": 0.4677, "step": 543 }, { "epoch": 0.2700645374813834, "grad_norm": 0.5428006649017334, "learning_rate": 8.991735537190084e-06, "loss": 0.5286, "step": 544 }, { "epoch": 0.2705609796458713, "grad_norm": 0.5427055358886719, "learning_rate": 9.008264462809918e-06, "loss": 0.4911, "step": 545 }, { "epoch": 0.2710574218103591, "grad_norm": 0.6294534802436829, "learning_rate": 9.024793388429753e-06, "loss": 0.4834, "step": 546 }, { "epoch": 0.27155386397484693, "grad_norm": 0.7055789828300476, "learning_rate": 9.041322314049587e-06, "loss": 0.5394, "step": 547 }, { "epoch": 0.2720503061393348, "grad_norm": 0.5609902143478394, "learning_rate": 9.057851239669422e-06, "loss": 0.4812, "step": 548 }, { "epoch": 0.2725467483038226, "grad_norm": 0.6609641909599304, "learning_rate": 9.074380165289256e-06, "loss": 0.4703, "step": 549 }, { "epoch": 0.27304319046831044, "grad_norm": 0.8120158314704895, "learning_rate": 9.090909090909091e-06, "loss": 0.4732, "step": 550 }, { "epoch": 0.2735396326327983, "grad_norm": 0.6212154626846313, "learning_rate": 9.107438016528927e-06, "loss": 0.4922, "step": 551 }, { "epoch": 0.2740360747972861, "grad_norm": 0.5858011245727539, "learning_rate": 9.12396694214876e-06, "loss": 0.5026, "step": 552 }, { "epoch": 0.27453251696177394, "grad_norm": 0.6022577881813049, "learning_rate": 9.140495867768596e-06, "loss": 0.468, "step": 553 }, { "epoch": 0.2750289591262618, "grad_norm": 0.6517961025238037, "learning_rate": 9.157024793388431e-06, "loss": 0.4314, "step": 554 }, { "epoch": 0.27552540129074965, "grad_norm": 0.6646472215652466, "learning_rate": 9.173553719008265e-06, "loss": 0.4774, "step": 555 }, { "epoch": 0.27602184345523745, "grad_norm": 0.5969814658164978, "learning_rate": 9.1900826446281e-06, "loss": 0.4705, "step": 556 }, { "epoch": 0.2765182856197253, "grad_norm": 0.5736490488052368, "learning_rate": 9.206611570247935e-06, "loss": 0.4793, "step": 557 }, { "epoch": 0.27701472778421316, "grad_norm": 0.5077117085456848, "learning_rate": 9.223140495867769e-06, "loss": 0.457, "step": 558 }, { "epoch": 0.27751116994870095, "grad_norm": 0.5538498759269714, "learning_rate": 9.239669421487604e-06, "loss": 0.4544, "step": 559 }, { "epoch": 0.2780076121131888, "grad_norm": 0.596728503704071, "learning_rate": 9.25619834710744e-06, "loss": 0.4788, "step": 560 }, { "epoch": 0.27850405427767666, "grad_norm": 0.55035001039505, "learning_rate": 9.272727272727273e-06, "loss": 0.4844, "step": 561 }, { "epoch": 0.2790004964421645, "grad_norm": 0.5837007164955139, "learning_rate": 9.289256198347109e-06, "loss": 0.4483, "step": 562 }, { "epoch": 0.2794969386066523, "grad_norm": 0.6499337553977966, "learning_rate": 9.305785123966943e-06, "loss": 0.4748, "step": 563 }, { "epoch": 0.27999338077114017, "grad_norm": 0.6035741567611694, "learning_rate": 9.322314049586778e-06, "loss": 0.4801, "step": 564 }, { "epoch": 0.280489822935628, "grad_norm": 0.6332799196243286, "learning_rate": 9.338842975206613e-06, "loss": 0.4987, "step": 565 }, { "epoch": 0.2809862651001158, "grad_norm": 0.6195710897445679, "learning_rate": 9.355371900826447e-06, "loss": 0.4583, "step": 566 }, { "epoch": 0.28148270726460367, "grad_norm": 0.5590093731880188, "learning_rate": 9.371900826446282e-06, "loss": 0.4664, "step": 567 }, { "epoch": 0.2819791494290915, "grad_norm": 0.6507348418235779, "learning_rate": 9.388429752066116e-06, "loss": 0.4976, "step": 568 }, { "epoch": 0.2824755915935793, "grad_norm": 0.6516704559326172, "learning_rate": 9.40495867768595e-06, "loss": 0.4636, "step": 569 }, { "epoch": 0.2829720337580672, "grad_norm": 0.5283865332603455, "learning_rate": 9.421487603305785e-06, "loss": 0.4757, "step": 570 }, { "epoch": 0.28346847592255503, "grad_norm": 0.570365846157074, "learning_rate": 9.438016528925621e-06, "loss": 0.4525, "step": 571 }, { "epoch": 0.2839649180870429, "grad_norm": 0.5509682297706604, "learning_rate": 9.454545454545456e-06, "loss": 0.4332, "step": 572 }, { "epoch": 0.2844613602515307, "grad_norm": 0.5959538817405701, "learning_rate": 9.47107438016529e-06, "loss": 0.4681, "step": 573 }, { "epoch": 0.28495780241601854, "grad_norm": 0.5520719885826111, "learning_rate": 9.487603305785125e-06, "loss": 0.4862, "step": 574 }, { "epoch": 0.2854542445805064, "grad_norm": 0.5845164656639099, "learning_rate": 9.50413223140496e-06, "loss": 0.4425, "step": 575 }, { "epoch": 0.2859506867449942, "grad_norm": 0.5471561551094055, "learning_rate": 9.520661157024794e-06, "loss": 0.4557, "step": 576 }, { "epoch": 0.28644712890948204, "grad_norm": 0.5656152367591858, "learning_rate": 9.537190082644629e-06, "loss": 0.4701, "step": 577 }, { "epoch": 0.2869435710739699, "grad_norm": 0.62446528673172, "learning_rate": 9.553719008264463e-06, "loss": 0.4579, "step": 578 }, { "epoch": 0.2874400132384577, "grad_norm": 0.558929443359375, "learning_rate": 9.570247933884298e-06, "loss": 0.4319, "step": 579 }, { "epoch": 0.28793645540294555, "grad_norm": 0.5847190022468567, "learning_rate": 9.586776859504134e-06, "loss": 0.4613, "step": 580 }, { "epoch": 0.2884328975674334, "grad_norm": 0.6001784801483154, "learning_rate": 9.603305785123967e-06, "loss": 0.4644, "step": 581 }, { "epoch": 0.28892933973192125, "grad_norm": 0.5458738207817078, "learning_rate": 9.619834710743803e-06, "loss": 0.4865, "step": 582 }, { "epoch": 0.28942578189640905, "grad_norm": 0.5827302932739258, "learning_rate": 9.636363636363638e-06, "loss": 0.4819, "step": 583 }, { "epoch": 0.2899222240608969, "grad_norm": 0.511719822883606, "learning_rate": 9.652892561983472e-06, "loss": 0.4704, "step": 584 }, { "epoch": 0.29041866622538476, "grad_norm": 0.5585824251174927, "learning_rate": 9.669421487603307e-06, "loss": 0.4641, "step": 585 }, { "epoch": 0.29091510838987256, "grad_norm": 0.588890552520752, "learning_rate": 9.685950413223141e-06, "loss": 0.4822, "step": 586 }, { "epoch": 0.2914115505543604, "grad_norm": 0.5970147252082825, "learning_rate": 9.702479338842976e-06, "loss": 0.4912, "step": 587 }, { "epoch": 0.29190799271884826, "grad_norm": 0.5893604755401611, "learning_rate": 9.71900826446281e-06, "loss": 0.47, "step": 588 }, { "epoch": 0.2924044348833361, "grad_norm": 0.5946270227432251, "learning_rate": 9.735537190082645e-06, "loss": 0.4556, "step": 589 }, { "epoch": 0.2929008770478239, "grad_norm": 0.5550459027290344, "learning_rate": 9.75206611570248e-06, "loss": 0.458, "step": 590 }, { "epoch": 0.29339731921231177, "grad_norm": 0.5310551524162292, "learning_rate": 9.768595041322316e-06, "loss": 0.4641, "step": 591 }, { "epoch": 0.2938937613767996, "grad_norm": 0.5575319528579712, "learning_rate": 9.78512396694215e-06, "loss": 0.4522, "step": 592 }, { "epoch": 0.2943902035412874, "grad_norm": 0.666354775428772, "learning_rate": 9.801652892561985e-06, "loss": 0.4589, "step": 593 }, { "epoch": 0.2948866457057753, "grad_norm": 0.571260392665863, "learning_rate": 9.81818181818182e-06, "loss": 0.4682, "step": 594 }, { "epoch": 0.29538308787026313, "grad_norm": 0.5564945936203003, "learning_rate": 9.834710743801654e-06, "loss": 0.4657, "step": 595 }, { "epoch": 0.2958795300347509, "grad_norm": 0.5981553196907043, "learning_rate": 9.851239669421488e-06, "loss": 0.4475, "step": 596 }, { "epoch": 0.2963759721992388, "grad_norm": 0.546642541885376, "learning_rate": 9.867768595041323e-06, "loss": 0.4682, "step": 597 }, { "epoch": 0.29687241436372663, "grad_norm": 0.6417818665504456, "learning_rate": 9.884297520661157e-06, "loss": 0.4635, "step": 598 }, { "epoch": 0.2973688565282145, "grad_norm": 0.6661314964294434, "learning_rate": 9.900826446280992e-06, "loss": 0.4785, "step": 599 }, { "epoch": 0.2978652986927023, "grad_norm": 0.5763871669769287, "learning_rate": 9.917355371900828e-06, "loss": 0.4688, "step": 600 }, { "epoch": 0.29836174085719014, "grad_norm": 0.6581618785858154, "learning_rate": 9.933884297520661e-06, "loss": 0.475, "step": 601 }, { "epoch": 0.298858183021678, "grad_norm": 0.7018885612487793, "learning_rate": 9.950413223140497e-06, "loss": 0.4949, "step": 602 }, { "epoch": 0.2993546251861658, "grad_norm": 0.5467149615287781, "learning_rate": 9.966942148760332e-06, "loss": 0.4656, "step": 603 }, { "epoch": 0.29985106735065364, "grad_norm": 0.599861741065979, "learning_rate": 9.983471074380166e-06, "loss": 0.4993, "step": 604 }, { "epoch": 0.3003475095151415, "grad_norm": 0.5989638566970825, "learning_rate": 1e-05, "loss": 0.4934, "step": 605 }, { "epoch": 0.3008439516796293, "grad_norm": 0.5814307928085327, "learning_rate": 9.999999165317946e-06, "loss": 0.4872, "step": 606 }, { "epoch": 0.30134039384411715, "grad_norm": 0.5735344886779785, "learning_rate": 9.999996661272064e-06, "loss": 0.4643, "step": 607 }, { "epoch": 0.301836836008605, "grad_norm": 0.6105080246925354, "learning_rate": 9.999992487863189e-06, "loss": 0.4691, "step": 608 }, { "epoch": 0.30233327817309286, "grad_norm": 0.7019006609916687, "learning_rate": 9.999986645092714e-06, "loss": 0.4859, "step": 609 }, { "epoch": 0.30282972033758065, "grad_norm": 0.5550382733345032, "learning_rate": 9.99997913296259e-06, "loss": 0.4613, "step": 610 }, { "epoch": 0.3033261625020685, "grad_norm": 0.5882860422134399, "learning_rate": 9.999969951475326e-06, "loss": 0.4756, "step": 611 }, { "epoch": 0.30382260466655636, "grad_norm": 0.6381136178970337, "learning_rate": 9.999959100633987e-06, "loss": 0.4671, "step": 612 }, { "epoch": 0.30431904683104416, "grad_norm": 0.6112157106399536, "learning_rate": 9.999946580442195e-06, "loss": 0.4841, "step": 613 }, { "epoch": 0.304815488995532, "grad_norm": 0.548315167427063, "learning_rate": 9.999932390904133e-06, "loss": 0.4716, "step": 614 }, { "epoch": 0.30531193116001987, "grad_norm": 0.6650294661521912, "learning_rate": 9.999916532024533e-06, "loss": 0.4747, "step": 615 }, { "epoch": 0.3058083733245077, "grad_norm": 0.6084309816360474, "learning_rate": 9.999899003808695e-06, "loss": 0.4789, "step": 616 }, { "epoch": 0.3063048154889955, "grad_norm": 0.6000365018844604, "learning_rate": 9.99987980626247e-06, "loss": 0.5045, "step": 617 }, { "epoch": 0.3068012576534834, "grad_norm": 0.6562034487724304, "learning_rate": 9.999858939392263e-06, "loss": 0.4893, "step": 618 }, { "epoch": 0.3072976998179712, "grad_norm": 0.6300380825996399, "learning_rate": 9.99983640320505e-06, "loss": 0.4482, "step": 619 }, { "epoch": 0.307794141982459, "grad_norm": 0.6020801663398743, "learning_rate": 9.999812197708347e-06, "loss": 0.4303, "step": 620 }, { "epoch": 0.3082905841469469, "grad_norm": 0.6070950627326965, "learning_rate": 9.999786322910239e-06, "loss": 0.4813, "step": 621 }, { "epoch": 0.30878702631143473, "grad_norm": 0.7042976021766663, "learning_rate": 9.999758778819363e-06, "loss": 0.4689, "step": 622 }, { "epoch": 0.30928346847592253, "grad_norm": 0.6233851909637451, "learning_rate": 9.99972956544492e-06, "loss": 0.4635, "step": 623 }, { "epoch": 0.3097799106404104, "grad_norm": 0.5716886520385742, "learning_rate": 9.999698682796658e-06, "loss": 0.4474, "step": 624 }, { "epoch": 0.31027635280489824, "grad_norm": 0.569075882434845, "learning_rate": 9.99966613088489e-06, "loss": 0.5012, "step": 625 }, { "epoch": 0.3107727949693861, "grad_norm": 0.643073320388794, "learning_rate": 9.999631909720487e-06, "loss": 0.5014, "step": 626 }, { "epoch": 0.3112692371338739, "grad_norm": 0.558023989200592, "learning_rate": 9.999596019314868e-06, "loss": 0.5218, "step": 627 }, { "epoch": 0.31176567929836174, "grad_norm": 0.5722401738166809, "learning_rate": 9.999558459680022e-06, "loss": 0.4913, "step": 628 }, { "epoch": 0.3122621214628496, "grad_norm": 0.5979071259498596, "learning_rate": 9.999519230828486e-06, "loss": 0.4708, "step": 629 }, { "epoch": 0.3127585636273374, "grad_norm": 0.6055789589881897, "learning_rate": 9.999478332773357e-06, "loss": 0.4872, "step": 630 }, { "epoch": 0.31325500579182525, "grad_norm": 0.6576817035675049, "learning_rate": 9.999435765528293e-06, "loss": 0.4469, "step": 631 }, { "epoch": 0.3137514479563131, "grad_norm": 0.6066470742225647, "learning_rate": 9.999391529107504e-06, "loss": 0.4777, "step": 632 }, { "epoch": 0.3142478901208009, "grad_norm": 0.5625318288803101, "learning_rate": 9.999345623525758e-06, "loss": 0.4757, "step": 633 }, { "epoch": 0.31474433228528875, "grad_norm": 0.7003360390663147, "learning_rate": 9.999298048798385e-06, "loss": 0.449, "step": 634 }, { "epoch": 0.3152407744497766, "grad_norm": 0.5636935234069824, "learning_rate": 9.999248804941265e-06, "loss": 0.445, "step": 635 }, { "epoch": 0.31573721661426446, "grad_norm": 0.5840704441070557, "learning_rate": 9.999197891970843e-06, "loss": 0.474, "step": 636 }, { "epoch": 0.31623365877875226, "grad_norm": 0.6671532392501831, "learning_rate": 9.999145309904112e-06, "loss": 0.4626, "step": 637 }, { "epoch": 0.3167301009432401, "grad_norm": 0.601431131362915, "learning_rate": 9.999091058758634e-06, "loss": 0.4627, "step": 638 }, { "epoch": 0.31722654310772797, "grad_norm": 0.587620735168457, "learning_rate": 9.99903513855252e-06, "loss": 0.4512, "step": 639 }, { "epoch": 0.31772298527221576, "grad_norm": 0.5470988750457764, "learning_rate": 9.998977549304436e-06, "loss": 0.4732, "step": 640 }, { "epoch": 0.3182194274367036, "grad_norm": 0.6828019022941589, "learning_rate": 9.998918291033617e-06, "loss": 0.4895, "step": 641 }, { "epoch": 0.31871586960119147, "grad_norm": 0.594344437122345, "learning_rate": 9.998857363759842e-06, "loss": 0.4656, "step": 642 }, { "epoch": 0.3192123117656793, "grad_norm": 0.6507296562194824, "learning_rate": 9.998794767503455e-06, "loss": 0.502, "step": 643 }, { "epoch": 0.3197087539301671, "grad_norm": 0.5916901230812073, "learning_rate": 9.998730502285354e-06, "loss": 0.4843, "step": 644 }, { "epoch": 0.320205196094655, "grad_norm": 0.65090012550354, "learning_rate": 9.998664568126996e-06, "loss": 0.4787, "step": 645 }, { "epoch": 0.32070163825914283, "grad_norm": 0.8044404983520508, "learning_rate": 9.998596965050395e-06, "loss": 0.4808, "step": 646 }, { "epoch": 0.3211980804236306, "grad_norm": 0.6018081307411194, "learning_rate": 9.998527693078122e-06, "loss": 0.446, "step": 647 }, { "epoch": 0.3216945225881185, "grad_norm": 0.5921911001205444, "learning_rate": 9.998456752233305e-06, "loss": 0.4682, "step": 648 }, { "epoch": 0.32219096475260633, "grad_norm": 0.7134881019592285, "learning_rate": 9.99838414253963e-06, "loss": 0.4882, "step": 649 }, { "epoch": 0.32268740691709413, "grad_norm": 0.6117831468582153, "learning_rate": 9.998309864021337e-06, "loss": 0.4653, "step": 650 }, { "epoch": 0.323183849081582, "grad_norm": 0.6605574488639832, "learning_rate": 9.998233916703225e-06, "loss": 0.5061, "step": 651 }, { "epoch": 0.32368029124606984, "grad_norm": 0.5409277677536011, "learning_rate": 9.998156300610658e-06, "loss": 0.4504, "step": 652 }, { "epoch": 0.3241767334105577, "grad_norm": 0.5586496591567993, "learning_rate": 9.99807701576954e-06, "loss": 0.4547, "step": 653 }, { "epoch": 0.3246731755750455, "grad_norm": 0.6110630631446838, "learning_rate": 9.997996062206348e-06, "loss": 0.4773, "step": 654 }, { "epoch": 0.32516961773953335, "grad_norm": 0.5422811508178711, "learning_rate": 9.99791343994811e-06, "loss": 0.521, "step": 655 }, { "epoch": 0.3256660599040212, "grad_norm": 0.667647123336792, "learning_rate": 9.997829149022408e-06, "loss": 0.451, "step": 656 }, { "epoch": 0.326162502068509, "grad_norm": 0.6123071312904358, "learning_rate": 9.997743189457387e-06, "loss": 0.4863, "step": 657 }, { "epoch": 0.32665894423299685, "grad_norm": 0.5621511936187744, "learning_rate": 9.997655561281747e-06, "loss": 0.4871, "step": 658 }, { "epoch": 0.3271553863974847, "grad_norm": 0.5090628266334534, "learning_rate": 9.997566264524745e-06, "loss": 0.4673, "step": 659 }, { "epoch": 0.32765182856197256, "grad_norm": 0.625612199306488, "learning_rate": 9.997475299216191e-06, "loss": 0.459, "step": 660 }, { "epoch": 0.32814827072646036, "grad_norm": 0.5244866013526917, "learning_rate": 9.99738266538646e-06, "loss": 0.4627, "step": 661 }, { "epoch": 0.3286447128909482, "grad_norm": 0.5177583694458008, "learning_rate": 9.997288363066479e-06, "loss": 0.4407, "step": 662 }, { "epoch": 0.32914115505543606, "grad_norm": 0.634233832359314, "learning_rate": 9.99719239228773e-06, "loss": 0.4609, "step": 663 }, { "epoch": 0.32963759721992386, "grad_norm": 0.5054285526275635, "learning_rate": 9.99709475308226e-06, "loss": 0.4844, "step": 664 }, { "epoch": 0.3301340393844117, "grad_norm": 0.5654548406600952, "learning_rate": 9.996995445482664e-06, "loss": 0.4539, "step": 665 }, { "epoch": 0.33063048154889957, "grad_norm": 0.529575765132904, "learning_rate": 9.9968944695221e-06, "loss": 0.4723, "step": 666 }, { "epoch": 0.33112692371338737, "grad_norm": 0.5582539439201355, "learning_rate": 9.99679182523428e-06, "loss": 0.452, "step": 667 }, { "epoch": 0.3316233658778752, "grad_norm": 0.5566109418869019, "learning_rate": 9.996687512653476e-06, "loss": 0.4758, "step": 668 }, { "epoch": 0.3321198080423631, "grad_norm": 0.6441712379455566, "learning_rate": 9.996581531814513e-06, "loss": 0.5027, "step": 669 }, { "epoch": 0.3326162502068509, "grad_norm": 0.5492421388626099, "learning_rate": 9.996473882752777e-06, "loss": 0.4601, "step": 670 }, { "epoch": 0.3331126923713387, "grad_norm": 0.6243419051170349, "learning_rate": 9.996364565504208e-06, "loss": 0.4887, "step": 671 }, { "epoch": 0.3336091345358266, "grad_norm": 0.5897534489631653, "learning_rate": 9.996253580105302e-06, "loss": 0.4702, "step": 672 }, { "epoch": 0.33410557670031443, "grad_norm": 0.5758418440818787, "learning_rate": 9.996140926593119e-06, "loss": 0.4445, "step": 673 }, { "epoch": 0.33460201886480223, "grad_norm": 0.5882619619369507, "learning_rate": 9.996026605005266e-06, "loss": 0.4747, "step": 674 }, { "epoch": 0.3350984610292901, "grad_norm": 0.6536632776260376, "learning_rate": 9.995910615379917e-06, "loss": 0.4502, "step": 675 }, { "epoch": 0.33559490319377794, "grad_norm": 0.6516268849372864, "learning_rate": 9.995792957755793e-06, "loss": 0.5103, "step": 676 }, { "epoch": 0.33609134535826574, "grad_norm": 0.6254358887672424, "learning_rate": 9.995673632172179e-06, "loss": 0.4582, "step": 677 }, { "epoch": 0.3365877875227536, "grad_norm": 0.7100464105606079, "learning_rate": 9.995552638668912e-06, "loss": 0.4719, "step": 678 }, { "epoch": 0.33708422968724144, "grad_norm": 0.5953912138938904, "learning_rate": 9.995429977286394e-06, "loss": 0.4857, "step": 679 }, { "epoch": 0.3375806718517293, "grad_norm": 0.6551641821861267, "learning_rate": 9.995305648065573e-06, "loss": 0.4826, "step": 680 }, { "epoch": 0.3380771140162171, "grad_norm": 0.5488927960395813, "learning_rate": 9.995179651047961e-06, "loss": 0.4401, "step": 681 }, { "epoch": 0.33857355618070495, "grad_norm": 0.6154251098632812, "learning_rate": 9.995051986275626e-06, "loss": 0.4793, "step": 682 }, { "epoch": 0.3390699983451928, "grad_norm": 0.5878848433494568, "learning_rate": 9.99492265379119e-06, "loss": 0.4729, "step": 683 }, { "epoch": 0.3395664405096806, "grad_norm": 0.5885066986083984, "learning_rate": 9.994791653637834e-06, "loss": 0.4467, "step": 684 }, { "epoch": 0.34006288267416845, "grad_norm": 0.5630676746368408, "learning_rate": 9.994658985859295e-06, "loss": 0.4549, "step": 685 }, { "epoch": 0.3405593248386563, "grad_norm": 0.6363318562507629, "learning_rate": 9.99452465049987e-06, "loss": 0.4599, "step": 686 }, { "epoch": 0.34105576700314416, "grad_norm": 0.6042366623878479, "learning_rate": 9.994388647604408e-06, "loss": 0.4573, "step": 687 }, { "epoch": 0.34155220916763196, "grad_norm": 0.5951661467552185, "learning_rate": 9.994250977218313e-06, "loss": 0.4703, "step": 688 }, { "epoch": 0.3420486513321198, "grad_norm": 0.5871053338050842, "learning_rate": 9.994111639387557e-06, "loss": 0.4594, "step": 689 }, { "epoch": 0.34254509349660767, "grad_norm": 0.6044358015060425, "learning_rate": 9.993970634158656e-06, "loss": 0.4993, "step": 690 }, { "epoch": 0.34304153566109546, "grad_norm": 0.4842963218688965, "learning_rate": 9.993827961578688e-06, "loss": 0.4506, "step": 691 }, { "epoch": 0.3435379778255833, "grad_norm": 0.5399145483970642, "learning_rate": 9.993683621695287e-06, "loss": 0.4971, "step": 692 }, { "epoch": 0.34403441999007117, "grad_norm": 0.5532727837562561, "learning_rate": 9.993537614556648e-06, "loss": 0.4843, "step": 693 }, { "epoch": 0.34453086215455897, "grad_norm": 0.5703217387199402, "learning_rate": 9.993389940211515e-06, "loss": 0.453, "step": 694 }, { "epoch": 0.3450273043190468, "grad_norm": 0.5410152077674866, "learning_rate": 9.993240598709195e-06, "loss": 0.4349, "step": 695 }, { "epoch": 0.3455237464835347, "grad_norm": 0.6282304525375366, "learning_rate": 9.993089590099547e-06, "loss": 0.4304, "step": 696 }, { "epoch": 0.34602018864802253, "grad_norm": 0.6877906322479248, "learning_rate": 9.99293691443299e-06, "loss": 0.4674, "step": 697 }, { "epoch": 0.34651663081251033, "grad_norm": 0.5707480907440186, "learning_rate": 9.992782571760497e-06, "loss": 0.4464, "step": 698 }, { "epoch": 0.3470130729769982, "grad_norm": 0.6348291039466858, "learning_rate": 9.9926265621336e-06, "loss": 0.4454, "step": 699 }, { "epoch": 0.34750951514148604, "grad_norm": 0.6165683269500732, "learning_rate": 9.992468885604385e-06, "loss": 0.4677, "step": 700 }, { "epoch": 0.34800595730597383, "grad_norm": 0.5776503682136536, "learning_rate": 9.992309542225497e-06, "loss": 0.4284, "step": 701 }, { "epoch": 0.3485023994704617, "grad_norm": 0.596569299697876, "learning_rate": 9.992148532050139e-06, "loss": 0.4266, "step": 702 }, { "epoch": 0.34899884163494954, "grad_norm": 0.5887807011604309, "learning_rate": 9.991985855132062e-06, "loss": 0.4547, "step": 703 }, { "epoch": 0.34949528379943734, "grad_norm": 0.5734627842903137, "learning_rate": 9.991821511525584e-06, "loss": 0.5154, "step": 704 }, { "epoch": 0.3499917259639252, "grad_norm": 0.6112487316131592, "learning_rate": 9.991655501285574e-06, "loss": 0.48, "step": 705 }, { "epoch": 0.35048816812841305, "grad_norm": 0.636520266532898, "learning_rate": 9.991487824467458e-06, "loss": 0.5137, "step": 706 }, { "epoch": 0.3509846102929009, "grad_norm": 0.5615051984786987, "learning_rate": 9.991318481127218e-06, "loss": 0.4735, "step": 707 }, { "epoch": 0.3514810524573887, "grad_norm": 0.5920907258987427, "learning_rate": 9.991147471321392e-06, "loss": 0.4848, "step": 708 }, { "epoch": 0.35197749462187655, "grad_norm": 0.6375461220741272, "learning_rate": 9.990974795107078e-06, "loss": 0.4871, "step": 709 }, { "epoch": 0.3524739367863644, "grad_norm": 0.6332018971443176, "learning_rate": 9.990800452541929e-06, "loss": 0.5125, "step": 710 }, { "epoch": 0.3529703789508522, "grad_norm": 0.6216305494308472, "learning_rate": 9.99062444368415e-06, "loss": 0.4853, "step": 711 }, { "epoch": 0.35346682111534006, "grad_norm": 0.7522878050804138, "learning_rate": 9.990446768592507e-06, "loss": 0.4876, "step": 712 }, { "epoch": 0.3539632632798279, "grad_norm": 0.5638903379440308, "learning_rate": 9.99026742732632e-06, "loss": 0.4478, "step": 713 }, { "epoch": 0.35445970544431576, "grad_norm": 0.6338232755661011, "learning_rate": 9.990086419945469e-06, "loss": 0.4717, "step": 714 }, { "epoch": 0.35495614760880356, "grad_norm": 0.5656048059463501, "learning_rate": 9.989903746510383e-06, "loss": 0.5069, "step": 715 }, { "epoch": 0.3554525897732914, "grad_norm": 0.5839251279830933, "learning_rate": 9.989719407082056e-06, "loss": 0.4488, "step": 716 }, { "epoch": 0.35594903193777927, "grad_norm": 0.5226553678512573, "learning_rate": 9.989533401722031e-06, "loss": 0.445, "step": 717 }, { "epoch": 0.35644547410226707, "grad_norm": 0.5391815900802612, "learning_rate": 9.98934573049241e-06, "loss": 0.4453, "step": 718 }, { "epoch": 0.3569419162667549, "grad_norm": 0.592984676361084, "learning_rate": 9.989156393455856e-06, "loss": 0.4461, "step": 719 }, { "epoch": 0.3574383584312428, "grad_norm": 0.5609174966812134, "learning_rate": 9.988965390675578e-06, "loss": 0.4719, "step": 720 }, { "epoch": 0.3579348005957306, "grad_norm": 0.53073650598526, "learning_rate": 9.988772722215348e-06, "loss": 0.4759, "step": 721 }, { "epoch": 0.3584312427602184, "grad_norm": 0.6437492966651917, "learning_rate": 9.988578388139493e-06, "loss": 0.4997, "step": 722 }, { "epoch": 0.3589276849247063, "grad_norm": 0.5265535712242126, "learning_rate": 9.988382388512898e-06, "loss": 0.4445, "step": 723 }, { "epoch": 0.35942412708919413, "grad_norm": 0.6196646690368652, "learning_rate": 9.988184723400999e-06, "loss": 0.4764, "step": 724 }, { "epoch": 0.35992056925368193, "grad_norm": 0.546890914440155, "learning_rate": 9.987985392869792e-06, "loss": 0.4551, "step": 725 }, { "epoch": 0.3604170114181698, "grad_norm": 0.5816842913627625, "learning_rate": 9.987784396985829e-06, "loss": 0.4632, "step": 726 }, { "epoch": 0.36091345358265764, "grad_norm": 0.5734960436820984, "learning_rate": 9.987581735816216e-06, "loss": 0.4767, "step": 727 }, { "epoch": 0.36140989574714544, "grad_norm": 0.6691988706588745, "learning_rate": 9.987377409428617e-06, "loss": 0.4783, "step": 728 }, { "epoch": 0.3619063379116333, "grad_norm": 0.4974777400493622, "learning_rate": 9.98717141789125e-06, "loss": 0.4782, "step": 729 }, { "epoch": 0.36240278007612114, "grad_norm": 0.5978480577468872, "learning_rate": 9.98696376127289e-06, "loss": 0.4582, "step": 730 }, { "epoch": 0.36289922224060894, "grad_norm": 0.5835424065589905, "learning_rate": 9.98675443964287e-06, "loss": 0.4962, "step": 731 }, { "epoch": 0.3633956644050968, "grad_norm": 0.5229841470718384, "learning_rate": 9.986543453071074e-06, "loss": 0.4766, "step": 732 }, { "epoch": 0.36389210656958465, "grad_norm": 0.6138909459114075, "learning_rate": 9.986330801627944e-06, "loss": 0.4536, "step": 733 }, { "epoch": 0.3643885487340725, "grad_norm": 0.6031911969184875, "learning_rate": 9.986116485384481e-06, "loss": 0.4823, "step": 734 }, { "epoch": 0.3648849908985603, "grad_norm": 0.6340209245681763, "learning_rate": 9.98590050441224e-06, "loss": 0.4522, "step": 735 }, { "epoch": 0.36538143306304816, "grad_norm": 0.5872398614883423, "learning_rate": 9.98568285878333e-06, "loss": 0.47, "step": 736 }, { "epoch": 0.365877875227536, "grad_norm": 0.7160881161689758, "learning_rate": 9.985463548570416e-06, "loss": 0.4773, "step": 737 }, { "epoch": 0.3663743173920238, "grad_norm": 0.6246534585952759, "learning_rate": 9.985242573846721e-06, "loss": 0.4913, "step": 738 }, { "epoch": 0.36687075955651166, "grad_norm": 0.5309376120567322, "learning_rate": 9.98501993468602e-06, "loss": 0.499, "step": 739 }, { "epoch": 0.3673672017209995, "grad_norm": 0.6275343894958496, "learning_rate": 9.984795631162651e-06, "loss": 0.4583, "step": 740 }, { "epoch": 0.36786364388548737, "grad_norm": 0.5849477648735046, "learning_rate": 9.984569663351497e-06, "loss": 0.4894, "step": 741 }, { "epoch": 0.36836008604997517, "grad_norm": 0.5356663465499878, "learning_rate": 9.984342031328007e-06, "loss": 0.4768, "step": 742 }, { "epoch": 0.368856528214463, "grad_norm": 0.5999839305877686, "learning_rate": 9.984112735168182e-06, "loss": 0.4237, "step": 743 }, { "epoch": 0.3693529703789509, "grad_norm": 0.6123843789100647, "learning_rate": 9.983881774948572e-06, "loss": 0.4841, "step": 744 }, { "epoch": 0.36984941254343867, "grad_norm": 0.5758894681930542, "learning_rate": 9.983649150746292e-06, "loss": 0.4645, "step": 745 }, { "epoch": 0.3703458547079265, "grad_norm": 0.5598086714744568, "learning_rate": 9.983414862639011e-06, "loss": 0.4536, "step": 746 }, { "epoch": 0.3708422968724144, "grad_norm": 0.48257583379745483, "learning_rate": 9.983178910704947e-06, "loss": 0.4769, "step": 747 }, { "epoch": 0.3713387390369022, "grad_norm": 0.6119939684867859, "learning_rate": 9.982941295022881e-06, "loss": 0.4457, "step": 748 }, { "epoch": 0.37183518120139003, "grad_norm": 0.6829886436462402, "learning_rate": 9.982702015672145e-06, "loss": 0.4929, "step": 749 }, { "epoch": 0.3723316233658779, "grad_norm": 0.5432550311088562, "learning_rate": 9.982461072732628e-06, "loss": 0.4504, "step": 750 }, { "epoch": 0.37282806553036574, "grad_norm": 0.5155565142631531, "learning_rate": 9.982218466284775e-06, "loss": 0.4604, "step": 751 }, { "epoch": 0.37332450769485354, "grad_norm": 0.5790650844573975, "learning_rate": 9.981974196409586e-06, "loss": 0.468, "step": 752 }, { "epoch": 0.3738209498593414, "grad_norm": 0.6230031251907349, "learning_rate": 9.981728263188615e-06, "loss": 0.4837, "step": 753 }, { "epoch": 0.37431739202382924, "grad_norm": 0.5523694157600403, "learning_rate": 9.98148066670397e-06, "loss": 0.4716, "step": 754 }, { "epoch": 0.37481383418831704, "grad_norm": 0.6392102241516113, "learning_rate": 9.981231407038324e-06, "loss": 0.4688, "step": 755 }, { "epoch": 0.3753102763528049, "grad_norm": 0.5826284885406494, "learning_rate": 9.98098048427489e-06, "loss": 0.4365, "step": 756 }, { "epoch": 0.37580671851729275, "grad_norm": 0.627784788608551, "learning_rate": 9.98072789849745e-06, "loss": 0.4949, "step": 757 }, { "epoch": 0.37630316068178055, "grad_norm": 0.6046406030654907, "learning_rate": 9.980473649790333e-06, "loss": 0.4971, "step": 758 }, { "epoch": 0.3767996028462684, "grad_norm": 0.5522487163543701, "learning_rate": 9.980217738238427e-06, "loss": 0.4658, "step": 759 }, { "epoch": 0.37729604501075625, "grad_norm": 0.5662911534309387, "learning_rate": 9.979960163927172e-06, "loss": 0.4665, "step": 760 }, { "epoch": 0.3777924871752441, "grad_norm": 0.5223079919815063, "learning_rate": 9.979700926942564e-06, "loss": 0.4513, "step": 761 }, { "epoch": 0.3782889293397319, "grad_norm": 0.5548537969589233, "learning_rate": 9.97944002737116e-06, "loss": 0.4998, "step": 762 }, { "epoch": 0.37878537150421976, "grad_norm": 0.6004071831703186, "learning_rate": 9.979177465300063e-06, "loss": 0.4483, "step": 763 }, { "epoch": 0.3792818136687076, "grad_norm": 0.6103107929229736, "learning_rate": 9.978913240816938e-06, "loss": 0.5054, "step": 764 }, { "epoch": 0.3797782558331954, "grad_norm": 0.5363016128540039, "learning_rate": 9.978647354010002e-06, "loss": 0.4644, "step": 765 }, { "epoch": 0.38027469799768326, "grad_norm": 0.5729445815086365, "learning_rate": 9.978379804968026e-06, "loss": 0.4532, "step": 766 }, { "epoch": 0.3807711401621711, "grad_norm": 0.5967592000961304, "learning_rate": 9.978110593780338e-06, "loss": 0.4917, "step": 767 }, { "epoch": 0.38126758232665897, "grad_norm": 0.6371681690216064, "learning_rate": 9.977839720536818e-06, "loss": 0.492, "step": 768 }, { "epoch": 0.38176402449114677, "grad_norm": 0.5581688284873962, "learning_rate": 9.977567185327907e-06, "loss": 0.4946, "step": 769 }, { "epoch": 0.3822604666556346, "grad_norm": 0.6137298941612244, "learning_rate": 9.977292988244597e-06, "loss": 0.4395, "step": 770 }, { "epoch": 0.3827569088201225, "grad_norm": 0.671826958656311, "learning_rate": 9.977017129378432e-06, "loss": 0.4493, "step": 771 }, { "epoch": 0.3832533509846103, "grad_norm": 0.5748264789581299, "learning_rate": 9.976739608821515e-06, "loss": 0.5086, "step": 772 }, { "epoch": 0.38374979314909813, "grad_norm": 0.5649125576019287, "learning_rate": 9.976460426666505e-06, "loss": 0.5096, "step": 773 }, { "epoch": 0.384246235313586, "grad_norm": 0.5817816257476807, "learning_rate": 9.976179583006608e-06, "loss": 0.4634, "step": 774 }, { "epoch": 0.3847426774780738, "grad_norm": 0.558176577091217, "learning_rate": 9.975897077935597e-06, "loss": 0.451, "step": 775 }, { "epoch": 0.38523911964256163, "grad_norm": 0.5981376767158508, "learning_rate": 9.975612911547787e-06, "loss": 0.4392, "step": 776 }, { "epoch": 0.3857355618070495, "grad_norm": 0.5933478474617004, "learning_rate": 9.975327083938056e-06, "loss": 0.4622, "step": 777 }, { "epoch": 0.38623200397153734, "grad_norm": 0.5830017924308777, "learning_rate": 9.975039595201833e-06, "loss": 0.4795, "step": 778 }, { "epoch": 0.38672844613602514, "grad_norm": 0.6203069686889648, "learning_rate": 9.974750445435104e-06, "loss": 0.4651, "step": 779 }, { "epoch": 0.387224888300513, "grad_norm": 0.592688262462616, "learning_rate": 9.974459634734407e-06, "loss": 0.451, "step": 780 }, { "epoch": 0.38772133046500085, "grad_norm": 0.5821216702461243, "learning_rate": 9.974167163196837e-06, "loss": 0.447, "step": 781 }, { "epoch": 0.38821777262948864, "grad_norm": 0.6830419898033142, "learning_rate": 9.97387303092004e-06, "loss": 0.464, "step": 782 }, { "epoch": 0.3887142147939765, "grad_norm": 0.6093246340751648, "learning_rate": 9.97357723800222e-06, "loss": 0.4853, "step": 783 }, { "epoch": 0.38921065695846435, "grad_norm": 0.5356206893920898, "learning_rate": 9.973279784542137e-06, "loss": 0.4674, "step": 784 }, { "epoch": 0.38970709912295215, "grad_norm": 0.5805352330207825, "learning_rate": 9.972980670639098e-06, "loss": 0.4364, "step": 785 }, { "epoch": 0.39020354128744, "grad_norm": 0.6718869805335999, "learning_rate": 9.972679896392973e-06, "loss": 0.4501, "step": 786 }, { "epoch": 0.39069998345192786, "grad_norm": 0.5219154953956604, "learning_rate": 9.97237746190418e-06, "loss": 0.4754, "step": 787 }, { "epoch": 0.3911964256164157, "grad_norm": 0.6348830461502075, "learning_rate": 9.972073367273694e-06, "loss": 0.4264, "step": 788 }, { "epoch": 0.3916928677809035, "grad_norm": 0.5595013499259949, "learning_rate": 9.971767612603045e-06, "loss": 0.4261, "step": 789 }, { "epoch": 0.39218930994539136, "grad_norm": 0.5607438087463379, "learning_rate": 9.971460197994314e-06, "loss": 0.4638, "step": 790 }, { "epoch": 0.3926857521098792, "grad_norm": 0.6973970532417297, "learning_rate": 9.97115112355014e-06, "loss": 0.471, "step": 791 }, { "epoch": 0.393182194274367, "grad_norm": 0.5851337313652039, "learning_rate": 9.970840389373715e-06, "loss": 0.4126, "step": 792 }, { "epoch": 0.39367863643885487, "grad_norm": 0.656575083732605, "learning_rate": 9.970527995568783e-06, "loss": 0.4961, "step": 793 }, { "epoch": 0.3941750786033427, "grad_norm": 0.5848230123519897, "learning_rate": 9.970213942239644e-06, "loss": 0.4622, "step": 794 }, { "epoch": 0.3946715207678306, "grad_norm": 0.6512570381164551, "learning_rate": 9.969898229491155e-06, "loss": 0.4699, "step": 795 }, { "epoch": 0.3951679629323184, "grad_norm": 0.578198254108429, "learning_rate": 9.96958085742872e-06, "loss": 0.4688, "step": 796 }, { "epoch": 0.3956644050968062, "grad_norm": 0.6184307336807251, "learning_rate": 9.969261826158303e-06, "loss": 0.4475, "step": 797 }, { "epoch": 0.3961608472612941, "grad_norm": 0.6091291904449463, "learning_rate": 9.968941135786418e-06, "loss": 0.4367, "step": 798 }, { "epoch": 0.3966572894257819, "grad_norm": 0.5075336694717407, "learning_rate": 9.968618786420136e-06, "loss": 0.4585, "step": 799 }, { "epoch": 0.39715373159026973, "grad_norm": 0.6913009881973267, "learning_rate": 9.968294778167083e-06, "loss": 0.4623, "step": 800 }, { "epoch": 0.3976501737547576, "grad_norm": 0.7731044888496399, "learning_rate": 9.967969111135434e-06, "loss": 0.4758, "step": 801 }, { "epoch": 0.3981466159192454, "grad_norm": 0.5711026191711426, "learning_rate": 9.96764178543392e-06, "loss": 0.4568, "step": 802 }, { "epoch": 0.39864305808373324, "grad_norm": 0.6603060960769653, "learning_rate": 9.967312801171825e-06, "loss": 0.4565, "step": 803 }, { "epoch": 0.3991395002482211, "grad_norm": 0.7891825437545776, "learning_rate": 9.966982158458992e-06, "loss": 0.4803, "step": 804 }, { "epoch": 0.39963594241270894, "grad_norm": 0.6418102979660034, "learning_rate": 9.96664985740581e-06, "loss": 0.4209, "step": 805 }, { "epoch": 0.40013238457719674, "grad_norm": 0.519429087638855, "learning_rate": 9.96631589812323e-06, "loss": 0.5037, "step": 806 }, { "epoch": 0.4006288267416846, "grad_norm": 0.6470623016357422, "learning_rate": 9.965980280722744e-06, "loss": 0.4789, "step": 807 }, { "epoch": 0.40112526890617245, "grad_norm": 0.7030336856842041, "learning_rate": 9.965643005316413e-06, "loss": 0.4967, "step": 808 }, { "epoch": 0.40162171107066025, "grad_norm": 0.5825676918029785, "learning_rate": 9.965304072016842e-06, "loss": 0.4629, "step": 809 }, { "epoch": 0.4021181532351481, "grad_norm": 0.5922484993934631, "learning_rate": 9.964963480937189e-06, "loss": 0.4564, "step": 810 }, { "epoch": 0.40261459539963595, "grad_norm": 0.6525038480758667, "learning_rate": 9.964621232191169e-06, "loss": 0.4577, "step": 811 }, { "epoch": 0.40311103756412375, "grad_norm": 0.5141238570213318, "learning_rate": 9.964277325893053e-06, "loss": 0.4797, "step": 812 }, { "epoch": 0.4036074797286116, "grad_norm": 0.5516549348831177, "learning_rate": 9.963931762157657e-06, "loss": 0.432, "step": 813 }, { "epoch": 0.40410392189309946, "grad_norm": 0.5856518149375916, "learning_rate": 9.96358454110036e-06, "loss": 0.4477, "step": 814 }, { "epoch": 0.4046003640575873, "grad_norm": 0.5053486227989197, "learning_rate": 9.963235662837085e-06, "loss": 0.4744, "step": 815 }, { "epoch": 0.4050968062220751, "grad_norm": 0.571459949016571, "learning_rate": 9.962885127484318e-06, "loss": 0.4441, "step": 816 }, { "epoch": 0.40559324838656297, "grad_norm": 0.5924246907234192, "learning_rate": 9.96253293515909e-06, "loss": 0.423, "step": 817 }, { "epoch": 0.4060896905510508, "grad_norm": 0.5508887767791748, "learning_rate": 9.96217908597899e-06, "loss": 0.4713, "step": 818 }, { "epoch": 0.4065861327155386, "grad_norm": 0.61686110496521, "learning_rate": 9.961823580062155e-06, "loss": 0.4879, "step": 819 }, { "epoch": 0.40708257488002647, "grad_norm": 0.635216474533081, "learning_rate": 9.961466417527283e-06, "loss": 0.4781, "step": 820 }, { "epoch": 0.4075790170445143, "grad_norm": 0.5738227963447571, "learning_rate": 9.96110759849362e-06, "loss": 0.4253, "step": 821 }, { "epoch": 0.4080754592090022, "grad_norm": 0.6115068197250366, "learning_rate": 9.960747123080965e-06, "loss": 0.4682, "step": 822 }, { "epoch": 0.40857190137349, "grad_norm": 0.6142166256904602, "learning_rate": 9.96038499140967e-06, "loss": 0.4378, "step": 823 }, { "epoch": 0.40906834353797783, "grad_norm": 0.538521945476532, "learning_rate": 9.960021203600642e-06, "loss": 0.4554, "step": 824 }, { "epoch": 0.4095647857024657, "grad_norm": 0.5782569050788879, "learning_rate": 9.959655759775342e-06, "loss": 0.4526, "step": 825 }, { "epoch": 0.4100612278669535, "grad_norm": 0.5420348048210144, "learning_rate": 9.95928866005578e-06, "loss": 0.457, "step": 826 }, { "epoch": 0.41055767003144134, "grad_norm": 0.5827574729919434, "learning_rate": 9.958919904564519e-06, "loss": 0.4702, "step": 827 }, { "epoch": 0.4110541121959292, "grad_norm": 0.6292406916618347, "learning_rate": 9.958549493424678e-06, "loss": 0.457, "step": 828 }, { "epoch": 0.411550554360417, "grad_norm": 0.5374106168746948, "learning_rate": 9.958177426759928e-06, "loss": 0.4522, "step": 829 }, { "epoch": 0.41204699652490484, "grad_norm": 0.5417695045471191, "learning_rate": 9.957803704694488e-06, "loss": 0.4688, "step": 830 }, { "epoch": 0.4125434386893927, "grad_norm": 0.6463310718536377, "learning_rate": 9.95742832735314e-06, "loss": 0.4982, "step": 831 }, { "epoch": 0.41303988085388055, "grad_norm": 0.6234830021858215, "learning_rate": 9.957051294861208e-06, "loss": 0.4666, "step": 832 }, { "epoch": 0.41353632301836835, "grad_norm": 0.6719191670417786, "learning_rate": 9.956672607344572e-06, "loss": 0.4313, "step": 833 }, { "epoch": 0.4140327651828562, "grad_norm": 0.5802842378616333, "learning_rate": 9.95629226492967e-06, "loss": 0.4404, "step": 834 }, { "epoch": 0.41452920734734405, "grad_norm": 0.6921095252037048, "learning_rate": 9.955910267743486e-06, "loss": 0.4499, "step": 835 }, { "epoch": 0.41502564951183185, "grad_norm": 0.7400733828544617, "learning_rate": 9.955526615913554e-06, "loss": 0.4759, "step": 836 }, { "epoch": 0.4155220916763197, "grad_norm": 0.5363679528236389, "learning_rate": 9.95514130956797e-06, "loss": 0.4698, "step": 837 }, { "epoch": 0.41601853384080756, "grad_norm": 0.6984108090400696, "learning_rate": 9.954754348835379e-06, "loss": 0.4741, "step": 838 }, { "epoch": 0.41651497600529536, "grad_norm": 0.7196961641311646, "learning_rate": 9.954365733844971e-06, "loss": 0.4727, "step": 839 }, { "epoch": 0.4170114181697832, "grad_norm": 0.5767598748207092, "learning_rate": 9.953975464726495e-06, "loss": 0.458, "step": 840 }, { "epoch": 0.41750786033427106, "grad_norm": 0.6403599381446838, "learning_rate": 9.953583541610257e-06, "loss": 0.4438, "step": 841 }, { "epoch": 0.4180043024987589, "grad_norm": 0.5899956822395325, "learning_rate": 9.953189964627102e-06, "loss": 0.4447, "step": 842 }, { "epoch": 0.4185007446632467, "grad_norm": 0.5701733231544495, "learning_rate": 9.95279473390844e-06, "loss": 0.4702, "step": 843 }, { "epoch": 0.41899718682773457, "grad_norm": 0.5714786648750305, "learning_rate": 9.952397849586225e-06, "loss": 0.4281, "step": 844 }, { "epoch": 0.4194936289922224, "grad_norm": 0.6508054733276367, "learning_rate": 9.951999311792966e-06, "loss": 0.4778, "step": 845 }, { "epoch": 0.4199900711567102, "grad_norm": 0.5201603174209595, "learning_rate": 9.951599120661726e-06, "loss": 0.4381, "step": 846 }, { "epoch": 0.4204865133211981, "grad_norm": 0.6205934882164001, "learning_rate": 9.951197276326117e-06, "loss": 0.4443, "step": 847 }, { "epoch": 0.42098295548568593, "grad_norm": 0.6824631690979004, "learning_rate": 9.950793778920303e-06, "loss": 0.4819, "step": 848 }, { "epoch": 0.4214793976501738, "grad_norm": 0.5860981345176697, "learning_rate": 9.950388628579e-06, "loss": 0.4808, "step": 849 }, { "epoch": 0.4219758398146616, "grad_norm": 0.6551135182380676, "learning_rate": 9.94998182543748e-06, "loss": 0.4348, "step": 850 }, { "epoch": 0.42247228197914943, "grad_norm": 0.6599870324134827, "learning_rate": 9.94957336963156e-06, "loss": 0.4817, "step": 851 }, { "epoch": 0.4229687241436373, "grad_norm": 0.6042893528938293, "learning_rate": 9.949163261297616e-06, "loss": 0.4567, "step": 852 }, { "epoch": 0.4234651663081251, "grad_norm": 0.5517352819442749, "learning_rate": 9.948751500572568e-06, "loss": 0.4441, "step": 853 }, { "epoch": 0.42396160847261294, "grad_norm": 0.5570823550224304, "learning_rate": 9.948338087593894e-06, "loss": 0.4299, "step": 854 }, { "epoch": 0.4244580506371008, "grad_norm": 0.6479265689849854, "learning_rate": 9.94792302249962e-06, "loss": 0.4536, "step": 855 }, { "epoch": 0.4249544928015886, "grad_norm": 0.5390287637710571, "learning_rate": 9.947506305428328e-06, "loss": 0.4697, "step": 856 }, { "epoch": 0.42545093496607644, "grad_norm": 0.5986117124557495, "learning_rate": 9.947087936519143e-06, "loss": 0.4394, "step": 857 }, { "epoch": 0.4259473771305643, "grad_norm": 0.5844370722770691, "learning_rate": 9.946667915911754e-06, "loss": 0.4584, "step": 858 }, { "epoch": 0.42644381929505215, "grad_norm": 0.5930382609367371, "learning_rate": 9.94624624374639e-06, "loss": 0.4512, "step": 859 }, { "epoch": 0.42694026145953995, "grad_norm": 0.6441788673400879, "learning_rate": 9.945822920163835e-06, "loss": 0.457, "step": 860 }, { "epoch": 0.4274367036240278, "grad_norm": 0.551365077495575, "learning_rate": 9.945397945305428e-06, "loss": 0.4318, "step": 861 }, { "epoch": 0.42793314578851566, "grad_norm": 0.6384586691856384, "learning_rate": 9.944971319313055e-06, "loss": 0.4536, "step": 862 }, { "epoch": 0.42842958795300345, "grad_norm": 0.5195631384849548, "learning_rate": 9.944543042329157e-06, "loss": 0.4348, "step": 863 }, { "epoch": 0.4289260301174913, "grad_norm": 0.6326119303703308, "learning_rate": 9.94411311449672e-06, "loss": 0.4953, "step": 864 }, { "epoch": 0.42942247228197916, "grad_norm": 0.6283515691757202, "learning_rate": 9.94368153595929e-06, "loss": 0.4373, "step": 865 }, { "epoch": 0.42991891444646696, "grad_norm": 0.5547547340393066, "learning_rate": 9.943248306860956e-06, "loss": 0.4872, "step": 866 }, { "epoch": 0.4304153566109548, "grad_norm": 0.643791139125824, "learning_rate": 9.942813427346363e-06, "loss": 0.4337, "step": 867 }, { "epoch": 0.43091179877544267, "grad_norm": 0.6802789568901062, "learning_rate": 9.942376897560703e-06, "loss": 0.4924, "step": 868 }, { "epoch": 0.4314082409399305, "grad_norm": 0.5925860404968262, "learning_rate": 9.941938717649724e-06, "loss": 0.4299, "step": 869 }, { "epoch": 0.4319046831044183, "grad_norm": 0.7297766208648682, "learning_rate": 9.941498887759724e-06, "loss": 0.4573, "step": 870 }, { "epoch": 0.4324011252689062, "grad_norm": 0.5683950185775757, "learning_rate": 9.941057408037546e-06, "loss": 0.4621, "step": 871 }, { "epoch": 0.432897567433394, "grad_norm": 0.5645796656608582, "learning_rate": 9.94061427863059e-06, "loss": 0.4862, "step": 872 }, { "epoch": 0.4333940095978818, "grad_norm": 0.5694764852523804, "learning_rate": 9.940169499686803e-06, "loss": 0.4458, "step": 873 }, { "epoch": 0.4338904517623697, "grad_norm": 0.5827603340148926, "learning_rate": 9.93972307135469e-06, "loss": 0.4887, "step": 874 }, { "epoch": 0.43438689392685753, "grad_norm": 0.607937216758728, "learning_rate": 9.939274993783295e-06, "loss": 0.4418, "step": 875 }, { "epoch": 0.4348833360913454, "grad_norm": 0.5169119834899902, "learning_rate": 9.938825267122223e-06, "loss": 0.442, "step": 876 }, { "epoch": 0.4353797782558332, "grad_norm": 0.6418021321296692, "learning_rate": 9.938373891521622e-06, "loss": 0.4646, "step": 877 }, { "epoch": 0.43587622042032104, "grad_norm": 0.6169459223747253, "learning_rate": 9.937920867132199e-06, "loss": 0.4572, "step": 878 }, { "epoch": 0.4363726625848089, "grad_norm": 0.533040463924408, "learning_rate": 9.937466194105202e-06, "loss": 0.4594, "step": 879 }, { "epoch": 0.4368691047492967, "grad_norm": 0.5577148199081421, "learning_rate": 9.937009872592435e-06, "loss": 0.4872, "step": 880 }, { "epoch": 0.43736554691378454, "grad_norm": 0.5590134859085083, "learning_rate": 9.936551902746255e-06, "loss": 0.4712, "step": 881 }, { "epoch": 0.4378619890782724, "grad_norm": 0.5516287088394165, "learning_rate": 9.93609228471956e-06, "loss": 0.4607, "step": 882 }, { "epoch": 0.4383584312427602, "grad_norm": 0.5595844388008118, "learning_rate": 9.935631018665808e-06, "loss": 0.4711, "step": 883 }, { "epoch": 0.43885487340724805, "grad_norm": 0.579397976398468, "learning_rate": 9.935168104739002e-06, "loss": 0.4672, "step": 884 }, { "epoch": 0.4393513155717359, "grad_norm": 0.5682505369186401, "learning_rate": 9.934703543093695e-06, "loss": 0.4389, "step": 885 }, { "epoch": 0.43984775773622375, "grad_norm": 0.5594995617866516, "learning_rate": 9.934237333884994e-06, "loss": 0.4485, "step": 886 }, { "epoch": 0.44034419990071155, "grad_norm": 0.6025765538215637, "learning_rate": 9.933769477268552e-06, "loss": 0.4616, "step": 887 }, { "epoch": 0.4408406420651994, "grad_norm": 0.6198194026947021, "learning_rate": 9.933299973400574e-06, "loss": 0.4748, "step": 888 }, { "epoch": 0.44133708422968726, "grad_norm": 0.5865346789360046, "learning_rate": 9.932828822437815e-06, "loss": 0.4618, "step": 889 }, { "epoch": 0.44183352639417506, "grad_norm": 0.5655702948570251, "learning_rate": 9.932356024537577e-06, "loss": 0.4157, "step": 890 }, { "epoch": 0.4423299685586629, "grad_norm": 0.6345211863517761, "learning_rate": 9.931881579857719e-06, "loss": 0.4479, "step": 891 }, { "epoch": 0.44282641072315077, "grad_norm": 0.6352914571762085, "learning_rate": 9.931405488556642e-06, "loss": 0.4589, "step": 892 }, { "epoch": 0.44332285288763856, "grad_norm": 0.5720461010932922, "learning_rate": 9.930927750793298e-06, "loss": 0.4446, "step": 893 }, { "epoch": 0.4438192950521264, "grad_norm": 0.5331560969352722, "learning_rate": 9.930448366727197e-06, "loss": 0.4563, "step": 894 }, { "epoch": 0.44431573721661427, "grad_norm": 0.5432044267654419, "learning_rate": 9.929967336518387e-06, "loss": 0.471, "step": 895 }, { "epoch": 0.4448121793811021, "grad_norm": 0.5145241022109985, "learning_rate": 9.929484660327472e-06, "loss": 0.4527, "step": 896 }, { "epoch": 0.4453086215455899, "grad_norm": 0.6716557741165161, "learning_rate": 9.929000338315604e-06, "loss": 0.4756, "step": 897 }, { "epoch": 0.4458050637100778, "grad_norm": 0.5955334901809692, "learning_rate": 9.928514370644487e-06, "loss": 0.4207, "step": 898 }, { "epoch": 0.44630150587456563, "grad_norm": 0.6092278361320496, "learning_rate": 9.92802675747637e-06, "loss": 0.4629, "step": 899 }, { "epoch": 0.4467979480390534, "grad_norm": 0.6189857125282288, "learning_rate": 9.927537498974059e-06, "loss": 0.4935, "step": 900 }, { "epoch": 0.4472943902035413, "grad_norm": 0.571831226348877, "learning_rate": 9.927046595300895e-06, "loss": 0.4204, "step": 901 }, { "epoch": 0.44779083236802913, "grad_norm": 0.5818679928779602, "learning_rate": 9.926554046620785e-06, "loss": 0.4698, "step": 902 }, { "epoch": 0.448287274532517, "grad_norm": 0.4938901960849762, "learning_rate": 9.926059853098175e-06, "loss": 0.4514, "step": 903 }, { "epoch": 0.4487837166970048, "grad_norm": 0.5413110256195068, "learning_rate": 9.925564014898063e-06, "loss": 0.4382, "step": 904 }, { "epoch": 0.44928015886149264, "grad_norm": 0.5519100427627563, "learning_rate": 9.925066532185996e-06, "loss": 0.4541, "step": 905 }, { "epoch": 0.4497766010259805, "grad_norm": 0.5705522298812866, "learning_rate": 9.924567405128069e-06, "loss": 0.4632, "step": 906 }, { "epoch": 0.4502730431904683, "grad_norm": 0.549027144908905, "learning_rate": 9.924066633890929e-06, "loss": 0.4782, "step": 907 }, { "epoch": 0.45076948535495615, "grad_norm": 0.6326151490211487, "learning_rate": 9.923564218641768e-06, "loss": 0.4208, "step": 908 }, { "epoch": 0.451265927519444, "grad_norm": 0.630217969417572, "learning_rate": 9.92306015954833e-06, "loss": 0.4693, "step": 909 }, { "epoch": 0.4517623696839318, "grad_norm": 0.6052801012992859, "learning_rate": 9.922554456778905e-06, "loss": 0.4746, "step": 910 }, { "epoch": 0.45225881184841965, "grad_norm": 0.6216434836387634, "learning_rate": 9.922047110502335e-06, "loss": 0.4433, "step": 911 }, { "epoch": 0.4527552540129075, "grad_norm": 0.5720237493515015, "learning_rate": 9.921538120888007e-06, "loss": 0.445, "step": 912 }, { "epoch": 0.45325169617739536, "grad_norm": 0.5607985258102417, "learning_rate": 9.921027488105864e-06, "loss": 0.4395, "step": 913 }, { "epoch": 0.45374813834188316, "grad_norm": 0.5706934332847595, "learning_rate": 9.920515212326386e-06, "loss": 0.4517, "step": 914 }, { "epoch": 0.454244580506371, "grad_norm": 0.598118782043457, "learning_rate": 9.920001293720612e-06, "loss": 0.4483, "step": 915 }, { "epoch": 0.45474102267085886, "grad_norm": 0.5979647636413574, "learning_rate": 9.919485732460123e-06, "loss": 0.4768, "step": 916 }, { "epoch": 0.45523746483534666, "grad_norm": 0.5547130703926086, "learning_rate": 9.918968528717053e-06, "loss": 0.4653, "step": 917 }, { "epoch": 0.4557339069998345, "grad_norm": 0.6306782364845276, "learning_rate": 9.918449682664082e-06, "loss": 0.4948, "step": 918 }, { "epoch": 0.45623034916432237, "grad_norm": 0.6055850386619568, "learning_rate": 9.917929194474438e-06, "loss": 0.4673, "step": 919 }, { "epoch": 0.45672679132881017, "grad_norm": 0.6051064133644104, "learning_rate": 9.917407064321897e-06, "loss": 0.4833, "step": 920 }, { "epoch": 0.457223233493298, "grad_norm": 0.5719088315963745, "learning_rate": 9.916883292380786e-06, "loss": 0.4872, "step": 921 }, { "epoch": 0.4577196756577859, "grad_norm": 0.6364821791648865, "learning_rate": 9.916357878825974e-06, "loss": 0.4494, "step": 922 }, { "epoch": 0.4582161178222737, "grad_norm": 0.6275284886360168, "learning_rate": 9.91583082383289e-06, "loss": 0.4325, "step": 923 }, { "epoch": 0.4587125599867615, "grad_norm": 0.5457890033721924, "learning_rate": 9.915302127577496e-06, "loss": 0.4599, "step": 924 }, { "epoch": 0.4592090021512494, "grad_norm": 0.7632023096084595, "learning_rate": 9.914771790236313e-06, "loss": 0.46, "step": 925 }, { "epoch": 0.45970544431573723, "grad_norm": 0.6196309328079224, "learning_rate": 9.914239811986406e-06, "loss": 0.4645, "step": 926 }, { "epoch": 0.46020188648022503, "grad_norm": 0.6275529861450195, "learning_rate": 9.913706193005386e-06, "loss": 0.4772, "step": 927 }, { "epoch": 0.4606983286447129, "grad_norm": 0.6084286570549011, "learning_rate": 9.913170933471416e-06, "loss": 0.4382, "step": 928 }, { "epoch": 0.46119477080920074, "grad_norm": 0.6103651523590088, "learning_rate": 9.912634033563205e-06, "loss": 0.4321, "step": 929 }, { "epoch": 0.4616912129736886, "grad_norm": 0.6446409821510315, "learning_rate": 9.912095493460005e-06, "loss": 0.4327, "step": 930 }, { "epoch": 0.4621876551381764, "grad_norm": 0.6004866361618042, "learning_rate": 9.911555313341625e-06, "loss": 0.4259, "step": 931 }, { "epoch": 0.46268409730266424, "grad_norm": 0.6525911688804626, "learning_rate": 9.911013493388416e-06, "loss": 0.4393, "step": 932 }, { "epoch": 0.4631805394671521, "grad_norm": 0.653540849685669, "learning_rate": 9.910470033781274e-06, "loss": 0.4506, "step": 933 }, { "epoch": 0.4636769816316399, "grad_norm": 0.6028352975845337, "learning_rate": 9.909924934701647e-06, "loss": 0.4502, "step": 934 }, { "epoch": 0.46417342379612775, "grad_norm": 0.5618104338645935, "learning_rate": 9.909378196331527e-06, "loss": 0.428, "step": 935 }, { "epoch": 0.4646698659606156, "grad_norm": 0.6070424318313599, "learning_rate": 9.908829818853459e-06, "loss": 0.4338, "step": 936 }, { "epoch": 0.4651663081251034, "grad_norm": 0.5997883677482605, "learning_rate": 9.908279802450529e-06, "loss": 0.4732, "step": 937 }, { "epoch": 0.46566275028959125, "grad_norm": 0.5680132508277893, "learning_rate": 9.907728147306373e-06, "loss": 0.4382, "step": 938 }, { "epoch": 0.4661591924540791, "grad_norm": 0.5455333590507507, "learning_rate": 9.90717485360517e-06, "loss": 0.4604, "step": 939 }, { "epoch": 0.46665563461856696, "grad_norm": 0.5591575503349304, "learning_rate": 9.906619921531658e-06, "loss": 0.4638, "step": 940 }, { "epoch": 0.46715207678305476, "grad_norm": 0.604485273361206, "learning_rate": 9.906063351271104e-06, "loss": 0.4692, "step": 941 }, { "epoch": 0.4676485189475426, "grad_norm": 0.5896407961845398, "learning_rate": 9.90550514300934e-06, "loss": 0.4356, "step": 942 }, { "epoch": 0.46814496111203047, "grad_norm": 0.6322289109230042, "learning_rate": 9.904945296932731e-06, "loss": 0.5013, "step": 943 }, { "epoch": 0.46864140327651826, "grad_norm": 0.5494778156280518, "learning_rate": 9.904383813228197e-06, "loss": 0.4534, "step": 944 }, { "epoch": 0.4691378454410061, "grad_norm": 0.5788816809654236, "learning_rate": 9.9038206920832e-06, "loss": 0.4571, "step": 945 }, { "epoch": 0.46963428760549397, "grad_norm": 0.6223641633987427, "learning_rate": 9.903255933685755e-06, "loss": 0.4138, "step": 946 }, { "epoch": 0.47013072976998177, "grad_norm": 0.582351803779602, "learning_rate": 9.902689538224415e-06, "loss": 0.4678, "step": 947 }, { "epoch": 0.4706271719344696, "grad_norm": 0.6602117419242859, "learning_rate": 9.902121505888287e-06, "loss": 0.4675, "step": 948 }, { "epoch": 0.4711236140989575, "grad_norm": 0.6985843181610107, "learning_rate": 9.90155183686702e-06, "loss": 0.5112, "step": 949 }, { "epoch": 0.47162005626344533, "grad_norm": 0.601648211479187, "learning_rate": 9.900980531350813e-06, "loss": 0.4204, "step": 950 }, { "epoch": 0.47211649842793313, "grad_norm": 0.6571403741836548, "learning_rate": 9.900407589530405e-06, "loss": 0.4495, "step": 951 }, { "epoch": 0.472612940592421, "grad_norm": 0.6431176662445068, "learning_rate": 9.89983301159709e-06, "loss": 0.4804, "step": 952 }, { "epoch": 0.47310938275690884, "grad_norm": 0.4892394542694092, "learning_rate": 9.899256797742702e-06, "loss": 0.4306, "step": 953 }, { "epoch": 0.47360582492139663, "grad_norm": 0.7257311344146729, "learning_rate": 9.898678948159625e-06, "loss": 0.4609, "step": 954 }, { "epoch": 0.4741022670858845, "grad_norm": 0.6158337593078613, "learning_rate": 9.898099463040784e-06, "loss": 0.4431, "step": 955 }, { "epoch": 0.47459870925037234, "grad_norm": 0.6085072755813599, "learning_rate": 9.897518342579657e-06, "loss": 0.4392, "step": 956 }, { "epoch": 0.4750951514148602, "grad_norm": 0.7046692967414856, "learning_rate": 9.896935586970262e-06, "loss": 0.4556, "step": 957 }, { "epoch": 0.475591593579348, "grad_norm": 0.5827057957649231, "learning_rate": 9.896351196407166e-06, "loss": 0.4385, "step": 958 }, { "epoch": 0.47608803574383585, "grad_norm": 0.6459434032440186, "learning_rate": 9.89576517108548e-06, "loss": 0.4778, "step": 959 }, { "epoch": 0.4765844779083237, "grad_norm": 0.5758009552955627, "learning_rate": 9.895177511200864e-06, "loss": 0.4454, "step": 960 }, { "epoch": 0.4770809200728115, "grad_norm": 0.6074600219726562, "learning_rate": 9.89458821694952e-06, "loss": 0.4361, "step": 961 }, { "epoch": 0.47757736223729935, "grad_norm": 0.5639742016792297, "learning_rate": 9.893997288528198e-06, "loss": 0.4477, "step": 962 }, { "epoch": 0.4780738044017872, "grad_norm": 0.5208935737609863, "learning_rate": 9.893404726134193e-06, "loss": 0.4402, "step": 963 }, { "epoch": 0.478570246566275, "grad_norm": 0.5632034540176392, "learning_rate": 9.892810529965344e-06, "loss": 0.4367, "step": 964 }, { "epoch": 0.47906668873076286, "grad_norm": 0.5871676206588745, "learning_rate": 9.89221470022004e-06, "loss": 0.4492, "step": 965 }, { "epoch": 0.4795631308952507, "grad_norm": 0.5698586702346802, "learning_rate": 9.891617237097209e-06, "loss": 0.4623, "step": 966 }, { "epoch": 0.48005957305973856, "grad_norm": 0.590275764465332, "learning_rate": 9.891018140796332e-06, "loss": 0.4682, "step": 967 }, { "epoch": 0.48055601522422636, "grad_norm": 0.601129412651062, "learning_rate": 9.890417411517426e-06, "loss": 0.4946, "step": 968 }, { "epoch": 0.4810524573887142, "grad_norm": 0.5358284115791321, "learning_rate": 9.889815049461062e-06, "loss": 0.4485, "step": 969 }, { "epoch": 0.48154889955320207, "grad_norm": 0.6163699626922607, "learning_rate": 9.88921105482835e-06, "loss": 0.4865, "step": 970 }, { "epoch": 0.48204534171768987, "grad_norm": 0.5826394557952881, "learning_rate": 9.888605427820947e-06, "loss": 0.4579, "step": 971 }, { "epoch": 0.4825417838821777, "grad_norm": 0.5492720603942871, "learning_rate": 9.887998168641057e-06, "loss": 0.4393, "step": 972 }, { "epoch": 0.4830382260466656, "grad_norm": 0.6193668246269226, "learning_rate": 9.887389277491429e-06, "loss": 0.4915, "step": 973 }, { "epoch": 0.4835346682111534, "grad_norm": 0.6325623393058777, "learning_rate": 9.88677875457535e-06, "loss": 0.4657, "step": 974 }, { "epoch": 0.4840311103756412, "grad_norm": 0.5437896251678467, "learning_rate": 9.886166600096662e-06, "loss": 0.4636, "step": 975 }, { "epoch": 0.4845275525401291, "grad_norm": 0.6048393845558167, "learning_rate": 9.885552814259746e-06, "loss": 0.4491, "step": 976 }, { "epoch": 0.48502399470461693, "grad_norm": 0.5315700769424438, "learning_rate": 9.884937397269525e-06, "loss": 0.422, "step": 977 }, { "epoch": 0.48552043686910473, "grad_norm": 0.5243560075759888, "learning_rate": 9.884320349331474e-06, "loss": 0.4296, "step": 978 }, { "epoch": 0.4860168790335926, "grad_norm": 0.4885232746601105, "learning_rate": 9.883701670651607e-06, "loss": 0.4338, "step": 979 }, { "epoch": 0.48651332119808044, "grad_norm": 0.5639747977256775, "learning_rate": 9.883081361436482e-06, "loss": 0.4609, "step": 980 }, { "epoch": 0.48700976336256824, "grad_norm": 0.5286743640899658, "learning_rate": 9.882459421893206e-06, "loss": 0.4595, "step": 981 }, { "epoch": 0.4875062055270561, "grad_norm": 0.4950232207775116, "learning_rate": 9.881835852229427e-06, "loss": 0.4439, "step": 982 }, { "epoch": 0.48800264769154394, "grad_norm": 0.5400747656822205, "learning_rate": 9.881210652653338e-06, "loss": 0.4702, "step": 983 }, { "epoch": 0.4884990898560318, "grad_norm": 0.5721049308776855, "learning_rate": 9.880583823373676e-06, "loss": 0.474, "step": 984 }, { "epoch": 0.4889955320205196, "grad_norm": 0.5697023272514343, "learning_rate": 9.879955364599722e-06, "loss": 0.4469, "step": 985 }, { "epoch": 0.48949197418500745, "grad_norm": 0.651923656463623, "learning_rate": 9.879325276541303e-06, "loss": 0.4353, "step": 986 }, { "epoch": 0.4899884163494953, "grad_norm": 0.5232157707214355, "learning_rate": 9.878693559408785e-06, "loss": 0.4373, "step": 987 }, { "epoch": 0.4904848585139831, "grad_norm": 0.6123502254486084, "learning_rate": 9.878060213413083e-06, "loss": 0.4803, "step": 988 }, { "epoch": 0.49098130067847096, "grad_norm": 0.5617421865463257, "learning_rate": 9.877425238765657e-06, "loss": 0.442, "step": 989 }, { "epoch": 0.4914777428429588, "grad_norm": 0.5848252177238464, "learning_rate": 9.876788635678502e-06, "loss": 0.4372, "step": 990 }, { "epoch": 0.4919741850074466, "grad_norm": 0.6082578897476196, "learning_rate": 9.876150404364166e-06, "loss": 0.4655, "step": 991 }, { "epoch": 0.49247062717193446, "grad_norm": 0.575847327709198, "learning_rate": 9.875510545035736e-06, "loss": 0.4384, "step": 992 }, { "epoch": 0.4929670693364223, "grad_norm": 0.6035521030426025, "learning_rate": 9.874869057906844e-06, "loss": 0.4764, "step": 993 }, { "epoch": 0.49346351150091017, "grad_norm": 0.5103117823600769, "learning_rate": 9.874225943191666e-06, "loss": 0.425, "step": 994 }, { "epoch": 0.49395995366539797, "grad_norm": 0.6662722229957581, "learning_rate": 9.87358120110492e-06, "loss": 0.4686, "step": 995 }, { "epoch": 0.4944563958298858, "grad_norm": 0.5862585306167603, "learning_rate": 9.872934831861867e-06, "loss": 0.4697, "step": 996 }, { "epoch": 0.4949528379943737, "grad_norm": 0.5902515649795532, "learning_rate": 9.872286835678313e-06, "loss": 0.4164, "step": 997 }, { "epoch": 0.49544928015886147, "grad_norm": 0.6023157238960266, "learning_rate": 9.871637212770606e-06, "loss": 0.4756, "step": 998 }, { "epoch": 0.4959457223233493, "grad_norm": 0.6857558488845825, "learning_rate": 9.870985963355636e-06, "loss": 0.4414, "step": 999 }, { "epoch": 0.4964421644878372, "grad_norm": 0.5725039839744568, "learning_rate": 9.87033308765084e-06, "loss": 0.4497, "step": 1000 }, { "epoch": 0.496938606652325, "grad_norm": 0.6090866923332214, "learning_rate": 9.869678585874193e-06, "loss": 0.4613, "step": 1001 }, { "epoch": 0.49743504881681283, "grad_norm": 0.5878522396087646, "learning_rate": 9.86902245824422e-06, "loss": 0.425, "step": 1002 }, { "epoch": 0.4979314909813007, "grad_norm": 0.6251345872879028, "learning_rate": 9.868364704979977e-06, "loss": 0.4701, "step": 1003 }, { "epoch": 0.49842793314578854, "grad_norm": 0.5989763736724854, "learning_rate": 9.867705326301077e-06, "loss": 0.4715, "step": 1004 }, { "epoch": 0.49892437531027634, "grad_norm": 0.6005432605743408, "learning_rate": 9.867044322427663e-06, "loss": 0.4396, "step": 1005 }, { "epoch": 0.4994208174747642, "grad_norm": 0.597500741481781, "learning_rate": 9.86638169358043e-06, "loss": 0.464, "step": 1006 }, { "epoch": 0.49991725963925204, "grad_norm": 0.5395328998565674, "learning_rate": 9.865717439980611e-06, "loss": 0.4427, "step": 1007 }, { "epoch": 0.5004137018037399, "grad_norm": 0.5938105583190918, "learning_rate": 9.86505156184998e-06, "loss": 0.4315, "step": 1008 }, { "epoch": 0.5009101439682278, "grad_norm": 0.532762348651886, "learning_rate": 9.864384059410858e-06, "loss": 0.4516, "step": 1009 }, { "epoch": 0.5014065861327155, "grad_norm": 0.5764479637145996, "learning_rate": 9.863714932886106e-06, "loss": 0.4429, "step": 1010 }, { "epoch": 0.5019030282972033, "grad_norm": 0.6884803771972656, "learning_rate": 9.863044182499126e-06, "loss": 0.4521, "step": 1011 }, { "epoch": 0.5023994704616912, "grad_norm": 0.4860242009162903, "learning_rate": 9.862371808473862e-06, "loss": 0.4245, "step": 1012 }, { "epoch": 0.502895912626179, "grad_norm": 0.6113942265510559, "learning_rate": 9.861697811034805e-06, "loss": 0.4505, "step": 1013 }, { "epoch": 0.5033923547906669, "grad_norm": 0.6692773103713989, "learning_rate": 9.861022190406982e-06, "loss": 0.4368, "step": 1014 }, { "epoch": 0.5038887969551548, "grad_norm": 0.6185814738273621, "learning_rate": 9.860344946815966e-06, "loss": 0.466, "step": 1015 }, { "epoch": 0.5043852391196426, "grad_norm": 0.7360732555389404, "learning_rate": 9.859666080487868e-06, "loss": 0.4497, "step": 1016 }, { "epoch": 0.5048816812841304, "grad_norm": 0.5051026940345764, "learning_rate": 9.858985591649343e-06, "loss": 0.411, "step": 1017 }, { "epoch": 0.5053781234486182, "grad_norm": 0.7091212272644043, "learning_rate": 9.85830348052759e-06, "loss": 0.4364, "step": 1018 }, { "epoch": 0.5058745656131061, "grad_norm": 0.7252426743507385, "learning_rate": 9.857619747350346e-06, "loss": 0.4455, "step": 1019 }, { "epoch": 0.5063710077775939, "grad_norm": 0.5383073687553406, "learning_rate": 9.856934392345892e-06, "loss": 0.443, "step": 1020 }, { "epoch": 0.5068674499420818, "grad_norm": 0.666297435760498, "learning_rate": 9.856247415743048e-06, "loss": 0.4092, "step": 1021 }, { "epoch": 0.5073638921065696, "grad_norm": 0.5978301763534546, "learning_rate": 9.855558817771177e-06, "loss": 0.4452, "step": 1022 }, { "epoch": 0.5078603342710574, "grad_norm": 0.6052388548851013, "learning_rate": 9.854868598660184e-06, "loss": 0.4226, "step": 1023 }, { "epoch": 0.5083567764355452, "grad_norm": 0.572261393070221, "learning_rate": 9.854176758640513e-06, "loss": 0.4312, "step": 1024 }, { "epoch": 0.5088532186000331, "grad_norm": 0.5447362661361694, "learning_rate": 9.853483297943153e-06, "loss": 0.4738, "step": 1025 }, { "epoch": 0.5093496607645209, "grad_norm": 0.6350544691085815, "learning_rate": 9.85278821679963e-06, "loss": 0.4442, "step": 1026 }, { "epoch": 0.5098461029290088, "grad_norm": 0.6432090997695923, "learning_rate": 9.852091515442012e-06, "loss": 0.4561, "step": 1027 }, { "epoch": 0.5103425450934966, "grad_norm": 0.500100314617157, "learning_rate": 9.85139319410291e-06, "loss": 0.4512, "step": 1028 }, { "epoch": 0.5108389872579845, "grad_norm": 0.717689573764801, "learning_rate": 9.850693253015473e-06, "loss": 0.4626, "step": 1029 }, { "epoch": 0.5113354294224722, "grad_norm": 0.555020809173584, "learning_rate": 9.849991692413394e-06, "loss": 0.4372, "step": 1030 }, { "epoch": 0.5118318715869601, "grad_norm": 0.5757158994674683, "learning_rate": 9.849288512530906e-06, "loss": 0.4904, "step": 1031 }, { "epoch": 0.5123283137514479, "grad_norm": 0.6115471720695496, "learning_rate": 9.848583713602777e-06, "loss": 0.4547, "step": 1032 }, { "epoch": 0.5128247559159358, "grad_norm": 0.5386384129524231, "learning_rate": 9.847877295864326e-06, "loss": 0.4254, "step": 1033 }, { "epoch": 0.5133211980804236, "grad_norm": 0.5753943920135498, "learning_rate": 9.847169259551403e-06, "loss": 0.4414, "step": 1034 }, { "epoch": 0.5138176402449115, "grad_norm": 0.5859472751617432, "learning_rate": 9.846459604900403e-06, "loss": 0.4678, "step": 1035 }, { "epoch": 0.5143140824093994, "grad_norm": 0.5723085999488831, "learning_rate": 9.845748332148259e-06, "loss": 0.4949, "step": 1036 }, { "epoch": 0.5148105245738871, "grad_norm": 0.5379884839057922, "learning_rate": 9.845035441532448e-06, "loss": 0.4722, "step": 1037 }, { "epoch": 0.515306966738375, "grad_norm": 0.5732872486114502, "learning_rate": 9.844320933290986e-06, "loss": 0.4268, "step": 1038 }, { "epoch": 0.5158034089028628, "grad_norm": 0.6455171704292297, "learning_rate": 9.843604807662422e-06, "loss": 0.4499, "step": 1039 }, { "epoch": 0.5162998510673507, "grad_norm": 0.5448759198188782, "learning_rate": 9.842887064885856e-06, "loss": 0.4602, "step": 1040 }, { "epoch": 0.5167962932318385, "grad_norm": 0.6587372422218323, "learning_rate": 9.842167705200923e-06, "loss": 0.4562, "step": 1041 }, { "epoch": 0.5172927353963264, "grad_norm": 0.6155374050140381, "learning_rate": 9.841446728847795e-06, "loss": 0.4447, "step": 1042 }, { "epoch": 0.5177891775608142, "grad_norm": 0.5448647141456604, "learning_rate": 9.840724136067186e-06, "loss": 0.4645, "step": 1043 }, { "epoch": 0.518285619725302, "grad_norm": 0.6560261845588684, "learning_rate": 9.839999927100354e-06, "loss": 0.4381, "step": 1044 }, { "epoch": 0.5187820618897898, "grad_norm": 0.6950559616088867, "learning_rate": 9.839274102189089e-06, "loss": 0.4576, "step": 1045 }, { "epoch": 0.5192785040542777, "grad_norm": 0.6056340932846069, "learning_rate": 9.838546661575725e-06, "loss": 0.4589, "step": 1046 }, { "epoch": 0.5197749462187655, "grad_norm": 0.7459995150566101, "learning_rate": 9.837817605503134e-06, "loss": 0.4657, "step": 1047 }, { "epoch": 0.5202713883832534, "grad_norm": 0.6246460676193237, "learning_rate": 9.837086934214733e-06, "loss": 0.4565, "step": 1048 }, { "epoch": 0.5207678305477412, "grad_norm": 0.6117191314697266, "learning_rate": 9.836354647954467e-06, "loss": 0.4867, "step": 1049 }, { "epoch": 0.521264272712229, "grad_norm": 0.6422696709632874, "learning_rate": 9.835620746966829e-06, "loss": 0.4628, "step": 1050 }, { "epoch": 0.5217607148767168, "grad_norm": 0.5251749753952026, "learning_rate": 9.834885231496847e-06, "loss": 0.4409, "step": 1051 }, { "epoch": 0.5222571570412047, "grad_norm": 0.5926701426506042, "learning_rate": 9.834148101790093e-06, "loss": 0.4459, "step": 1052 }, { "epoch": 0.5227535992056925, "grad_norm": 0.5840223431587219, "learning_rate": 9.833409358092673e-06, "loss": 0.4329, "step": 1053 }, { "epoch": 0.5232500413701804, "grad_norm": 0.5884864330291748, "learning_rate": 9.832669000651231e-06, "loss": 0.4566, "step": 1054 }, { "epoch": 0.5237464835346682, "grad_norm": 0.6731551885604858, "learning_rate": 9.831927029712957e-06, "loss": 0.4746, "step": 1055 }, { "epoch": 0.5242429256991561, "grad_norm": 0.6504162549972534, "learning_rate": 9.831183445525571e-06, "loss": 0.4474, "step": 1056 }, { "epoch": 0.5247393678636438, "grad_norm": 0.5918163061141968, "learning_rate": 9.830438248337337e-06, "loss": 0.4889, "step": 1057 }, { "epoch": 0.5252358100281317, "grad_norm": 0.6740034222602844, "learning_rate": 9.829691438397056e-06, "loss": 0.4453, "step": 1058 }, { "epoch": 0.5257322521926195, "grad_norm": 0.5471624135971069, "learning_rate": 9.828943015954066e-06, "loss": 0.4372, "step": 1059 }, { "epoch": 0.5262286943571074, "grad_norm": 0.547701895236969, "learning_rate": 9.828192981258249e-06, "loss": 0.4379, "step": 1060 }, { "epoch": 0.5267251365215952, "grad_norm": 0.5749586224555969, "learning_rate": 9.827441334560017e-06, "loss": 0.487, "step": 1061 }, { "epoch": 0.5272215786860831, "grad_norm": 0.5433669090270996, "learning_rate": 9.826688076110328e-06, "loss": 0.465, "step": 1062 }, { "epoch": 0.527718020850571, "grad_norm": 0.5684483051300049, "learning_rate": 9.82593320616067e-06, "loss": 0.4609, "step": 1063 }, { "epoch": 0.5282144630150587, "grad_norm": 0.5279474854469299, "learning_rate": 9.825176724963075e-06, "loss": 0.4848, "step": 1064 }, { "epoch": 0.5287109051795466, "grad_norm": 0.5513960719108582, "learning_rate": 9.824418632770115e-06, "loss": 0.461, "step": 1065 }, { "epoch": 0.5292073473440344, "grad_norm": 0.6358182430267334, "learning_rate": 9.823658929834892e-06, "loss": 0.4605, "step": 1066 }, { "epoch": 0.5297037895085223, "grad_norm": 0.517944872379303, "learning_rate": 9.822897616411055e-06, "loss": 0.4887, "step": 1067 }, { "epoch": 0.5302002316730101, "grad_norm": 0.6297049522399902, "learning_rate": 9.82213469275278e-06, "loss": 0.4933, "step": 1068 }, { "epoch": 0.530696673837498, "grad_norm": 0.5742437839508057, "learning_rate": 9.821370159114792e-06, "loss": 0.4488, "step": 1069 }, { "epoch": 0.5311931160019858, "grad_norm": 0.486621618270874, "learning_rate": 9.820604015752344e-06, "loss": 0.4516, "step": 1070 }, { "epoch": 0.5316895581664736, "grad_norm": 0.5812432765960693, "learning_rate": 9.819836262921231e-06, "loss": 0.4831, "step": 1071 }, { "epoch": 0.5321860003309614, "grad_norm": 0.5651341676712036, "learning_rate": 9.819066900877787e-06, "loss": 0.4591, "step": 1072 }, { "epoch": 0.5326824424954493, "grad_norm": 0.5016430616378784, "learning_rate": 9.81829592987888e-06, "loss": 0.4184, "step": 1073 }, { "epoch": 0.5331788846599371, "grad_norm": 0.5695505142211914, "learning_rate": 9.817523350181916e-06, "loss": 0.443, "step": 1074 }, { "epoch": 0.533675326824425, "grad_norm": 0.607589602470398, "learning_rate": 9.81674916204484e-06, "loss": 0.4738, "step": 1075 }, { "epoch": 0.5341717689889128, "grad_norm": 0.6675355434417725, "learning_rate": 9.815973365726126e-06, "loss": 0.4334, "step": 1076 }, { "epoch": 0.5346682111534006, "grad_norm": 0.6298385262489319, "learning_rate": 9.8151959614848e-06, "loss": 0.4512, "step": 1077 }, { "epoch": 0.5351646533178884, "grad_norm": 0.6344515681266785, "learning_rate": 9.814416949580412e-06, "loss": 0.473, "step": 1078 }, { "epoch": 0.5356610954823763, "grad_norm": 0.6583170294761658, "learning_rate": 9.813636330273051e-06, "loss": 0.4218, "step": 1079 }, { "epoch": 0.5361575376468641, "grad_norm": 0.6544238924980164, "learning_rate": 9.812854103823349e-06, "loss": 0.4713, "step": 1080 }, { "epoch": 0.536653979811352, "grad_norm": 0.7164194583892822, "learning_rate": 9.812070270492467e-06, "loss": 0.4088, "step": 1081 }, { "epoch": 0.5371504219758398, "grad_norm": 0.5986360907554626, "learning_rate": 9.811284830542105e-06, "loss": 0.4743, "step": 1082 }, { "epoch": 0.5376468641403277, "grad_norm": 0.6138706207275391, "learning_rate": 9.810497784234503e-06, "loss": 0.4645, "step": 1083 }, { "epoch": 0.5381433063048154, "grad_norm": 0.6618176698684692, "learning_rate": 9.809709131832431e-06, "loss": 0.4805, "step": 1084 }, { "epoch": 0.5386397484693033, "grad_norm": 0.5988572835922241, "learning_rate": 9.808918873599205e-06, "loss": 0.4515, "step": 1085 }, { "epoch": 0.5391361906337911, "grad_norm": 0.7287824153900146, "learning_rate": 9.808127009798662e-06, "loss": 0.4787, "step": 1086 }, { "epoch": 0.539632632798279, "grad_norm": 0.6417304873466492, "learning_rate": 9.807333540695192e-06, "loss": 0.4638, "step": 1087 }, { "epoch": 0.5401290749627669, "grad_norm": 0.7080192565917969, "learning_rate": 9.806538466553705e-06, "loss": 0.4065, "step": 1088 }, { "epoch": 0.5406255171272547, "grad_norm": 0.568427562713623, "learning_rate": 9.80574178763966e-06, "loss": 0.4614, "step": 1089 }, { "epoch": 0.5411219592917426, "grad_norm": 0.5805076360702515, "learning_rate": 9.804943504219046e-06, "loss": 0.4598, "step": 1090 }, { "epoch": 0.5416184014562303, "grad_norm": 0.567419707775116, "learning_rate": 9.804143616558387e-06, "loss": 0.442, "step": 1091 }, { "epoch": 0.5421148436207182, "grad_norm": 0.7031585574150085, "learning_rate": 9.803342124924742e-06, "loss": 0.4355, "step": 1092 }, { "epoch": 0.542611285785206, "grad_norm": 0.5469849705696106, "learning_rate": 9.802539029585709e-06, "loss": 0.4454, "step": 1093 }, { "epoch": 0.5431077279496939, "grad_norm": 0.573331892490387, "learning_rate": 9.80173433080942e-06, "loss": 0.4334, "step": 1094 }, { "epoch": 0.5436041701141817, "grad_norm": 0.6324737071990967, "learning_rate": 9.800928028864543e-06, "loss": 0.4705, "step": 1095 }, { "epoch": 0.5441006122786696, "grad_norm": 0.5884754657745361, "learning_rate": 9.80012012402028e-06, "loss": 0.4227, "step": 1096 }, { "epoch": 0.5445970544431574, "grad_norm": 0.6385104060173035, "learning_rate": 9.799310616546367e-06, "loss": 0.4487, "step": 1097 }, { "epoch": 0.5450934966076452, "grad_norm": 0.5854458808898926, "learning_rate": 9.798499506713075e-06, "loss": 0.396, "step": 1098 }, { "epoch": 0.545589938772133, "grad_norm": 0.5839739441871643, "learning_rate": 9.797686794791216e-06, "loss": 0.4357, "step": 1099 }, { "epoch": 0.5460863809366209, "grad_norm": 0.5969468951225281, "learning_rate": 9.79687248105213e-06, "loss": 0.4621, "step": 1100 }, { "epoch": 0.5465828231011087, "grad_norm": 0.5973749756813049, "learning_rate": 9.796056565767694e-06, "loss": 0.4531, "step": 1101 }, { "epoch": 0.5470792652655966, "grad_norm": 0.5620649456977844, "learning_rate": 9.79523904921032e-06, "loss": 0.4636, "step": 1102 }, { "epoch": 0.5475757074300844, "grad_norm": 0.6660063862800598, "learning_rate": 9.794419931652954e-06, "loss": 0.4755, "step": 1103 }, { "epoch": 0.5480721495945722, "grad_norm": 0.5329774618148804, "learning_rate": 9.793599213369078e-06, "loss": 0.4425, "step": 1104 }, { "epoch": 0.54856859175906, "grad_norm": 0.6592510342597961, "learning_rate": 9.792776894632709e-06, "loss": 0.4649, "step": 1105 }, { "epoch": 0.5490650339235479, "grad_norm": 0.5413240194320679, "learning_rate": 9.791952975718395e-06, "loss": 0.459, "step": 1106 }, { "epoch": 0.5495614760880357, "grad_norm": 0.6274532079696655, "learning_rate": 9.791127456901219e-06, "loss": 0.4208, "step": 1107 }, { "epoch": 0.5500579182525236, "grad_norm": 0.5738719701766968, "learning_rate": 9.790300338456802e-06, "loss": 0.4802, "step": 1108 }, { "epoch": 0.5505543604170114, "grad_norm": 0.5603674650192261, "learning_rate": 9.789471620661296e-06, "loss": 0.5129, "step": 1109 }, { "epoch": 0.5510508025814993, "grad_norm": 0.5310673117637634, "learning_rate": 9.788641303791384e-06, "loss": 0.4179, "step": 1110 }, { "epoch": 0.551547244745987, "grad_norm": 0.5755075216293335, "learning_rate": 9.78780938812429e-06, "loss": 0.4664, "step": 1111 }, { "epoch": 0.5520436869104749, "grad_norm": 0.5947471857070923, "learning_rate": 9.786975873937768e-06, "loss": 0.4814, "step": 1112 }, { "epoch": 0.5525401290749627, "grad_norm": 0.5506641268730164, "learning_rate": 9.786140761510103e-06, "loss": 0.4504, "step": 1113 }, { "epoch": 0.5530365712394506, "grad_norm": 0.6543846130371094, "learning_rate": 9.785304051120117e-06, "loss": 0.491, "step": 1114 }, { "epoch": 0.5535330134039385, "grad_norm": 0.604807436466217, "learning_rate": 9.784465743047168e-06, "loss": 0.4631, "step": 1115 }, { "epoch": 0.5540294555684263, "grad_norm": 0.5983553528785706, "learning_rate": 9.78362583757114e-06, "loss": 0.4313, "step": 1116 }, { "epoch": 0.5545258977329142, "grad_norm": 0.6479515433311462, "learning_rate": 9.782784334972459e-06, "loss": 0.4609, "step": 1117 }, { "epoch": 0.5550223398974019, "grad_norm": 0.564041793346405, "learning_rate": 9.781941235532076e-06, "loss": 0.4243, "step": 1118 }, { "epoch": 0.5555187820618898, "grad_norm": 0.6237730383872986, "learning_rate": 9.781096539531479e-06, "loss": 0.44, "step": 1119 }, { "epoch": 0.5560152242263776, "grad_norm": 0.6409626007080078, "learning_rate": 9.780250247252692e-06, "loss": 0.4459, "step": 1120 }, { "epoch": 0.5565116663908655, "grad_norm": 0.8023943901062012, "learning_rate": 9.779402358978267e-06, "loss": 0.4832, "step": 1121 }, { "epoch": 0.5570081085553533, "grad_norm": 0.5997129082679749, "learning_rate": 9.778552874991291e-06, "loss": 0.4536, "step": 1122 }, { "epoch": 0.5575045507198412, "grad_norm": 0.578983724117279, "learning_rate": 9.777701795575385e-06, "loss": 0.4267, "step": 1123 }, { "epoch": 0.558000992884329, "grad_norm": 0.6628795266151428, "learning_rate": 9.7768491210147e-06, "loss": 0.4558, "step": 1124 }, { "epoch": 0.5584974350488168, "grad_norm": 0.6671413779258728, "learning_rate": 9.775994851593921e-06, "loss": 0.4541, "step": 1125 }, { "epoch": 0.5589938772133046, "grad_norm": 0.5701093077659607, "learning_rate": 9.775138987598264e-06, "loss": 0.4389, "step": 1126 }, { "epoch": 0.5594903193777925, "grad_norm": 0.576451301574707, "learning_rate": 9.774281529313483e-06, "loss": 0.4394, "step": 1127 }, { "epoch": 0.5599867615422803, "grad_norm": 0.6742432117462158, "learning_rate": 9.773422477025854e-06, "loss": 0.4811, "step": 1128 }, { "epoch": 0.5604832037067682, "grad_norm": 0.5538901090621948, "learning_rate": 9.772561831022195e-06, "loss": 0.4389, "step": 1129 }, { "epoch": 0.560979645871256, "grad_norm": 0.5413223505020142, "learning_rate": 9.771699591589854e-06, "loss": 0.4416, "step": 1130 }, { "epoch": 0.5614760880357438, "grad_norm": 0.5663831830024719, "learning_rate": 9.770835759016704e-06, "loss": 0.4622, "step": 1131 }, { "epoch": 0.5619725302002316, "grad_norm": 0.5842031836509705, "learning_rate": 9.76997033359116e-06, "loss": 0.4248, "step": 1132 }, { "epoch": 0.5624689723647195, "grad_norm": 0.5851379632949829, "learning_rate": 9.769103315602161e-06, "loss": 0.4915, "step": 1133 }, { "epoch": 0.5629654145292073, "grad_norm": 0.5787515044212341, "learning_rate": 9.768234705339184e-06, "loss": 0.4123, "step": 1134 }, { "epoch": 0.5634618566936952, "grad_norm": 0.5883163213729858, "learning_rate": 9.76736450309223e-06, "loss": 0.4148, "step": 1135 }, { "epoch": 0.563958298858183, "grad_norm": 0.6022423505783081, "learning_rate": 9.76649270915184e-06, "loss": 0.4494, "step": 1136 }, { "epoch": 0.5644547410226709, "grad_norm": 0.6078398823738098, "learning_rate": 9.765619323809078e-06, "loss": 0.4389, "step": 1137 }, { "epoch": 0.5649511831871586, "grad_norm": 0.6205084919929504, "learning_rate": 9.76474434735555e-06, "loss": 0.4561, "step": 1138 }, { "epoch": 0.5654476253516465, "grad_norm": 0.6373727917671204, "learning_rate": 9.76386778008338e-06, "loss": 0.4548, "step": 1139 }, { "epoch": 0.5659440675161344, "grad_norm": 0.6644636392593384, "learning_rate": 9.762989622285234e-06, "loss": 0.4478, "step": 1140 }, { "epoch": 0.5664405096806222, "grad_norm": 0.559161901473999, "learning_rate": 9.762109874254305e-06, "loss": 0.421, "step": 1141 }, { "epoch": 0.5669369518451101, "grad_norm": 0.6848572492599487, "learning_rate": 9.761228536284313e-06, "loss": 0.4406, "step": 1142 }, { "epoch": 0.5674333940095979, "grad_norm": 0.5845863819122314, "learning_rate": 9.76034560866952e-06, "loss": 0.4409, "step": 1143 }, { "epoch": 0.5679298361740858, "grad_norm": 0.5199159979820251, "learning_rate": 9.759461091704703e-06, "loss": 0.4334, "step": 1144 }, { "epoch": 0.5684262783385735, "grad_norm": 0.618832528591156, "learning_rate": 9.758574985685186e-06, "loss": 0.4339, "step": 1145 }, { "epoch": 0.5689227205030614, "grad_norm": 0.6033197641372681, "learning_rate": 9.75768729090681e-06, "loss": 0.4891, "step": 1146 }, { "epoch": 0.5694191626675492, "grad_norm": 0.5497013330459595, "learning_rate": 9.756798007665954e-06, "loss": 0.4338, "step": 1147 }, { "epoch": 0.5699156048320371, "grad_norm": 0.635118842124939, "learning_rate": 9.755907136259525e-06, "loss": 0.4609, "step": 1148 }, { "epoch": 0.5704120469965249, "grad_norm": 0.5897179245948792, "learning_rate": 9.755014676984965e-06, "loss": 0.4857, "step": 1149 }, { "epoch": 0.5709084891610128, "grad_norm": 0.5024181008338928, "learning_rate": 9.754120630140237e-06, "loss": 0.4423, "step": 1150 }, { "epoch": 0.5714049313255006, "grad_norm": 0.5655117630958557, "learning_rate": 9.75322499602384e-06, "loss": 0.4428, "step": 1151 }, { "epoch": 0.5719013734899884, "grad_norm": 0.6595343351364136, "learning_rate": 9.752327774934802e-06, "loss": 0.5054, "step": 1152 }, { "epoch": 0.5723978156544762, "grad_norm": 0.5222578048706055, "learning_rate": 9.751428967172683e-06, "loss": 0.428, "step": 1153 }, { "epoch": 0.5728942578189641, "grad_norm": 0.5471052527427673, "learning_rate": 9.750528573037566e-06, "loss": 0.437, "step": 1154 }, { "epoch": 0.5733906999834519, "grad_norm": 0.5422949194908142, "learning_rate": 9.749626592830073e-06, "loss": 0.4668, "step": 1155 }, { "epoch": 0.5738871421479398, "grad_norm": 0.5633902549743652, "learning_rate": 9.748723026851346e-06, "loss": 0.4582, "step": 1156 }, { "epoch": 0.5743835843124276, "grad_norm": 0.6789720058441162, "learning_rate": 9.747817875403066e-06, "loss": 0.4925, "step": 1157 }, { "epoch": 0.5748800264769154, "grad_norm": 0.5519346594810486, "learning_rate": 9.746911138787434e-06, "loss": 0.4347, "step": 1158 }, { "epoch": 0.5753764686414032, "grad_norm": 0.6288570165634155, "learning_rate": 9.746002817307187e-06, "loss": 0.4838, "step": 1159 }, { "epoch": 0.5758729108058911, "grad_norm": 0.5391813516616821, "learning_rate": 9.745092911265587e-06, "loss": 0.4253, "step": 1160 }, { "epoch": 0.576369352970379, "grad_norm": 0.5741091370582581, "learning_rate": 9.744181420966432e-06, "loss": 0.4514, "step": 1161 }, { "epoch": 0.5768657951348668, "grad_norm": 0.6315009593963623, "learning_rate": 9.743268346714037e-06, "loss": 0.4142, "step": 1162 }, { "epoch": 0.5773622372993547, "grad_norm": 0.5397639870643616, "learning_rate": 9.742353688813257e-06, "loss": 0.4379, "step": 1163 }, { "epoch": 0.5778586794638425, "grad_norm": 0.5935172438621521, "learning_rate": 9.741437447569473e-06, "loss": 0.4427, "step": 1164 }, { "epoch": 0.5783551216283302, "grad_norm": 0.575933039188385, "learning_rate": 9.740519623288587e-06, "loss": 0.4454, "step": 1165 }, { "epoch": 0.5788515637928181, "grad_norm": 0.6504976749420166, "learning_rate": 9.73960021627704e-06, "loss": 0.4697, "step": 1166 }, { "epoch": 0.579348005957306, "grad_norm": 0.5776121020317078, "learning_rate": 9.738679226841796e-06, "loss": 0.4398, "step": 1167 }, { "epoch": 0.5798444481217938, "grad_norm": 0.5595331788063049, "learning_rate": 9.737756655290348e-06, "loss": 0.4437, "step": 1168 }, { "epoch": 0.5803408902862817, "grad_norm": 0.5998561382293701, "learning_rate": 9.736832501930717e-06, "loss": 0.4746, "step": 1169 }, { "epoch": 0.5808373324507695, "grad_norm": 0.5837823748588562, "learning_rate": 9.735906767071456e-06, "loss": 0.4084, "step": 1170 }, { "epoch": 0.5813337746152574, "grad_norm": 0.5267230272293091, "learning_rate": 9.73497945102164e-06, "loss": 0.4696, "step": 1171 }, { "epoch": 0.5818302167797451, "grad_norm": 0.5833373069763184, "learning_rate": 9.734050554090872e-06, "loss": 0.4158, "step": 1172 }, { "epoch": 0.582326658944233, "grad_norm": 0.6048559546470642, "learning_rate": 9.733120076589291e-06, "loss": 0.4303, "step": 1173 }, { "epoch": 0.5828231011087208, "grad_norm": 0.519048810005188, "learning_rate": 9.732188018827556e-06, "loss": 0.4725, "step": 1174 }, { "epoch": 0.5833195432732087, "grad_norm": 0.5740111470222473, "learning_rate": 9.731254381116852e-06, "loss": 0.4768, "step": 1175 }, { "epoch": 0.5838159854376965, "grad_norm": 0.5625897645950317, "learning_rate": 9.730319163768902e-06, "loss": 0.4542, "step": 1176 }, { "epoch": 0.5843124276021844, "grad_norm": 0.5533280372619629, "learning_rate": 9.729382367095944e-06, "loss": 0.4198, "step": 1177 }, { "epoch": 0.5848088697666722, "grad_norm": 0.617614209651947, "learning_rate": 9.728443991410752e-06, "loss": 0.5006, "step": 1178 }, { "epoch": 0.58530531193116, "grad_norm": 0.5622063279151917, "learning_rate": 9.727504037026623e-06, "loss": 0.4512, "step": 1179 }, { "epoch": 0.5858017540956478, "grad_norm": 0.5960741639137268, "learning_rate": 9.726562504257383e-06, "loss": 0.4279, "step": 1180 }, { "epoch": 0.5862981962601357, "grad_norm": 0.550359308719635, "learning_rate": 9.725619393417382e-06, "loss": 0.4304, "step": 1181 }, { "epoch": 0.5867946384246235, "grad_norm": 0.5168578028678894, "learning_rate": 9.724674704821503e-06, "loss": 0.4375, "step": 1182 }, { "epoch": 0.5872910805891114, "grad_norm": 0.537912130355835, "learning_rate": 9.72372843878515e-06, "loss": 0.4348, "step": 1183 }, { "epoch": 0.5877875227535992, "grad_norm": 0.640807569026947, "learning_rate": 9.722780595624253e-06, "loss": 0.4442, "step": 1184 }, { "epoch": 0.588283964918087, "grad_norm": 0.5694154500961304, "learning_rate": 9.721831175655274e-06, "loss": 0.4647, "step": 1185 }, { "epoch": 0.5887804070825748, "grad_norm": 0.6483800411224365, "learning_rate": 9.720880179195196e-06, "loss": 0.4668, "step": 1186 }, { "epoch": 0.5892768492470627, "grad_norm": 0.6253471374511719, "learning_rate": 9.719927606561534e-06, "loss": 0.4253, "step": 1187 }, { "epoch": 0.5897732914115505, "grad_norm": 0.5358550548553467, "learning_rate": 9.718973458072325e-06, "loss": 0.4788, "step": 1188 }, { "epoch": 0.5902697335760384, "grad_norm": 0.5972486734390259, "learning_rate": 9.718017734046134e-06, "loss": 0.4551, "step": 1189 }, { "epoch": 0.5907661757405263, "grad_norm": 0.6481278538703918, "learning_rate": 9.717060434802049e-06, "loss": 0.4718, "step": 1190 }, { "epoch": 0.5912626179050141, "grad_norm": 0.5232759118080139, "learning_rate": 9.716101560659688e-06, "loss": 0.4576, "step": 1191 }, { "epoch": 0.5917590600695019, "grad_norm": 0.5357466340065002, "learning_rate": 9.715141111939192e-06, "loss": 0.4266, "step": 1192 }, { "epoch": 0.5922555022339897, "grad_norm": 0.5327233076095581, "learning_rate": 9.714179088961228e-06, "loss": 0.4286, "step": 1193 }, { "epoch": 0.5927519443984776, "grad_norm": 0.5237245559692383, "learning_rate": 9.713215492046992e-06, "loss": 0.422, "step": 1194 }, { "epoch": 0.5932483865629654, "grad_norm": 0.5165470242500305, "learning_rate": 9.712250321518201e-06, "loss": 0.4415, "step": 1195 }, { "epoch": 0.5937448287274533, "grad_norm": 0.5545601844787598, "learning_rate": 9.711283577697099e-06, "loss": 0.4408, "step": 1196 }, { "epoch": 0.5942412708919411, "grad_norm": 0.5641174912452698, "learning_rate": 9.710315260906456e-06, "loss": 0.4333, "step": 1197 }, { "epoch": 0.594737713056429, "grad_norm": 0.594059407711029, "learning_rate": 9.709345371469567e-06, "loss": 0.4714, "step": 1198 }, { "epoch": 0.5952341552209167, "grad_norm": 0.581364095211029, "learning_rate": 9.708373909710251e-06, "loss": 0.4518, "step": 1199 }, { "epoch": 0.5957305973854046, "grad_norm": 0.5431079268455505, "learning_rate": 9.707400875952856e-06, "loss": 0.4414, "step": 1200 }, { "epoch": 0.5962270395498924, "grad_norm": 0.511829137802124, "learning_rate": 9.706426270522244e-06, "loss": 0.4694, "step": 1201 }, { "epoch": 0.5967234817143803, "grad_norm": 0.5661177635192871, "learning_rate": 9.705450093743815e-06, "loss": 0.4453, "step": 1202 }, { "epoch": 0.5972199238788681, "grad_norm": 0.5798479914665222, "learning_rate": 9.704472345943489e-06, "loss": 0.4559, "step": 1203 }, { "epoch": 0.597716366043356, "grad_norm": 0.4813724756240845, "learning_rate": 9.703493027447705e-06, "loss": 0.4309, "step": 1204 }, { "epoch": 0.5982128082078438, "grad_norm": 0.5502196550369263, "learning_rate": 9.702512138583435e-06, "loss": 0.4581, "step": 1205 }, { "epoch": 0.5987092503723316, "grad_norm": 0.5731869339942932, "learning_rate": 9.701529679678168e-06, "loss": 0.4531, "step": 1206 }, { "epoch": 0.5992056925368194, "grad_norm": 0.507214367389679, "learning_rate": 9.700545651059921e-06, "loss": 0.4129, "step": 1207 }, { "epoch": 0.5997021347013073, "grad_norm": 0.4984944760799408, "learning_rate": 9.699560053057236e-06, "loss": 0.4724, "step": 1208 }, { "epoch": 0.6001985768657951, "grad_norm": 0.5749444961547852, "learning_rate": 9.698572885999174e-06, "loss": 0.4657, "step": 1209 }, { "epoch": 0.600695019030283, "grad_norm": 0.5570077300071716, "learning_rate": 9.697584150215326e-06, "loss": 0.4451, "step": 1210 }, { "epoch": 0.6011914611947708, "grad_norm": 0.5148751735687256, "learning_rate": 9.696593846035807e-06, "loss": 0.4337, "step": 1211 }, { "epoch": 0.6016879033592586, "grad_norm": 0.5888288021087646, "learning_rate": 9.695601973791245e-06, "loss": 0.463, "step": 1212 }, { "epoch": 0.6021843455237464, "grad_norm": 0.5786775946617126, "learning_rate": 9.694608533812807e-06, "loss": 0.4572, "step": 1213 }, { "epoch": 0.6026807876882343, "grad_norm": 0.5414060354232788, "learning_rate": 9.693613526432168e-06, "loss": 0.4398, "step": 1214 }, { "epoch": 0.6031772298527222, "grad_norm": 0.507391631603241, "learning_rate": 9.692616951981539e-06, "loss": 0.44, "step": 1215 }, { "epoch": 0.60367367201721, "grad_norm": 0.5531654953956604, "learning_rate": 9.69161881079365e-06, "loss": 0.4606, "step": 1216 }, { "epoch": 0.6041701141816979, "grad_norm": 0.5442899465560913, "learning_rate": 9.690619103201751e-06, "loss": 0.4404, "step": 1217 }, { "epoch": 0.6046665563461857, "grad_norm": 0.5535522699356079, "learning_rate": 9.689617829539616e-06, "loss": 0.4513, "step": 1218 }, { "epoch": 0.6051629985106735, "grad_norm": 0.541619062423706, "learning_rate": 9.688614990141545e-06, "loss": 0.453, "step": 1219 }, { "epoch": 0.6056594406751613, "grad_norm": 0.5307257771492004, "learning_rate": 9.687610585342358e-06, "loss": 0.4688, "step": 1220 }, { "epoch": 0.6061558828396492, "grad_norm": 0.49863725900650024, "learning_rate": 9.686604615477398e-06, "loss": 0.4142, "step": 1221 }, { "epoch": 0.606652325004137, "grad_norm": 0.5913330316543579, "learning_rate": 9.685597080882533e-06, "loss": 0.4425, "step": 1222 }, { "epoch": 0.6071487671686249, "grad_norm": 0.5615970492362976, "learning_rate": 9.684587981894148e-06, "loss": 0.4552, "step": 1223 }, { "epoch": 0.6076452093331127, "grad_norm": 0.5642855763435364, "learning_rate": 9.68357731884916e-06, "loss": 0.4532, "step": 1224 }, { "epoch": 0.6081416514976006, "grad_norm": 0.6529304385185242, "learning_rate": 9.682565092084994e-06, "loss": 0.4579, "step": 1225 }, { "epoch": 0.6086380936620883, "grad_norm": 0.6697067022323608, "learning_rate": 9.681551301939612e-06, "loss": 0.4566, "step": 1226 }, { "epoch": 0.6091345358265762, "grad_norm": 0.5266802310943604, "learning_rate": 9.680535948751485e-06, "loss": 0.4466, "step": 1227 }, { "epoch": 0.609630977991064, "grad_norm": 0.685796320438385, "learning_rate": 9.679519032859616e-06, "loss": 0.4624, "step": 1228 }, { "epoch": 0.6101274201555519, "grad_norm": 0.5447752475738525, "learning_rate": 9.678500554603524e-06, "loss": 0.455, "step": 1229 }, { "epoch": 0.6106238623200397, "grad_norm": 0.5499634742736816, "learning_rate": 9.677480514323253e-06, "loss": 0.4376, "step": 1230 }, { "epoch": 0.6111203044845276, "grad_norm": 0.6218085289001465, "learning_rate": 9.676458912359362e-06, "loss": 0.4493, "step": 1231 }, { "epoch": 0.6116167466490154, "grad_norm": 0.6070442795753479, "learning_rate": 9.675435749052941e-06, "loss": 0.4277, "step": 1232 }, { "epoch": 0.6121131888135032, "grad_norm": 0.5411662459373474, "learning_rate": 9.674411024745593e-06, "loss": 0.4375, "step": 1233 }, { "epoch": 0.612609630977991, "grad_norm": 0.5433435440063477, "learning_rate": 9.67338473977945e-06, "loss": 0.4212, "step": 1234 }, { "epoch": 0.6131060731424789, "grad_norm": 0.6882891058921814, "learning_rate": 9.672356894497157e-06, "loss": 0.4002, "step": 1235 }, { "epoch": 0.6136025153069667, "grad_norm": 0.6055873036384583, "learning_rate": 9.671327489241884e-06, "loss": 0.4884, "step": 1236 }, { "epoch": 0.6140989574714546, "grad_norm": 0.5510829091072083, "learning_rate": 9.670296524357322e-06, "loss": 0.4266, "step": 1237 }, { "epoch": 0.6145953996359425, "grad_norm": 0.6319456696510315, "learning_rate": 9.669264000187681e-06, "loss": 0.4278, "step": 1238 }, { "epoch": 0.6150918418004302, "grad_norm": 0.5201278328895569, "learning_rate": 9.668229917077696e-06, "loss": 0.4564, "step": 1239 }, { "epoch": 0.615588283964918, "grad_norm": 0.5340493321418762, "learning_rate": 9.667194275372618e-06, "loss": 0.4622, "step": 1240 }, { "epoch": 0.6160847261294059, "grad_norm": 0.6237800717353821, "learning_rate": 9.666157075418216e-06, "loss": 0.4551, "step": 1241 }, { "epoch": 0.6165811682938938, "grad_norm": 0.5983462333679199, "learning_rate": 9.665118317560786e-06, "loss": 0.4596, "step": 1242 }, { "epoch": 0.6170776104583816, "grad_norm": 0.4770101010799408, "learning_rate": 9.664078002147143e-06, "loss": 0.4361, "step": 1243 }, { "epoch": 0.6175740526228695, "grad_norm": 0.5247612595558167, "learning_rate": 9.663036129524616e-06, "loss": 0.4534, "step": 1244 }, { "epoch": 0.6180704947873573, "grad_norm": 0.5783234238624573, "learning_rate": 9.66199270004106e-06, "loss": 0.4122, "step": 1245 }, { "epoch": 0.6185669369518451, "grad_norm": 0.5445291996002197, "learning_rate": 9.660947714044846e-06, "loss": 0.4683, "step": 1246 }, { "epoch": 0.6190633791163329, "grad_norm": 0.5317956209182739, "learning_rate": 9.659901171884869e-06, "loss": 0.4306, "step": 1247 }, { "epoch": 0.6195598212808208, "grad_norm": 0.5008754134178162, "learning_rate": 9.658853073910541e-06, "loss": 0.4454, "step": 1248 }, { "epoch": 0.6200562634453086, "grad_norm": 0.536970317363739, "learning_rate": 9.65780342047179e-06, "loss": 0.4571, "step": 1249 }, { "epoch": 0.6205527056097965, "grad_norm": 0.5829573273658752, "learning_rate": 9.65675221191907e-06, "loss": 0.4206, "step": 1250 }, { "epoch": 0.6210491477742843, "grad_norm": 0.5711414813995361, "learning_rate": 9.65569944860335e-06, "loss": 0.4423, "step": 1251 }, { "epoch": 0.6215455899387722, "grad_norm": 0.6208679676055908, "learning_rate": 9.65464513087612e-06, "loss": 0.4305, "step": 1252 }, { "epoch": 0.6220420321032599, "grad_norm": 0.5921888947486877, "learning_rate": 9.653589259089386e-06, "loss": 0.4505, "step": 1253 }, { "epoch": 0.6225384742677478, "grad_norm": 0.5208152532577515, "learning_rate": 9.652531833595675e-06, "loss": 0.4323, "step": 1254 }, { "epoch": 0.6230349164322356, "grad_norm": 0.5845632553100586, "learning_rate": 9.651472854748036e-06, "loss": 0.4601, "step": 1255 }, { "epoch": 0.6235313585967235, "grad_norm": 0.6094711422920227, "learning_rate": 9.65041232290003e-06, "loss": 0.4481, "step": 1256 }, { "epoch": 0.6240278007612113, "grad_norm": 0.55179762840271, "learning_rate": 9.649350238405739e-06, "loss": 0.4717, "step": 1257 }, { "epoch": 0.6245242429256992, "grad_norm": 0.6117375493049622, "learning_rate": 9.648286601619766e-06, "loss": 0.4532, "step": 1258 }, { "epoch": 0.625020685090187, "grad_norm": 0.605010986328125, "learning_rate": 9.647221412897232e-06, "loss": 0.4161, "step": 1259 }, { "epoch": 0.6255171272546748, "grad_norm": 0.5556288957595825, "learning_rate": 9.646154672593771e-06, "loss": 0.4382, "step": 1260 }, { "epoch": 0.6260135694191626, "grad_norm": 0.5320722460746765, "learning_rate": 9.64508638106554e-06, "loss": 0.4332, "step": 1261 }, { "epoch": 0.6265100115836505, "grad_norm": 0.5968083739280701, "learning_rate": 9.644016538669214e-06, "loss": 0.4247, "step": 1262 }, { "epoch": 0.6270064537481383, "grad_norm": 0.5405896902084351, "learning_rate": 9.642945145761983e-06, "loss": 0.4635, "step": 1263 }, { "epoch": 0.6275028959126262, "grad_norm": 0.5168482065200806, "learning_rate": 9.641872202701557e-06, "loss": 0.4208, "step": 1264 }, { "epoch": 0.627999338077114, "grad_norm": 0.5075944662094116, "learning_rate": 9.640797709846159e-06, "loss": 0.438, "step": 1265 }, { "epoch": 0.6284957802416018, "grad_norm": 0.5591109991073608, "learning_rate": 9.639721667554537e-06, "loss": 0.4315, "step": 1266 }, { "epoch": 0.6289922224060897, "grad_norm": 0.5791926980018616, "learning_rate": 9.638644076185953e-06, "loss": 0.4151, "step": 1267 }, { "epoch": 0.6294886645705775, "grad_norm": 0.5376565456390381, "learning_rate": 9.63756493610018e-06, "loss": 0.4619, "step": 1268 }, { "epoch": 0.6299851067350654, "grad_norm": 0.4602423310279846, "learning_rate": 9.636484247657519e-06, "loss": 0.4371, "step": 1269 }, { "epoch": 0.6304815488995532, "grad_norm": 0.4830094575881958, "learning_rate": 9.635402011218778e-06, "loss": 0.4096, "step": 1270 }, { "epoch": 0.6309779910640411, "grad_norm": 0.4936559796333313, "learning_rate": 9.634318227145291e-06, "loss": 0.4369, "step": 1271 }, { "epoch": 0.6314744332285289, "grad_norm": 0.5132246613502502, "learning_rate": 9.633232895798901e-06, "loss": 0.447, "step": 1272 }, { "epoch": 0.6319708753930167, "grad_norm": 0.5098681449890137, "learning_rate": 9.63214601754197e-06, "loss": 0.4234, "step": 1273 }, { "epoch": 0.6324673175575045, "grad_norm": 0.49768054485321045, "learning_rate": 9.63105759273738e-06, "loss": 0.4425, "step": 1274 }, { "epoch": 0.6329637597219924, "grad_norm": 0.556969404220581, "learning_rate": 9.629967621748527e-06, "loss": 0.4278, "step": 1275 }, { "epoch": 0.6334602018864802, "grad_norm": 0.5210133194923401, "learning_rate": 9.628876104939318e-06, "loss": 0.4845, "step": 1276 }, { "epoch": 0.6339566440509681, "grad_norm": 0.5318720936775208, "learning_rate": 9.627783042674182e-06, "loss": 0.4523, "step": 1277 }, { "epoch": 0.6344530862154559, "grad_norm": 0.48020273447036743, "learning_rate": 9.626688435318066e-06, "loss": 0.4449, "step": 1278 }, { "epoch": 0.6349495283799438, "grad_norm": 0.49945199489593506, "learning_rate": 9.62559228323643e-06, "loss": 0.4804, "step": 1279 }, { "epoch": 0.6354459705444315, "grad_norm": 0.561797022819519, "learning_rate": 9.624494586795243e-06, "loss": 0.4283, "step": 1280 }, { "epoch": 0.6359424127089194, "grad_norm": 0.4954497218132019, "learning_rate": 9.623395346361004e-06, "loss": 0.4468, "step": 1281 }, { "epoch": 0.6364388548734072, "grad_norm": 0.5648560523986816, "learning_rate": 9.622294562300714e-06, "loss": 0.4451, "step": 1282 }, { "epoch": 0.6369352970378951, "grad_norm": 0.6128668785095215, "learning_rate": 9.621192234981897e-06, "loss": 0.4453, "step": 1283 }, { "epoch": 0.6374317392023829, "grad_norm": 0.50145024061203, "learning_rate": 9.620088364772589e-06, "loss": 0.4756, "step": 1284 }, { "epoch": 0.6379281813668708, "grad_norm": 0.6170275211334229, "learning_rate": 9.618982952041344e-06, "loss": 0.4467, "step": 1285 }, { "epoch": 0.6384246235313586, "grad_norm": 0.477433443069458, "learning_rate": 9.61787599715723e-06, "loss": 0.4179, "step": 1286 }, { "epoch": 0.6389210656958464, "grad_norm": 0.502993106842041, "learning_rate": 9.616767500489822e-06, "loss": 0.4327, "step": 1287 }, { "epoch": 0.6394175078603342, "grad_norm": 0.5686522722244263, "learning_rate": 9.615657462409227e-06, "loss": 0.4412, "step": 1288 }, { "epoch": 0.6399139500248221, "grad_norm": 0.5733565092086792, "learning_rate": 9.614545883286051e-06, "loss": 0.4549, "step": 1289 }, { "epoch": 0.64041039218931, "grad_norm": 0.5683903098106384, "learning_rate": 9.613432763491422e-06, "loss": 0.4153, "step": 1290 }, { "epoch": 0.6409068343537978, "grad_norm": 0.5265888571739197, "learning_rate": 9.612318103396977e-06, "loss": 0.4331, "step": 1291 }, { "epoch": 0.6414032765182857, "grad_norm": 0.5418409705162048, "learning_rate": 9.611201903374873e-06, "loss": 0.4113, "step": 1292 }, { "epoch": 0.6418997186827734, "grad_norm": 0.6175366640090942, "learning_rate": 9.610084163797782e-06, "loss": 0.4808, "step": 1293 }, { "epoch": 0.6423961608472613, "grad_norm": 0.48724982142448425, "learning_rate": 9.608964885038882e-06, "loss": 0.4179, "step": 1294 }, { "epoch": 0.6428926030117491, "grad_norm": 0.5819126963615417, "learning_rate": 9.607844067471871e-06, "loss": 0.4498, "step": 1295 }, { "epoch": 0.643389045176237, "grad_norm": 0.6375250816345215, "learning_rate": 9.606721711470962e-06, "loss": 0.4902, "step": 1296 }, { "epoch": 0.6438854873407248, "grad_norm": 0.5593791007995605, "learning_rate": 9.605597817410875e-06, "loss": 0.4431, "step": 1297 }, { "epoch": 0.6443819295052127, "grad_norm": 0.6574770212173462, "learning_rate": 9.604472385666851e-06, "loss": 0.4533, "step": 1298 }, { "epoch": 0.6448783716697005, "grad_norm": 0.5549986362457275, "learning_rate": 9.60334541661464e-06, "loss": 0.4688, "step": 1299 }, { "epoch": 0.6453748138341883, "grad_norm": 0.6403267979621887, "learning_rate": 9.602216910630507e-06, "loss": 0.4254, "step": 1300 }, { "epoch": 0.6458712559986761, "grad_norm": 0.55754154920578, "learning_rate": 9.60108686809123e-06, "loss": 0.4341, "step": 1301 }, { "epoch": 0.646367698163164, "grad_norm": 0.5636479258537292, "learning_rate": 9.599955289374097e-06, "loss": 0.4372, "step": 1302 }, { "epoch": 0.6468641403276518, "grad_norm": 0.6760838031768799, "learning_rate": 9.598822174856912e-06, "loss": 0.4361, "step": 1303 }, { "epoch": 0.6473605824921397, "grad_norm": 0.5079112648963928, "learning_rate": 9.597687524917992e-06, "loss": 0.4527, "step": 1304 }, { "epoch": 0.6478570246566275, "grad_norm": 0.6224894523620605, "learning_rate": 9.596551339936167e-06, "loss": 0.4369, "step": 1305 }, { "epoch": 0.6483534668211154, "grad_norm": 0.5648080110549927, "learning_rate": 9.595413620290774e-06, "loss": 0.4254, "step": 1306 }, { "epoch": 0.6488499089856031, "grad_norm": 0.56905597448349, "learning_rate": 9.594274366361673e-06, "loss": 0.4203, "step": 1307 }, { "epoch": 0.649346351150091, "grad_norm": 0.541673481464386, "learning_rate": 9.593133578529224e-06, "loss": 0.411, "step": 1308 }, { "epoch": 0.6498427933145788, "grad_norm": 0.5095140933990479, "learning_rate": 9.59199125717431e-06, "loss": 0.4133, "step": 1309 }, { "epoch": 0.6503392354790667, "grad_norm": 0.5697533488273621, "learning_rate": 9.590847402678316e-06, "loss": 0.4394, "step": 1310 }, { "epoch": 0.6508356776435545, "grad_norm": 0.6379334330558777, "learning_rate": 9.589702015423148e-06, "loss": 0.4584, "step": 1311 }, { "epoch": 0.6513321198080424, "grad_norm": 0.5399661660194397, "learning_rate": 9.588555095791219e-06, "loss": 0.4608, "step": 1312 }, { "epoch": 0.6518285619725303, "grad_norm": 0.5818194150924683, "learning_rate": 9.587406644165453e-06, "loss": 0.4546, "step": 1313 }, { "epoch": 0.652325004137018, "grad_norm": 0.5863329768180847, "learning_rate": 9.586256660929287e-06, "loss": 0.4244, "step": 1314 }, { "epoch": 0.6528214463015058, "grad_norm": 0.6387655735015869, "learning_rate": 9.585105146466668e-06, "loss": 0.4233, "step": 1315 }, { "epoch": 0.6533178884659937, "grad_norm": 0.5146544575691223, "learning_rate": 9.58395210116206e-06, "loss": 0.4295, "step": 1316 }, { "epoch": 0.6538143306304816, "grad_norm": 0.5838323831558228, "learning_rate": 9.582797525400428e-06, "loss": 0.4119, "step": 1317 }, { "epoch": 0.6543107727949694, "grad_norm": 0.669072687625885, "learning_rate": 9.581641419567256e-06, "loss": 0.4617, "step": 1318 }, { "epoch": 0.6548072149594573, "grad_norm": 0.5799645185470581, "learning_rate": 9.580483784048537e-06, "loss": 0.4737, "step": 1319 }, { "epoch": 0.6553036571239451, "grad_norm": 0.6691159605979919, "learning_rate": 9.579324619230772e-06, "loss": 0.4596, "step": 1320 }, { "epoch": 0.6558000992884329, "grad_norm": 0.5202569365501404, "learning_rate": 9.578163925500978e-06, "loss": 0.439, "step": 1321 }, { "epoch": 0.6562965414529207, "grad_norm": 0.7119522094726562, "learning_rate": 9.577001703246676e-06, "loss": 0.462, "step": 1322 }, { "epoch": 0.6567929836174086, "grad_norm": 0.5135021209716797, "learning_rate": 9.5758379528559e-06, "loss": 0.4222, "step": 1323 }, { "epoch": 0.6572894257818964, "grad_norm": 0.5196523070335388, "learning_rate": 9.574672674717196e-06, "loss": 0.4476, "step": 1324 }, { "epoch": 0.6577858679463843, "grad_norm": 0.5175487995147705, "learning_rate": 9.57350586921962e-06, "loss": 0.425, "step": 1325 }, { "epoch": 0.6582823101108721, "grad_norm": 0.6086983680725098, "learning_rate": 9.572337536752733e-06, "loss": 0.4793, "step": 1326 }, { "epoch": 0.6587787522753599, "grad_norm": 0.5586812496185303, "learning_rate": 9.571167677706615e-06, "loss": 0.4625, "step": 1327 }, { "epoch": 0.6592751944398477, "grad_norm": 0.47752371430397034, "learning_rate": 9.569996292471844e-06, "loss": 0.4359, "step": 1328 }, { "epoch": 0.6597716366043356, "grad_norm": 0.6078988909721375, "learning_rate": 9.568823381439518e-06, "loss": 0.4567, "step": 1329 }, { "epoch": 0.6602680787688234, "grad_norm": 0.5701612234115601, "learning_rate": 9.567648945001238e-06, "loss": 0.4566, "step": 1330 }, { "epoch": 0.6607645209333113, "grad_norm": 0.5643782019615173, "learning_rate": 9.566472983549118e-06, "loss": 0.4758, "step": 1331 }, { "epoch": 0.6612609630977991, "grad_norm": 0.5645661354064941, "learning_rate": 9.565295497475777e-06, "loss": 0.4062, "step": 1332 }, { "epoch": 0.661757405262287, "grad_norm": 0.6096187233924866, "learning_rate": 9.564116487174348e-06, "loss": 0.4305, "step": 1333 }, { "epoch": 0.6622538474267747, "grad_norm": 0.5431627035140991, "learning_rate": 9.56293595303847e-06, "loss": 0.4061, "step": 1334 }, { "epoch": 0.6627502895912626, "grad_norm": 0.6135295629501343, "learning_rate": 9.561753895462292e-06, "loss": 0.4403, "step": 1335 }, { "epoch": 0.6632467317557504, "grad_norm": 0.5764193534851074, "learning_rate": 9.560570314840469e-06, "loss": 0.4464, "step": 1336 }, { "epoch": 0.6637431739202383, "grad_norm": 0.6366569399833679, "learning_rate": 9.559385211568167e-06, "loss": 0.4698, "step": 1337 }, { "epoch": 0.6642396160847261, "grad_norm": 0.5993988513946533, "learning_rate": 9.558198586041062e-06, "loss": 0.4449, "step": 1338 }, { "epoch": 0.664736058249214, "grad_norm": 0.5432702302932739, "learning_rate": 9.557010438655332e-06, "loss": 0.4681, "step": 1339 }, { "epoch": 0.6652325004137019, "grad_norm": 0.5668163299560547, "learning_rate": 9.555820769807668e-06, "loss": 0.4673, "step": 1340 }, { "epoch": 0.6657289425781896, "grad_norm": 0.5853536128997803, "learning_rate": 9.554629579895272e-06, "loss": 0.4338, "step": 1341 }, { "epoch": 0.6662253847426775, "grad_norm": 0.5000286102294922, "learning_rate": 9.553436869315846e-06, "loss": 0.4542, "step": 1342 }, { "epoch": 0.6667218269071653, "grad_norm": 0.582520067691803, "learning_rate": 9.552242638467604e-06, "loss": 0.4443, "step": 1343 }, { "epoch": 0.6672182690716532, "grad_norm": 0.5484603047370911, "learning_rate": 9.55104688774927e-06, "loss": 0.4248, "step": 1344 }, { "epoch": 0.667714711236141, "grad_norm": 0.5816395878791809, "learning_rate": 9.54984961756007e-06, "loss": 0.4203, "step": 1345 }, { "epoch": 0.6682111534006289, "grad_norm": 0.478975385427475, "learning_rate": 9.548650828299742e-06, "loss": 0.4513, "step": 1346 }, { "epoch": 0.6687075955651167, "grad_norm": 0.5252837538719177, "learning_rate": 9.547450520368526e-06, "loss": 0.4494, "step": 1347 }, { "epoch": 0.6692040377296045, "grad_norm": 0.5393924117088318, "learning_rate": 9.546248694167175e-06, "loss": 0.4627, "step": 1348 }, { "epoch": 0.6697004798940923, "grad_norm": 0.5630596876144409, "learning_rate": 9.545045350096944e-06, "loss": 0.4525, "step": 1349 }, { "epoch": 0.6701969220585802, "grad_norm": 0.5713055729866028, "learning_rate": 9.5438404885596e-06, "loss": 0.4146, "step": 1350 }, { "epoch": 0.670693364223068, "grad_norm": 0.5112431049346924, "learning_rate": 9.54263410995741e-06, "loss": 0.4427, "step": 1351 }, { "epoch": 0.6711898063875559, "grad_norm": 0.5249531865119934, "learning_rate": 9.541426214693153e-06, "loss": 0.4264, "step": 1352 }, { "epoch": 0.6716862485520437, "grad_norm": 0.5685523748397827, "learning_rate": 9.540216803170113e-06, "loss": 0.4526, "step": 1353 }, { "epoch": 0.6721826907165315, "grad_norm": 0.5299743413925171, "learning_rate": 9.539005875792077e-06, "loss": 0.449, "step": 1354 }, { "epoch": 0.6726791328810193, "grad_norm": 0.5392031669616699, "learning_rate": 9.537793432963345e-06, "loss": 0.4563, "step": 1355 }, { "epoch": 0.6731755750455072, "grad_norm": 0.7439344525337219, "learning_rate": 9.536579475088714e-06, "loss": 0.4484, "step": 1356 }, { "epoch": 0.673672017209995, "grad_norm": 0.5113942623138428, "learning_rate": 9.535364002573495e-06, "loss": 0.4515, "step": 1357 }, { "epoch": 0.6741684593744829, "grad_norm": 0.5553276538848877, "learning_rate": 9.534147015823498e-06, "loss": 0.4157, "step": 1358 }, { "epoch": 0.6746649015389707, "grad_norm": 0.6232392191886902, "learning_rate": 9.532928515245046e-06, "loss": 0.4508, "step": 1359 }, { "epoch": 0.6751613437034586, "grad_norm": 0.547154426574707, "learning_rate": 9.531708501244958e-06, "loss": 0.4593, "step": 1360 }, { "epoch": 0.6756577858679463, "grad_norm": 0.5360016226768494, "learning_rate": 9.530486974230568e-06, "loss": 0.4537, "step": 1361 }, { "epoch": 0.6761542280324342, "grad_norm": 0.6055577397346497, "learning_rate": 9.52926393460971e-06, "loss": 0.4519, "step": 1362 }, { "epoch": 0.676650670196922, "grad_norm": 0.5525018572807312, "learning_rate": 9.528039382790722e-06, "loss": 0.4319, "step": 1363 }, { "epoch": 0.6771471123614099, "grad_norm": 0.5303156971931458, "learning_rate": 9.526813319182449e-06, "loss": 0.4503, "step": 1364 }, { "epoch": 0.6776435545258978, "grad_norm": 0.5124494433403015, "learning_rate": 9.525585744194243e-06, "loss": 0.4256, "step": 1365 }, { "epoch": 0.6781399966903856, "grad_norm": 0.6383893489837646, "learning_rate": 9.524356658235954e-06, "loss": 0.4637, "step": 1366 }, { "epoch": 0.6786364388548735, "grad_norm": 0.5125488638877869, "learning_rate": 9.52312606171794e-06, "loss": 0.4172, "step": 1367 }, { "epoch": 0.6791328810193612, "grad_norm": 0.6082428693771362, "learning_rate": 9.52189395505107e-06, "loss": 0.422, "step": 1368 }, { "epoch": 0.679629323183849, "grad_norm": 0.5299893617630005, "learning_rate": 9.520660338646702e-06, "loss": 0.4428, "step": 1369 }, { "epoch": 0.6801257653483369, "grad_norm": 0.5432389378547668, "learning_rate": 9.519425212916714e-06, "loss": 0.4783, "step": 1370 }, { "epoch": 0.6806222075128248, "grad_norm": 0.5132809281349182, "learning_rate": 9.51818857827348e-06, "loss": 0.4743, "step": 1371 }, { "epoch": 0.6811186496773126, "grad_norm": 0.5567704439163208, "learning_rate": 9.516950435129875e-06, "loss": 0.4441, "step": 1372 }, { "epoch": 0.6816150918418005, "grad_norm": 0.6049127578735352, "learning_rate": 9.515710783899284e-06, "loss": 0.4324, "step": 1373 }, { "epoch": 0.6821115340062883, "grad_norm": 0.521929144859314, "learning_rate": 9.514469624995593e-06, "loss": 0.4427, "step": 1374 }, { "epoch": 0.6826079761707761, "grad_norm": 0.5339908599853516, "learning_rate": 9.51322695883319e-06, "loss": 0.4114, "step": 1375 }, { "epoch": 0.6831044183352639, "grad_norm": 0.5728681087493896, "learning_rate": 9.51198278582697e-06, "loss": 0.4214, "step": 1376 }, { "epoch": 0.6836008604997518, "grad_norm": 0.5253012180328369, "learning_rate": 9.510737106392325e-06, "loss": 0.4275, "step": 1377 }, { "epoch": 0.6840973026642396, "grad_norm": 0.5466620922088623, "learning_rate": 9.509489920945155e-06, "loss": 0.4456, "step": 1378 }, { "epoch": 0.6845937448287275, "grad_norm": 0.6012613773345947, "learning_rate": 9.508241229901862e-06, "loss": 0.4412, "step": 1379 }, { "epoch": 0.6850901869932153, "grad_norm": 0.6182264685630798, "learning_rate": 9.50699103367935e-06, "loss": 0.4519, "step": 1380 }, { "epoch": 0.6855866291577031, "grad_norm": 0.5224515199661255, "learning_rate": 9.505739332695026e-06, "loss": 0.4575, "step": 1381 }, { "epoch": 0.6860830713221909, "grad_norm": 0.574854850769043, "learning_rate": 9.504486127366796e-06, "loss": 0.4411, "step": 1382 }, { "epoch": 0.6865795134866788, "grad_norm": 0.5909528136253357, "learning_rate": 9.503231418113073e-06, "loss": 0.4328, "step": 1383 }, { "epoch": 0.6870759556511666, "grad_norm": 0.5950061678886414, "learning_rate": 9.501975205352772e-06, "loss": 0.4432, "step": 1384 }, { "epoch": 0.6875723978156545, "grad_norm": 0.61399906873703, "learning_rate": 9.500717489505307e-06, "loss": 0.4466, "step": 1385 }, { "epoch": 0.6880688399801423, "grad_norm": 0.5697099566459656, "learning_rate": 9.499458270990593e-06, "loss": 0.4599, "step": 1386 }, { "epoch": 0.6885652821446302, "grad_norm": 0.6305244565010071, "learning_rate": 9.498197550229054e-06, "loss": 0.4527, "step": 1387 }, { "epoch": 0.6890617243091179, "grad_norm": 0.5557189583778381, "learning_rate": 9.496935327641605e-06, "loss": 0.4382, "step": 1388 }, { "epoch": 0.6895581664736058, "grad_norm": 0.5588181614875793, "learning_rate": 9.49567160364967e-06, "loss": 0.4469, "step": 1389 }, { "epoch": 0.6900546086380936, "grad_norm": 0.5517104268074036, "learning_rate": 9.494406378675173e-06, "loss": 0.4703, "step": 1390 }, { "epoch": 0.6905510508025815, "grad_norm": 0.5536298751831055, "learning_rate": 9.493139653140537e-06, "loss": 0.4387, "step": 1391 }, { "epoch": 0.6910474929670694, "grad_norm": 0.5622080564498901, "learning_rate": 9.491871427468687e-06, "loss": 0.4343, "step": 1392 }, { "epoch": 0.6915439351315572, "grad_norm": 0.5567677617073059, "learning_rate": 9.490601702083051e-06, "loss": 0.4266, "step": 1393 }, { "epoch": 0.6920403772960451, "grad_norm": 0.5907567739486694, "learning_rate": 9.489330477407554e-06, "loss": 0.5111, "step": 1394 }, { "epoch": 0.6925368194605328, "grad_norm": 0.5452378988265991, "learning_rate": 9.488057753866623e-06, "loss": 0.453, "step": 1395 }, { "epoch": 0.6930332616250207, "grad_norm": 0.5620105862617493, "learning_rate": 9.486783531885187e-06, "loss": 0.4186, "step": 1396 }, { "epoch": 0.6935297037895085, "grad_norm": 0.5404983758926392, "learning_rate": 9.485507811888673e-06, "loss": 0.4094, "step": 1397 }, { "epoch": 0.6940261459539964, "grad_norm": 0.5625578165054321, "learning_rate": 9.48423059430301e-06, "loss": 0.4283, "step": 1398 }, { "epoch": 0.6945225881184842, "grad_norm": 0.6118019819259644, "learning_rate": 9.482951879554628e-06, "loss": 0.4739, "step": 1399 }, { "epoch": 0.6950190302829721, "grad_norm": 0.5911295413970947, "learning_rate": 9.481671668070452e-06, "loss": 0.4556, "step": 1400 }, { "epoch": 0.6955154724474599, "grad_norm": 0.5447912216186523, "learning_rate": 9.480389960277911e-06, "loss": 0.4535, "step": 1401 }, { "epoch": 0.6960119146119477, "grad_norm": 0.5497490167617798, "learning_rate": 9.479106756604935e-06, "loss": 0.4791, "step": 1402 }, { "epoch": 0.6965083567764355, "grad_norm": 0.6257814168930054, "learning_rate": 9.477822057479945e-06, "loss": 0.4705, "step": 1403 }, { "epoch": 0.6970047989409234, "grad_norm": 0.5963369011878967, "learning_rate": 9.476535863331873e-06, "loss": 0.4582, "step": 1404 }, { "epoch": 0.6975012411054112, "grad_norm": 0.5791458487510681, "learning_rate": 9.47524817459014e-06, "loss": 0.4204, "step": 1405 }, { "epoch": 0.6979976832698991, "grad_norm": 0.4923294186592102, "learning_rate": 9.473958991684671e-06, "loss": 0.4017, "step": 1406 }, { "epoch": 0.6984941254343869, "grad_norm": 0.6571674942970276, "learning_rate": 9.472668315045893e-06, "loss": 0.412, "step": 1407 }, { "epoch": 0.6989905675988747, "grad_norm": 0.5297819972038269, "learning_rate": 9.471376145104723e-06, "loss": 0.4747, "step": 1408 }, { "epoch": 0.6994870097633625, "grad_norm": 0.6224269270896912, "learning_rate": 9.470082482292585e-06, "loss": 0.452, "step": 1409 }, { "epoch": 0.6999834519278504, "grad_norm": 0.49564531445503235, "learning_rate": 9.468787327041394e-06, "loss": 0.4078, "step": 1410 }, { "epoch": 0.7004798940923382, "grad_norm": 0.4884668290615082, "learning_rate": 9.467490679783571e-06, "loss": 0.4032, "step": 1411 }, { "epoch": 0.7009763362568261, "grad_norm": 0.6143863797187805, "learning_rate": 9.46619254095203e-06, "loss": 0.4095, "step": 1412 }, { "epoch": 0.701472778421314, "grad_norm": 0.601227343082428, "learning_rate": 9.464892910980184e-06, "loss": 0.4293, "step": 1413 }, { "epoch": 0.7019692205858018, "grad_norm": 0.5319937467575073, "learning_rate": 9.463591790301942e-06, "loss": 0.4278, "step": 1414 }, { "epoch": 0.7024656627502895, "grad_norm": 0.6343777179718018, "learning_rate": 9.462289179351716e-06, "loss": 0.431, "step": 1415 }, { "epoch": 0.7029621049147774, "grad_norm": 0.5992676019668579, "learning_rate": 9.460985078564414e-06, "loss": 0.4092, "step": 1416 }, { "epoch": 0.7034585470792653, "grad_norm": 0.5552161931991577, "learning_rate": 9.459679488375432e-06, "loss": 0.4202, "step": 1417 }, { "epoch": 0.7039549892437531, "grad_norm": 0.6074199676513672, "learning_rate": 9.45837240922068e-06, "loss": 0.4763, "step": 1418 }, { "epoch": 0.704451431408241, "grad_norm": 0.532774031162262, "learning_rate": 9.45706384153655e-06, "loss": 0.427, "step": 1419 }, { "epoch": 0.7049478735727288, "grad_norm": 0.5691224932670593, "learning_rate": 9.455753785759942e-06, "loss": 0.4437, "step": 1420 }, { "epoch": 0.7054443157372167, "grad_norm": 0.5561055541038513, "learning_rate": 9.454442242328246e-06, "loss": 0.4502, "step": 1421 }, { "epoch": 0.7059407579017044, "grad_norm": 0.5590347051620483, "learning_rate": 9.453129211679348e-06, "loss": 0.4491, "step": 1422 }, { "epoch": 0.7064372000661923, "grad_norm": 0.6085344552993774, "learning_rate": 9.451814694251636e-06, "loss": 0.4479, "step": 1423 }, { "epoch": 0.7069336422306801, "grad_norm": 0.544177234172821, "learning_rate": 9.450498690483993e-06, "loss": 0.4407, "step": 1424 }, { "epoch": 0.707430084395168, "grad_norm": 0.47989514470100403, "learning_rate": 9.449181200815793e-06, "loss": 0.451, "step": 1425 }, { "epoch": 0.7079265265596558, "grad_norm": 0.5444818735122681, "learning_rate": 9.447862225686912e-06, "loss": 0.4397, "step": 1426 }, { "epoch": 0.7084229687241437, "grad_norm": 0.6024978756904602, "learning_rate": 9.446541765537723e-06, "loss": 0.433, "step": 1427 }, { "epoch": 0.7089194108886315, "grad_norm": 0.6839745044708252, "learning_rate": 9.445219820809086e-06, "loss": 0.4256, "step": 1428 }, { "epoch": 0.7094158530531193, "grad_norm": 0.5598261952400208, "learning_rate": 9.443896391942365e-06, "loss": 0.4765, "step": 1429 }, { "epoch": 0.7099122952176071, "grad_norm": 0.5280299782752991, "learning_rate": 9.442571479379419e-06, "loss": 0.4163, "step": 1430 }, { "epoch": 0.710408737382095, "grad_norm": 0.7106832265853882, "learning_rate": 9.441245083562597e-06, "loss": 0.4343, "step": 1431 }, { "epoch": 0.7109051795465828, "grad_norm": 0.7199373841285706, "learning_rate": 9.439917204934748e-06, "loss": 0.4651, "step": 1432 }, { "epoch": 0.7114016217110707, "grad_norm": 0.5404292941093445, "learning_rate": 9.438587843939216e-06, "loss": 0.4479, "step": 1433 }, { "epoch": 0.7118980638755585, "grad_norm": 0.5436515808105469, "learning_rate": 9.437257001019835e-06, "loss": 0.4261, "step": 1434 }, { "epoch": 0.7123945060400463, "grad_norm": 0.6173946857452393, "learning_rate": 9.435924676620941e-06, "loss": 0.4624, "step": 1435 }, { "epoch": 0.7128909482045341, "grad_norm": 0.4887685477733612, "learning_rate": 9.43459087118736e-06, "loss": 0.4285, "step": 1436 }, { "epoch": 0.713387390369022, "grad_norm": 0.5495191812515259, "learning_rate": 9.43325558516441e-06, "loss": 0.4551, "step": 1437 }, { "epoch": 0.7138838325335098, "grad_norm": 0.5641023516654968, "learning_rate": 9.43191881899791e-06, "loss": 0.4458, "step": 1438 }, { "epoch": 0.7143802746979977, "grad_norm": 0.5989640355110168, "learning_rate": 9.430580573134169e-06, "loss": 0.4219, "step": 1439 }, { "epoch": 0.7148767168624856, "grad_norm": 0.5417524576187134, "learning_rate": 9.429240848019992e-06, "loss": 0.4373, "step": 1440 }, { "epoch": 0.7153731590269734, "grad_norm": 0.5612537264823914, "learning_rate": 9.427899644102676e-06, "loss": 0.4627, "step": 1441 }, { "epoch": 0.7158696011914611, "grad_norm": 0.5522523522377014, "learning_rate": 9.426556961830013e-06, "loss": 0.4425, "step": 1442 }, { "epoch": 0.716366043355949, "grad_norm": 0.5678358674049377, "learning_rate": 9.425212801650286e-06, "loss": 0.4355, "step": 1443 }, { "epoch": 0.7168624855204369, "grad_norm": 0.6351677179336548, "learning_rate": 9.423867164012276e-06, "loss": 0.4238, "step": 1444 }, { "epoch": 0.7173589276849247, "grad_norm": 0.5011627674102783, "learning_rate": 9.422520049365254e-06, "loss": 0.4276, "step": 1445 }, { "epoch": 0.7178553698494126, "grad_norm": 0.46632397174835205, "learning_rate": 9.421171458158986e-06, "loss": 0.4223, "step": 1446 }, { "epoch": 0.7183518120139004, "grad_norm": 0.6402161717414856, "learning_rate": 9.419821390843728e-06, "loss": 0.4308, "step": 1447 }, { "epoch": 0.7188482541783883, "grad_norm": 0.5804960131645203, "learning_rate": 9.41846984787023e-06, "loss": 0.3884, "step": 1448 }, { "epoch": 0.719344696342876, "grad_norm": 0.5963141322135925, "learning_rate": 9.41711682968974e-06, "loss": 0.4505, "step": 1449 }, { "epoch": 0.7198411385073639, "grad_norm": 0.5864262580871582, "learning_rate": 9.41576233675399e-06, "loss": 0.4699, "step": 1450 }, { "epoch": 0.7203375806718517, "grad_norm": 0.5355082154273987, "learning_rate": 9.414406369515208e-06, "loss": 0.4106, "step": 1451 }, { "epoch": 0.7208340228363396, "grad_norm": 0.5055246353149414, "learning_rate": 9.413048928426118e-06, "loss": 0.4346, "step": 1452 }, { "epoch": 0.7213304650008274, "grad_norm": 0.5422968864440918, "learning_rate": 9.411690013939932e-06, "loss": 0.4209, "step": 1453 }, { "epoch": 0.7218269071653153, "grad_norm": 0.5988758206367493, "learning_rate": 9.41032962651035e-06, "loss": 0.4565, "step": 1454 }, { "epoch": 0.7223233493298031, "grad_norm": 0.501494824886322, "learning_rate": 9.408967766591574e-06, "loss": 0.4456, "step": 1455 }, { "epoch": 0.7228197914942909, "grad_norm": 0.4979725480079651, "learning_rate": 9.40760443463829e-06, "loss": 0.4494, "step": 1456 }, { "epoch": 0.7233162336587787, "grad_norm": 0.49973613023757935, "learning_rate": 9.406239631105675e-06, "loss": 0.4609, "step": 1457 }, { "epoch": 0.7238126758232666, "grad_norm": 0.5644683837890625, "learning_rate": 9.404873356449406e-06, "loss": 0.4341, "step": 1458 }, { "epoch": 0.7243091179877544, "grad_norm": 0.5251160264015198, "learning_rate": 9.403505611125638e-06, "loss": 0.419, "step": 1459 }, { "epoch": 0.7248055601522423, "grad_norm": 0.5271303653717041, "learning_rate": 9.402136395591028e-06, "loss": 0.4434, "step": 1460 }, { "epoch": 0.7253020023167301, "grad_norm": 0.5572744607925415, "learning_rate": 9.40076571030272e-06, "loss": 0.3912, "step": 1461 }, { "epoch": 0.7257984444812179, "grad_norm": 0.5114668607711792, "learning_rate": 9.399393555718346e-06, "loss": 0.4612, "step": 1462 }, { "epoch": 0.7262948866457057, "grad_norm": 0.5315856337547302, "learning_rate": 9.398019932296033e-06, "loss": 0.4312, "step": 1463 }, { "epoch": 0.7267913288101936, "grad_norm": 0.5733614563941956, "learning_rate": 9.396644840494396e-06, "loss": 0.429, "step": 1464 }, { "epoch": 0.7272877709746814, "grad_norm": 0.5949275493621826, "learning_rate": 9.395268280772542e-06, "loss": 0.4433, "step": 1465 }, { "epoch": 0.7277842131391693, "grad_norm": 0.5500174164772034, "learning_rate": 9.393890253590064e-06, "loss": 0.4058, "step": 1466 }, { "epoch": 0.7282806553036572, "grad_norm": 0.6163674592971802, "learning_rate": 9.392510759407053e-06, "loss": 0.4392, "step": 1467 }, { "epoch": 0.728777097468145, "grad_norm": 0.5145508646965027, "learning_rate": 9.391129798684078e-06, "loss": 0.4484, "step": 1468 }, { "epoch": 0.7292735396326328, "grad_norm": 0.5447497963905334, "learning_rate": 9.389747371882207e-06, "loss": 0.4313, "step": 1469 }, { "epoch": 0.7297699817971206, "grad_norm": 0.5020238161087036, "learning_rate": 9.388363479462997e-06, "loss": 0.4083, "step": 1470 }, { "epoch": 0.7302664239616085, "grad_norm": 0.4987616240978241, "learning_rate": 9.38697812188849e-06, "loss": 0.4335, "step": 1471 }, { "epoch": 0.7307628661260963, "grad_norm": 0.5499151945114136, "learning_rate": 9.38559129962122e-06, "loss": 0.4404, "step": 1472 }, { "epoch": 0.7312593082905842, "grad_norm": 0.5357805490493774, "learning_rate": 9.384203013124209e-06, "loss": 0.4428, "step": 1473 }, { "epoch": 0.731755750455072, "grad_norm": 0.5687552094459534, "learning_rate": 9.382813262860968e-06, "loss": 0.4661, "step": 1474 }, { "epoch": 0.7322521926195599, "grad_norm": 0.5788300037384033, "learning_rate": 9.381422049295496e-06, "loss": 0.4249, "step": 1475 }, { "epoch": 0.7327486347840476, "grad_norm": 0.5166923403739929, "learning_rate": 9.380029372892282e-06, "loss": 0.4478, "step": 1476 }, { "epoch": 0.7332450769485355, "grad_norm": 0.4838884770870209, "learning_rate": 9.378635234116303e-06, "loss": 0.4804, "step": 1477 }, { "epoch": 0.7337415191130233, "grad_norm": 0.5336625576019287, "learning_rate": 9.377239633433026e-06, "loss": 0.427, "step": 1478 }, { "epoch": 0.7342379612775112, "grad_norm": 0.5679466724395752, "learning_rate": 9.3758425713084e-06, "loss": 0.4429, "step": 1479 }, { "epoch": 0.734734403441999, "grad_norm": 0.5302140712738037, "learning_rate": 9.374444048208868e-06, "loss": 0.3822, "step": 1480 }, { "epoch": 0.7352308456064869, "grad_norm": 0.5472661852836609, "learning_rate": 9.37304406460136e-06, "loss": 0.4577, "step": 1481 }, { "epoch": 0.7357272877709747, "grad_norm": 0.520281970500946, "learning_rate": 9.371642620953293e-06, "loss": 0.4252, "step": 1482 }, { "epoch": 0.7362237299354625, "grad_norm": 0.6001216173171997, "learning_rate": 9.370239717732567e-06, "loss": 0.4909, "step": 1483 }, { "epoch": 0.7367201720999503, "grad_norm": 0.60647052526474, "learning_rate": 9.368835355407577e-06, "loss": 0.4746, "step": 1484 }, { "epoch": 0.7372166142644382, "grad_norm": 0.5043294429779053, "learning_rate": 9.367429534447199e-06, "loss": 0.4182, "step": 1485 }, { "epoch": 0.737713056428926, "grad_norm": 0.5762001872062683, "learning_rate": 9.3660222553208e-06, "loss": 0.4849, "step": 1486 }, { "epoch": 0.7382094985934139, "grad_norm": 0.5673403143882751, "learning_rate": 9.364613518498233e-06, "loss": 0.4277, "step": 1487 }, { "epoch": 0.7387059407579017, "grad_norm": 0.5483721494674683, "learning_rate": 9.363203324449837e-06, "loss": 0.4177, "step": 1488 }, { "epoch": 0.7392023829223895, "grad_norm": 0.5513754487037659, "learning_rate": 9.361791673646434e-06, "loss": 0.4407, "step": 1489 }, { "epoch": 0.7396988250868773, "grad_norm": 0.5092954635620117, "learning_rate": 9.360378566559338e-06, "loss": 0.4269, "step": 1490 }, { "epoch": 0.7401952672513652, "grad_norm": 0.5422273874282837, "learning_rate": 9.358964003660347e-06, "loss": 0.471, "step": 1491 }, { "epoch": 0.740691709415853, "grad_norm": 0.5499087572097778, "learning_rate": 9.357547985421746e-06, "loss": 0.4073, "step": 1492 }, { "epoch": 0.7411881515803409, "grad_norm": 0.5604990720748901, "learning_rate": 9.356130512316306e-06, "loss": 0.4246, "step": 1493 }, { "epoch": 0.7416845937448288, "grad_norm": 0.5565416812896729, "learning_rate": 9.354711584817278e-06, "loss": 0.4191, "step": 1494 }, { "epoch": 0.7421810359093166, "grad_norm": 0.47673845291137695, "learning_rate": 9.353291203398409e-06, "loss": 0.3926, "step": 1495 }, { "epoch": 0.7426774780738044, "grad_norm": 0.5705013871192932, "learning_rate": 9.351869368533921e-06, "loss": 0.4252, "step": 1496 }, { "epoch": 0.7431739202382922, "grad_norm": 0.5457376837730408, "learning_rate": 9.350446080698528e-06, "loss": 0.4429, "step": 1497 }, { "epoch": 0.7436703624027801, "grad_norm": 0.5997578501701355, "learning_rate": 9.349021340367429e-06, "loss": 0.4446, "step": 1498 }, { "epoch": 0.7441668045672679, "grad_norm": 0.5166521668434143, "learning_rate": 9.347595148016304e-06, "loss": 0.4535, "step": 1499 }, { "epoch": 0.7446632467317558, "grad_norm": 0.6460052132606506, "learning_rate": 9.34616750412132e-06, "loss": 0.4523, "step": 1500 }, { "epoch": 0.7451596888962436, "grad_norm": 0.6248716711997986, "learning_rate": 9.344738409159126e-06, "loss": 0.4481, "step": 1501 }, { "epoch": 0.7456561310607315, "grad_norm": 0.5373780727386475, "learning_rate": 9.343307863606865e-06, "loss": 0.4341, "step": 1502 }, { "epoch": 0.7461525732252192, "grad_norm": 0.6093046069145203, "learning_rate": 9.34187586794215e-06, "loss": 0.4705, "step": 1503 }, { "epoch": 0.7466490153897071, "grad_norm": 0.5253204107284546, "learning_rate": 9.340442422643087e-06, "loss": 0.4649, "step": 1504 }, { "epoch": 0.7471454575541949, "grad_norm": 0.5135625600814819, "learning_rate": 9.33900752818827e-06, "loss": 0.4408, "step": 1505 }, { "epoch": 0.7476418997186828, "grad_norm": 0.6057254076004028, "learning_rate": 9.337571185056764e-06, "loss": 0.4372, "step": 1506 }, { "epoch": 0.7481383418831706, "grad_norm": 0.5051776170730591, "learning_rate": 9.336133393728128e-06, "loss": 0.4232, "step": 1507 }, { "epoch": 0.7486347840476585, "grad_norm": 0.5237805843353271, "learning_rate": 9.334694154682403e-06, "loss": 0.4578, "step": 1508 }, { "epoch": 0.7491312262121463, "grad_norm": 0.5281199216842651, "learning_rate": 9.33325346840011e-06, "loss": 0.4487, "step": 1509 }, { "epoch": 0.7496276683766341, "grad_norm": 0.5701234936714172, "learning_rate": 9.331811335362256e-06, "loss": 0.444, "step": 1510 }, { "epoch": 0.7501241105411219, "grad_norm": 0.5097557306289673, "learning_rate": 9.330367756050326e-06, "loss": 0.4388, "step": 1511 }, { "epoch": 0.7506205527056098, "grad_norm": 0.5308884382247925, "learning_rate": 9.328922730946297e-06, "loss": 0.4522, "step": 1512 }, { "epoch": 0.7511169948700976, "grad_norm": 0.587111234664917, "learning_rate": 9.327476260532623e-06, "loss": 0.4422, "step": 1513 }, { "epoch": 0.7516134370345855, "grad_norm": 0.5243600606918335, "learning_rate": 9.326028345292237e-06, "loss": 0.4274, "step": 1514 }, { "epoch": 0.7521098791990734, "grad_norm": 0.503616213798523, "learning_rate": 9.324578985708563e-06, "loss": 0.4311, "step": 1515 }, { "epoch": 0.7526063213635611, "grad_norm": 0.560653567314148, "learning_rate": 9.323128182265502e-06, "loss": 0.4586, "step": 1516 }, { "epoch": 0.753102763528049, "grad_norm": 0.6238027811050415, "learning_rate": 9.321675935447436e-06, "loss": 0.4397, "step": 1517 }, { "epoch": 0.7535992056925368, "grad_norm": 0.5202054381370544, "learning_rate": 9.320222245739233e-06, "loss": 0.488, "step": 1518 }, { "epoch": 0.7540956478570247, "grad_norm": 0.5677753686904907, "learning_rate": 9.318767113626237e-06, "loss": 0.437, "step": 1519 }, { "epoch": 0.7545920900215125, "grad_norm": 0.47325894236564636, "learning_rate": 9.317310539594282e-06, "loss": 0.4602, "step": 1520 }, { "epoch": 0.7550885321860004, "grad_norm": 0.5085488557815552, "learning_rate": 9.315852524129673e-06, "loss": 0.4359, "step": 1521 }, { "epoch": 0.7555849743504882, "grad_norm": 0.5548588037490845, "learning_rate": 9.314393067719208e-06, "loss": 0.4553, "step": 1522 }, { "epoch": 0.756081416514976, "grad_norm": 0.561051607131958, "learning_rate": 9.312932170850153e-06, "loss": 0.4649, "step": 1523 }, { "epoch": 0.7565778586794638, "grad_norm": 0.5337622761726379, "learning_rate": 9.311469834010267e-06, "loss": 0.4562, "step": 1524 }, { "epoch": 0.7570743008439517, "grad_norm": 0.5527084469795227, "learning_rate": 9.310006057687782e-06, "loss": 0.4054, "step": 1525 }, { "epoch": 0.7575707430084395, "grad_norm": 0.5620992183685303, "learning_rate": 9.308540842371415e-06, "loss": 0.4246, "step": 1526 }, { "epoch": 0.7580671851729274, "grad_norm": 0.492828905582428, "learning_rate": 9.30707418855036e-06, "loss": 0.4353, "step": 1527 }, { "epoch": 0.7585636273374152, "grad_norm": 0.5356407165527344, "learning_rate": 9.305606096714292e-06, "loss": 0.4208, "step": 1528 }, { "epoch": 0.7590600695019031, "grad_norm": 0.5134130120277405, "learning_rate": 9.304136567353371e-06, "loss": 0.4525, "step": 1529 }, { "epoch": 0.7595565116663908, "grad_norm": 0.5331559181213379, "learning_rate": 9.302665600958227e-06, "loss": 0.4572, "step": 1530 }, { "epoch": 0.7600529538308787, "grad_norm": 0.567231297492981, "learning_rate": 9.30119319801998e-06, "loss": 0.4272, "step": 1531 }, { "epoch": 0.7605493959953665, "grad_norm": 0.5165696144104004, "learning_rate": 9.299719359030224e-06, "loss": 0.4683, "step": 1532 }, { "epoch": 0.7610458381598544, "grad_norm": 0.5045085549354553, "learning_rate": 9.298244084481034e-06, "loss": 0.4526, "step": 1533 }, { "epoch": 0.7615422803243422, "grad_norm": 0.48476773500442505, "learning_rate": 9.296767374864963e-06, "loss": 0.4546, "step": 1534 }, { "epoch": 0.7620387224888301, "grad_norm": 0.48874950408935547, "learning_rate": 9.295289230675046e-06, "loss": 0.4351, "step": 1535 }, { "epoch": 0.7625351646533179, "grad_norm": 0.5410566926002502, "learning_rate": 9.293809652404795e-06, "loss": 0.4377, "step": 1536 }, { "epoch": 0.7630316068178057, "grad_norm": 0.5258738398551941, "learning_rate": 9.292328640548201e-06, "loss": 0.4437, "step": 1537 }, { "epoch": 0.7635280489822935, "grad_norm": 0.4979988634586334, "learning_rate": 9.290846195599732e-06, "loss": 0.4278, "step": 1538 }, { "epoch": 0.7640244911467814, "grad_norm": 0.5359347462654114, "learning_rate": 9.289362318054337e-06, "loss": 0.4565, "step": 1539 }, { "epoch": 0.7645209333112692, "grad_norm": 0.5632565021514893, "learning_rate": 9.28787700840744e-06, "loss": 0.4373, "step": 1540 }, { "epoch": 0.7650173754757571, "grad_norm": 0.5066002011299133, "learning_rate": 9.286390267154951e-06, "loss": 0.4352, "step": 1541 }, { "epoch": 0.765513817640245, "grad_norm": 0.575319766998291, "learning_rate": 9.284902094793248e-06, "loss": 0.4634, "step": 1542 }, { "epoch": 0.7660102598047327, "grad_norm": 0.5488958358764648, "learning_rate": 9.283412491819194e-06, "loss": 0.4458, "step": 1543 }, { "epoch": 0.7665067019692205, "grad_norm": 0.5566610097885132, "learning_rate": 9.281921458730126e-06, "loss": 0.4085, "step": 1544 }, { "epoch": 0.7670031441337084, "grad_norm": 0.5832508206367493, "learning_rate": 9.280428996023857e-06, "loss": 0.4801, "step": 1545 }, { "epoch": 0.7674995862981963, "grad_norm": 0.5083023905754089, "learning_rate": 9.278935104198682e-06, "loss": 0.4412, "step": 1546 }, { "epoch": 0.7679960284626841, "grad_norm": 0.5665803551673889, "learning_rate": 9.277439783753373e-06, "loss": 0.3946, "step": 1547 }, { "epoch": 0.768492470627172, "grad_norm": 0.6288432478904724, "learning_rate": 9.275943035187173e-06, "loss": 0.464, "step": 1548 }, { "epoch": 0.7689889127916598, "grad_norm": 0.5556529760360718, "learning_rate": 9.274444858999808e-06, "loss": 0.4314, "step": 1549 }, { "epoch": 0.7694853549561476, "grad_norm": 0.6026809811592102, "learning_rate": 9.272945255691476e-06, "loss": 0.4428, "step": 1550 }, { "epoch": 0.7699817971206354, "grad_norm": 0.6030560731887817, "learning_rate": 9.271444225762857e-06, "loss": 0.4639, "step": 1551 }, { "epoch": 0.7704782392851233, "grad_norm": 0.5519431829452515, "learning_rate": 9.269941769715102e-06, "loss": 0.4608, "step": 1552 }, { "epoch": 0.7709746814496111, "grad_norm": 0.6464009284973145, "learning_rate": 9.268437888049839e-06, "loss": 0.4334, "step": 1553 }, { "epoch": 0.771471123614099, "grad_norm": 0.5299685597419739, "learning_rate": 9.266932581269177e-06, "loss": 0.3824, "step": 1554 }, { "epoch": 0.7719675657785868, "grad_norm": 0.5132258534431458, "learning_rate": 9.265425849875696e-06, "loss": 0.4313, "step": 1555 }, { "epoch": 0.7724640079430747, "grad_norm": 0.5118081569671631, "learning_rate": 9.26391769437245e-06, "loss": 0.4334, "step": 1556 }, { "epoch": 0.7729604501075624, "grad_norm": 0.6036546230316162, "learning_rate": 9.262408115262971e-06, "loss": 0.4205, "step": 1557 }, { "epoch": 0.7734568922720503, "grad_norm": 0.5508906841278076, "learning_rate": 9.26089711305127e-06, "loss": 0.4182, "step": 1558 }, { "epoch": 0.7739533344365381, "grad_norm": 0.5402917265892029, "learning_rate": 9.259384688241828e-06, "loss": 0.4287, "step": 1559 }, { "epoch": 0.774449776601026, "grad_norm": 0.5624039173126221, "learning_rate": 9.257870841339601e-06, "loss": 0.4135, "step": 1560 }, { "epoch": 0.7749462187655138, "grad_norm": 0.6440000534057617, "learning_rate": 9.256355572850024e-06, "loss": 0.4135, "step": 1561 }, { "epoch": 0.7754426609300017, "grad_norm": 0.6305055022239685, "learning_rate": 9.254838883279002e-06, "loss": 0.3958, "step": 1562 }, { "epoch": 0.7759391030944895, "grad_norm": 0.59746915102005, "learning_rate": 9.253320773132917e-06, "loss": 0.4353, "step": 1563 }, { "epoch": 0.7764355452589773, "grad_norm": 0.5780338048934937, "learning_rate": 9.251801242918623e-06, "loss": 0.4439, "step": 1564 }, { "epoch": 0.7769319874234651, "grad_norm": 0.6020218133926392, "learning_rate": 9.250280293143455e-06, "loss": 0.438, "step": 1565 }, { "epoch": 0.777428429587953, "grad_norm": 0.5848191976547241, "learning_rate": 9.248757924315211e-06, "loss": 0.422, "step": 1566 }, { "epoch": 0.7779248717524408, "grad_norm": 0.48993244767189026, "learning_rate": 9.24723413694217e-06, "loss": 0.4036, "step": 1567 }, { "epoch": 0.7784213139169287, "grad_norm": 0.5309478044509888, "learning_rate": 9.245708931533087e-06, "loss": 0.4344, "step": 1568 }, { "epoch": 0.7789177560814166, "grad_norm": 0.6281633973121643, "learning_rate": 9.24418230859718e-06, "loss": 0.4497, "step": 1569 }, { "epoch": 0.7794141982459043, "grad_norm": 0.5675585269927979, "learning_rate": 9.242654268644153e-06, "loss": 0.439, "step": 1570 }, { "epoch": 0.7799106404103922, "grad_norm": 0.5997617840766907, "learning_rate": 9.241124812184176e-06, "loss": 0.4456, "step": 1571 }, { "epoch": 0.78040708257488, "grad_norm": 0.591719925403595, "learning_rate": 9.239593939727889e-06, "loss": 0.4617, "step": 1572 }, { "epoch": 0.7809035247393679, "grad_norm": 0.5426728129386902, "learning_rate": 9.238061651786414e-06, "loss": 0.431, "step": 1573 }, { "epoch": 0.7813999669038557, "grad_norm": 0.5264804363250732, "learning_rate": 9.236527948871335e-06, "loss": 0.4747, "step": 1574 }, { "epoch": 0.7818964090683436, "grad_norm": 0.6222091317176819, "learning_rate": 9.234992831494718e-06, "loss": 0.4374, "step": 1575 }, { "epoch": 0.7823928512328314, "grad_norm": 0.5049533247947693, "learning_rate": 9.233456300169093e-06, "loss": 0.4415, "step": 1576 }, { "epoch": 0.7828892933973192, "grad_norm": 0.5087575316429138, "learning_rate": 9.23191835540747e-06, "loss": 0.4217, "step": 1577 }, { "epoch": 0.783385735561807, "grad_norm": 0.5682765245437622, "learning_rate": 9.230378997723326e-06, "loss": 0.4675, "step": 1578 }, { "epoch": 0.7838821777262949, "grad_norm": 0.5627255439758301, "learning_rate": 9.228838227630609e-06, "loss": 0.4484, "step": 1579 }, { "epoch": 0.7843786198907827, "grad_norm": 0.5500094294548035, "learning_rate": 9.22729604564374e-06, "loss": 0.4243, "step": 1580 }, { "epoch": 0.7848750620552706, "grad_norm": 0.5067808032035828, "learning_rate": 9.225752452277617e-06, "loss": 0.4153, "step": 1581 }, { "epoch": 0.7853715042197584, "grad_norm": 0.6325167417526245, "learning_rate": 9.224207448047594e-06, "loss": 0.4285, "step": 1582 }, { "epoch": 0.7858679463842463, "grad_norm": 0.5216733813285828, "learning_rate": 9.222661033469517e-06, "loss": 0.4322, "step": 1583 }, { "epoch": 0.786364388548734, "grad_norm": 0.5267024636268616, "learning_rate": 9.221113209059684e-06, "loss": 0.4148, "step": 1584 }, { "epoch": 0.7868608307132219, "grad_norm": 0.561464250087738, "learning_rate": 9.219563975334875e-06, "loss": 0.4375, "step": 1585 }, { "epoch": 0.7873572728777097, "grad_norm": 0.5648907423019409, "learning_rate": 9.218013332812334e-06, "loss": 0.4552, "step": 1586 }, { "epoch": 0.7878537150421976, "grad_norm": 0.513913631439209, "learning_rate": 9.216461282009783e-06, "loss": 0.422, "step": 1587 }, { "epoch": 0.7883501572066854, "grad_norm": 0.5804303288459778, "learning_rate": 9.214907823445405e-06, "loss": 0.4204, "step": 1588 }, { "epoch": 0.7888465993711733, "grad_norm": 0.5746512413024902, "learning_rate": 9.213352957637862e-06, "loss": 0.444, "step": 1589 }, { "epoch": 0.7893430415356611, "grad_norm": 0.6075983643531799, "learning_rate": 9.211796685106275e-06, "loss": 0.4217, "step": 1590 }, { "epoch": 0.7898394837001489, "grad_norm": 0.5673282742500305, "learning_rate": 9.210239006370249e-06, "loss": 0.4329, "step": 1591 }, { "epoch": 0.7903359258646367, "grad_norm": 0.5772711038589478, "learning_rate": 9.208679921949845e-06, "loss": 0.4333, "step": 1592 }, { "epoch": 0.7908323680291246, "grad_norm": 0.5618643164634705, "learning_rate": 9.2071194323656e-06, "loss": 0.4695, "step": 1593 }, { "epoch": 0.7913288101936125, "grad_norm": 0.5830571055412292, "learning_rate": 9.205557538138522e-06, "loss": 0.439, "step": 1594 }, { "epoch": 0.7918252523581003, "grad_norm": 0.7346681952476501, "learning_rate": 9.203994239790081e-06, "loss": 0.4574, "step": 1595 }, { "epoch": 0.7923216945225882, "grad_norm": 0.6190969944000244, "learning_rate": 9.202429537842221e-06, "loss": 0.4234, "step": 1596 }, { "epoch": 0.7928181366870759, "grad_norm": 0.5416748523712158, "learning_rate": 9.200863432817355e-06, "loss": 0.4331, "step": 1597 }, { "epoch": 0.7933145788515638, "grad_norm": 0.537095844745636, "learning_rate": 9.199295925238362e-06, "loss": 0.4304, "step": 1598 }, { "epoch": 0.7938110210160516, "grad_norm": 0.5391631126403809, "learning_rate": 9.19772701562859e-06, "loss": 0.4073, "step": 1599 }, { "epoch": 0.7943074631805395, "grad_norm": 0.557813823223114, "learning_rate": 9.196156704511856e-06, "loss": 0.4056, "step": 1600 }, { "epoch": 0.7948039053450273, "grad_norm": 0.5373549461364746, "learning_rate": 9.194584992412442e-06, "loss": 0.4371, "step": 1601 }, { "epoch": 0.7953003475095152, "grad_norm": 0.5625040531158447, "learning_rate": 9.193011879855103e-06, "loss": 0.4454, "step": 1602 }, { "epoch": 0.795796789674003, "grad_norm": 0.5398711562156677, "learning_rate": 9.191437367365056e-06, "loss": 0.4354, "step": 1603 }, { "epoch": 0.7962932318384908, "grad_norm": 0.5407079458236694, "learning_rate": 9.18986145546799e-06, "loss": 0.4266, "step": 1604 }, { "epoch": 0.7967896740029786, "grad_norm": 0.5729132890701294, "learning_rate": 9.188284144690057e-06, "loss": 0.4139, "step": 1605 }, { "epoch": 0.7972861161674665, "grad_norm": 0.5204187631607056, "learning_rate": 9.18670543555788e-06, "loss": 0.4298, "step": 1606 }, { "epoch": 0.7977825583319543, "grad_norm": 0.6151060461997986, "learning_rate": 9.185125328598547e-06, "loss": 0.4564, "step": 1607 }, { "epoch": 0.7982790004964422, "grad_norm": 0.5229493379592896, "learning_rate": 9.183543824339612e-06, "loss": 0.4537, "step": 1608 }, { "epoch": 0.79877544266093, "grad_norm": 0.4937582314014435, "learning_rate": 9.181960923309094e-06, "loss": 0.4319, "step": 1609 }, { "epoch": 0.7992718848254179, "grad_norm": 0.6324421763420105, "learning_rate": 9.180376626035486e-06, "loss": 0.4791, "step": 1610 }, { "epoch": 0.7997683269899056, "grad_norm": 0.5774857997894287, "learning_rate": 9.178790933047739e-06, "loss": 0.4563, "step": 1611 }, { "epoch": 0.8002647691543935, "grad_norm": 0.5309554934501648, "learning_rate": 9.17720384487527e-06, "loss": 0.4574, "step": 1612 }, { "epoch": 0.8007612113188813, "grad_norm": 0.5274905562400818, "learning_rate": 9.175615362047969e-06, "loss": 0.442, "step": 1613 }, { "epoch": 0.8012576534833692, "grad_norm": 0.5394960641860962, "learning_rate": 9.174025485096188e-06, "loss": 0.4242, "step": 1614 }, { "epoch": 0.801754095647857, "grad_norm": 0.49949130415916443, "learning_rate": 9.172434214550739e-06, "loss": 0.4067, "step": 1615 }, { "epoch": 0.8022505378123449, "grad_norm": 0.6044440865516663, "learning_rate": 9.170841550942905e-06, "loss": 0.4162, "step": 1616 }, { "epoch": 0.8027469799768328, "grad_norm": 0.5172641277313232, "learning_rate": 9.169247494804436e-06, "loss": 0.4085, "step": 1617 }, { "epoch": 0.8032434221413205, "grad_norm": 0.7316107749938965, "learning_rate": 9.167652046667542e-06, "loss": 0.4275, "step": 1618 }, { "epoch": 0.8037398643058083, "grad_norm": 0.5519174933433533, "learning_rate": 9.166055207064899e-06, "loss": 0.4695, "step": 1619 }, { "epoch": 0.8042363064702962, "grad_norm": 0.5604727268218994, "learning_rate": 9.16445697652965e-06, "loss": 0.434, "step": 1620 }, { "epoch": 0.8047327486347841, "grad_norm": 0.6401044130325317, "learning_rate": 9.162857355595401e-06, "loss": 0.4165, "step": 1621 }, { "epoch": 0.8052291907992719, "grad_norm": 0.5021212100982666, "learning_rate": 9.161256344796221e-06, "loss": 0.4428, "step": 1622 }, { "epoch": 0.8057256329637598, "grad_norm": 0.6644954681396484, "learning_rate": 9.159653944666643e-06, "loss": 0.447, "step": 1623 }, { "epoch": 0.8062220751282475, "grad_norm": 0.5076836943626404, "learning_rate": 9.158050155741667e-06, "loss": 0.44, "step": 1624 }, { "epoch": 0.8067185172927354, "grad_norm": 0.5257400870323181, "learning_rate": 9.156444978556753e-06, "loss": 0.4331, "step": 1625 }, { "epoch": 0.8072149594572232, "grad_norm": 0.5866564512252808, "learning_rate": 9.154838413647828e-06, "loss": 0.4268, "step": 1626 }, { "epoch": 0.8077114016217111, "grad_norm": 0.6111658215522766, "learning_rate": 9.153230461551276e-06, "loss": 0.4518, "step": 1627 }, { "epoch": 0.8082078437861989, "grad_norm": 0.5225775241851807, "learning_rate": 9.151621122803954e-06, "loss": 0.4469, "step": 1628 }, { "epoch": 0.8087042859506868, "grad_norm": 0.5336846113204956, "learning_rate": 9.150010397943175e-06, "loss": 0.4029, "step": 1629 }, { "epoch": 0.8092007281151746, "grad_norm": 0.5863685011863708, "learning_rate": 9.148398287506713e-06, "loss": 0.367, "step": 1630 }, { "epoch": 0.8096971702796624, "grad_norm": 0.5972265601158142, "learning_rate": 9.14678479203281e-06, "loss": 0.459, "step": 1631 }, { "epoch": 0.8101936124441502, "grad_norm": 0.5554679036140442, "learning_rate": 9.145169912060168e-06, "loss": 0.4503, "step": 1632 }, { "epoch": 0.8106900546086381, "grad_norm": 0.574024498462677, "learning_rate": 9.143553648127954e-06, "loss": 0.4608, "step": 1633 }, { "epoch": 0.8111864967731259, "grad_norm": 0.6086248159408569, "learning_rate": 9.14193600077579e-06, "loss": 0.4425, "step": 1634 }, { "epoch": 0.8116829389376138, "grad_norm": 0.49857181310653687, "learning_rate": 9.140316970543768e-06, "loss": 0.4318, "step": 1635 }, { "epoch": 0.8121793811021016, "grad_norm": 0.5506787300109863, "learning_rate": 9.138696557972437e-06, "loss": 0.4327, "step": 1636 }, { "epoch": 0.8126758232665895, "grad_norm": 0.6392481327056885, "learning_rate": 9.137074763602809e-06, "loss": 0.4824, "step": 1637 }, { "epoch": 0.8131722654310772, "grad_norm": 0.6029486060142517, "learning_rate": 9.135451587976357e-06, "loss": 0.4361, "step": 1638 }, { "epoch": 0.8136687075955651, "grad_norm": 0.5503445863723755, "learning_rate": 9.133827031635015e-06, "loss": 0.4279, "step": 1639 }, { "epoch": 0.8141651497600529, "grad_norm": 0.5959994196891785, "learning_rate": 9.132201095121178e-06, "loss": 0.4237, "step": 1640 }, { "epoch": 0.8146615919245408, "grad_norm": 0.5765507817268372, "learning_rate": 9.130573778977702e-06, "loss": 0.4319, "step": 1641 }, { "epoch": 0.8151580340890286, "grad_norm": 0.5816723704338074, "learning_rate": 9.128945083747906e-06, "loss": 0.4179, "step": 1642 }, { "epoch": 0.8156544762535165, "grad_norm": 0.5627021193504333, "learning_rate": 9.127315009975564e-06, "loss": 0.4248, "step": 1643 }, { "epoch": 0.8161509184180044, "grad_norm": 0.589637041091919, "learning_rate": 9.125683558204914e-06, "loss": 0.4448, "step": 1644 }, { "epoch": 0.8166473605824921, "grad_norm": 0.5704853534698486, "learning_rate": 9.124050728980652e-06, "loss": 0.4685, "step": 1645 }, { "epoch": 0.81714380274698, "grad_norm": 0.5607720613479614, "learning_rate": 9.122416522847939e-06, "loss": 0.4416, "step": 1646 }, { "epoch": 0.8176402449114678, "grad_norm": 0.517994225025177, "learning_rate": 9.12078094035239e-06, "loss": 0.4342, "step": 1647 }, { "epoch": 0.8181366870759557, "grad_norm": 0.558856189250946, "learning_rate": 9.119143982040082e-06, "loss": 0.4332, "step": 1648 }, { "epoch": 0.8186331292404435, "grad_norm": 0.5226592421531677, "learning_rate": 9.117505648457549e-06, "loss": 0.4543, "step": 1649 }, { "epoch": 0.8191295714049314, "grad_norm": 0.588062047958374, "learning_rate": 9.115865940151788e-06, "loss": 0.4419, "step": 1650 }, { "epoch": 0.8196260135694191, "grad_norm": 0.5603100657463074, "learning_rate": 9.114224857670255e-06, "loss": 0.3993, "step": 1651 }, { "epoch": 0.820122455733907, "grad_norm": 0.512185275554657, "learning_rate": 9.112582401560858e-06, "loss": 0.4512, "step": 1652 }, { "epoch": 0.8206188978983948, "grad_norm": 0.5559622645378113, "learning_rate": 9.110938572371972e-06, "loss": 0.4419, "step": 1653 }, { "epoch": 0.8211153400628827, "grad_norm": 0.5673766732215881, "learning_rate": 9.109293370652426e-06, "loss": 0.4439, "step": 1654 }, { "epoch": 0.8216117822273705, "grad_norm": 0.5307457447052002, "learning_rate": 9.107646796951507e-06, "loss": 0.4472, "step": 1655 }, { "epoch": 0.8221082243918584, "grad_norm": 0.5498424172401428, "learning_rate": 9.105998851818963e-06, "loss": 0.4485, "step": 1656 }, { "epoch": 0.8226046665563462, "grad_norm": 0.5538991689682007, "learning_rate": 9.104349535804996e-06, "loss": 0.4243, "step": 1657 }, { "epoch": 0.823101108720834, "grad_norm": 0.48336324095726013, "learning_rate": 9.102698849460269e-06, "loss": 0.4616, "step": 1658 }, { "epoch": 0.8235975508853218, "grad_norm": 0.5233404040336609, "learning_rate": 9.101046793335904e-06, "loss": 0.4545, "step": 1659 }, { "epoch": 0.8240939930498097, "grad_norm": 0.5155025720596313, "learning_rate": 9.099393367983473e-06, "loss": 0.4367, "step": 1660 }, { "epoch": 0.8245904352142975, "grad_norm": 0.4975949227809906, "learning_rate": 9.09773857395501e-06, "loss": 0.4067, "step": 1661 }, { "epoch": 0.8250868773787854, "grad_norm": 0.5854753851890564, "learning_rate": 9.09608241180301e-06, "loss": 0.4702, "step": 1662 }, { "epoch": 0.8255833195432732, "grad_norm": 0.45376160740852356, "learning_rate": 9.094424882080419e-06, "loss": 0.4308, "step": 1663 }, { "epoch": 0.8260797617077611, "grad_norm": 0.47028806805610657, "learning_rate": 9.092765985340639e-06, "loss": 0.4426, "step": 1664 }, { "epoch": 0.8265762038722488, "grad_norm": 0.5525215268135071, "learning_rate": 9.09110572213753e-06, "loss": 0.4253, "step": 1665 }, { "epoch": 0.8270726460367367, "grad_norm": 0.49917829036712646, "learning_rate": 9.089444093025412e-06, "loss": 0.4326, "step": 1666 }, { "epoch": 0.8275690882012245, "grad_norm": 0.5036566853523254, "learning_rate": 9.087781098559056e-06, "loss": 0.4536, "step": 1667 }, { "epoch": 0.8280655303657124, "grad_norm": 0.5644833445549011, "learning_rate": 9.086116739293692e-06, "loss": 0.4479, "step": 1668 }, { "epoch": 0.8285619725302003, "grad_norm": 0.5556514859199524, "learning_rate": 9.084451015785001e-06, "loss": 0.443, "step": 1669 }, { "epoch": 0.8290584146946881, "grad_norm": 0.4909750521183014, "learning_rate": 9.082783928589127e-06, "loss": 0.3893, "step": 1670 }, { "epoch": 0.829554856859176, "grad_norm": 0.5294464230537415, "learning_rate": 9.081115478262664e-06, "loss": 0.4252, "step": 1671 }, { "epoch": 0.8300512990236637, "grad_norm": 0.45590072870254517, "learning_rate": 9.079445665362659e-06, "loss": 0.4594, "step": 1672 }, { "epoch": 0.8305477411881516, "grad_norm": 0.5185277462005615, "learning_rate": 9.077774490446619e-06, "loss": 0.4381, "step": 1673 }, { "epoch": 0.8310441833526394, "grad_norm": 0.530820906162262, "learning_rate": 9.076101954072506e-06, "loss": 0.4139, "step": 1674 }, { "epoch": 0.8315406255171273, "grad_norm": 0.5152429342269897, "learning_rate": 9.074428056798733e-06, "loss": 0.4215, "step": 1675 }, { "epoch": 0.8320370676816151, "grad_norm": 0.5308616757392883, "learning_rate": 9.072752799184167e-06, "loss": 0.4322, "step": 1676 }, { "epoch": 0.832533509846103, "grad_norm": 0.5033403635025024, "learning_rate": 9.071076181788134e-06, "loss": 0.4377, "step": 1677 }, { "epoch": 0.8330299520105907, "grad_norm": 0.5242775678634644, "learning_rate": 9.06939820517041e-06, "loss": 0.401, "step": 1678 }, { "epoch": 0.8335263941750786, "grad_norm": 0.6543769836425781, "learning_rate": 9.067718869891226e-06, "loss": 0.4484, "step": 1679 }, { "epoch": 0.8340228363395664, "grad_norm": 0.6296330690383911, "learning_rate": 9.066038176511265e-06, "loss": 0.4544, "step": 1680 }, { "epoch": 0.8345192785040543, "grad_norm": 0.500861644744873, "learning_rate": 9.064356125591664e-06, "loss": 0.4133, "step": 1681 }, { "epoch": 0.8350157206685421, "grad_norm": 0.6682038307189941, "learning_rate": 9.062672717694019e-06, "loss": 0.445, "step": 1682 }, { "epoch": 0.83551216283303, "grad_norm": 0.5524898171424866, "learning_rate": 9.06098795338037e-06, "loss": 0.4089, "step": 1683 }, { "epoch": 0.8360086049975178, "grad_norm": 0.5515351891517639, "learning_rate": 9.059301833213213e-06, "loss": 0.4255, "step": 1684 }, { "epoch": 0.8365050471620056, "grad_norm": 0.5101115107536316, "learning_rate": 9.0576143577555e-06, "loss": 0.4379, "step": 1685 }, { "epoch": 0.8370014893264934, "grad_norm": 0.5463240742683411, "learning_rate": 9.055925527570633e-06, "loss": 0.4376, "step": 1686 }, { "epoch": 0.8374979314909813, "grad_norm": 0.5432209968566895, "learning_rate": 9.054235343222466e-06, "loss": 0.4292, "step": 1687 }, { "epoch": 0.8379943736554691, "grad_norm": 0.5974810123443604, "learning_rate": 9.052543805275307e-06, "loss": 0.4174, "step": 1688 }, { "epoch": 0.838490815819957, "grad_norm": 0.5347517132759094, "learning_rate": 9.050850914293914e-06, "loss": 0.4235, "step": 1689 }, { "epoch": 0.8389872579844448, "grad_norm": 0.48126131296157837, "learning_rate": 9.049156670843495e-06, "loss": 0.435, "step": 1690 }, { "epoch": 0.8394837001489327, "grad_norm": 0.6248517036437988, "learning_rate": 9.047461075489714e-06, "loss": 0.4327, "step": 1691 }, { "epoch": 0.8399801423134204, "grad_norm": 0.4766693115234375, "learning_rate": 9.045764128798684e-06, "loss": 0.4187, "step": 1692 }, { "epoch": 0.8404765844779083, "grad_norm": 0.4784223735332489, "learning_rate": 9.04406583133697e-06, "loss": 0.4381, "step": 1693 }, { "epoch": 0.8409730266423961, "grad_norm": 0.6314343214035034, "learning_rate": 9.042366183671585e-06, "loss": 0.4313, "step": 1694 }, { "epoch": 0.841469468806884, "grad_norm": 0.5457367897033691, "learning_rate": 9.040665186369999e-06, "loss": 0.4235, "step": 1695 }, { "epoch": 0.8419659109713719, "grad_norm": 0.5036778450012207, "learning_rate": 9.038962840000125e-06, "loss": 0.4249, "step": 1696 }, { "epoch": 0.8424623531358597, "grad_norm": 0.5816625952720642, "learning_rate": 9.03725914513033e-06, "loss": 0.4553, "step": 1697 }, { "epoch": 0.8429587953003476, "grad_norm": 0.5800036787986755, "learning_rate": 9.035554102329435e-06, "loss": 0.4451, "step": 1698 }, { "epoch": 0.8434552374648353, "grad_norm": 0.48050376772880554, "learning_rate": 9.033847712166706e-06, "loss": 0.402, "step": 1699 }, { "epoch": 0.8439516796293232, "grad_norm": 0.5135194659233093, "learning_rate": 9.03213997521186e-06, "loss": 0.4308, "step": 1700 }, { "epoch": 0.844448121793811, "grad_norm": 0.6084844470024109, "learning_rate": 9.030430892035062e-06, "loss": 0.4127, "step": 1701 }, { "epoch": 0.8449445639582989, "grad_norm": 0.7048614621162415, "learning_rate": 9.02872046320693e-06, "loss": 0.425, "step": 1702 }, { "epoch": 0.8454410061227867, "grad_norm": 0.538898229598999, "learning_rate": 9.027008689298531e-06, "loss": 0.4494, "step": 1703 }, { "epoch": 0.8459374482872746, "grad_norm": 0.48955434560775757, "learning_rate": 9.025295570881378e-06, "loss": 0.4182, "step": 1704 }, { "epoch": 0.8464338904517623, "grad_norm": 0.7288880944252014, "learning_rate": 9.023581108527437e-06, "loss": 0.4418, "step": 1705 }, { "epoch": 0.8469303326162502, "grad_norm": 0.49747708439826965, "learning_rate": 9.021865302809117e-06, "loss": 0.3928, "step": 1706 }, { "epoch": 0.847426774780738, "grad_norm": 0.5740458965301514, "learning_rate": 9.020148154299282e-06, "loss": 0.4553, "step": 1707 }, { "epoch": 0.8479232169452259, "grad_norm": 0.5902491211891174, "learning_rate": 9.01842966357124e-06, "loss": 0.434, "step": 1708 }, { "epoch": 0.8484196591097137, "grad_norm": 0.5555624961853027, "learning_rate": 9.016709831198746e-06, "loss": 0.3984, "step": 1709 }, { "epoch": 0.8489161012742016, "grad_norm": 0.472883403301239, "learning_rate": 9.01498865775601e-06, "loss": 0.4211, "step": 1710 }, { "epoch": 0.8494125434386894, "grad_norm": 0.5465968251228333, "learning_rate": 9.013266143817681e-06, "loss": 0.4577, "step": 1711 }, { "epoch": 0.8499089856031772, "grad_norm": 0.5829252600669861, "learning_rate": 9.011542289958861e-06, "loss": 0.4158, "step": 1712 }, { "epoch": 0.850405427767665, "grad_norm": 0.5676199197769165, "learning_rate": 9.009817096755098e-06, "loss": 0.4326, "step": 1713 }, { "epoch": 0.8509018699321529, "grad_norm": 0.5030863881111145, "learning_rate": 9.008090564782388e-06, "loss": 0.4242, "step": 1714 }, { "epoch": 0.8513983120966407, "grad_norm": 0.4817841351032257, "learning_rate": 9.006362694617173e-06, "loss": 0.4065, "step": 1715 }, { "epoch": 0.8518947542611286, "grad_norm": 0.5152532458305359, "learning_rate": 9.004633486836339e-06, "loss": 0.4261, "step": 1716 }, { "epoch": 0.8523911964256164, "grad_norm": 0.5304025411605835, "learning_rate": 9.002902942017225e-06, "loss": 0.3875, "step": 1717 }, { "epoch": 0.8528876385901043, "grad_norm": 0.6320633888244629, "learning_rate": 9.00117106073761e-06, "loss": 0.4664, "step": 1718 }, { "epoch": 0.853384080754592, "grad_norm": 0.6097387075424194, "learning_rate": 8.999437843575727e-06, "loss": 0.4867, "step": 1719 }, { "epoch": 0.8538805229190799, "grad_norm": 0.4960586130619049, "learning_rate": 8.997703291110243e-06, "loss": 0.4513, "step": 1720 }, { "epoch": 0.8543769650835678, "grad_norm": 0.42599567770957947, "learning_rate": 8.995967403920283e-06, "loss": 0.3955, "step": 1721 }, { "epoch": 0.8548734072480556, "grad_norm": 0.5902724862098694, "learning_rate": 8.994230182585412e-06, "loss": 0.4245, "step": 1722 }, { "epoch": 0.8553698494125435, "grad_norm": 0.5553848743438721, "learning_rate": 8.99249162768564e-06, "loss": 0.4048, "step": 1723 }, { "epoch": 0.8558662915770313, "grad_norm": 0.4749779999256134, "learning_rate": 8.990751739801424e-06, "loss": 0.4306, "step": 1724 }, { "epoch": 0.8563627337415192, "grad_norm": 0.53037029504776, "learning_rate": 8.989010519513664e-06, "loss": 0.4668, "step": 1725 }, { "epoch": 0.8568591759060069, "grad_norm": 0.5485416650772095, "learning_rate": 8.987267967403706e-06, "loss": 0.4258, "step": 1726 }, { "epoch": 0.8573556180704948, "grad_norm": 0.5324862003326416, "learning_rate": 8.985524084053342e-06, "loss": 0.4812, "step": 1727 }, { "epoch": 0.8578520602349826, "grad_norm": 0.5852982401847839, "learning_rate": 8.983778870044806e-06, "loss": 0.4273, "step": 1728 }, { "epoch": 0.8583485023994705, "grad_norm": 0.553841769695282, "learning_rate": 8.982032325960781e-06, "loss": 0.4451, "step": 1729 }, { "epoch": 0.8588449445639583, "grad_norm": 0.5063709616661072, "learning_rate": 8.980284452384387e-06, "loss": 0.4139, "step": 1730 }, { "epoch": 0.8593413867284462, "grad_norm": 0.5242361426353455, "learning_rate": 8.978535249899191e-06, "loss": 0.4302, "step": 1731 }, { "epoch": 0.8598378288929339, "grad_norm": 0.6199545860290527, "learning_rate": 8.976784719089206e-06, "loss": 0.4762, "step": 1732 }, { "epoch": 0.8603342710574218, "grad_norm": 0.6064724922180176, "learning_rate": 8.975032860538888e-06, "loss": 0.4666, "step": 1733 }, { "epoch": 0.8608307132219096, "grad_norm": 0.5021710991859436, "learning_rate": 8.973279674833133e-06, "loss": 0.4433, "step": 1734 }, { "epoch": 0.8613271553863975, "grad_norm": 0.5995794534683228, "learning_rate": 8.971525162557282e-06, "loss": 0.4694, "step": 1735 }, { "epoch": 0.8618235975508853, "grad_norm": 0.5587152242660522, "learning_rate": 8.969769324297118e-06, "loss": 0.4402, "step": 1736 }, { "epoch": 0.8623200397153732, "grad_norm": 0.512983500957489, "learning_rate": 8.96801216063887e-06, "loss": 0.4329, "step": 1737 }, { "epoch": 0.862816481879861, "grad_norm": 0.5441102981567383, "learning_rate": 8.966253672169206e-06, "loss": 0.4299, "step": 1738 }, { "epoch": 0.8633129240443488, "grad_norm": 0.6008766293525696, "learning_rate": 8.964493859475239e-06, "loss": 0.4283, "step": 1739 }, { "epoch": 0.8638093662088366, "grad_norm": 0.5494847893714905, "learning_rate": 8.962732723144518e-06, "loss": 0.4369, "step": 1740 }, { "epoch": 0.8643058083733245, "grad_norm": 0.5402365326881409, "learning_rate": 8.960970263765044e-06, "loss": 0.4548, "step": 1741 }, { "epoch": 0.8648022505378123, "grad_norm": 0.5482388138771057, "learning_rate": 8.959206481925252e-06, "loss": 0.3968, "step": 1742 }, { "epoch": 0.8652986927023002, "grad_norm": 0.551554799079895, "learning_rate": 8.957441378214021e-06, "loss": 0.4181, "step": 1743 }, { "epoch": 0.865795134866788, "grad_norm": 0.580487072467804, "learning_rate": 8.95567495322067e-06, "loss": 0.4287, "step": 1744 }, { "epoch": 0.8662915770312759, "grad_norm": 0.5487938523292542, "learning_rate": 8.953907207534964e-06, "loss": 0.4464, "step": 1745 }, { "epoch": 0.8667880191957636, "grad_norm": 0.6433790922164917, "learning_rate": 8.9521381417471e-06, "loss": 0.4161, "step": 1746 }, { "epoch": 0.8672844613602515, "grad_norm": 0.5853144526481628, "learning_rate": 8.950367756447727e-06, "loss": 0.4013, "step": 1747 }, { "epoch": 0.8677809035247394, "grad_norm": 0.601159393787384, "learning_rate": 8.948596052227921e-06, "loss": 0.4352, "step": 1748 }, { "epoch": 0.8682773456892272, "grad_norm": 0.5605200529098511, "learning_rate": 8.946823029679213e-06, "loss": 0.4343, "step": 1749 }, { "epoch": 0.8687737878537151, "grad_norm": 0.5800423622131348, "learning_rate": 8.945048689393563e-06, "loss": 0.4539, "step": 1750 }, { "epoch": 0.8692702300182029, "grad_norm": 0.5459901094436646, "learning_rate": 8.943273031963375e-06, "loss": 0.4927, "step": 1751 }, { "epoch": 0.8697666721826908, "grad_norm": 0.5402246713638306, "learning_rate": 8.941496057981495e-06, "loss": 0.4602, "step": 1752 }, { "epoch": 0.8702631143471785, "grad_norm": 0.5166112184524536, "learning_rate": 8.939717768041206e-06, "loss": 0.4051, "step": 1753 }, { "epoch": 0.8707595565116664, "grad_norm": 0.5454505085945129, "learning_rate": 8.937938162736229e-06, "loss": 0.4369, "step": 1754 }, { "epoch": 0.8712559986761542, "grad_norm": 0.5163682699203491, "learning_rate": 8.936157242660726e-06, "loss": 0.4471, "step": 1755 }, { "epoch": 0.8717524408406421, "grad_norm": 0.46487587690353394, "learning_rate": 8.9343750084093e-06, "loss": 0.441, "step": 1756 }, { "epoch": 0.8722488830051299, "grad_norm": 0.5229008197784424, "learning_rate": 8.932591460576988e-06, "loss": 0.4533, "step": 1757 }, { "epoch": 0.8727453251696178, "grad_norm": 0.5630226135253906, "learning_rate": 8.93080659975927e-06, "loss": 0.4169, "step": 1758 }, { "epoch": 0.8732417673341055, "grad_norm": 0.5616797208786011, "learning_rate": 8.92902042655206e-06, "loss": 0.4317, "step": 1759 }, { "epoch": 0.8737382094985934, "grad_norm": 0.49621760845184326, "learning_rate": 8.927232941551716e-06, "loss": 0.4121, "step": 1760 }, { "epoch": 0.8742346516630812, "grad_norm": 0.5726821422576904, "learning_rate": 8.92544414535503e-06, "loss": 0.4515, "step": 1761 }, { "epoch": 0.8747310938275691, "grad_norm": 0.6012055277824402, "learning_rate": 8.92365403855923e-06, "loss": 0.422, "step": 1762 }, { "epoch": 0.8752275359920569, "grad_norm": 0.4987521171569824, "learning_rate": 8.921862621761985e-06, "loss": 0.4252, "step": 1763 }, { "epoch": 0.8757239781565448, "grad_norm": 0.5278068780899048, "learning_rate": 8.920069895561403e-06, "loss": 0.4533, "step": 1764 }, { "epoch": 0.8762204203210326, "grad_norm": 0.5759723782539368, "learning_rate": 8.918275860556022e-06, "loss": 0.4409, "step": 1765 }, { "epoch": 0.8767168624855204, "grad_norm": 0.5995286703109741, "learning_rate": 8.916480517344826e-06, "loss": 0.4219, "step": 1766 }, { "epoch": 0.8772133046500082, "grad_norm": 0.48819640278816223, "learning_rate": 8.914683866527227e-06, "loss": 0.4254, "step": 1767 }, { "epoch": 0.8777097468144961, "grad_norm": 0.6572971343994141, "learning_rate": 8.912885908703083e-06, "loss": 0.4418, "step": 1768 }, { "epoch": 0.878206188978984, "grad_norm": 0.581867516040802, "learning_rate": 8.911086644472679e-06, "loss": 0.4372, "step": 1769 }, { "epoch": 0.8787026311434718, "grad_norm": 0.6188267469406128, "learning_rate": 8.909286074436742e-06, "loss": 0.4568, "step": 1770 }, { "epoch": 0.8791990733079597, "grad_norm": 0.5480692982673645, "learning_rate": 8.907484199196432e-06, "loss": 0.4319, "step": 1771 }, { "epoch": 0.8796955154724475, "grad_norm": 0.528197169303894, "learning_rate": 8.905681019353349e-06, "loss": 0.3905, "step": 1772 }, { "epoch": 0.8801919576369353, "grad_norm": 0.5527967810630798, "learning_rate": 8.903876535509524e-06, "loss": 0.4146, "step": 1773 }, { "epoch": 0.8806883998014231, "grad_norm": 0.5546196103096008, "learning_rate": 8.902070748267425e-06, "loss": 0.4231, "step": 1774 }, { "epoch": 0.881184841965911, "grad_norm": 0.5447160005569458, "learning_rate": 8.900263658229954e-06, "loss": 0.4058, "step": 1775 }, { "epoch": 0.8816812841303988, "grad_norm": 0.5791531205177307, "learning_rate": 8.898455266000455e-06, "loss": 0.444, "step": 1776 }, { "epoch": 0.8821777262948867, "grad_norm": 0.6416099667549133, "learning_rate": 8.896645572182694e-06, "loss": 0.4578, "step": 1777 }, { "epoch": 0.8826741684593745, "grad_norm": 0.5542735457420349, "learning_rate": 8.894834577380882e-06, "loss": 0.4473, "step": 1778 }, { "epoch": 0.8831706106238624, "grad_norm": 0.6295015811920166, "learning_rate": 8.89302228219966e-06, "loss": 0.4358, "step": 1779 }, { "epoch": 0.8836670527883501, "grad_norm": 0.5795145630836487, "learning_rate": 8.891208687244104e-06, "loss": 0.4325, "step": 1780 }, { "epoch": 0.884163494952838, "grad_norm": 0.5134644508361816, "learning_rate": 8.889393793119725e-06, "loss": 0.4206, "step": 1781 }, { "epoch": 0.8846599371173258, "grad_norm": 0.5982595086097717, "learning_rate": 8.887577600432466e-06, "loss": 0.4417, "step": 1782 }, { "epoch": 0.8851563792818137, "grad_norm": 0.5896233320236206, "learning_rate": 8.885760109788705e-06, "loss": 0.4379, "step": 1783 }, { "epoch": 0.8856528214463015, "grad_norm": 0.6269838213920593, "learning_rate": 8.883941321795254e-06, "loss": 0.4309, "step": 1784 }, { "epoch": 0.8861492636107894, "grad_norm": 0.5304679274559021, "learning_rate": 8.882121237059353e-06, "loss": 0.391, "step": 1785 }, { "epoch": 0.8866457057752771, "grad_norm": 0.5302784442901611, "learning_rate": 8.880299856188681e-06, "loss": 0.4527, "step": 1786 }, { "epoch": 0.887142147939765, "grad_norm": 0.6124231219291687, "learning_rate": 8.878477179791349e-06, "loss": 0.4098, "step": 1787 }, { "epoch": 0.8876385901042528, "grad_norm": 0.6121025085449219, "learning_rate": 8.876653208475898e-06, "loss": 0.4418, "step": 1788 }, { "epoch": 0.8881350322687407, "grad_norm": 0.6045516729354858, "learning_rate": 8.874827942851302e-06, "loss": 0.4439, "step": 1789 }, { "epoch": 0.8886314744332285, "grad_norm": 0.6030242443084717, "learning_rate": 8.873001383526966e-06, "loss": 0.4419, "step": 1790 }, { "epoch": 0.8891279165977164, "grad_norm": 0.5613811016082764, "learning_rate": 8.871173531112733e-06, "loss": 0.4083, "step": 1791 }, { "epoch": 0.8896243587622042, "grad_norm": 0.6209923624992371, "learning_rate": 8.86934438621887e-06, "loss": 0.4327, "step": 1792 }, { "epoch": 0.890120800926692, "grad_norm": 0.570731520652771, "learning_rate": 8.86751394945608e-06, "loss": 0.4184, "step": 1793 }, { "epoch": 0.8906172430911798, "grad_norm": 0.5369892716407776, "learning_rate": 8.865682221435495e-06, "loss": 0.4317, "step": 1794 }, { "epoch": 0.8911136852556677, "grad_norm": 0.6593745946884155, "learning_rate": 8.863849202768677e-06, "loss": 0.4603, "step": 1795 }, { "epoch": 0.8916101274201556, "grad_norm": 0.5539768934249878, "learning_rate": 8.862014894067627e-06, "loss": 0.431, "step": 1796 }, { "epoch": 0.8921065695846434, "grad_norm": 0.5808566212654114, "learning_rate": 8.860179295944766e-06, "loss": 0.4283, "step": 1797 }, { "epoch": 0.8926030117491313, "grad_norm": 0.5353223085403442, "learning_rate": 8.858342409012953e-06, "loss": 0.4429, "step": 1798 }, { "epoch": 0.8930994539136191, "grad_norm": 0.5380842089653015, "learning_rate": 8.856504233885473e-06, "loss": 0.4134, "step": 1799 }, { "epoch": 0.8935958960781069, "grad_norm": 0.6138882637023926, "learning_rate": 8.854664771176044e-06, "loss": 0.464, "step": 1800 }, { "epoch": 0.8940923382425947, "grad_norm": 0.5829402804374695, "learning_rate": 8.852824021498811e-06, "loss": 0.4353, "step": 1801 }, { "epoch": 0.8945887804070826, "grad_norm": 0.5596102476119995, "learning_rate": 8.850981985468351e-06, "loss": 0.4589, "step": 1802 }, { "epoch": 0.8950852225715704, "grad_norm": 0.6228837370872498, "learning_rate": 8.849138663699671e-06, "loss": 0.4599, "step": 1803 }, { "epoch": 0.8955816647360583, "grad_norm": 0.5054922103881836, "learning_rate": 8.847294056808204e-06, "loss": 0.4228, "step": 1804 }, { "epoch": 0.8960781069005461, "grad_norm": 0.5560739636421204, "learning_rate": 8.845448165409815e-06, "loss": 0.4492, "step": 1805 }, { "epoch": 0.896574549065034, "grad_norm": 0.5766074657440186, "learning_rate": 8.8436009901208e-06, "loss": 0.4589, "step": 1806 }, { "epoch": 0.8970709912295217, "grad_norm": 0.565342366695404, "learning_rate": 8.841752531557875e-06, "loss": 0.4356, "step": 1807 }, { "epoch": 0.8975674333940096, "grad_norm": 0.6789494156837463, "learning_rate": 8.839902790338193e-06, "loss": 0.4399, "step": 1808 }, { "epoch": 0.8980638755584974, "grad_norm": 0.5705914497375488, "learning_rate": 8.838051767079332e-06, "loss": 0.4415, "step": 1809 }, { "epoch": 0.8985603177229853, "grad_norm": 0.6257730722427368, "learning_rate": 8.836199462399298e-06, "loss": 0.4656, "step": 1810 }, { "epoch": 0.8990567598874731, "grad_norm": 0.5985350012779236, "learning_rate": 8.834345876916526e-06, "loss": 0.4331, "step": 1811 }, { "epoch": 0.899553202051961, "grad_norm": 0.5551367998123169, "learning_rate": 8.832491011249878e-06, "loss": 0.4559, "step": 1812 }, { "epoch": 0.9000496442164487, "grad_norm": 0.6118077635765076, "learning_rate": 8.830634866018641e-06, "loss": 0.4088, "step": 1813 }, { "epoch": 0.9005460863809366, "grad_norm": 0.687336802482605, "learning_rate": 8.828777441842536e-06, "loss": 0.4437, "step": 1814 }, { "epoch": 0.9010425285454244, "grad_norm": 0.5353473424911499, "learning_rate": 8.826918739341701e-06, "loss": 0.4082, "step": 1815 }, { "epoch": 0.9015389707099123, "grad_norm": 0.6167910695075989, "learning_rate": 8.82505875913671e-06, "loss": 0.4245, "step": 1816 }, { "epoch": 0.9020354128744001, "grad_norm": 0.46991169452667236, "learning_rate": 8.82319750184856e-06, "loss": 0.4324, "step": 1817 }, { "epoch": 0.902531855038888, "grad_norm": 0.5620924234390259, "learning_rate": 8.821334968098671e-06, "loss": 0.4358, "step": 1818 }, { "epoch": 0.9030282972033759, "grad_norm": 0.632743775844574, "learning_rate": 8.819471158508894e-06, "loss": 0.4321, "step": 1819 }, { "epoch": 0.9035247393678636, "grad_norm": 0.6145474910736084, "learning_rate": 8.817606073701505e-06, "loss": 0.4634, "step": 1820 }, { "epoch": 0.9040211815323514, "grad_norm": 0.5922397375106812, "learning_rate": 8.815739714299206e-06, "loss": 0.4474, "step": 1821 }, { "epoch": 0.9045176236968393, "grad_norm": 0.6047663688659668, "learning_rate": 8.813872080925122e-06, "loss": 0.4352, "step": 1822 }, { "epoch": 0.9050140658613272, "grad_norm": 0.5270055532455444, "learning_rate": 8.812003174202803e-06, "loss": 0.432, "step": 1823 }, { "epoch": 0.905510508025815, "grad_norm": 0.5392149686813354, "learning_rate": 8.810132994756232e-06, "loss": 0.4142, "step": 1824 }, { "epoch": 0.9060069501903029, "grad_norm": 0.5890448689460754, "learning_rate": 8.808261543209807e-06, "loss": 0.3996, "step": 1825 }, { "epoch": 0.9065033923547907, "grad_norm": 0.5297775268554688, "learning_rate": 8.806388820188354e-06, "loss": 0.4326, "step": 1826 }, { "epoch": 0.9069998345192785, "grad_norm": 0.613181471824646, "learning_rate": 8.804514826317125e-06, "loss": 0.463, "step": 1827 }, { "epoch": 0.9074962766837663, "grad_norm": 0.5629436373710632, "learning_rate": 8.8026395622218e-06, "loss": 0.4724, "step": 1828 }, { "epoch": 0.9079927188482542, "grad_norm": 0.5205148458480835, "learning_rate": 8.800763028528472e-06, "loss": 0.4173, "step": 1829 }, { "epoch": 0.908489161012742, "grad_norm": 0.5331025123596191, "learning_rate": 8.79888522586367e-06, "loss": 0.4066, "step": 1830 }, { "epoch": 0.9089856031772299, "grad_norm": 0.5812790393829346, "learning_rate": 8.797006154854338e-06, "loss": 0.4694, "step": 1831 }, { "epoch": 0.9094820453417177, "grad_norm": 0.5471412539482117, "learning_rate": 8.795125816127849e-06, "loss": 0.4309, "step": 1832 }, { "epoch": 0.9099784875062056, "grad_norm": 0.5645981431007385, "learning_rate": 8.793244210311995e-06, "loss": 0.4136, "step": 1833 }, { "epoch": 0.9104749296706933, "grad_norm": 0.5445948839187622, "learning_rate": 8.791361338034993e-06, "loss": 0.4241, "step": 1834 }, { "epoch": 0.9109713718351812, "grad_norm": 0.5183908343315125, "learning_rate": 8.789477199925485e-06, "loss": 0.46, "step": 1835 }, { "epoch": 0.911467813999669, "grad_norm": 0.5413535237312317, "learning_rate": 8.787591796612531e-06, "loss": 0.4138, "step": 1836 }, { "epoch": 0.9119642561641569, "grad_norm": 0.528959333896637, "learning_rate": 8.785705128725618e-06, "loss": 0.4172, "step": 1837 }, { "epoch": 0.9124606983286447, "grad_norm": 0.5554860234260559, "learning_rate": 8.783817196894652e-06, "loss": 0.445, "step": 1838 }, { "epoch": 0.9129571404931326, "grad_norm": 0.5726234316825867, "learning_rate": 8.781928001749961e-06, "loss": 0.4131, "step": 1839 }, { "epoch": 0.9134535826576203, "grad_norm": 0.5625305771827698, "learning_rate": 8.780037543922299e-06, "loss": 0.4428, "step": 1840 }, { "epoch": 0.9139500248221082, "grad_norm": 0.4896819293498993, "learning_rate": 8.778145824042838e-06, "loss": 0.4092, "step": 1841 }, { "epoch": 0.914446466986596, "grad_norm": 0.5608265995979309, "learning_rate": 8.776252842743169e-06, "loss": 0.41, "step": 1842 }, { "epoch": 0.9149429091510839, "grad_norm": 0.5511738061904907, "learning_rate": 8.774358600655309e-06, "loss": 0.3965, "step": 1843 }, { "epoch": 0.9154393513155717, "grad_norm": 0.4804808795452118, "learning_rate": 8.772463098411694e-06, "loss": 0.4297, "step": 1844 }, { "epoch": 0.9159357934800596, "grad_norm": 0.5344271659851074, "learning_rate": 8.77056633664518e-06, "loss": 0.4581, "step": 1845 }, { "epoch": 0.9164322356445475, "grad_norm": 0.47074583172798157, "learning_rate": 8.768668315989045e-06, "loss": 0.3821, "step": 1846 }, { "epoch": 0.9169286778090352, "grad_norm": 0.5754554271697998, "learning_rate": 8.766769037076986e-06, "loss": 0.4361, "step": 1847 }, { "epoch": 0.917425119973523, "grad_norm": 0.467873215675354, "learning_rate": 8.76486850054312e-06, "loss": 0.4188, "step": 1848 }, { "epoch": 0.9179215621380109, "grad_norm": 0.43534114956855774, "learning_rate": 8.762966707021985e-06, "loss": 0.4295, "step": 1849 }, { "epoch": 0.9184180043024988, "grad_norm": 0.5038536787033081, "learning_rate": 8.761063657148537e-06, "loss": 0.4496, "step": 1850 }, { "epoch": 0.9189144464669866, "grad_norm": 0.5346387028694153, "learning_rate": 8.759159351558155e-06, "loss": 0.4145, "step": 1851 }, { "epoch": 0.9194108886314745, "grad_norm": 0.5176717042922974, "learning_rate": 8.757253790886635e-06, "loss": 0.4089, "step": 1852 }, { "epoch": 0.9199073307959623, "grad_norm": 0.5240582823753357, "learning_rate": 8.75534697577019e-06, "loss": 0.414, "step": 1853 }, { "epoch": 0.9204037729604501, "grad_norm": 0.4714455306529999, "learning_rate": 8.753438906845454e-06, "loss": 0.4109, "step": 1854 }, { "epoch": 0.9209002151249379, "grad_norm": 0.5242617726325989, "learning_rate": 8.751529584749482e-06, "loss": 0.4433, "step": 1855 }, { "epoch": 0.9213966572894258, "grad_norm": 0.5449681878089905, "learning_rate": 8.749619010119738e-06, "loss": 0.4407, "step": 1856 }, { "epoch": 0.9218930994539136, "grad_norm": 0.5364516377449036, "learning_rate": 8.74770718359412e-06, "loss": 0.4227, "step": 1857 }, { "epoch": 0.9223895416184015, "grad_norm": 0.6049516797065735, "learning_rate": 8.745794105810928e-06, "loss": 0.4185, "step": 1858 }, { "epoch": 0.9228859837828893, "grad_norm": 0.6199399828910828, "learning_rate": 8.74387977740889e-06, "loss": 0.4126, "step": 1859 }, { "epoch": 0.9233824259473772, "grad_norm": 0.5637511014938354, "learning_rate": 8.741964199027147e-06, "loss": 0.4271, "step": 1860 }, { "epoch": 0.9238788681118649, "grad_norm": 0.45303842425346375, "learning_rate": 8.740047371305259e-06, "loss": 0.4273, "step": 1861 }, { "epoch": 0.9243753102763528, "grad_norm": 0.5761847496032715, "learning_rate": 8.738129294883202e-06, "loss": 0.4361, "step": 1862 }, { "epoch": 0.9248717524408406, "grad_norm": 0.6092985272407532, "learning_rate": 8.73620997040137e-06, "loss": 0.4271, "step": 1863 }, { "epoch": 0.9253681946053285, "grad_norm": 0.5013222098350525, "learning_rate": 8.734289398500576e-06, "loss": 0.4238, "step": 1864 }, { "epoch": 0.9258646367698163, "grad_norm": 0.5292612314224243, "learning_rate": 8.732367579822043e-06, "loss": 0.4362, "step": 1865 }, { "epoch": 0.9263610789343042, "grad_norm": 0.5607077479362488, "learning_rate": 8.730444515007413e-06, "loss": 0.4378, "step": 1866 }, { "epoch": 0.9268575210987919, "grad_norm": 0.5158674716949463, "learning_rate": 8.72852020469875e-06, "loss": 0.4149, "step": 1867 }, { "epoch": 0.9273539632632798, "grad_norm": 0.6492262482643127, "learning_rate": 8.726594649538524e-06, "loss": 0.3981, "step": 1868 }, { "epoch": 0.9278504054277676, "grad_norm": 0.5811569094657898, "learning_rate": 8.72466785016963e-06, "loss": 0.3943, "step": 1869 }, { "epoch": 0.9283468475922555, "grad_norm": 0.5623005032539368, "learning_rate": 8.72273980723537e-06, "loss": 0.4078, "step": 1870 }, { "epoch": 0.9288432897567434, "grad_norm": 0.5014375448226929, "learning_rate": 8.720810521379467e-06, "loss": 0.4096, "step": 1871 }, { "epoch": 0.9293397319212312, "grad_norm": 0.5757625102996826, "learning_rate": 8.718879993246058e-06, "loss": 0.3979, "step": 1872 }, { "epoch": 0.9298361740857191, "grad_norm": 0.6123849153518677, "learning_rate": 8.716948223479693e-06, "loss": 0.4309, "step": 1873 }, { "epoch": 0.9303326162502068, "grad_norm": 0.5052618384361267, "learning_rate": 8.715015212725336e-06, "loss": 0.4205, "step": 1874 }, { "epoch": 0.9308290584146947, "grad_norm": 0.594577968120575, "learning_rate": 8.713080961628368e-06, "loss": 0.4422, "step": 1875 }, { "epoch": 0.9313255005791825, "grad_norm": 0.5295659899711609, "learning_rate": 8.711145470834584e-06, "loss": 0.454, "step": 1876 }, { "epoch": 0.9318219427436704, "grad_norm": 0.5543389916419983, "learning_rate": 8.709208740990189e-06, "loss": 0.4361, "step": 1877 }, { "epoch": 0.9323183849081582, "grad_norm": 0.5224103331565857, "learning_rate": 8.707270772741807e-06, "loss": 0.4341, "step": 1878 }, { "epoch": 0.9328148270726461, "grad_norm": 0.4790569543838501, "learning_rate": 8.705331566736473e-06, "loss": 0.4463, "step": 1879 }, { "epoch": 0.9333112692371339, "grad_norm": 0.4508906602859497, "learning_rate": 8.703391123621632e-06, "loss": 0.4145, "step": 1880 }, { "epoch": 0.9338077114016217, "grad_norm": 0.5547817945480347, "learning_rate": 8.701449444045149e-06, "loss": 0.4438, "step": 1881 }, { "epoch": 0.9343041535661095, "grad_norm": 0.5121026039123535, "learning_rate": 8.699506528655297e-06, "loss": 0.4357, "step": 1882 }, { "epoch": 0.9348005957305974, "grad_norm": 0.5174432992935181, "learning_rate": 8.697562378100761e-06, "loss": 0.4584, "step": 1883 }, { "epoch": 0.9352970378950852, "grad_norm": 0.4870593845844269, "learning_rate": 8.695616993030642e-06, "loss": 0.4249, "step": 1884 }, { "epoch": 0.9357934800595731, "grad_norm": 0.6217696666717529, "learning_rate": 8.69367037409445e-06, "loss": 0.4256, "step": 1885 }, { "epoch": 0.9362899222240609, "grad_norm": 0.49673694372177124, "learning_rate": 8.691722521942107e-06, "loss": 0.4079, "step": 1886 }, { "epoch": 0.9367863643885488, "grad_norm": 0.6067440509796143, "learning_rate": 8.68977343722395e-06, "loss": 0.4134, "step": 1887 }, { "epoch": 0.9372828065530365, "grad_norm": 0.5228959918022156, "learning_rate": 8.687823120590727e-06, "loss": 0.4602, "step": 1888 }, { "epoch": 0.9377792487175244, "grad_norm": 0.6241065263748169, "learning_rate": 8.685871572693592e-06, "loss": 0.4286, "step": 1889 }, { "epoch": 0.9382756908820122, "grad_norm": 0.5820610523223877, "learning_rate": 8.683918794184115e-06, "loss": 0.4221, "step": 1890 }, { "epoch": 0.9387721330465001, "grad_norm": 0.5014218091964722, "learning_rate": 8.681964785714275e-06, "loss": 0.4395, "step": 1891 }, { "epoch": 0.9392685752109879, "grad_norm": 0.5033730268478394, "learning_rate": 8.680009547936465e-06, "loss": 0.4043, "step": 1892 }, { "epoch": 0.9397650173754758, "grad_norm": 0.5794035196304321, "learning_rate": 8.678053081503484e-06, "loss": 0.4583, "step": 1893 }, { "epoch": 0.9402614595399635, "grad_norm": 0.4863968789577484, "learning_rate": 8.676095387068542e-06, "loss": 0.4248, "step": 1894 }, { "epoch": 0.9407579017044514, "grad_norm": 0.5104725360870361, "learning_rate": 8.674136465285261e-06, "loss": 0.4144, "step": 1895 }, { "epoch": 0.9412543438689392, "grad_norm": 0.5124814510345459, "learning_rate": 8.672176316807672e-06, "loss": 0.444, "step": 1896 }, { "epoch": 0.9417507860334271, "grad_norm": 0.5211929082870483, "learning_rate": 8.670214942290215e-06, "loss": 0.4138, "step": 1897 }, { "epoch": 0.942247228197915, "grad_norm": 0.5938140153884888, "learning_rate": 8.66825234238774e-06, "loss": 0.4508, "step": 1898 }, { "epoch": 0.9427436703624028, "grad_norm": 0.47483140230178833, "learning_rate": 8.666288517755505e-06, "loss": 0.4197, "step": 1899 }, { "epoch": 0.9432401125268907, "grad_norm": 0.583417534828186, "learning_rate": 8.66432346904918e-06, "loss": 0.4218, "step": 1900 }, { "epoch": 0.9437365546913784, "grad_norm": 0.5486711859703064, "learning_rate": 8.662357196924838e-06, "loss": 0.4634, "step": 1901 }, { "epoch": 0.9442329968558663, "grad_norm": 0.5625196695327759, "learning_rate": 8.660389702038965e-06, "loss": 0.3909, "step": 1902 }, { "epoch": 0.9447294390203541, "grad_norm": 0.524493932723999, "learning_rate": 8.658420985048455e-06, "loss": 0.4283, "step": 1903 }, { "epoch": 0.945225881184842, "grad_norm": 0.5421998500823975, "learning_rate": 8.656451046610607e-06, "loss": 0.4357, "step": 1904 }, { "epoch": 0.9457223233493298, "grad_norm": 0.5529059767723083, "learning_rate": 8.654479887383134e-06, "loss": 0.4289, "step": 1905 }, { "epoch": 0.9462187655138177, "grad_norm": 0.6018701791763306, "learning_rate": 8.652507508024148e-06, "loss": 0.413, "step": 1906 }, { "epoch": 0.9467152076783055, "grad_norm": 0.5109067559242249, "learning_rate": 8.650533909192174e-06, "loss": 0.4443, "step": 1907 }, { "epoch": 0.9472116498427933, "grad_norm": 0.6005603075027466, "learning_rate": 8.648559091546145e-06, "loss": 0.4532, "step": 1908 }, { "epoch": 0.9477080920072811, "grad_norm": 0.5940963625907898, "learning_rate": 8.646583055745398e-06, "loss": 0.4301, "step": 1909 }, { "epoch": 0.948204534171769, "grad_norm": 0.5260499715805054, "learning_rate": 8.644605802449677e-06, "loss": 0.416, "step": 1910 }, { "epoch": 0.9487009763362568, "grad_norm": 0.606785237789154, "learning_rate": 8.642627332319133e-06, "loss": 0.426, "step": 1911 }, { "epoch": 0.9491974185007447, "grad_norm": 0.5093328952789307, "learning_rate": 8.640647646014324e-06, "loss": 0.4208, "step": 1912 }, { "epoch": 0.9496938606652325, "grad_norm": 0.5291666388511658, "learning_rate": 8.638666744196213e-06, "loss": 0.441, "step": 1913 }, { "epoch": 0.9501903028297204, "grad_norm": 0.6605219841003418, "learning_rate": 8.636684627526171e-06, "loss": 0.4319, "step": 1914 }, { "epoch": 0.9506867449942081, "grad_norm": 0.472433865070343, "learning_rate": 8.63470129666597e-06, "loss": 0.4353, "step": 1915 }, { "epoch": 0.951183187158696, "grad_norm": 0.554178774356842, "learning_rate": 8.632716752277792e-06, "loss": 0.3957, "step": 1916 }, { "epoch": 0.9516796293231838, "grad_norm": 0.5093095302581787, "learning_rate": 8.630730995024224e-06, "loss": 0.4135, "step": 1917 }, { "epoch": 0.9521760714876717, "grad_norm": 0.4878700375556946, "learning_rate": 8.628744025568252e-06, "loss": 0.4494, "step": 1918 }, { "epoch": 0.9526725136521595, "grad_norm": 0.5248449444770813, "learning_rate": 8.626755844573274e-06, "loss": 0.4354, "step": 1919 }, { "epoch": 0.9531689558166474, "grad_norm": 0.536544680595398, "learning_rate": 8.62476645270309e-06, "loss": 0.4269, "step": 1920 }, { "epoch": 0.9536653979811351, "grad_norm": 0.5358530879020691, "learning_rate": 8.622775850621904e-06, "loss": 0.4502, "step": 1921 }, { "epoch": 0.954161840145623, "grad_norm": 0.5174581408500671, "learning_rate": 8.62078403899432e-06, "loss": 0.4378, "step": 1922 }, { "epoch": 0.9546582823101109, "grad_norm": 0.5066552758216858, "learning_rate": 8.618791018485357e-06, "loss": 0.4379, "step": 1923 }, { "epoch": 0.9551547244745987, "grad_norm": 0.5197569131851196, "learning_rate": 8.616796789760424e-06, "loss": 0.405, "step": 1924 }, { "epoch": 0.9556511666390866, "grad_norm": 0.4998132586479187, "learning_rate": 8.614801353485343e-06, "loss": 0.403, "step": 1925 }, { "epoch": 0.9561476088035744, "grad_norm": 0.5210258364677429, "learning_rate": 8.612804710326332e-06, "loss": 0.4358, "step": 1926 }, { "epoch": 0.9566440509680623, "grad_norm": 0.5344130992889404, "learning_rate": 8.610806860950023e-06, "loss": 0.42, "step": 1927 }, { "epoch": 0.95714049313255, "grad_norm": 0.5282506346702576, "learning_rate": 8.608807806023436e-06, "loss": 0.4492, "step": 1928 }, { "epoch": 0.9576369352970379, "grad_norm": 0.49642491340637207, "learning_rate": 8.606807546214007e-06, "loss": 0.4076, "step": 1929 }, { "epoch": 0.9581333774615257, "grad_norm": 0.5523301362991333, "learning_rate": 8.604806082189564e-06, "loss": 0.4222, "step": 1930 }, { "epoch": 0.9586298196260136, "grad_norm": 0.4780605137348175, "learning_rate": 8.602803414618343e-06, "loss": 0.4021, "step": 1931 }, { "epoch": 0.9591262617905014, "grad_norm": 0.541985809803009, "learning_rate": 8.600799544168983e-06, "loss": 0.47, "step": 1932 }, { "epoch": 0.9596227039549893, "grad_norm": 0.5229997634887695, "learning_rate": 8.598794471510519e-06, "loss": 0.4334, "step": 1933 }, { "epoch": 0.9601191461194771, "grad_norm": 0.535997211933136, "learning_rate": 8.596788197312389e-06, "loss": 0.4374, "step": 1934 }, { "epoch": 0.9606155882839649, "grad_norm": 0.5105700492858887, "learning_rate": 8.594780722244436e-06, "loss": 0.4236, "step": 1935 }, { "epoch": 0.9611120304484527, "grad_norm": 0.5266976952552795, "learning_rate": 8.592772046976901e-06, "loss": 0.4265, "step": 1936 }, { "epoch": 0.9616084726129406, "grad_norm": 0.4920440912246704, "learning_rate": 8.590762172180426e-06, "loss": 0.4198, "step": 1937 }, { "epoch": 0.9621049147774284, "grad_norm": 0.47827622294425964, "learning_rate": 8.588751098526053e-06, "loss": 0.4367, "step": 1938 }, { "epoch": 0.9626013569419163, "grad_norm": 0.5506331920623779, "learning_rate": 8.586738826685223e-06, "loss": 0.4441, "step": 1939 }, { "epoch": 0.9630977991064041, "grad_norm": 0.46391645073890686, "learning_rate": 8.584725357329784e-06, "loss": 0.4423, "step": 1940 }, { "epoch": 0.963594241270892, "grad_norm": 0.5064931511878967, "learning_rate": 8.582710691131975e-06, "loss": 0.4245, "step": 1941 }, { "epoch": 0.9640906834353797, "grad_norm": 0.4886590242385864, "learning_rate": 8.580694828764438e-06, "loss": 0.408, "step": 1942 }, { "epoch": 0.9645871255998676, "grad_norm": 0.5776588320732117, "learning_rate": 8.578677770900215e-06, "loss": 0.4243, "step": 1943 }, { "epoch": 0.9650835677643554, "grad_norm": 0.49474671483039856, "learning_rate": 8.57665951821275e-06, "loss": 0.4038, "step": 1944 }, { "epoch": 0.9655800099288433, "grad_norm": 0.4920937120914459, "learning_rate": 8.574640071375877e-06, "loss": 0.4446, "step": 1945 }, { "epoch": 0.9660764520933312, "grad_norm": 0.604865312576294, "learning_rate": 8.572619431063839e-06, "loss": 0.4354, "step": 1946 }, { "epoch": 0.966572894257819, "grad_norm": 0.5341542363166809, "learning_rate": 8.570597597951272e-06, "loss": 0.4382, "step": 1947 }, { "epoch": 0.9670693364223067, "grad_norm": 0.5385371446609497, "learning_rate": 8.568574572713208e-06, "loss": 0.4038, "step": 1948 }, { "epoch": 0.9675657785867946, "grad_norm": 0.5662792325019836, "learning_rate": 8.566550356025083e-06, "loss": 0.4331, "step": 1949 }, { "epoch": 0.9680622207512825, "grad_norm": 0.48516252636909485, "learning_rate": 8.56452494856273e-06, "loss": 0.3945, "step": 1950 }, { "epoch": 0.9685586629157703, "grad_norm": 0.5713328123092651, "learning_rate": 8.562498351002375e-06, "loss": 0.4172, "step": 1951 }, { "epoch": 0.9690551050802582, "grad_norm": 0.496072381734848, "learning_rate": 8.560470564020642e-06, "loss": 0.42, "step": 1952 }, { "epoch": 0.969551547244746, "grad_norm": 0.5175489187240601, "learning_rate": 8.558441588294556e-06, "loss": 0.4251, "step": 1953 }, { "epoch": 0.9700479894092339, "grad_norm": 0.601966917514801, "learning_rate": 8.556411424501539e-06, "loss": 0.4411, "step": 1954 }, { "epoch": 0.9705444315737216, "grad_norm": 0.5943797826766968, "learning_rate": 8.554380073319403e-06, "loss": 0.4076, "step": 1955 }, { "epoch": 0.9710408737382095, "grad_norm": 0.5534605979919434, "learning_rate": 8.552347535426365e-06, "loss": 0.4162, "step": 1956 }, { "epoch": 0.9715373159026973, "grad_norm": 0.616706371307373, "learning_rate": 8.55031381150103e-06, "loss": 0.4682, "step": 1957 }, { "epoch": 0.9720337580671852, "grad_norm": 0.5168439745903015, "learning_rate": 8.548278902222408e-06, "loss": 0.4072, "step": 1958 }, { "epoch": 0.972530200231673, "grad_norm": 0.4984270930290222, "learning_rate": 8.546242808269895e-06, "loss": 0.4358, "step": 1959 }, { "epoch": 0.9730266423961609, "grad_norm": 0.6158453226089478, "learning_rate": 8.544205530323294e-06, "loss": 0.4298, "step": 1960 }, { "epoch": 0.9735230845606487, "grad_norm": 0.530850350856781, "learning_rate": 8.542167069062788e-06, "loss": 0.4633, "step": 1961 }, { "epoch": 0.9740195267251365, "grad_norm": 0.6034401655197144, "learning_rate": 8.54012742516897e-06, "loss": 0.4594, "step": 1962 }, { "epoch": 0.9745159688896243, "grad_norm": 0.5335168838500977, "learning_rate": 8.538086599322821e-06, "loss": 0.4247, "step": 1963 }, { "epoch": 0.9750124110541122, "grad_norm": 0.48932790756225586, "learning_rate": 8.536044592205716e-06, "loss": 0.4621, "step": 1964 }, { "epoch": 0.9755088532186, "grad_norm": 0.525940477848053, "learning_rate": 8.534001404499426e-06, "loss": 0.4188, "step": 1965 }, { "epoch": 0.9760052953830879, "grad_norm": 0.5755713582038879, "learning_rate": 8.531957036886114e-06, "loss": 0.4218, "step": 1966 }, { "epoch": 0.9765017375475757, "grad_norm": 0.49353525042533875, "learning_rate": 8.529911490048343e-06, "loss": 0.4692, "step": 1967 }, { "epoch": 0.9769981797120636, "grad_norm": 0.4995236396789551, "learning_rate": 8.527864764669063e-06, "loss": 0.4437, "step": 1968 }, { "epoch": 0.9774946218765513, "grad_norm": 0.5050954818725586, "learning_rate": 8.525816861431617e-06, "loss": 0.4028, "step": 1969 }, { "epoch": 0.9779910640410392, "grad_norm": 0.5010354518890381, "learning_rate": 8.523767781019752e-06, "loss": 0.4327, "step": 1970 }, { "epoch": 0.978487506205527, "grad_norm": 0.47992169857025146, "learning_rate": 8.521717524117592e-06, "loss": 0.412, "step": 1971 }, { "epoch": 0.9789839483700149, "grad_norm": 0.527184247970581, "learning_rate": 8.519666091409669e-06, "loss": 0.4271, "step": 1972 }, { "epoch": 0.9794803905345028, "grad_norm": 0.5123530626296997, "learning_rate": 8.517613483580893e-06, "loss": 0.4075, "step": 1973 }, { "epoch": 0.9799768326989906, "grad_norm": 0.528393566608429, "learning_rate": 8.515559701316583e-06, "loss": 0.4181, "step": 1974 }, { "epoch": 0.9804732748634784, "grad_norm": 0.4722403287887573, "learning_rate": 8.513504745302432e-06, "loss": 0.4265, "step": 1975 }, { "epoch": 0.9809697170279662, "grad_norm": 0.48667579889297485, "learning_rate": 8.51144861622454e-06, "loss": 0.4256, "step": 1976 }, { "epoch": 0.9814661591924541, "grad_norm": 0.4917706847190857, "learning_rate": 8.509391314769394e-06, "loss": 0.4157, "step": 1977 }, { "epoch": 0.9819626013569419, "grad_norm": 0.4952443242073059, "learning_rate": 8.507332841623862e-06, "loss": 0.4373, "step": 1978 }, { "epoch": 0.9824590435214298, "grad_norm": 0.47040024399757385, "learning_rate": 8.505273197475224e-06, "loss": 0.4107, "step": 1979 }, { "epoch": 0.9829554856859176, "grad_norm": 0.4929357171058655, "learning_rate": 8.50321238301113e-06, "loss": 0.4249, "step": 1980 }, { "epoch": 0.9834519278504055, "grad_norm": 0.5388676524162292, "learning_rate": 8.501150398919634e-06, "loss": 0.4164, "step": 1981 }, { "epoch": 0.9839483700148932, "grad_norm": 0.5139812231063843, "learning_rate": 8.499087245889176e-06, "loss": 0.4493, "step": 1982 }, { "epoch": 0.9844448121793811, "grad_norm": 0.5088293552398682, "learning_rate": 8.497022924608587e-06, "loss": 0.4659, "step": 1983 }, { "epoch": 0.9849412543438689, "grad_norm": 0.5986707806587219, "learning_rate": 8.494957435767086e-06, "loss": 0.406, "step": 1984 }, { "epoch": 0.9854376965083568, "grad_norm": 0.5391343832015991, "learning_rate": 8.492890780054285e-06, "loss": 0.4412, "step": 1985 }, { "epoch": 0.9859341386728446, "grad_norm": 0.5085428953170776, "learning_rate": 8.490822958160186e-06, "loss": 0.3892, "step": 1986 }, { "epoch": 0.9864305808373325, "grad_norm": 0.6246312260627747, "learning_rate": 8.488753970775176e-06, "loss": 0.4193, "step": 1987 }, { "epoch": 0.9869270230018203, "grad_norm": 0.7054706811904907, "learning_rate": 8.486683818590033e-06, "loss": 0.4305, "step": 1988 }, { "epoch": 0.9874234651663081, "grad_norm": 0.5165708661079407, "learning_rate": 8.484612502295926e-06, "loss": 0.4287, "step": 1989 }, { "epoch": 0.9879199073307959, "grad_norm": 0.5002493858337402, "learning_rate": 8.48254002258441e-06, "loss": 0.4171, "step": 1990 }, { "epoch": 0.9884163494952838, "grad_norm": 0.5562887191772461, "learning_rate": 8.480466380147435e-06, "loss": 0.4647, "step": 1991 }, { "epoch": 0.9889127916597716, "grad_norm": 0.49416297674179077, "learning_rate": 8.478391575677325e-06, "loss": 0.3953, "step": 1992 }, { "epoch": 0.9894092338242595, "grad_norm": 0.5194969177246094, "learning_rate": 8.476315609866807e-06, "loss": 0.4307, "step": 1993 }, { "epoch": 0.9899056759887473, "grad_norm": 0.6112486124038696, "learning_rate": 8.474238483408987e-06, "loss": 0.4088, "step": 1994 }, { "epoch": 0.9904021181532352, "grad_norm": 0.5487306714057922, "learning_rate": 8.472160196997364e-06, "loss": 0.4235, "step": 1995 }, { "epoch": 0.9908985603177229, "grad_norm": 0.5976057648658752, "learning_rate": 8.470080751325816e-06, "loss": 0.3817, "step": 1996 }, { "epoch": 0.9913950024822108, "grad_norm": 0.5934441089630127, "learning_rate": 8.468000147088619e-06, "loss": 0.4237, "step": 1997 }, { "epoch": 0.9918914446466987, "grad_norm": 0.506049633026123, "learning_rate": 8.465918384980429e-06, "loss": 0.4272, "step": 1998 }, { "epoch": 0.9923878868111865, "grad_norm": 0.4762342870235443, "learning_rate": 8.463835465696286e-06, "loss": 0.3679, "step": 1999 }, { "epoch": 0.9928843289756744, "grad_norm": 0.6033463478088379, "learning_rate": 8.461751389931624e-06, "loss": 0.4171, "step": 2000 }, { "epoch": 0.9933807711401622, "grad_norm": 0.6052261590957642, "learning_rate": 8.459666158382257e-06, "loss": 0.4472, "step": 2001 }, { "epoch": 0.99387721330465, "grad_norm": 0.5185953378677368, "learning_rate": 8.457579771744391e-06, "loss": 0.4484, "step": 2002 }, { "epoch": 0.9943736554691378, "grad_norm": 0.524969756603241, "learning_rate": 8.455492230714611e-06, "loss": 0.4234, "step": 2003 }, { "epoch": 0.9948700976336257, "grad_norm": 0.5626683831214905, "learning_rate": 8.453403535989888e-06, "loss": 0.4189, "step": 2004 }, { "epoch": 0.9953665397981135, "grad_norm": 0.5009151101112366, "learning_rate": 8.451313688267582e-06, "loss": 0.4113, "step": 2005 }, { "epoch": 0.9958629819626014, "grad_norm": 0.4929693341255188, "learning_rate": 8.44922268824544e-06, "loss": 0.4001, "step": 2006 }, { "epoch": 0.9963594241270892, "grad_norm": 0.4741830825805664, "learning_rate": 8.447130536621584e-06, "loss": 0.4087, "step": 2007 }, { "epoch": 0.9968558662915771, "grad_norm": 0.4958761930465698, "learning_rate": 8.44503723409453e-06, "loss": 0.4053, "step": 2008 }, { "epoch": 0.9973523084560648, "grad_norm": 0.567940354347229, "learning_rate": 8.442942781363177e-06, "loss": 0.446, "step": 2009 }, { "epoch": 0.9978487506205527, "grad_norm": 0.5425493717193604, "learning_rate": 8.440847179126802e-06, "loss": 0.4263, "step": 2010 }, { "epoch": 0.9983451927850405, "grad_norm": 0.567949652671814, "learning_rate": 8.43875042808507e-06, "loss": 0.4454, "step": 2011 }, { "epoch": 0.9988416349495284, "grad_norm": 0.6608812212944031, "learning_rate": 8.43665252893803e-06, "loss": 0.4235, "step": 2012 }, { "epoch": 0.9993380771140162, "grad_norm": 0.5508207082748413, "learning_rate": 8.434553482386116e-06, "loss": 0.4243, "step": 2013 }, { "epoch": 0.9998345192785041, "grad_norm": 0.5308889150619507, "learning_rate": 8.432453289130139e-06, "loss": 0.415, "step": 2014 }, { "epoch": 1.0003309614429918, "grad_norm": 1.2246485948562622, "learning_rate": 8.430351949871298e-06, "loss": 0.6285, "step": 2015 }, { "epoch": 1.0008274036074798, "grad_norm": 0.504490077495575, "learning_rate": 8.42824946531117e-06, "loss": 0.3189, "step": 2016 }, { "epoch": 1.0013238457719675, "grad_norm": 0.5669925808906555, "learning_rate": 8.426145836151723e-06, "loss": 0.4072, "step": 2017 }, { "epoch": 1.0018202879364555, "grad_norm": 0.5614247918128967, "learning_rate": 8.424041063095298e-06, "loss": 0.4325, "step": 2018 }, { "epoch": 1.0023167301009432, "grad_norm": 0.44536080956459045, "learning_rate": 8.421935146844622e-06, "loss": 0.3665, "step": 2019 }, { "epoch": 1.002813172265431, "grad_norm": 0.5503876805305481, "learning_rate": 8.419828088102804e-06, "loss": 0.424, "step": 2020 }, { "epoch": 1.003309614429919, "grad_norm": 0.5410546660423279, "learning_rate": 8.417719887573334e-06, "loss": 0.3716, "step": 2021 }, { "epoch": 1.0038060565944067, "grad_norm": 0.5199005603790283, "learning_rate": 8.41561054596008e-06, "loss": 0.4104, "step": 2022 }, { "epoch": 1.0043024987588947, "grad_norm": 0.45783084630966187, "learning_rate": 8.413500063967296e-06, "loss": 0.3799, "step": 2023 }, { "epoch": 1.0047989409233824, "grad_norm": 0.5061880350112915, "learning_rate": 8.411388442299617e-06, "loss": 0.4205, "step": 2024 }, { "epoch": 1.0052953830878704, "grad_norm": 0.5421326160430908, "learning_rate": 8.40927568166205e-06, "loss": 0.3766, "step": 2025 }, { "epoch": 1.005791825252358, "grad_norm": 0.558122992515564, "learning_rate": 8.407161782759995e-06, "loss": 0.4505, "step": 2026 }, { "epoch": 1.0062882674168458, "grad_norm": 0.5281616449356079, "learning_rate": 8.405046746299221e-06, "loss": 0.3879, "step": 2027 }, { "epoch": 1.0067847095813338, "grad_norm": 0.5300270915031433, "learning_rate": 8.402930572985884e-06, "loss": 0.4027, "step": 2028 }, { "epoch": 1.0072811517458216, "grad_norm": 0.4316788613796234, "learning_rate": 8.400813263526512e-06, "loss": 0.3431, "step": 2029 }, { "epoch": 1.0077775939103095, "grad_norm": 0.5456583499908447, "learning_rate": 8.398694818628023e-06, "loss": 0.4075, "step": 2030 }, { "epoch": 1.0082740360747973, "grad_norm": 0.4435092508792877, "learning_rate": 8.396575238997704e-06, "loss": 0.3285, "step": 2031 }, { "epoch": 1.0087704782392852, "grad_norm": 0.6036511659622192, "learning_rate": 8.394454525343227e-06, "loss": 0.4348, "step": 2032 }, { "epoch": 1.009266920403773, "grad_norm": 0.5378240346908569, "learning_rate": 8.39233267837264e-06, "loss": 0.3996, "step": 2033 }, { "epoch": 1.0097633625682607, "grad_norm": 0.4933599531650543, "learning_rate": 8.390209698794371e-06, "loss": 0.4239, "step": 2034 }, { "epoch": 1.0102598047327487, "grad_norm": 0.48561903834342957, "learning_rate": 8.388085587317224e-06, "loss": 0.3784, "step": 2035 }, { "epoch": 1.0107562468972364, "grad_norm": 0.5196031332015991, "learning_rate": 8.38596034465038e-06, "loss": 0.3745, "step": 2036 }, { "epoch": 1.0112526890617244, "grad_norm": 0.5139244198799133, "learning_rate": 8.383833971503405e-06, "loss": 0.3343, "step": 2037 }, { "epoch": 1.0117491312262121, "grad_norm": 0.5157453417778015, "learning_rate": 8.381706468586234e-06, "loss": 0.3915, "step": 2038 }, { "epoch": 1.0122455733907, "grad_norm": 0.46451663970947266, "learning_rate": 8.379577836609183e-06, "loss": 0.3872, "step": 2039 }, { "epoch": 1.0127420155551878, "grad_norm": 0.5853928923606873, "learning_rate": 8.377448076282942e-06, "loss": 0.4504, "step": 2040 }, { "epoch": 1.0132384577196756, "grad_norm": 0.4682686924934387, "learning_rate": 8.375317188318586e-06, "loss": 0.3979, "step": 2041 }, { "epoch": 1.0137348998841635, "grad_norm": 0.4764529764652252, "learning_rate": 8.373185173427553e-06, "loss": 0.4009, "step": 2042 }, { "epoch": 1.0142313420486513, "grad_norm": 0.5426122546195984, "learning_rate": 8.371052032321672e-06, "loss": 0.3809, "step": 2043 }, { "epoch": 1.0147277842131393, "grad_norm": 0.5203511714935303, "learning_rate": 8.368917765713136e-06, "loss": 0.3677, "step": 2044 }, { "epoch": 1.015224226377627, "grad_norm": 0.4883820116519928, "learning_rate": 8.36678237431452e-06, "loss": 0.4053, "step": 2045 }, { "epoch": 1.0157206685421147, "grad_norm": 0.4574817419052124, "learning_rate": 8.364645858838773e-06, "loss": 0.3457, "step": 2046 }, { "epoch": 1.0162171107066027, "grad_norm": 0.5179929733276367, "learning_rate": 8.362508219999222e-06, "loss": 0.4673, "step": 2047 }, { "epoch": 1.0167135528710904, "grad_norm": 0.5367615222930908, "learning_rate": 8.36036945850956e-06, "loss": 0.3893, "step": 2048 }, { "epoch": 1.0172099950355784, "grad_norm": 0.44896242022514343, "learning_rate": 8.35822957508387e-06, "loss": 0.3317, "step": 2049 }, { "epoch": 1.0177064372000661, "grad_norm": 0.5446336269378662, "learning_rate": 8.356088570436593e-06, "loss": 0.4806, "step": 2050 }, { "epoch": 1.0182028793645541, "grad_norm": 0.4994073212146759, "learning_rate": 8.353946445282558e-06, "loss": 0.3483, "step": 2051 }, { "epoch": 1.0186993215290419, "grad_norm": 0.5717995762825012, "learning_rate": 8.35180320033696e-06, "loss": 0.3908, "step": 2052 }, { "epoch": 1.0191957636935296, "grad_norm": 0.47658032178878784, "learning_rate": 8.349658836315369e-06, "loss": 0.3852, "step": 2053 }, { "epoch": 1.0196922058580176, "grad_norm": 0.5444210767745972, "learning_rate": 8.347513353933733e-06, "loss": 0.3714, "step": 2054 }, { "epoch": 1.0201886480225053, "grad_norm": 0.5696029663085938, "learning_rate": 8.345366753908366e-06, "loss": 0.4198, "step": 2055 }, { "epoch": 1.0206850901869933, "grad_norm": 0.4542326331138611, "learning_rate": 8.343219036955965e-06, "loss": 0.3597, "step": 2056 }, { "epoch": 1.021181532351481, "grad_norm": 0.5669736862182617, "learning_rate": 8.34107020379359e-06, "loss": 0.413, "step": 2057 }, { "epoch": 1.021677974515969, "grad_norm": 0.4827122688293457, "learning_rate": 8.338920255138679e-06, "loss": 0.3823, "step": 2058 }, { "epoch": 1.0221744166804567, "grad_norm": 0.5072704553604126, "learning_rate": 8.336769191709041e-06, "loss": 0.4018, "step": 2059 }, { "epoch": 1.0226708588449445, "grad_norm": 0.5344552993774414, "learning_rate": 8.334617014222858e-06, "loss": 0.4088, "step": 2060 }, { "epoch": 1.0231673010094324, "grad_norm": 0.48542529344558716, "learning_rate": 8.332463723398684e-06, "loss": 0.3977, "step": 2061 }, { "epoch": 1.0236637431739202, "grad_norm": 0.5706508159637451, "learning_rate": 8.330309319955446e-06, "loss": 0.3677, "step": 2062 }, { "epoch": 1.0241601853384081, "grad_norm": 0.6180424094200134, "learning_rate": 8.328153804612437e-06, "loss": 0.3807, "step": 2063 }, { "epoch": 1.0246566275028959, "grad_norm": 0.5053064227104187, "learning_rate": 8.325997178089329e-06, "loss": 0.4037, "step": 2064 }, { "epoch": 1.0251530696673838, "grad_norm": 0.500701367855072, "learning_rate": 8.323839441106156e-06, "loss": 0.3665, "step": 2065 }, { "epoch": 1.0256495118318716, "grad_norm": 0.6187525391578674, "learning_rate": 8.321680594383332e-06, "loss": 0.4052, "step": 2066 }, { "epoch": 1.0261459539963593, "grad_norm": 0.4809967279434204, "learning_rate": 8.319520638641636e-06, "loss": 0.4113, "step": 2067 }, { "epoch": 1.0266423961608473, "grad_norm": 0.5311712622642517, "learning_rate": 8.317359574602217e-06, "loss": 0.4311, "step": 2068 }, { "epoch": 1.027138838325335, "grad_norm": 0.5633242726325989, "learning_rate": 8.315197402986599e-06, "loss": 0.3756, "step": 2069 }, { "epoch": 1.027635280489823, "grad_norm": 0.5576339960098267, "learning_rate": 8.313034124516668e-06, "loss": 0.3907, "step": 2070 }, { "epoch": 1.0281317226543107, "grad_norm": 0.5381814241409302, "learning_rate": 8.310869739914688e-06, "loss": 0.4179, "step": 2071 }, { "epoch": 1.0286281648187987, "grad_norm": 0.4494713246822357, "learning_rate": 8.308704249903286e-06, "loss": 0.4141, "step": 2072 }, { "epoch": 1.0291246069832864, "grad_norm": 0.5130305886268616, "learning_rate": 8.30653765520546e-06, "loss": 0.3829, "step": 2073 }, { "epoch": 1.0296210491477742, "grad_norm": 0.5894926190376282, "learning_rate": 8.304369956544576e-06, "loss": 0.3745, "step": 2074 }, { "epoch": 1.0301174913122622, "grad_norm": 0.5124627947807312, "learning_rate": 8.302201154644373e-06, "loss": 0.408, "step": 2075 }, { "epoch": 1.03061393347675, "grad_norm": 0.5214718580245972, "learning_rate": 8.300031250228954e-06, "loss": 0.3583, "step": 2076 }, { "epoch": 1.0311103756412379, "grad_norm": 0.5137425661087036, "learning_rate": 8.29786024402279e-06, "loss": 0.3868, "step": 2077 }, { "epoch": 1.0316068178057256, "grad_norm": 0.5461231470108032, "learning_rate": 8.295688136750721e-06, "loss": 0.3909, "step": 2078 }, { "epoch": 1.0321032599702136, "grad_norm": 0.44158878922462463, "learning_rate": 8.293514929137954e-06, "loss": 0.3082, "step": 2079 }, { "epoch": 1.0325997021347013, "grad_norm": 0.5096025466918945, "learning_rate": 8.291340621910066e-06, "loss": 0.3942, "step": 2080 }, { "epoch": 1.033096144299189, "grad_norm": 0.5919194221496582, "learning_rate": 8.289165215792998e-06, "loss": 0.4429, "step": 2081 }, { "epoch": 1.033592586463677, "grad_norm": 0.4644145965576172, "learning_rate": 8.28698871151306e-06, "loss": 0.377, "step": 2082 }, { "epoch": 1.0340890286281648, "grad_norm": 0.5376895070075989, "learning_rate": 8.284811109796926e-06, "loss": 0.4046, "step": 2083 }, { "epoch": 1.0345854707926527, "grad_norm": 0.5095142722129822, "learning_rate": 8.282632411371639e-06, "loss": 0.4064, "step": 2084 }, { "epoch": 1.0350819129571405, "grad_norm": 0.6036754846572876, "learning_rate": 8.280452616964604e-06, "loss": 0.422, "step": 2085 }, { "epoch": 1.0355783551216284, "grad_norm": 0.4580363631248474, "learning_rate": 8.278271727303602e-06, "loss": 0.3368, "step": 2086 }, { "epoch": 1.0360747972861162, "grad_norm": 0.5248947143554688, "learning_rate": 8.276089743116765e-06, "loss": 0.3759, "step": 2087 }, { "epoch": 1.036571239450604, "grad_norm": 0.575727105140686, "learning_rate": 8.273906665132605e-06, "loss": 0.4331, "step": 2088 }, { "epoch": 1.0370676816150919, "grad_norm": 0.45427361130714417, "learning_rate": 8.271722494079987e-06, "loss": 0.3354, "step": 2089 }, { "epoch": 1.0375641237795796, "grad_norm": 0.643614649772644, "learning_rate": 8.26953723068815e-06, "loss": 0.4426, "step": 2090 }, { "epoch": 1.0380605659440676, "grad_norm": 0.536368727684021, "learning_rate": 8.267350875686693e-06, "loss": 0.422, "step": 2091 }, { "epoch": 1.0385570081085553, "grad_norm": 0.5518601536750793, "learning_rate": 8.26516342980558e-06, "loss": 0.4111, "step": 2092 }, { "epoch": 1.0390534502730433, "grad_norm": 0.4771900177001953, "learning_rate": 8.26297489377514e-06, "loss": 0.3551, "step": 2093 }, { "epoch": 1.039549892437531, "grad_norm": 0.6214922666549683, "learning_rate": 8.260785268326066e-06, "loss": 0.4281, "step": 2094 }, { "epoch": 1.0400463346020188, "grad_norm": 0.4791383147239685, "learning_rate": 8.258594554189415e-06, "loss": 0.3734, "step": 2095 }, { "epoch": 1.0405427767665067, "grad_norm": 0.5762887001037598, "learning_rate": 8.256402752096603e-06, "loss": 0.4407, "step": 2096 }, { "epoch": 1.0410392189309945, "grad_norm": 0.5391703248023987, "learning_rate": 8.25420986277942e-06, "loss": 0.3427, "step": 2097 }, { "epoch": 1.0415356610954825, "grad_norm": 0.5155554413795471, "learning_rate": 8.252015886970005e-06, "loss": 0.4055, "step": 2098 }, { "epoch": 1.0420321032599702, "grad_norm": 0.6652713418006897, "learning_rate": 8.249820825400871e-06, "loss": 0.4449, "step": 2099 }, { "epoch": 1.042528545424458, "grad_norm": 0.5448864102363586, "learning_rate": 8.24762467880489e-06, "loss": 0.4138, "step": 2100 }, { "epoch": 1.043024987588946, "grad_norm": 0.46306294202804565, "learning_rate": 8.245427447915293e-06, "loss": 0.2963, "step": 2101 }, { "epoch": 1.0435214297534336, "grad_norm": 0.5970485806465149, "learning_rate": 8.243229133465677e-06, "loss": 0.4674, "step": 2102 }, { "epoch": 1.0440178719179216, "grad_norm": 0.47740209102630615, "learning_rate": 8.241029736190001e-06, "loss": 0.3736, "step": 2103 }, { "epoch": 1.0445143140824094, "grad_norm": 0.5369322896003723, "learning_rate": 8.23882925682258e-06, "loss": 0.4089, "step": 2104 }, { "epoch": 1.0450107562468973, "grad_norm": 0.460010290145874, "learning_rate": 8.236627696098099e-06, "loss": 0.2993, "step": 2105 }, { "epoch": 1.045507198411385, "grad_norm": 0.5666905641555786, "learning_rate": 8.234425054751595e-06, "loss": 0.4361, "step": 2106 }, { "epoch": 1.0460036405758728, "grad_norm": 0.526233434677124, "learning_rate": 8.232221333518474e-06, "loss": 0.4339, "step": 2107 }, { "epoch": 1.0465000827403608, "grad_norm": 0.4896683394908905, "learning_rate": 8.230016533134495e-06, "loss": 0.4095, "step": 2108 }, { "epoch": 1.0469965249048485, "grad_norm": 0.47087162733078003, "learning_rate": 8.227810654335784e-06, "loss": 0.373, "step": 2109 }, { "epoch": 1.0474929670693365, "grad_norm": 0.5129252076148987, "learning_rate": 8.225603697858822e-06, "loss": 0.3774, "step": 2110 }, { "epoch": 1.0479894092338242, "grad_norm": 0.4936344027519226, "learning_rate": 8.223395664440451e-06, "loss": 0.3816, "step": 2111 }, { "epoch": 1.0484858513983122, "grad_norm": 0.5597810745239258, "learning_rate": 8.221186554817877e-06, "loss": 0.4508, "step": 2112 }, { "epoch": 1.0489822935628, "grad_norm": 0.5091251730918884, "learning_rate": 8.218976369728658e-06, "loss": 0.3353, "step": 2113 }, { "epoch": 1.0494787357272877, "grad_norm": 0.48619434237480164, "learning_rate": 8.216765109910716e-06, "loss": 0.344, "step": 2114 }, { "epoch": 1.0499751778917756, "grad_norm": 0.46110278367996216, "learning_rate": 8.21455277610233e-06, "loss": 0.3565, "step": 2115 }, { "epoch": 1.0504716200562634, "grad_norm": 0.5786783695220947, "learning_rate": 8.212339369042139e-06, "loss": 0.411, "step": 2116 }, { "epoch": 1.0509680622207513, "grad_norm": 0.4878820478916168, "learning_rate": 8.21012488946914e-06, "loss": 0.3515, "step": 2117 }, { "epoch": 1.051464504385239, "grad_norm": 0.5269025564193726, "learning_rate": 8.207909338122687e-06, "loss": 0.3853, "step": 2118 }, { "epoch": 1.051960946549727, "grad_norm": 0.45867884159088135, "learning_rate": 8.205692715742491e-06, "loss": 0.3682, "step": 2119 }, { "epoch": 1.0524573887142148, "grad_norm": 0.4716324210166931, "learning_rate": 8.203475023068624e-06, "loss": 0.3584, "step": 2120 }, { "epoch": 1.0529538308787025, "grad_norm": 0.5852767825126648, "learning_rate": 8.201256260841513e-06, "loss": 0.4222, "step": 2121 }, { "epoch": 1.0534502730431905, "grad_norm": 0.48973000049591064, "learning_rate": 8.199036429801942e-06, "loss": 0.4013, "step": 2122 }, { "epoch": 1.0539467152076782, "grad_norm": 0.5256425738334656, "learning_rate": 8.19681553069105e-06, "loss": 0.4082, "step": 2123 }, { "epoch": 1.0544431573721662, "grad_norm": 0.5481334328651428, "learning_rate": 8.194593564250337e-06, "loss": 0.3874, "step": 2124 }, { "epoch": 1.054939599536654, "grad_norm": 0.5615260601043701, "learning_rate": 8.192370531221659e-06, "loss": 0.388, "step": 2125 }, { "epoch": 1.055436041701142, "grad_norm": 0.5095133185386658, "learning_rate": 8.190146432347223e-06, "loss": 0.3786, "step": 2126 }, { "epoch": 1.0559324838656297, "grad_norm": 0.5212976336479187, "learning_rate": 8.187921268369598e-06, "loss": 0.3468, "step": 2127 }, { "epoch": 1.0564289260301174, "grad_norm": 0.48635414242744446, "learning_rate": 8.185695040031702e-06, "loss": 0.3742, "step": 2128 }, { "epoch": 1.0569253681946054, "grad_norm": 0.5391606688499451, "learning_rate": 8.183467748076817e-06, "loss": 0.4009, "step": 2129 }, { "epoch": 1.057421810359093, "grad_norm": 0.5341262221336365, "learning_rate": 8.181239393248572e-06, "loss": 0.4191, "step": 2130 }, { "epoch": 1.057918252523581, "grad_norm": 0.5158146619796753, "learning_rate": 8.179009976290955e-06, "loss": 0.3726, "step": 2131 }, { "epoch": 1.0584146946880688, "grad_norm": 0.5499842762947083, "learning_rate": 8.176779497948308e-06, "loss": 0.4045, "step": 2132 }, { "epoch": 1.0589111368525568, "grad_norm": 0.478890597820282, "learning_rate": 8.174547958965325e-06, "loss": 0.4117, "step": 2133 }, { "epoch": 1.0594075790170445, "grad_norm": 0.4508815109729767, "learning_rate": 8.17231536008706e-06, "loss": 0.4032, "step": 2134 }, { "epoch": 1.0599040211815323, "grad_norm": 0.5430088639259338, "learning_rate": 8.170081702058914e-06, "loss": 0.4189, "step": 2135 }, { "epoch": 1.0604004633460202, "grad_norm": 0.4805150628089905, "learning_rate": 8.167846985626646e-06, "loss": 0.3393, "step": 2136 }, { "epoch": 1.060896905510508, "grad_norm": 0.4941422641277313, "learning_rate": 8.165611211536365e-06, "loss": 0.4044, "step": 2137 }, { "epoch": 1.061393347674996, "grad_norm": 0.4873533844947815, "learning_rate": 8.16337438053454e-06, "loss": 0.4461, "step": 2138 }, { "epoch": 1.0618897898394837, "grad_norm": 0.4595411419868469, "learning_rate": 8.161136493367983e-06, "loss": 0.4008, "step": 2139 }, { "epoch": 1.0623862320039716, "grad_norm": 0.535145103931427, "learning_rate": 8.158897550783868e-06, "loss": 0.4127, "step": 2140 }, { "epoch": 1.0628826741684594, "grad_norm": 0.4665602147579193, "learning_rate": 8.156657553529712e-06, "loss": 0.3682, "step": 2141 }, { "epoch": 1.0633791163329471, "grad_norm": 0.5599395632743835, "learning_rate": 8.154416502353394e-06, "loss": 0.4256, "step": 2142 }, { "epoch": 1.063875558497435, "grad_norm": 0.4939349293708801, "learning_rate": 8.152174398003138e-06, "loss": 0.3782, "step": 2143 }, { "epoch": 1.0643720006619228, "grad_norm": 0.4621708393096924, "learning_rate": 8.149931241227522e-06, "loss": 0.4093, "step": 2144 }, { "epoch": 1.0648684428264108, "grad_norm": 0.5672184228897095, "learning_rate": 8.147687032775473e-06, "loss": 0.4439, "step": 2145 }, { "epoch": 1.0653648849908985, "grad_norm": 0.46641841530799866, "learning_rate": 8.145441773396276e-06, "loss": 0.3729, "step": 2146 }, { "epoch": 1.0658613271553863, "grad_norm": 0.49514076113700867, "learning_rate": 8.143195463839557e-06, "loss": 0.425, "step": 2147 }, { "epoch": 1.0663577693198742, "grad_norm": 0.5052604675292969, "learning_rate": 8.140948104855301e-06, "loss": 0.4093, "step": 2148 }, { "epoch": 1.066854211484362, "grad_norm": 0.464165061712265, "learning_rate": 8.13869969719384e-06, "loss": 0.3566, "step": 2149 }, { "epoch": 1.06735065364885, "grad_norm": 0.5019583106040955, "learning_rate": 8.136450241605854e-06, "loss": 0.3846, "step": 2150 }, { "epoch": 1.0678470958133377, "grad_norm": 0.5531181693077087, "learning_rate": 8.134199738842376e-06, "loss": 0.4289, "step": 2151 }, { "epoch": 1.0683435379778257, "grad_norm": 0.47489720582962036, "learning_rate": 8.131948189654789e-06, "loss": 0.391, "step": 2152 }, { "epoch": 1.0688399801423134, "grad_norm": 0.4566818177700043, "learning_rate": 8.129695594794822e-06, "loss": 0.3621, "step": 2153 }, { "epoch": 1.0693364223068014, "grad_norm": 0.594458281993866, "learning_rate": 8.127441955014557e-06, "loss": 0.4635, "step": 2154 }, { "epoch": 1.0698328644712891, "grad_norm": 0.45793166756629944, "learning_rate": 8.12518727106642e-06, "loss": 0.3815, "step": 2155 }, { "epoch": 1.0703293066357769, "grad_norm": 0.5282652974128723, "learning_rate": 8.122931543703194e-06, "loss": 0.398, "step": 2156 }, { "epoch": 1.0708257488002648, "grad_norm": 0.4394441246986389, "learning_rate": 8.120674773678e-06, "loss": 0.3252, "step": 2157 }, { "epoch": 1.0713221909647526, "grad_norm": 0.5539484024047852, "learning_rate": 8.118416961744318e-06, "loss": 0.4505, "step": 2158 }, { "epoch": 1.0718186331292405, "grad_norm": 0.5146337151527405, "learning_rate": 8.116158108655964e-06, "loss": 0.3891, "step": 2159 }, { "epoch": 1.0723150752937283, "grad_norm": 0.5483566522598267, "learning_rate": 8.113898215167109e-06, "loss": 0.427, "step": 2160 }, { "epoch": 1.072811517458216, "grad_norm": 0.5484516620635986, "learning_rate": 8.111637282032273e-06, "loss": 0.4054, "step": 2161 }, { "epoch": 1.073307959622704, "grad_norm": 0.5098834037780762, "learning_rate": 8.109375310006317e-06, "loss": 0.4, "step": 2162 }, { "epoch": 1.0738044017871917, "grad_norm": 0.5233021974563599, "learning_rate": 8.107112299844453e-06, "loss": 0.409, "step": 2163 }, { "epoch": 1.0743008439516797, "grad_norm": 0.5173671245574951, "learning_rate": 8.10484825230224e-06, "loss": 0.3743, "step": 2164 }, { "epoch": 1.0747972861161674, "grad_norm": 0.43147969245910645, "learning_rate": 8.102583168135579e-06, "loss": 0.3475, "step": 2165 }, { "epoch": 1.0752937282806554, "grad_norm": 0.5318785905838013, "learning_rate": 8.100317048100722e-06, "loss": 0.3775, "step": 2166 }, { "epoch": 1.0757901704451431, "grad_norm": 0.49304646253585815, "learning_rate": 8.098049892954264e-06, "loss": 0.3342, "step": 2167 }, { "epoch": 1.0762866126096309, "grad_norm": 0.5694938898086548, "learning_rate": 8.095781703453149e-06, "loss": 0.4162, "step": 2168 }, { "epoch": 1.0767830547741188, "grad_norm": 0.5001404285430908, "learning_rate": 8.093512480354662e-06, "loss": 0.4397, "step": 2169 }, { "epoch": 1.0772794969386066, "grad_norm": 0.49296656250953674, "learning_rate": 8.091242224416434e-06, "loss": 0.3873, "step": 2170 }, { "epoch": 1.0777759391030945, "grad_norm": 0.5335666537284851, "learning_rate": 8.08897093639644e-06, "loss": 0.4118, "step": 2171 }, { "epoch": 1.0782723812675823, "grad_norm": 0.4937012195587158, "learning_rate": 8.086698617053009e-06, "loss": 0.3701, "step": 2172 }, { "epoch": 1.0787688234320703, "grad_norm": 0.5040202736854553, "learning_rate": 8.084425267144798e-06, "loss": 0.3937, "step": 2173 }, { "epoch": 1.079265265596558, "grad_norm": 0.5115582942962646, "learning_rate": 8.08215088743082e-06, "loss": 0.3588, "step": 2174 }, { "epoch": 1.0797617077610457, "grad_norm": 0.5061562061309814, "learning_rate": 8.079875478670431e-06, "loss": 0.3982, "step": 2175 }, { "epoch": 1.0802581499255337, "grad_norm": 0.5242905020713806, "learning_rate": 8.077599041623325e-06, "loss": 0.3741, "step": 2176 }, { "epoch": 1.0807545920900214, "grad_norm": 0.46747416257858276, "learning_rate": 8.075321577049545e-06, "loss": 0.3624, "step": 2177 }, { "epoch": 1.0812510342545094, "grad_norm": 0.5029147267341614, "learning_rate": 8.07304308570947e-06, "loss": 0.3784, "step": 2178 }, { "epoch": 1.0817474764189972, "grad_norm": 0.5279207825660706, "learning_rate": 8.07076356836383e-06, "loss": 0.4264, "step": 2179 }, { "epoch": 1.0822439185834851, "grad_norm": 0.5612996220588684, "learning_rate": 8.068483025773694e-06, "loss": 0.3218, "step": 2180 }, { "epoch": 1.0827403607479729, "grad_norm": 0.5373792052268982, "learning_rate": 8.066201458700474e-06, "loss": 0.3893, "step": 2181 }, { "epoch": 1.0832368029124606, "grad_norm": 0.527367353439331, "learning_rate": 8.06391886790592e-06, "loss": 0.4068, "step": 2182 }, { "epoch": 1.0837332450769486, "grad_norm": 0.48099225759506226, "learning_rate": 8.061635254152129e-06, "loss": 0.4204, "step": 2183 }, { "epoch": 1.0842296872414363, "grad_norm": 0.5215699076652527, "learning_rate": 8.059350618201538e-06, "loss": 0.4161, "step": 2184 }, { "epoch": 1.0847261294059243, "grad_norm": 0.5925598740577698, "learning_rate": 8.057064960816924e-06, "loss": 0.4046, "step": 2185 }, { "epoch": 1.085222571570412, "grad_norm": 0.4636925756931305, "learning_rate": 8.054778282761405e-06, "loss": 0.3533, "step": 2186 }, { "epoch": 1.0857190137349, "grad_norm": 0.5489734411239624, "learning_rate": 8.052490584798442e-06, "loss": 0.3862, "step": 2187 }, { "epoch": 1.0862154558993877, "grad_norm": 0.5155268907546997, "learning_rate": 8.050201867691836e-06, "loss": 0.4103, "step": 2188 }, { "epoch": 1.0867118980638755, "grad_norm": 0.5641425848007202, "learning_rate": 8.047912132205725e-06, "loss": 0.4328, "step": 2189 }, { "epoch": 1.0872083402283634, "grad_norm": 0.5191887617111206, "learning_rate": 8.045621379104592e-06, "loss": 0.4325, "step": 2190 }, { "epoch": 1.0877047823928512, "grad_norm": 0.4865204095840454, "learning_rate": 8.043329609153254e-06, "loss": 0.4096, "step": 2191 }, { "epoch": 1.0882012245573391, "grad_norm": 0.4860038757324219, "learning_rate": 8.041036823116874e-06, "loss": 0.3916, "step": 2192 }, { "epoch": 1.0886976667218269, "grad_norm": 0.4851226508617401, "learning_rate": 8.038743021760948e-06, "loss": 0.3716, "step": 2193 }, { "epoch": 1.0891941088863146, "grad_norm": 0.47856733202934265, "learning_rate": 8.036448205851316e-06, "loss": 0.4014, "step": 2194 }, { "epoch": 1.0896905510508026, "grad_norm": 0.4963163435459137, "learning_rate": 8.034152376154156e-06, "loss": 0.376, "step": 2195 }, { "epoch": 1.0901869932152903, "grad_norm": 0.5578581094741821, "learning_rate": 8.031855533435979e-06, "loss": 0.4592, "step": 2196 }, { "epoch": 1.0906834353797783, "grad_norm": 0.47674664855003357, "learning_rate": 8.029557678463642e-06, "loss": 0.3709, "step": 2197 }, { "epoch": 1.091179877544266, "grad_norm": 0.5033631920814514, "learning_rate": 8.027258812004335e-06, "loss": 0.3757, "step": 2198 }, { "epoch": 1.091676319708754, "grad_norm": 0.5142678618431091, "learning_rate": 8.024958934825587e-06, "loss": 0.357, "step": 2199 }, { "epoch": 1.0921727618732417, "grad_norm": 0.5034021735191345, "learning_rate": 8.022658047695264e-06, "loss": 0.4142, "step": 2200 }, { "epoch": 1.0926692040377297, "grad_norm": 0.48208341002464294, "learning_rate": 8.020356151381569e-06, "loss": 0.4051, "step": 2201 }, { "epoch": 1.0931656462022175, "grad_norm": 0.4659241735935211, "learning_rate": 8.018053246653047e-06, "loss": 0.4018, "step": 2202 }, { "epoch": 1.0936620883667052, "grad_norm": 0.44737911224365234, "learning_rate": 8.015749334278569e-06, "loss": 0.3363, "step": 2203 }, { "epoch": 1.0941585305311932, "grad_norm": 0.5837634801864624, "learning_rate": 8.013444415027352e-06, "loss": 0.4527, "step": 2204 }, { "epoch": 1.094654972695681, "grad_norm": 0.4736716151237488, "learning_rate": 8.011138489668948e-06, "loss": 0.392, "step": 2205 }, { "epoch": 1.0951514148601689, "grad_norm": 0.48115596175193787, "learning_rate": 8.008831558973237e-06, "loss": 0.3813, "step": 2206 }, { "epoch": 1.0956478570246566, "grad_norm": 0.49862587451934814, "learning_rate": 8.006523623710449e-06, "loss": 0.393, "step": 2207 }, { "epoch": 1.0961442991891444, "grad_norm": 0.4378868043422699, "learning_rate": 8.004214684651133e-06, "loss": 0.3781, "step": 2208 }, { "epoch": 1.0966407413536323, "grad_norm": 0.49637800455093384, "learning_rate": 8.001904742566183e-06, "loss": 0.3655, "step": 2209 }, { "epoch": 1.09713718351812, "grad_norm": 0.5318952202796936, "learning_rate": 7.999593798226827e-06, "loss": 0.4555, "step": 2210 }, { "epoch": 1.097633625682608, "grad_norm": 0.42233139276504517, "learning_rate": 7.997281852404629e-06, "loss": 0.3082, "step": 2211 }, { "epoch": 1.0981300678470958, "grad_norm": 0.530130922794342, "learning_rate": 7.994968905871479e-06, "loss": 0.4231, "step": 2212 }, { "epoch": 1.0986265100115837, "grad_norm": 0.43761947751045227, "learning_rate": 7.992654959399611e-06, "loss": 0.3453, "step": 2213 }, { "epoch": 1.0991229521760715, "grad_norm": 0.5687201023101807, "learning_rate": 7.990340013761587e-06, "loss": 0.4599, "step": 2214 }, { "epoch": 1.0996193943405594, "grad_norm": 0.42079707980155945, "learning_rate": 7.988024069730306e-06, "loss": 0.3296, "step": 2215 }, { "epoch": 1.1001158365050472, "grad_norm": 0.5425044298171997, "learning_rate": 7.985707128079e-06, "loss": 0.4312, "step": 2216 }, { "epoch": 1.100612278669535, "grad_norm": 0.511191189289093, "learning_rate": 7.983389189581227e-06, "loss": 0.4139, "step": 2217 }, { "epoch": 1.101108720834023, "grad_norm": 0.555756688117981, "learning_rate": 7.98107025501089e-06, "loss": 0.3481, "step": 2218 }, { "epoch": 1.1016051629985106, "grad_norm": 0.5367488861083984, "learning_rate": 7.978750325142217e-06, "loss": 0.3705, "step": 2219 }, { "epoch": 1.1021016051629986, "grad_norm": 0.5053282976150513, "learning_rate": 7.976429400749766e-06, "loss": 0.3893, "step": 2220 }, { "epoch": 1.1025980473274863, "grad_norm": 0.617702066898346, "learning_rate": 7.974107482608434e-06, "loss": 0.449, "step": 2221 }, { "epoch": 1.103094489491974, "grad_norm": 0.484821617603302, "learning_rate": 7.971784571493446e-06, "loss": 0.3904, "step": 2222 }, { "epoch": 1.103590931656462, "grad_norm": 0.4430968165397644, "learning_rate": 7.969460668180358e-06, "loss": 0.3302, "step": 2223 }, { "epoch": 1.1040873738209498, "grad_norm": 0.5765494108200073, "learning_rate": 7.967135773445059e-06, "loss": 0.4276, "step": 2224 }, { "epoch": 1.1045838159854378, "grad_norm": 0.5178293585777283, "learning_rate": 7.964809888063765e-06, "loss": 0.4117, "step": 2225 }, { "epoch": 1.1050802581499255, "grad_norm": 0.4665907323360443, "learning_rate": 7.962483012813029e-06, "loss": 0.3777, "step": 2226 }, { "epoch": 1.1055767003144135, "grad_norm": 0.47450587153434753, "learning_rate": 7.960155148469733e-06, "loss": 0.3398, "step": 2227 }, { "epoch": 1.1060731424789012, "grad_norm": 0.572920024394989, "learning_rate": 7.957826295811085e-06, "loss": 0.4031, "step": 2228 }, { "epoch": 1.106569584643389, "grad_norm": 0.581386387348175, "learning_rate": 7.955496455614624e-06, "loss": 0.4249, "step": 2229 }, { "epoch": 1.107066026807877, "grad_norm": 0.48992353677749634, "learning_rate": 7.953165628658224e-06, "loss": 0.3629, "step": 2230 }, { "epoch": 1.1075624689723647, "grad_norm": 0.49896398186683655, "learning_rate": 7.950833815720083e-06, "loss": 0.3936, "step": 2231 }, { "epoch": 1.1080589111368526, "grad_norm": 0.556266725063324, "learning_rate": 7.948501017578728e-06, "loss": 0.391, "step": 2232 }, { "epoch": 1.1085553533013404, "grad_norm": 0.5408854484558105, "learning_rate": 7.946167235013023e-06, "loss": 0.3876, "step": 2233 }, { "epoch": 1.1090517954658283, "grad_norm": 0.5070262551307678, "learning_rate": 7.94383246880215e-06, "loss": 0.4478, "step": 2234 }, { "epoch": 1.109548237630316, "grad_norm": 0.46250849962234497, "learning_rate": 7.941496719725622e-06, "loss": 0.3574, "step": 2235 }, { "epoch": 1.1100446797948038, "grad_norm": 0.5522990226745605, "learning_rate": 7.939159988563286e-06, "loss": 0.4385, "step": 2236 }, { "epoch": 1.1105411219592918, "grad_norm": 0.46751147508621216, "learning_rate": 7.936822276095312e-06, "loss": 0.3735, "step": 2237 }, { "epoch": 1.1110375641237795, "grad_norm": 0.4831351041793823, "learning_rate": 7.934483583102197e-06, "loss": 0.4025, "step": 2238 }, { "epoch": 1.1115340062882675, "grad_norm": 0.5295484066009521, "learning_rate": 7.932143910364771e-06, "loss": 0.4071, "step": 2239 }, { "epoch": 1.1120304484527552, "grad_norm": 0.45213034749031067, "learning_rate": 7.929803258664182e-06, "loss": 0.3319, "step": 2240 }, { "epoch": 1.1125268906172432, "grad_norm": 0.6124757528305054, "learning_rate": 7.927461628781915e-06, "loss": 0.4191, "step": 2241 }, { "epoch": 1.113023332781731, "grad_norm": 0.4875052869319916, "learning_rate": 7.925119021499771e-06, "loss": 0.3931, "step": 2242 }, { "epoch": 1.1135197749462187, "grad_norm": 0.46222200989723206, "learning_rate": 7.92277543759989e-06, "loss": 0.404, "step": 2243 }, { "epoch": 1.1140162171107066, "grad_norm": 0.5171132683753967, "learning_rate": 7.920430877864725e-06, "loss": 0.4237, "step": 2244 }, { "epoch": 1.1145126592751944, "grad_norm": 0.5520651936531067, "learning_rate": 7.918085343077062e-06, "loss": 0.4371, "step": 2245 }, { "epoch": 1.1150091014396823, "grad_norm": 0.4940972328186035, "learning_rate": 7.915738834020014e-06, "loss": 0.3545, "step": 2246 }, { "epoch": 1.11550554360417, "grad_norm": 0.5554943084716797, "learning_rate": 7.913391351477013e-06, "loss": 0.4121, "step": 2247 }, { "epoch": 1.116001985768658, "grad_norm": 0.5366815328598022, "learning_rate": 7.911042896231822e-06, "loss": 0.4064, "step": 2248 }, { "epoch": 1.1164984279331458, "grad_norm": 0.5211118459701538, "learning_rate": 7.908693469068525e-06, "loss": 0.3085, "step": 2249 }, { "epoch": 1.1169948700976335, "grad_norm": 0.5850318670272827, "learning_rate": 7.906343070771534e-06, "loss": 0.4413, "step": 2250 }, { "epoch": 1.1174913122621215, "grad_norm": 0.45290809869766235, "learning_rate": 7.903991702125583e-06, "loss": 0.3537, "step": 2251 }, { "epoch": 1.1179877544266092, "grad_norm": 0.5465347766876221, "learning_rate": 7.901639363915724e-06, "loss": 0.4451, "step": 2252 }, { "epoch": 1.1184841965910972, "grad_norm": 0.4900934398174286, "learning_rate": 7.899286056927347e-06, "loss": 0.3641, "step": 2253 }, { "epoch": 1.118980638755585, "grad_norm": 0.5147108435630798, "learning_rate": 7.896931781946153e-06, "loss": 0.3857, "step": 2254 }, { "epoch": 1.1194770809200727, "grad_norm": 0.5059351921081543, "learning_rate": 7.894576539758173e-06, "loss": 0.361, "step": 2255 }, { "epoch": 1.1199735230845607, "grad_norm": 0.5212256908416748, "learning_rate": 7.892220331149753e-06, "loss": 0.3971, "step": 2256 }, { "epoch": 1.1204699652490484, "grad_norm": 0.561529278755188, "learning_rate": 7.889863156907574e-06, "loss": 0.3976, "step": 2257 }, { "epoch": 1.1209664074135364, "grad_norm": 0.5042228698730469, "learning_rate": 7.887505017818626e-06, "loss": 0.3657, "step": 2258 }, { "epoch": 1.1214628495780241, "grad_norm": 0.5039868950843811, "learning_rate": 7.885145914670234e-06, "loss": 0.4632, "step": 2259 }, { "epoch": 1.121959291742512, "grad_norm": 0.4714663028717041, "learning_rate": 7.882785848250033e-06, "loss": 0.3283, "step": 2260 }, { "epoch": 1.1224557339069998, "grad_norm": 0.548709511756897, "learning_rate": 7.880424819345987e-06, "loss": 0.4011, "step": 2261 }, { "epoch": 1.1229521760714878, "grad_norm": 0.48952004313468933, "learning_rate": 7.87806282874638e-06, "loss": 0.3884, "step": 2262 }, { "epoch": 1.1234486182359755, "grad_norm": 0.5224816203117371, "learning_rate": 7.875699877239815e-06, "loss": 0.3918, "step": 2263 }, { "epoch": 1.1239450604004633, "grad_norm": 0.48628318309783936, "learning_rate": 7.873335965615219e-06, "loss": 0.3793, "step": 2264 }, { "epoch": 1.1244415025649512, "grad_norm": 0.5722745060920715, "learning_rate": 7.870971094661836e-06, "loss": 0.4288, "step": 2265 }, { "epoch": 1.124937944729439, "grad_norm": 0.49483349919319153, "learning_rate": 7.868605265169236e-06, "loss": 0.414, "step": 2266 }, { "epoch": 1.125434386893927, "grad_norm": 0.4790622293949127, "learning_rate": 7.8662384779273e-06, "loss": 0.3591, "step": 2267 }, { "epoch": 1.1259308290584147, "grad_norm": 0.547235906124115, "learning_rate": 7.863870733726237e-06, "loss": 0.4571, "step": 2268 }, { "epoch": 1.1264272712229024, "grad_norm": 0.4581131935119629, "learning_rate": 7.861502033356572e-06, "loss": 0.4174, "step": 2269 }, { "epoch": 1.1269237133873904, "grad_norm": 0.5310490727424622, "learning_rate": 7.859132377609146e-06, "loss": 0.4092, "step": 2270 }, { "epoch": 1.1274201555518781, "grad_norm": 0.4995870590209961, "learning_rate": 7.85676176727513e-06, "loss": 0.3647, "step": 2271 }, { "epoch": 1.127916597716366, "grad_norm": 0.4578181505203247, "learning_rate": 7.854390203146e-06, "loss": 0.3719, "step": 2272 }, { "epoch": 1.1284130398808538, "grad_norm": 0.523820161819458, "learning_rate": 7.852017686013561e-06, "loss": 0.4295, "step": 2273 }, { "epoch": 1.1289094820453418, "grad_norm": 0.5442314743995667, "learning_rate": 7.849644216669929e-06, "loss": 0.4008, "step": 2274 }, { "epoch": 1.1294059242098295, "grad_norm": 0.47748222947120667, "learning_rate": 7.847269795907543e-06, "loss": 0.3578, "step": 2275 }, { "epoch": 1.1299023663743175, "grad_norm": 0.48083415627479553, "learning_rate": 7.844894424519156e-06, "loss": 0.3767, "step": 2276 }, { "epoch": 1.1303988085388053, "grad_norm": 0.5618274211883545, "learning_rate": 7.842518103297842e-06, "loss": 0.4483, "step": 2277 }, { "epoch": 1.130895250703293, "grad_norm": 0.41209137439727783, "learning_rate": 7.840140833036987e-06, "loss": 0.3316, "step": 2278 }, { "epoch": 1.131391692867781, "grad_norm": 0.5562422275543213, "learning_rate": 7.8377626145303e-06, "loss": 0.3971, "step": 2279 }, { "epoch": 1.1318881350322687, "grad_norm": 0.4675149917602539, "learning_rate": 7.835383448571801e-06, "loss": 0.3525, "step": 2280 }, { "epoch": 1.1323845771967567, "grad_norm": 0.5130094885826111, "learning_rate": 7.83300333595583e-06, "loss": 0.3837, "step": 2281 }, { "epoch": 1.1328810193612444, "grad_norm": 0.47485724091529846, "learning_rate": 7.830622277477042e-06, "loss": 0.4194, "step": 2282 }, { "epoch": 1.1333774615257322, "grad_norm": 0.5375344157218933, "learning_rate": 7.828240273930408e-06, "loss": 0.3683, "step": 2283 }, { "epoch": 1.1338739036902201, "grad_norm": 0.5085262656211853, "learning_rate": 7.825857326111213e-06, "loss": 0.4132, "step": 2284 }, { "epoch": 1.1343703458547079, "grad_norm": 0.44602128863334656, "learning_rate": 7.82347343481506e-06, "loss": 0.3492, "step": 2285 }, { "epoch": 1.1348667880191958, "grad_norm": 0.5005250573158264, "learning_rate": 7.821088600837865e-06, "loss": 0.393, "step": 2286 }, { "epoch": 1.1353632301836836, "grad_norm": 0.5310102105140686, "learning_rate": 7.81870282497586e-06, "loss": 0.3903, "step": 2287 }, { "epoch": 1.1358596723481715, "grad_norm": 0.5499944686889648, "learning_rate": 7.816316108025588e-06, "loss": 0.4351, "step": 2288 }, { "epoch": 1.1363561145126593, "grad_norm": 0.4943830966949463, "learning_rate": 7.81392845078391e-06, "loss": 0.3099, "step": 2289 }, { "epoch": 1.136852556677147, "grad_norm": 0.5506677031517029, "learning_rate": 7.811539854048003e-06, "loss": 0.4198, "step": 2290 }, { "epoch": 1.137348998841635, "grad_norm": 0.5279464721679688, "learning_rate": 7.809150318615351e-06, "loss": 0.3899, "step": 2291 }, { "epoch": 1.1378454410061227, "grad_norm": 0.5648773312568665, "learning_rate": 7.806759845283755e-06, "loss": 0.3863, "step": 2292 }, { "epoch": 1.1383418831706107, "grad_norm": 0.48882856965065, "learning_rate": 7.804368434851333e-06, "loss": 0.3885, "step": 2293 }, { "epoch": 1.1388383253350984, "grad_norm": 0.5658355951309204, "learning_rate": 7.801976088116507e-06, "loss": 0.4149, "step": 2294 }, { "epoch": 1.1393347674995864, "grad_norm": 0.4686974287033081, "learning_rate": 7.799582805878022e-06, "loss": 0.3679, "step": 2295 }, { "epoch": 1.1398312096640741, "grad_norm": 0.4900280237197876, "learning_rate": 7.797188588934921e-06, "loss": 0.3824, "step": 2296 }, { "epoch": 1.1403276518285619, "grad_norm": 0.5844712257385254, "learning_rate": 7.794793438086578e-06, "loss": 0.3671, "step": 2297 }, { "epoch": 1.1408240939930498, "grad_norm": 0.46553829312324524, "learning_rate": 7.792397354132661e-06, "loss": 0.3742, "step": 2298 }, { "epoch": 1.1413205361575376, "grad_norm": 0.5591110587120056, "learning_rate": 7.790000337873162e-06, "loss": 0.4013, "step": 2299 }, { "epoch": 1.1418169783220256, "grad_norm": 0.5824955105781555, "learning_rate": 7.78760239010838e-06, "loss": 0.4197, "step": 2300 }, { "epoch": 1.1423134204865133, "grad_norm": 0.5374048948287964, "learning_rate": 7.78520351163892e-06, "loss": 0.3837, "step": 2301 }, { "epoch": 1.142809862651001, "grad_norm": 0.47077277302742004, "learning_rate": 7.782803703265707e-06, "loss": 0.3602, "step": 2302 }, { "epoch": 1.143306304815489, "grad_norm": 0.564742386341095, "learning_rate": 7.780402965789968e-06, "loss": 0.437, "step": 2303 }, { "epoch": 1.1438027469799767, "grad_norm": 0.6398131251335144, "learning_rate": 7.778001300013248e-06, "loss": 0.4312, "step": 2304 }, { "epoch": 1.1442991891444647, "grad_norm": 0.47649499773979187, "learning_rate": 7.775598706737395e-06, "loss": 0.3575, "step": 2305 }, { "epoch": 1.1447956313089525, "grad_norm": 0.5230289101600647, "learning_rate": 7.77319518676457e-06, "loss": 0.4032, "step": 2306 }, { "epoch": 1.1452920734734404, "grad_norm": 0.5899373888969421, "learning_rate": 7.770790740897245e-06, "loss": 0.3945, "step": 2307 }, { "epoch": 1.1457885156379282, "grad_norm": 0.552971363067627, "learning_rate": 7.768385369938196e-06, "loss": 0.4042, "step": 2308 }, { "epoch": 1.1462849578024161, "grad_norm": 0.49716320633888245, "learning_rate": 7.765979074690512e-06, "loss": 0.3627, "step": 2309 }, { "epoch": 1.1467813999669039, "grad_norm": 0.5839441418647766, "learning_rate": 7.763571855957592e-06, "loss": 0.4105, "step": 2310 }, { "epoch": 1.1472778421313916, "grad_norm": 0.5220968723297119, "learning_rate": 7.761163714543137e-06, "loss": 0.4271, "step": 2311 }, { "epoch": 1.1477742842958796, "grad_norm": 0.47109493613243103, "learning_rate": 7.758754651251163e-06, "loss": 0.3841, "step": 2312 }, { "epoch": 1.1482707264603673, "grad_norm": 0.6111109256744385, "learning_rate": 7.75634466688599e-06, "loss": 0.4434, "step": 2313 }, { "epoch": 1.1487671686248553, "grad_norm": 0.5164154171943665, "learning_rate": 7.753933762252246e-06, "loss": 0.3407, "step": 2314 }, { "epoch": 1.149263610789343, "grad_norm": 0.5145156979560852, "learning_rate": 7.751521938154867e-06, "loss": 0.3569, "step": 2315 }, { "epoch": 1.1497600529538308, "grad_norm": 0.5358943939208984, "learning_rate": 7.749109195399093e-06, "loss": 0.4417, "step": 2316 }, { "epoch": 1.1502564951183187, "grad_norm": 0.4913126230239868, "learning_rate": 7.746695534790477e-06, "loss": 0.3356, "step": 2317 }, { "epoch": 1.1507529372828065, "grad_norm": 0.5036699175834656, "learning_rate": 7.744280957134872e-06, "loss": 0.4233, "step": 2318 }, { "epoch": 1.1512493794472944, "grad_norm": 0.47827887535095215, "learning_rate": 7.741865463238442e-06, "loss": 0.3371, "step": 2319 }, { "epoch": 1.1517458216117822, "grad_norm": 0.5525048971176147, "learning_rate": 7.739449053907653e-06, "loss": 0.4087, "step": 2320 }, { "epoch": 1.1522422637762701, "grad_norm": 0.5267645120620728, "learning_rate": 7.737031729949279e-06, "loss": 0.4183, "step": 2321 }, { "epoch": 1.152738705940758, "grad_norm": 0.5268141031265259, "learning_rate": 7.7346134921704e-06, "loss": 0.387, "step": 2322 }, { "epoch": 1.1532351481052459, "grad_norm": 0.5098434686660767, "learning_rate": 7.732194341378397e-06, "loss": 0.3799, "step": 2323 }, { "epoch": 1.1537315902697336, "grad_norm": 0.4360140264034271, "learning_rate": 7.72977427838096e-06, "loss": 0.3518, "step": 2324 }, { "epoch": 1.1542280324342213, "grad_norm": 0.6150738000869751, "learning_rate": 7.727353303986084e-06, "loss": 0.4512, "step": 2325 }, { "epoch": 1.1547244745987093, "grad_norm": 0.42270705103874207, "learning_rate": 7.724931419002063e-06, "loss": 0.3249, "step": 2326 }, { "epoch": 1.155220916763197, "grad_norm": 0.4908112585544586, "learning_rate": 7.722508624237503e-06, "loss": 0.3762, "step": 2327 }, { "epoch": 1.155717358927685, "grad_norm": 0.610887348651886, "learning_rate": 7.720084920501306e-06, "loss": 0.4362, "step": 2328 }, { "epoch": 1.1562138010921728, "grad_norm": 0.5379638671875, "learning_rate": 7.717660308602681e-06, "loss": 0.3898, "step": 2329 }, { "epoch": 1.1567102432566605, "grad_norm": 0.5278009176254272, "learning_rate": 7.715234789351144e-06, "loss": 0.3732, "step": 2330 }, { "epoch": 1.1572066854211485, "grad_norm": 0.4772145450115204, "learning_rate": 7.712808363556504e-06, "loss": 0.3912, "step": 2331 }, { "epoch": 1.1577031275856362, "grad_norm": 0.4805733561515808, "learning_rate": 7.710381032028882e-06, "loss": 0.3694, "step": 2332 }, { "epoch": 1.1581995697501242, "grad_norm": 0.5250303149223328, "learning_rate": 7.707952795578698e-06, "loss": 0.3904, "step": 2333 }, { "epoch": 1.158696011914612, "grad_norm": 0.45386064052581787, "learning_rate": 7.705523655016674e-06, "loss": 0.3547, "step": 2334 }, { "epoch": 1.1591924540790999, "grad_norm": 0.4877268075942993, "learning_rate": 7.703093611153833e-06, "loss": 0.3571, "step": 2335 }, { "epoch": 1.1596888962435876, "grad_norm": 0.5304108262062073, "learning_rate": 7.700662664801501e-06, "loss": 0.4196, "step": 2336 }, { "epoch": 1.1601853384080756, "grad_norm": 0.6019991636276245, "learning_rate": 7.698230816771307e-06, "loss": 0.3781, "step": 2337 }, { "epoch": 1.1606817805725633, "grad_norm": 0.503665566444397, "learning_rate": 7.695798067875174e-06, "loss": 0.4052, "step": 2338 }, { "epoch": 1.161178222737051, "grad_norm": 0.4661159813404083, "learning_rate": 7.693364418925335e-06, "loss": 0.3908, "step": 2339 }, { "epoch": 1.161674664901539, "grad_norm": 0.46968352794647217, "learning_rate": 7.690929870734319e-06, "loss": 0.3723, "step": 2340 }, { "epoch": 1.1621711070660268, "grad_norm": 0.5554460287094116, "learning_rate": 7.688494424114954e-06, "loss": 0.4821, "step": 2341 }, { "epoch": 1.1626675492305147, "grad_norm": 0.5310655832290649, "learning_rate": 7.686058079880371e-06, "loss": 0.4496, "step": 2342 }, { "epoch": 1.1631639913950025, "grad_norm": 0.4366506338119507, "learning_rate": 7.683620838843997e-06, "loss": 0.3613, "step": 2343 }, { "epoch": 1.1636604335594902, "grad_norm": 0.49211230874061584, "learning_rate": 7.681182701819563e-06, "loss": 0.4546, "step": 2344 }, { "epoch": 1.1641568757239782, "grad_norm": 0.4518333077430725, "learning_rate": 7.678743669621094e-06, "loss": 0.3652, "step": 2345 }, { "epoch": 1.164653317888466, "grad_norm": 0.5196146965026855, "learning_rate": 7.676303743062917e-06, "loss": 0.4079, "step": 2346 }, { "epoch": 1.165149760052954, "grad_norm": 0.4470931887626648, "learning_rate": 7.67386292295966e-06, "loss": 0.3125, "step": 2347 }, { "epoch": 1.1656462022174416, "grad_norm": 0.5917523503303528, "learning_rate": 7.671421210126245e-06, "loss": 0.3809, "step": 2348 }, { "epoch": 1.1661426443819294, "grad_norm": 0.5357183814048767, "learning_rate": 7.668978605377892e-06, "loss": 0.4172, "step": 2349 }, { "epoch": 1.1666390865464173, "grad_norm": 0.48682188987731934, "learning_rate": 7.666535109530121e-06, "loss": 0.3577, "step": 2350 }, { "epoch": 1.167135528710905, "grad_norm": 0.6545096635818481, "learning_rate": 7.66409072339875e-06, "loss": 0.4383, "step": 2351 }, { "epoch": 1.167631970875393, "grad_norm": 0.4411161243915558, "learning_rate": 7.661645447799893e-06, "loss": 0.3415, "step": 2352 }, { "epoch": 1.1681284130398808, "grad_norm": 0.5509361624717712, "learning_rate": 7.65919928354996e-06, "loss": 0.4267, "step": 2353 }, { "epoch": 1.1686248552043688, "grad_norm": 0.45296987891197205, "learning_rate": 7.656752231465659e-06, "loss": 0.3892, "step": 2354 }, { "epoch": 1.1691212973688565, "grad_norm": 0.5119287371635437, "learning_rate": 7.654304292363993e-06, "loss": 0.4274, "step": 2355 }, { "epoch": 1.1696177395333445, "grad_norm": 0.49846914410591125, "learning_rate": 7.651855467062265e-06, "loss": 0.3515, "step": 2356 }, { "epoch": 1.1701141816978322, "grad_norm": 0.5010349750518799, "learning_rate": 7.649405756378072e-06, "loss": 0.4513, "step": 2357 }, { "epoch": 1.17061062386232, "grad_norm": 0.4543551206588745, "learning_rate": 7.646955161129302e-06, "loss": 0.3873, "step": 2358 }, { "epoch": 1.171107066026808, "grad_norm": 0.4804386496543884, "learning_rate": 7.644503682134143e-06, "loss": 0.3833, "step": 2359 }, { "epoch": 1.1716035081912957, "grad_norm": 0.5031589865684509, "learning_rate": 7.642051320211082e-06, "loss": 0.3945, "step": 2360 }, { "epoch": 1.1720999503557836, "grad_norm": 0.47198060154914856, "learning_rate": 7.639598076178887e-06, "loss": 0.3895, "step": 2361 }, { "epoch": 1.1725963925202714, "grad_norm": 0.5085179805755615, "learning_rate": 7.637143950856638e-06, "loss": 0.4261, "step": 2362 }, { "epoch": 1.1730928346847591, "grad_norm": 0.42634278535842896, "learning_rate": 7.634688945063696e-06, "loss": 0.3506, "step": 2363 }, { "epoch": 1.173589276849247, "grad_norm": 0.551628828048706, "learning_rate": 7.632233059619723e-06, "loss": 0.4142, "step": 2364 }, { "epoch": 1.1740857190137348, "grad_norm": 0.5333375334739685, "learning_rate": 7.629776295344672e-06, "loss": 0.4585, "step": 2365 }, { "epoch": 1.1745821611782228, "grad_norm": 0.585602343082428, "learning_rate": 7.627318653058789e-06, "loss": 0.4056, "step": 2366 }, { "epoch": 1.1750786033427105, "grad_norm": 0.507634162902832, "learning_rate": 7.624860133582612e-06, "loss": 0.3866, "step": 2367 }, { "epoch": 1.1755750455071985, "grad_norm": 0.5014358162879944, "learning_rate": 7.622400737736978e-06, "loss": 0.3865, "step": 2368 }, { "epoch": 1.1760714876716862, "grad_norm": 0.524798572063446, "learning_rate": 7.61994046634301e-06, "loss": 0.3435, "step": 2369 }, { "epoch": 1.1765679298361742, "grad_norm": 0.5248203873634338, "learning_rate": 7.6174793202221275e-06, "loss": 0.3968, "step": 2370 }, { "epoch": 1.177064372000662, "grad_norm": 0.5110960006713867, "learning_rate": 7.615017300196038e-06, "loss": 0.4079, "step": 2371 }, { "epoch": 1.1775608141651497, "grad_norm": 0.48564788699150085, "learning_rate": 7.6125544070867456e-06, "loss": 0.3679, "step": 2372 }, { "epoch": 1.1780572563296376, "grad_norm": 0.5156721472740173, "learning_rate": 7.610090641716541e-06, "loss": 0.4197, "step": 2373 }, { "epoch": 1.1785536984941254, "grad_norm": 0.47410547733306885, "learning_rate": 7.607626004908009e-06, "loss": 0.4062, "step": 2374 }, { "epoch": 1.1790501406586134, "grad_norm": 0.5164138078689575, "learning_rate": 7.605160497484027e-06, "loss": 0.4223, "step": 2375 }, { "epoch": 1.179546582823101, "grad_norm": 0.4530256688594818, "learning_rate": 7.602694120267757e-06, "loss": 0.3201, "step": 2376 }, { "epoch": 1.1800430249875888, "grad_norm": 0.5823356509208679, "learning_rate": 7.600226874082659e-06, "loss": 0.3912, "step": 2377 }, { "epoch": 1.1805394671520768, "grad_norm": 0.476347953081131, "learning_rate": 7.597758759752476e-06, "loss": 0.3677, "step": 2378 }, { "epoch": 1.1810359093165645, "grad_norm": 0.5618307590484619, "learning_rate": 7.595289778101249e-06, "loss": 0.4362, "step": 2379 }, { "epoch": 1.1815323514810525, "grad_norm": 0.5034103989601135, "learning_rate": 7.592819929953299e-06, "loss": 0.4458, "step": 2380 }, { "epoch": 1.1820287936455403, "grad_norm": 0.49801507592201233, "learning_rate": 7.590349216133245e-06, "loss": 0.3508, "step": 2381 }, { "epoch": 1.1825252358100282, "grad_norm": 0.5116190314292908, "learning_rate": 7.587877637465989e-06, "loss": 0.3532, "step": 2382 }, { "epoch": 1.183021677974516, "grad_norm": 0.4639187753200531, "learning_rate": 7.5854051947767235e-06, "loss": 0.3625, "step": 2383 }, { "epoch": 1.183518120139004, "grad_norm": 0.4660295248031616, "learning_rate": 7.582931888890933e-06, "loss": 0.3917, "step": 2384 }, { "epoch": 1.1840145623034917, "grad_norm": 0.540460467338562, "learning_rate": 7.580457720634383e-06, "loss": 0.4045, "step": 2385 }, { "epoch": 1.1845110044679794, "grad_norm": 0.46129047870635986, "learning_rate": 7.577982690833135e-06, "loss": 0.3943, "step": 2386 }, { "epoch": 1.1850074466324674, "grad_norm": 0.5270177721977234, "learning_rate": 7.575506800313529e-06, "loss": 0.3846, "step": 2387 }, { "epoch": 1.1855038887969551, "grad_norm": 0.4920385479927063, "learning_rate": 7.573030049902204e-06, "loss": 0.3981, "step": 2388 }, { "epoch": 1.186000330961443, "grad_norm": 0.5357401371002197, "learning_rate": 7.570552440426075e-06, "loss": 0.3934, "step": 2389 }, { "epoch": 1.1864967731259308, "grad_norm": 0.5329555869102478, "learning_rate": 7.56807397271235e-06, "loss": 0.3823, "step": 2390 }, { "epoch": 1.1869932152904186, "grad_norm": 0.5002598166465759, "learning_rate": 7.565594647588521e-06, "loss": 0.3935, "step": 2391 }, { "epoch": 1.1874896574549065, "grad_norm": 0.5231428742408752, "learning_rate": 7.563114465882369e-06, "loss": 0.3319, "step": 2392 }, { "epoch": 1.1879860996193943, "grad_norm": 0.5176430344581604, "learning_rate": 7.5606334284219586e-06, "loss": 0.3733, "step": 2393 }, { "epoch": 1.1884825417838822, "grad_norm": 0.5265229344367981, "learning_rate": 7.558151536035641e-06, "loss": 0.4344, "step": 2394 }, { "epoch": 1.18897898394837, "grad_norm": 0.5091320872306824, "learning_rate": 7.555668789552051e-06, "loss": 0.3501, "step": 2395 }, { "epoch": 1.189475426112858, "grad_norm": 0.6120239496231079, "learning_rate": 7.553185189800112e-06, "loss": 0.3549, "step": 2396 }, { "epoch": 1.1899718682773457, "grad_norm": 0.5510028600692749, "learning_rate": 7.550700737609031e-06, "loss": 0.3856, "step": 2397 }, { "epoch": 1.1904683104418334, "grad_norm": 0.47388842701911926, "learning_rate": 7.548215433808297e-06, "loss": 0.354, "step": 2398 }, { "epoch": 1.1909647526063214, "grad_norm": 0.6440820097923279, "learning_rate": 7.545729279227687e-06, "loss": 0.4766, "step": 2399 }, { "epoch": 1.1914611947708091, "grad_norm": 0.5103074908256531, "learning_rate": 7.543242274697258e-06, "loss": 0.3353, "step": 2400 }, { "epoch": 1.191957636935297, "grad_norm": 0.5963773131370544, "learning_rate": 7.540754421047356e-06, "loss": 0.4699, "step": 2401 }, { "epoch": 1.1924540790997848, "grad_norm": 0.4876772463321686, "learning_rate": 7.538265719108606e-06, "loss": 0.3696, "step": 2402 }, { "epoch": 1.1929505212642728, "grad_norm": 0.564170241355896, "learning_rate": 7.5357761697119195e-06, "loss": 0.4394, "step": 2403 }, { "epoch": 1.1934469634287606, "grad_norm": 0.5147236585617065, "learning_rate": 7.533285773688488e-06, "loss": 0.3698, "step": 2404 }, { "epoch": 1.1939434055932483, "grad_norm": 0.5328168869018555, "learning_rate": 7.53079453186979e-06, "loss": 0.4058, "step": 2405 }, { "epoch": 1.1944398477577363, "grad_norm": 0.5710572600364685, "learning_rate": 7.528302445087577e-06, "loss": 0.4344, "step": 2406 }, { "epoch": 1.194936289922224, "grad_norm": 0.516255795955658, "learning_rate": 7.525809514173896e-06, "loss": 0.3883, "step": 2407 }, { "epoch": 1.195432732086712, "grad_norm": 0.49333807826042175, "learning_rate": 7.523315739961065e-06, "loss": 0.3831, "step": 2408 }, { "epoch": 1.1959291742511997, "grad_norm": 0.5510305762290955, "learning_rate": 7.5208211232816864e-06, "loss": 0.4038, "step": 2409 }, { "epoch": 1.1964256164156875, "grad_norm": 0.4461895525455475, "learning_rate": 7.518325664968649e-06, "loss": 0.3602, "step": 2410 }, { "epoch": 1.1969220585801754, "grad_norm": 0.49438929557800293, "learning_rate": 7.515829365855116e-06, "loss": 0.388, "step": 2411 }, { "epoch": 1.1974185007446632, "grad_norm": 0.4937930703163147, "learning_rate": 7.513332226774535e-06, "loss": 0.3905, "step": 2412 }, { "epoch": 1.1979149429091511, "grad_norm": 0.47071120142936707, "learning_rate": 7.51083424856063e-06, "loss": 0.3985, "step": 2413 }, { "epoch": 1.1984113850736389, "grad_norm": 0.4600197970867157, "learning_rate": 7.508335432047412e-06, "loss": 0.3793, "step": 2414 }, { "epoch": 1.1989078272381268, "grad_norm": 0.47093021869659424, "learning_rate": 7.505835778069166e-06, "loss": 0.3763, "step": 2415 }, { "epoch": 1.1994042694026146, "grad_norm": 0.5042209029197693, "learning_rate": 7.503335287460456e-06, "loss": 0.3929, "step": 2416 }, { "epoch": 1.1999007115671025, "grad_norm": 0.48855090141296387, "learning_rate": 7.500833961056133e-06, "loss": 0.3633, "step": 2417 }, { "epoch": 1.2003971537315903, "grad_norm": 0.5310403108596802, "learning_rate": 7.498331799691318e-06, "loss": 0.4193, "step": 2418 }, { "epoch": 1.200893595896078, "grad_norm": 0.5342064499855042, "learning_rate": 7.495828804201417e-06, "loss": 0.4198, "step": 2419 }, { "epoch": 1.201390038060566, "grad_norm": 0.517898678779602, "learning_rate": 7.493324975422112e-06, "loss": 0.3607, "step": 2420 }, { "epoch": 1.2018864802250537, "grad_norm": 0.49077755212783813, "learning_rate": 7.4908203141893594e-06, "loss": 0.3694, "step": 2421 }, { "epoch": 1.2023829223895417, "grad_norm": 0.47027212381362915, "learning_rate": 7.488314821339403e-06, "loss": 0.3937, "step": 2422 }, { "epoch": 1.2028793645540294, "grad_norm": 0.5497079491615295, "learning_rate": 7.485808497708757e-06, "loss": 0.4104, "step": 2423 }, { "epoch": 1.2033758067185172, "grad_norm": 0.5950957536697388, "learning_rate": 7.483301344134213e-06, "loss": 0.4055, "step": 2424 }, { "epoch": 1.2038722488830051, "grad_norm": 0.5146173238754272, "learning_rate": 7.480793361452842e-06, "loss": 0.3739, "step": 2425 }, { "epoch": 1.204368691047493, "grad_norm": 0.5368160009384155, "learning_rate": 7.478284550501992e-06, "loss": 0.3874, "step": 2426 }, { "epoch": 1.2048651332119809, "grad_norm": 0.5868874788284302, "learning_rate": 7.475774912119287e-06, "loss": 0.4084, "step": 2427 }, { "epoch": 1.2053615753764686, "grad_norm": 0.482779860496521, "learning_rate": 7.473264447142626e-06, "loss": 0.346, "step": 2428 }, { "epoch": 1.2058580175409566, "grad_norm": 0.6309033036231995, "learning_rate": 7.470753156410188e-06, "loss": 0.4263, "step": 2429 }, { "epoch": 1.2063544597054443, "grad_norm": 0.5498918294906616, "learning_rate": 7.46824104076042e-06, "loss": 0.3747, "step": 2430 }, { "epoch": 1.2068509018699323, "grad_norm": 0.4737209677696228, "learning_rate": 7.465728101032052e-06, "loss": 0.337, "step": 2431 }, { "epoch": 1.20734734403442, "grad_norm": 0.5845704078674316, "learning_rate": 7.4632143380640875e-06, "loss": 0.4037, "step": 2432 }, { "epoch": 1.2078437861989078, "grad_norm": 0.4938165545463562, "learning_rate": 7.460699752695801e-06, "loss": 0.4209, "step": 2433 }, { "epoch": 1.2083402283633957, "grad_norm": 0.5489631295204163, "learning_rate": 7.458184345766744e-06, "loss": 0.3523, "step": 2434 }, { "epoch": 1.2088366705278835, "grad_norm": 0.6433979272842407, "learning_rate": 7.455668118116746e-06, "loss": 0.3824, "step": 2435 }, { "epoch": 1.2093331126923714, "grad_norm": 0.4681095480918884, "learning_rate": 7.453151070585903e-06, "loss": 0.396, "step": 2436 }, { "epoch": 1.2098295548568592, "grad_norm": 0.6159718632698059, "learning_rate": 7.45063320401459e-06, "loss": 0.3713, "step": 2437 }, { "epoch": 1.210325997021347, "grad_norm": 0.6487888097763062, "learning_rate": 7.448114519243456e-06, "loss": 0.3923, "step": 2438 }, { "epoch": 1.2108224391858349, "grad_norm": 0.5053868889808655, "learning_rate": 7.445595017113418e-06, "loss": 0.3888, "step": 2439 }, { "epoch": 1.2113188813503226, "grad_norm": 0.5467628240585327, "learning_rate": 7.4430746984656736e-06, "loss": 0.3681, "step": 2440 }, { "epoch": 1.2118153235148106, "grad_norm": 0.6380398869514465, "learning_rate": 7.440553564141686e-06, "loss": 0.4679, "step": 2441 }, { "epoch": 1.2123117656792983, "grad_norm": 0.45091187953948975, "learning_rate": 7.438031614983195e-06, "loss": 0.3358, "step": 2442 }, { "epoch": 1.2128082078437863, "grad_norm": 0.7301981449127197, "learning_rate": 7.4355088518322076e-06, "loss": 0.4635, "step": 2443 }, { "epoch": 1.213304650008274, "grad_norm": 0.5031082630157471, "learning_rate": 7.432985275531009e-06, "loss": 0.3754, "step": 2444 }, { "epoch": 1.213801092172762, "grad_norm": 0.5093189477920532, "learning_rate": 7.430460886922152e-06, "loss": 0.3616, "step": 2445 }, { "epoch": 1.2142975343372497, "grad_norm": 0.6535641551017761, "learning_rate": 7.427935686848461e-06, "loss": 0.426, "step": 2446 }, { "epoch": 1.2147939765017375, "grad_norm": 0.48377081751823425, "learning_rate": 7.425409676153032e-06, "loss": 0.3689, "step": 2447 }, { "epoch": 1.2152904186662254, "grad_norm": 0.5694271922111511, "learning_rate": 7.42288285567923e-06, "loss": 0.4039, "step": 2448 }, { "epoch": 1.2157868608307132, "grad_norm": 0.5919223427772522, "learning_rate": 7.420355226270693e-06, "loss": 0.4418, "step": 2449 }, { "epoch": 1.2162833029952012, "grad_norm": 0.519576370716095, "learning_rate": 7.417826788771327e-06, "loss": 0.4167, "step": 2450 }, { "epoch": 1.216779745159689, "grad_norm": 0.5768771171569824, "learning_rate": 7.415297544025311e-06, "loss": 0.3601, "step": 2451 }, { "epoch": 1.2172761873241766, "grad_norm": 0.6436270475387573, "learning_rate": 7.412767492877089e-06, "loss": 0.3966, "step": 2452 }, { "epoch": 1.2177726294886646, "grad_norm": 0.48601192235946655, "learning_rate": 7.410236636171376e-06, "loss": 0.3844, "step": 2453 }, { "epoch": 1.2182690716531523, "grad_norm": 0.6192067861557007, "learning_rate": 7.407704974753157e-06, "loss": 0.4341, "step": 2454 }, { "epoch": 1.2187655138176403, "grad_norm": 0.4221775531768799, "learning_rate": 7.405172509467685e-06, "loss": 0.3134, "step": 2455 }, { "epoch": 1.219261955982128, "grad_norm": 0.5270872712135315, "learning_rate": 7.402639241160479e-06, "loss": 0.4591, "step": 2456 }, { "epoch": 1.2197583981466158, "grad_norm": 0.5916522741317749, "learning_rate": 7.400105170677333e-06, "loss": 0.4636, "step": 2457 }, { "epoch": 1.2202548403111038, "grad_norm": 0.4490011930465698, "learning_rate": 7.3975702988643e-06, "loss": 0.3652, "step": 2458 }, { "epoch": 1.2207512824755915, "grad_norm": 0.568234384059906, "learning_rate": 7.395034626567709e-06, "loss": 0.4057, "step": 2459 }, { "epoch": 1.2212477246400795, "grad_norm": 0.5205076932907104, "learning_rate": 7.392498154634147e-06, "loss": 0.397, "step": 2460 }, { "epoch": 1.2217441668045672, "grad_norm": 0.5033110976219177, "learning_rate": 7.3899608839104775e-06, "loss": 0.4399, "step": 2461 }, { "epoch": 1.2222406089690552, "grad_norm": 0.45437678694725037, "learning_rate": 7.3874228152438236e-06, "loss": 0.3811, "step": 2462 }, { "epoch": 1.222737051133543, "grad_norm": 0.4731200933456421, "learning_rate": 7.3848839494815775e-06, "loss": 0.3948, "step": 2463 }, { "epoch": 1.2232334932980309, "grad_norm": 0.43281009793281555, "learning_rate": 7.382344287471398e-06, "loss": 0.3516, "step": 2464 }, { "epoch": 1.2237299354625186, "grad_norm": 0.5193945169448853, "learning_rate": 7.379803830061211e-06, "loss": 0.4482, "step": 2465 }, { "epoch": 1.2242263776270064, "grad_norm": 0.48088014125823975, "learning_rate": 7.377262578099204e-06, "loss": 0.3872, "step": 2466 }, { "epoch": 1.2247228197914943, "grad_norm": 0.4448457658290863, "learning_rate": 7.374720532433832e-06, "loss": 0.3614, "step": 2467 }, { "epoch": 1.225219261955982, "grad_norm": 0.5600183606147766, "learning_rate": 7.372177693913817e-06, "loss": 0.4415, "step": 2468 }, { "epoch": 1.22571570412047, "grad_norm": 0.4941699504852295, "learning_rate": 7.36963406338814e-06, "loss": 0.3935, "step": 2469 }, { "epoch": 1.2262121462849578, "grad_norm": 0.46634677052497864, "learning_rate": 7.3670896417060555e-06, "loss": 0.365, "step": 2470 }, { "epoch": 1.2267085884494455, "grad_norm": 0.5530996322631836, "learning_rate": 7.364544429717071e-06, "loss": 0.4155, "step": 2471 }, { "epoch": 1.2272050306139335, "grad_norm": 0.49604812264442444, "learning_rate": 7.3619984282709665e-06, "loss": 0.3567, "step": 2472 }, { "epoch": 1.2277014727784212, "grad_norm": 0.4739789366722107, "learning_rate": 7.359451638217783e-06, "loss": 0.3625, "step": 2473 }, { "epoch": 1.2281979149429092, "grad_norm": 0.4561573565006256, "learning_rate": 7.356904060407823e-06, "loss": 0.3322, "step": 2474 }, { "epoch": 1.228694357107397, "grad_norm": 0.5460416674613953, "learning_rate": 7.354355695691655e-06, "loss": 0.4285, "step": 2475 }, { "epoch": 1.229190799271885, "grad_norm": 0.5139216780662537, "learning_rate": 7.3518065449201095e-06, "loss": 0.3328, "step": 2476 }, { "epoch": 1.2296872414363726, "grad_norm": 0.47846096754074097, "learning_rate": 7.349256608944275e-06, "loss": 0.4316, "step": 2477 }, { "epoch": 1.2301836836008606, "grad_norm": 0.5051803588867188, "learning_rate": 7.346705888615509e-06, "loss": 0.4108, "step": 2478 }, { "epoch": 1.2306801257653484, "grad_norm": 0.5413897633552551, "learning_rate": 7.344154384785426e-06, "loss": 0.3431, "step": 2479 }, { "epoch": 1.231176567929836, "grad_norm": 0.4524216055870056, "learning_rate": 7.341602098305904e-06, "loss": 0.4088, "step": 2480 }, { "epoch": 1.231673010094324, "grad_norm": 0.44711753726005554, "learning_rate": 7.339049030029084e-06, "loss": 0.3612, "step": 2481 }, { "epoch": 1.2321694522588118, "grad_norm": 0.5546899437904358, "learning_rate": 7.336495180807364e-06, "loss": 0.3865, "step": 2482 }, { "epoch": 1.2326658944232998, "grad_norm": 0.5081660747528076, "learning_rate": 7.333940551493406e-06, "loss": 0.4225, "step": 2483 }, { "epoch": 1.2331623365877875, "grad_norm": 0.43425291776657104, "learning_rate": 7.331385142940131e-06, "loss": 0.3764, "step": 2484 }, { "epoch": 1.2336587787522753, "grad_norm": 0.47655460238456726, "learning_rate": 7.32882895600072e-06, "loss": 0.3605, "step": 2485 }, { "epoch": 1.2341552209167632, "grad_norm": 0.5094716548919678, "learning_rate": 7.326271991528614e-06, "loss": 0.3699, "step": 2486 }, { "epoch": 1.234651663081251, "grad_norm": 0.49266523122787476, "learning_rate": 7.323714250377515e-06, "loss": 0.393, "step": 2487 }, { "epoch": 1.235148105245739, "grad_norm": 0.49343177676200867, "learning_rate": 7.321155733401382e-06, "loss": 0.3821, "step": 2488 }, { "epoch": 1.2356445474102267, "grad_norm": 0.5281352996826172, "learning_rate": 7.318596441454437e-06, "loss": 0.3712, "step": 2489 }, { "epoch": 1.2361409895747146, "grad_norm": 0.5805754661560059, "learning_rate": 7.316036375391156e-06, "loss": 0.4633, "step": 2490 }, { "epoch": 1.2366374317392024, "grad_norm": 0.42948412895202637, "learning_rate": 7.313475536066275e-06, "loss": 0.3504, "step": 2491 }, { "epoch": 1.2371338739036903, "grad_norm": 0.5082016587257385, "learning_rate": 7.31091392433479e-06, "loss": 0.3709, "step": 2492 }, { "epoch": 1.237630316068178, "grad_norm": 0.5121543407440186, "learning_rate": 7.3083515410519516e-06, "loss": 0.4019, "step": 2493 }, { "epoch": 1.2381267582326658, "grad_norm": 0.45897120237350464, "learning_rate": 7.305788387073272e-06, "loss": 0.4156, "step": 2494 }, { "epoch": 1.2386232003971538, "grad_norm": 0.4920487403869629, "learning_rate": 7.303224463254517e-06, "loss": 0.3503, "step": 2495 }, { "epoch": 1.2391196425616415, "grad_norm": 0.48511648178100586, "learning_rate": 7.3006597704517115e-06, "loss": 0.3757, "step": 2496 }, { "epoch": 1.2396160847261295, "grad_norm": 0.44919106364250183, "learning_rate": 7.298094309521138e-06, "loss": 0.3424, "step": 2497 }, { "epoch": 1.2401125268906172, "grad_norm": 0.5298908352851868, "learning_rate": 7.295528081319334e-06, "loss": 0.3929, "step": 2498 }, { "epoch": 1.240608969055105, "grad_norm": 0.5385826826095581, "learning_rate": 7.292961086703091e-06, "loss": 0.4263, "step": 2499 }, { "epoch": 1.241105411219593, "grad_norm": 0.4378737211227417, "learning_rate": 7.290393326529463e-06, "loss": 0.3497, "step": 2500 }, { "epoch": 1.2416018533840807, "grad_norm": 0.5024944543838501, "learning_rate": 7.28782480165575e-06, "loss": 0.3938, "step": 2501 }, { "epoch": 1.2420982955485687, "grad_norm": 0.5205275416374207, "learning_rate": 7.285255512939516e-06, "loss": 0.3714, "step": 2502 }, { "epoch": 1.2425947377130564, "grad_norm": 0.4623008668422699, "learning_rate": 7.2826854612385756e-06, "loss": 0.424, "step": 2503 }, { "epoch": 1.2430911798775444, "grad_norm": 0.49147793650627136, "learning_rate": 7.280114647411001e-06, "loss": 0.4159, "step": 2504 }, { "epoch": 1.243587622042032, "grad_norm": 0.5057128071784973, "learning_rate": 7.2775430723151155e-06, "loss": 0.3929, "step": 2505 }, { "epoch": 1.2440840642065198, "grad_norm": 0.5281651616096497, "learning_rate": 7.274970736809497e-06, "loss": 0.3975, "step": 2506 }, { "epoch": 1.2445805063710078, "grad_norm": 0.47953200340270996, "learning_rate": 7.272397641752982e-06, "loss": 0.3114, "step": 2507 }, { "epoch": 1.2450769485354956, "grad_norm": 0.542560338973999, "learning_rate": 7.269823788004653e-06, "loss": 0.3844, "step": 2508 }, { "epoch": 1.2455733906999835, "grad_norm": 0.4981084167957306, "learning_rate": 7.267249176423852e-06, "loss": 0.4056, "step": 2509 }, { "epoch": 1.2460698328644713, "grad_norm": 0.5392075777053833, "learning_rate": 7.264673807870172e-06, "loss": 0.3982, "step": 2510 }, { "epoch": 1.2465662750289592, "grad_norm": 0.5061963200569153, "learning_rate": 7.262097683203456e-06, "loss": 0.3937, "step": 2511 }, { "epoch": 1.247062717193447, "grad_norm": 0.5159602761268616, "learning_rate": 7.259520803283806e-06, "loss": 0.3735, "step": 2512 }, { "epoch": 1.2475591593579347, "grad_norm": 0.5299279689788818, "learning_rate": 7.2569431689715695e-06, "loss": 0.3902, "step": 2513 }, { "epoch": 1.2480556015224227, "grad_norm": 0.49996834993362427, "learning_rate": 7.25436478112735e-06, "loss": 0.3892, "step": 2514 }, { "epoch": 1.2485520436869104, "grad_norm": 0.4792983829975128, "learning_rate": 7.251785640611999e-06, "loss": 0.396, "step": 2515 }, { "epoch": 1.2490484858513984, "grad_norm": 0.557754397392273, "learning_rate": 7.249205748286623e-06, "loss": 0.4898, "step": 2516 }, { "epoch": 1.2495449280158861, "grad_norm": 0.5108155608177185, "learning_rate": 7.246625105012579e-06, "loss": 0.3667, "step": 2517 }, { "epoch": 1.2500413701803739, "grad_norm": 0.4918431341648102, "learning_rate": 7.244043711651472e-06, "loss": 0.4122, "step": 2518 }, { "epoch": 1.2505378123448618, "grad_norm": 0.41424164175987244, "learning_rate": 7.241461569065158e-06, "loss": 0.3113, "step": 2519 }, { "epoch": 1.2510342545093498, "grad_norm": 0.536949872970581, "learning_rate": 7.238878678115746e-06, "loss": 0.3376, "step": 2520 }, { "epoch": 1.2515306966738375, "grad_norm": 0.49436327815055847, "learning_rate": 7.2362950396655925e-06, "loss": 0.3669, "step": 2521 }, { "epoch": 1.2520271388383253, "grad_norm": 0.5080093741416931, "learning_rate": 7.233710654577306e-06, "loss": 0.3856, "step": 2522 }, { "epoch": 1.2525235810028132, "grad_norm": 0.42266377806663513, "learning_rate": 7.231125523713739e-06, "loss": 0.3595, "step": 2523 }, { "epoch": 1.253020023167301, "grad_norm": 0.5349867343902588, "learning_rate": 7.228539647938e-06, "loss": 0.4609, "step": 2524 }, { "epoch": 1.253516465331789, "grad_norm": 0.48479899764060974, "learning_rate": 7.225953028113439e-06, "loss": 0.3659, "step": 2525 }, { "epoch": 1.2540129074962767, "grad_norm": 0.5605447292327881, "learning_rate": 7.223365665103662e-06, "loss": 0.3546, "step": 2526 }, { "epoch": 1.2545093496607644, "grad_norm": 0.5551738142967224, "learning_rate": 7.220777559772515e-06, "loss": 0.3961, "step": 2527 }, { "epoch": 1.2550057918252524, "grad_norm": 0.5253196954727173, "learning_rate": 7.2181887129841e-06, "loss": 0.3497, "step": 2528 }, { "epoch": 1.2555022339897401, "grad_norm": 0.5588341951370239, "learning_rate": 7.215599125602759e-06, "loss": 0.3977, "step": 2529 }, { "epoch": 1.255998676154228, "grad_norm": 0.49951350688934326, "learning_rate": 7.2130087984930885e-06, "loss": 0.3812, "step": 2530 }, { "epoch": 1.2564951183187159, "grad_norm": 0.624385416507721, "learning_rate": 7.210417732519926e-06, "loss": 0.4022, "step": 2531 }, { "epoch": 1.2569915604832036, "grad_norm": 0.5165929794311523, "learning_rate": 7.207825928548358e-06, "loss": 0.356, "step": 2532 }, { "epoch": 1.2574880026476916, "grad_norm": 0.5480858683586121, "learning_rate": 7.2052333874437175e-06, "loss": 0.3921, "step": 2533 }, { "epoch": 1.2579844448121793, "grad_norm": 0.5685137510299683, "learning_rate": 7.202640110071584e-06, "loss": 0.3517, "step": 2534 }, { "epoch": 1.2584808869766673, "grad_norm": 0.5701687932014465, "learning_rate": 7.200046097297782e-06, "loss": 0.421, "step": 2535 }, { "epoch": 1.258977329141155, "grad_norm": 0.48400962352752686, "learning_rate": 7.197451349988382e-06, "loss": 0.3766, "step": 2536 }, { "epoch": 1.259473771305643, "grad_norm": 0.6711210012435913, "learning_rate": 7.194855869009701e-06, "loss": 0.4251, "step": 2537 }, { "epoch": 1.2599702134701307, "grad_norm": 0.5574687719345093, "learning_rate": 7.192259655228298e-06, "loss": 0.3994, "step": 2538 }, { "epoch": 1.2604666556346187, "grad_norm": 0.49496883153915405, "learning_rate": 7.189662709510977e-06, "loss": 0.4195, "step": 2539 }, { "epoch": 1.2609630977991064, "grad_norm": 0.5330886244773865, "learning_rate": 7.1870650327247895e-06, "loss": 0.3582, "step": 2540 }, { "epoch": 1.2614595399635942, "grad_norm": 0.6304511427879333, "learning_rate": 7.1844666257370296e-06, "loss": 0.4127, "step": 2541 }, { "epoch": 1.2619559821280821, "grad_norm": 0.5566669702529907, "learning_rate": 7.181867489415233e-06, "loss": 0.3842, "step": 2542 }, { "epoch": 1.2624524242925699, "grad_norm": 0.4574280381202698, "learning_rate": 7.179267624627182e-06, "loss": 0.3337, "step": 2543 }, { "epoch": 1.2629488664570578, "grad_norm": 0.5164908170700073, "learning_rate": 7.1766670322409005e-06, "loss": 0.3529, "step": 2544 }, { "epoch": 1.2634453086215456, "grad_norm": 0.5841267108917236, "learning_rate": 7.1740657131246545e-06, "loss": 0.3887, "step": 2545 }, { "epoch": 1.2639417507860333, "grad_norm": 0.5440463423728943, "learning_rate": 7.171463668146957e-06, "loss": 0.4319, "step": 2546 }, { "epoch": 1.2644381929505213, "grad_norm": 0.49000343680381775, "learning_rate": 7.168860898176555e-06, "loss": 0.3968, "step": 2547 }, { "epoch": 1.264934635115009, "grad_norm": 0.5002177953720093, "learning_rate": 7.166257404082446e-06, "loss": 0.3958, "step": 2548 }, { "epoch": 1.265431077279497, "grad_norm": 0.6062672138214111, "learning_rate": 7.163653186733867e-06, "loss": 0.4151, "step": 2549 }, { "epoch": 1.2659275194439847, "grad_norm": 0.5013561844825745, "learning_rate": 7.161048247000292e-06, "loss": 0.3797, "step": 2550 }, { "epoch": 1.2664239616084725, "grad_norm": 0.4410761296749115, "learning_rate": 7.158442585751442e-06, "loss": 0.3907, "step": 2551 }, { "epoch": 1.2669204037729604, "grad_norm": 0.618492066860199, "learning_rate": 7.155836203857276e-06, "loss": 0.4185, "step": 2552 }, { "epoch": 1.2674168459374484, "grad_norm": 0.47494566440582275, "learning_rate": 7.153229102187994e-06, "loss": 0.3184, "step": 2553 }, { "epoch": 1.2679132881019362, "grad_norm": 0.517630934715271, "learning_rate": 7.150621281614036e-06, "loss": 0.408, "step": 2554 }, { "epoch": 1.268409730266424, "grad_norm": 0.4979529082775116, "learning_rate": 7.148012743006083e-06, "loss": 0.4051, "step": 2555 }, { "epoch": 1.2689061724309119, "grad_norm": 0.49566522240638733, "learning_rate": 7.145403487235057e-06, "loss": 0.386, "step": 2556 }, { "epoch": 1.2694026145953996, "grad_norm": 0.4883257746696472, "learning_rate": 7.142793515172112e-06, "loss": 0.3334, "step": 2557 }, { "epoch": 1.2698990567598876, "grad_norm": 0.5583348274230957, "learning_rate": 7.140182827688651e-06, "loss": 0.4169, "step": 2558 }, { "epoch": 1.2703954989243753, "grad_norm": 0.48189777135849, "learning_rate": 7.137571425656311e-06, "loss": 0.3784, "step": 2559 }, { "epoch": 1.270891941088863, "grad_norm": 0.4559665322303772, "learning_rate": 7.1349593099469676e-06, "loss": 0.3373, "step": 2560 }, { "epoch": 1.271388383253351, "grad_norm": 0.5081077814102173, "learning_rate": 7.132346481432737e-06, "loss": 0.3765, "step": 2561 }, { "epoch": 1.2718848254178388, "grad_norm": 0.5796273350715637, "learning_rate": 7.129732940985969e-06, "loss": 0.4331, "step": 2562 }, { "epoch": 1.2723812675823267, "grad_norm": 0.4955868422985077, "learning_rate": 7.127118689479256e-06, "loss": 0.4038, "step": 2563 }, { "epoch": 1.2728777097468145, "grad_norm": 0.49203819036483765, "learning_rate": 7.124503727785424e-06, "loss": 0.3883, "step": 2564 }, { "epoch": 1.2733741519113022, "grad_norm": 0.6160573959350586, "learning_rate": 7.121888056777538e-06, "loss": 0.4341, "step": 2565 }, { "epoch": 1.2738705940757902, "grad_norm": 0.43339842557907104, "learning_rate": 7.1192716773289e-06, "loss": 0.3423, "step": 2566 }, { "epoch": 1.2743670362402781, "grad_norm": 0.5944221615791321, "learning_rate": 7.116654590313045e-06, "loss": 0.4062, "step": 2567 }, { "epoch": 1.2748634784047659, "grad_norm": 0.5640813112258911, "learning_rate": 7.114036796603752e-06, "loss": 0.3957, "step": 2568 }, { "epoch": 1.2753599205692536, "grad_norm": 0.47151803970336914, "learning_rate": 7.11141829707503e-06, "loss": 0.3941, "step": 2569 }, { "epoch": 1.2758563627337416, "grad_norm": 0.5323421955108643, "learning_rate": 7.108799092601122e-06, "loss": 0.4036, "step": 2570 }, { "epoch": 1.2763528048982293, "grad_norm": 0.5974299311637878, "learning_rate": 7.106179184056512e-06, "loss": 0.3922, "step": 2571 }, { "epoch": 1.2768492470627173, "grad_norm": 0.47996801137924194, "learning_rate": 7.103558572315914e-06, "loss": 0.3906, "step": 2572 }, { "epoch": 1.277345689227205, "grad_norm": 0.5089288353919983, "learning_rate": 7.100937258254281e-06, "loss": 0.3573, "step": 2573 }, { "epoch": 1.2778421313916928, "grad_norm": 0.633018434047699, "learning_rate": 7.098315242746797e-06, "loss": 0.4225, "step": 2574 }, { "epoch": 1.2783385735561807, "grad_norm": 0.49534544348716736, "learning_rate": 7.095692526668882e-06, "loss": 0.3308, "step": 2575 }, { "epoch": 1.2788350157206685, "grad_norm": 0.5344131588935852, "learning_rate": 7.093069110896194e-06, "loss": 0.3863, "step": 2576 }, { "epoch": 1.2793314578851565, "grad_norm": 0.5656041502952576, "learning_rate": 7.090444996304613e-06, "loss": 0.4013, "step": 2577 }, { "epoch": 1.2798279000496442, "grad_norm": 0.5493302941322327, "learning_rate": 7.087820183770264e-06, "loss": 0.4033, "step": 2578 }, { "epoch": 1.280324342214132, "grad_norm": 0.5400651097297668, "learning_rate": 7.0851946741694975e-06, "loss": 0.3524, "step": 2579 }, { "epoch": 1.28082078437862, "grad_norm": 0.5209118723869324, "learning_rate": 7.082568468378905e-06, "loss": 0.3975, "step": 2580 }, { "epoch": 1.2813172265431076, "grad_norm": 0.5289517045021057, "learning_rate": 7.079941567275299e-06, "loss": 0.4097, "step": 2581 }, { "epoch": 1.2818136687075956, "grad_norm": 0.5361064076423645, "learning_rate": 7.077313971735735e-06, "loss": 0.3248, "step": 2582 }, { "epoch": 1.2823101108720834, "grad_norm": 0.46760058403015137, "learning_rate": 7.074685682637493e-06, "loss": 0.3829, "step": 2583 }, { "epoch": 1.2828065530365713, "grad_norm": 0.45419180393218994, "learning_rate": 7.07205670085809e-06, "loss": 0.408, "step": 2584 }, { "epoch": 1.283302995201059, "grad_norm": 0.5577008128166199, "learning_rate": 7.069427027275268e-06, "loss": 0.3569, "step": 2585 }, { "epoch": 1.283799437365547, "grad_norm": 0.5130093097686768, "learning_rate": 7.0667966627670085e-06, "loss": 0.4054, "step": 2586 }, { "epoch": 1.2842958795300348, "grad_norm": 0.5372724533081055, "learning_rate": 7.064165608211513e-06, "loss": 0.3915, "step": 2587 }, { "epoch": 1.2847923216945225, "grad_norm": 0.5205749869346619, "learning_rate": 7.061533864487222e-06, "loss": 0.3785, "step": 2588 }, { "epoch": 1.2852887638590105, "grad_norm": 0.5225046873092651, "learning_rate": 7.058901432472805e-06, "loss": 0.388, "step": 2589 }, { "epoch": 1.2857852060234982, "grad_norm": 0.48210909962654114, "learning_rate": 7.056268313047155e-06, "loss": 0.363, "step": 2590 }, { "epoch": 1.2862816481879862, "grad_norm": 0.5682415962219238, "learning_rate": 7.053634507089402e-06, "loss": 0.4172, "step": 2591 }, { "epoch": 1.286778090352474, "grad_norm": 0.44178253412246704, "learning_rate": 7.051000015478903e-06, "loss": 0.3376, "step": 2592 }, { "epoch": 1.2872745325169617, "grad_norm": 0.5266363024711609, "learning_rate": 7.048364839095242e-06, "loss": 0.3778, "step": 2593 }, { "epoch": 1.2877709746814496, "grad_norm": 0.4572821259498596, "learning_rate": 7.045728978818231e-06, "loss": 0.3846, "step": 2594 }, { "epoch": 1.2882674168459374, "grad_norm": 0.5146253705024719, "learning_rate": 7.043092435527916e-06, "loss": 0.4346, "step": 2595 }, { "epoch": 1.2887638590104253, "grad_norm": 0.46208274364471436, "learning_rate": 7.040455210104564e-06, "loss": 0.3303, "step": 2596 }, { "epoch": 1.289260301174913, "grad_norm": 0.46202826499938965, "learning_rate": 7.037817303428674e-06, "loss": 0.3648, "step": 2597 }, { "epoch": 1.2897567433394008, "grad_norm": 0.4679618179798126, "learning_rate": 7.0351787163809695e-06, "loss": 0.3632, "step": 2598 }, { "epoch": 1.2902531855038888, "grad_norm": 0.5751457810401917, "learning_rate": 7.032539449842407e-06, "loss": 0.4464, "step": 2599 }, { "epoch": 1.2907496276683768, "grad_norm": 0.463943749666214, "learning_rate": 7.029899504694162e-06, "loss": 0.3554, "step": 2600 }, { "epoch": 1.2912460698328645, "grad_norm": 0.5370602011680603, "learning_rate": 7.0272588818176425e-06, "loss": 0.3671, "step": 2601 }, { "epoch": 1.2917425119973522, "grad_norm": 0.4601109027862549, "learning_rate": 7.0246175820944815e-06, "loss": 0.303, "step": 2602 }, { "epoch": 1.2922389541618402, "grad_norm": 0.6105486154556274, "learning_rate": 7.021975606406534e-06, "loss": 0.4935, "step": 2603 }, { "epoch": 1.292735396326328, "grad_norm": 0.4353608787059784, "learning_rate": 7.019332955635887e-06, "loss": 0.3947, "step": 2604 }, { "epoch": 1.293231838490816, "grad_norm": 0.4892846643924713, "learning_rate": 7.016689630664848e-06, "loss": 0.365, "step": 2605 }, { "epoch": 1.2937282806553037, "grad_norm": 0.6245917677879333, "learning_rate": 7.014045632375952e-06, "loss": 0.4368, "step": 2606 }, { "epoch": 1.2942247228197914, "grad_norm": 0.4518301784992218, "learning_rate": 7.011400961651958e-06, "loss": 0.3556, "step": 2607 }, { "epoch": 1.2947211649842794, "grad_norm": 0.5734021067619324, "learning_rate": 7.00875561937585e-06, "loss": 0.3831, "step": 2608 }, { "epoch": 1.295217607148767, "grad_norm": 0.4963547885417938, "learning_rate": 7.006109606430836e-06, "loss": 0.3581, "step": 2609 }, { "epoch": 1.295714049313255, "grad_norm": 0.5054352283477783, "learning_rate": 7.003462923700346e-06, "loss": 0.3767, "step": 2610 }, { "epoch": 1.2962104914777428, "grad_norm": 0.6206742525100708, "learning_rate": 7.000815572068038e-06, "loss": 0.4601, "step": 2611 }, { "epoch": 1.2967069336422306, "grad_norm": 0.5653295516967773, "learning_rate": 6.998167552417789e-06, "loss": 0.4332, "step": 2612 }, { "epoch": 1.2972033758067185, "grad_norm": 0.4292779266834259, "learning_rate": 6.995518865633703e-06, "loss": 0.3379, "step": 2613 }, { "epoch": 1.2976998179712065, "grad_norm": 0.5319264531135559, "learning_rate": 6.992869512600101e-06, "loss": 0.4, "step": 2614 }, { "epoch": 1.2981962601356942, "grad_norm": 0.5727825164794922, "learning_rate": 6.990219494201532e-06, "loss": 0.3937, "step": 2615 }, { "epoch": 1.298692702300182, "grad_norm": 0.5172706842422485, "learning_rate": 6.9875688113227656e-06, "loss": 0.3656, "step": 2616 }, { "epoch": 1.29918914446467, "grad_norm": 0.5072450637817383, "learning_rate": 6.984917464848793e-06, "loss": 0.3619, "step": 2617 }, { "epoch": 1.2996855866291577, "grad_norm": 0.47954562306404114, "learning_rate": 6.982265455664825e-06, "loss": 0.3537, "step": 2618 }, { "epoch": 1.3001820287936456, "grad_norm": 0.5356285572052002, "learning_rate": 6.979612784656298e-06, "loss": 0.3666, "step": 2619 }, { "epoch": 1.3006784709581334, "grad_norm": 0.524986207485199, "learning_rate": 6.9769594527088625e-06, "loss": 0.4318, "step": 2620 }, { "epoch": 1.3011749131226211, "grad_norm": 0.5057681798934937, "learning_rate": 6.974305460708398e-06, "loss": 0.3965, "step": 2621 }, { "epoch": 1.301671355287109, "grad_norm": 0.458638072013855, "learning_rate": 6.9716508095409985e-06, "loss": 0.336, "step": 2622 }, { "epoch": 1.3021677974515968, "grad_norm": 0.4980895519256592, "learning_rate": 6.968995500092981e-06, "loss": 0.3689, "step": 2623 }, { "epoch": 1.3026642396160848, "grad_norm": 0.5421167612075806, "learning_rate": 6.966339533250879e-06, "loss": 0.4003, "step": 2624 }, { "epoch": 1.3031606817805725, "grad_norm": 0.5021947026252747, "learning_rate": 6.96368290990145e-06, "loss": 0.4336, "step": 2625 }, { "epoch": 1.3036571239450603, "grad_norm": 0.5392422676086426, "learning_rate": 6.961025630931667e-06, "loss": 0.3504, "step": 2626 }, { "epoch": 1.3041535661095482, "grad_norm": 0.48228132724761963, "learning_rate": 6.958367697228725e-06, "loss": 0.3552, "step": 2627 }, { "epoch": 1.3046500082740362, "grad_norm": 0.529038667678833, "learning_rate": 6.955709109680032e-06, "loss": 0.4036, "step": 2628 }, { "epoch": 1.305146450438524, "grad_norm": 0.5629602074623108, "learning_rate": 6.9530498691732205e-06, "loss": 0.4763, "step": 2629 }, { "epoch": 1.3056428926030117, "grad_norm": 0.46715018153190613, "learning_rate": 6.9503899765961406e-06, "loss": 0.4167, "step": 2630 }, { "epoch": 1.3061393347674997, "grad_norm": 0.5063503980636597, "learning_rate": 6.947729432836854e-06, "loss": 0.3349, "step": 2631 }, { "epoch": 1.3066357769319874, "grad_norm": 0.5404956340789795, "learning_rate": 6.945068238783648e-06, "loss": 0.3748, "step": 2632 }, { "epoch": 1.3071322190964754, "grad_norm": 0.5558984875679016, "learning_rate": 6.942406395325021e-06, "loss": 0.3986, "step": 2633 }, { "epoch": 1.307628661260963, "grad_norm": 0.5531495809555054, "learning_rate": 6.9397439033496894e-06, "loss": 0.3688, "step": 2634 }, { "epoch": 1.3081251034254509, "grad_norm": 0.5168246030807495, "learning_rate": 6.937080763746587e-06, "loss": 0.4213, "step": 2635 }, { "epoch": 1.3086215455899388, "grad_norm": 0.49358895421028137, "learning_rate": 6.9344169774048675e-06, "loss": 0.428, "step": 2636 }, { "epoch": 1.3091179877544266, "grad_norm": 0.5075780749320984, "learning_rate": 6.9317525452138915e-06, "loss": 0.3794, "step": 2637 }, { "epoch": 1.3096144299189145, "grad_norm": 0.4621506333351135, "learning_rate": 6.929087468063242e-06, "loss": 0.3428, "step": 2638 }, { "epoch": 1.3101108720834023, "grad_norm": 0.5629824995994568, "learning_rate": 6.9264217468427175e-06, "loss": 0.4369, "step": 2639 }, { "epoch": 1.31060731424789, "grad_norm": 0.47517815232276917, "learning_rate": 6.92375538244233e-06, "loss": 0.4783, "step": 2640 }, { "epoch": 1.311103756412378, "grad_norm": 0.42532482743263245, "learning_rate": 6.921088375752304e-06, "loss": 0.3461, "step": 2641 }, { "epoch": 1.3116001985768657, "grad_norm": 0.5003303289413452, "learning_rate": 6.918420727663084e-06, "loss": 0.4066, "step": 2642 }, { "epoch": 1.3120966407413537, "grad_norm": 0.5094057321548462, "learning_rate": 6.91575243906532e-06, "loss": 0.3642, "step": 2643 }, { "epoch": 1.3125930829058414, "grad_norm": 0.4947139620780945, "learning_rate": 6.913083510849884e-06, "loss": 0.3598, "step": 2644 }, { "epoch": 1.3130895250703294, "grad_norm": 0.5364342927932739, "learning_rate": 6.910413943907859e-06, "loss": 0.4175, "step": 2645 }, { "epoch": 1.3135859672348171, "grad_norm": 0.5234874486923218, "learning_rate": 6.907743739130539e-06, "loss": 0.3747, "step": 2646 }, { "epoch": 1.314082409399305, "grad_norm": 0.4738132059574127, "learning_rate": 6.905072897409436e-06, "loss": 0.323, "step": 2647 }, { "epoch": 1.3145788515637928, "grad_norm": 0.49420440196990967, "learning_rate": 6.902401419636269e-06, "loss": 0.4374, "step": 2648 }, { "epoch": 1.3150752937282806, "grad_norm": 0.5411880612373352, "learning_rate": 6.899729306702973e-06, "loss": 0.4069, "step": 2649 }, { "epoch": 1.3155717358927685, "grad_norm": 0.4490676820278168, "learning_rate": 6.897056559501693e-06, "loss": 0.388, "step": 2650 }, { "epoch": 1.3160681780572563, "grad_norm": 0.4830697774887085, "learning_rate": 6.894383178924787e-06, "loss": 0.3845, "step": 2651 }, { "epoch": 1.3165646202217443, "grad_norm": 0.4967210292816162, "learning_rate": 6.891709165864824e-06, "loss": 0.4058, "step": 2652 }, { "epoch": 1.317061062386232, "grad_norm": 0.6337066292762756, "learning_rate": 6.889034521214583e-06, "loss": 0.4355, "step": 2653 }, { "epoch": 1.3175575045507197, "grad_norm": 0.47389525175094604, "learning_rate": 6.886359245867057e-06, "loss": 0.3056, "step": 2654 }, { "epoch": 1.3180539467152077, "grad_norm": 0.518921434879303, "learning_rate": 6.883683340715448e-06, "loss": 0.3806, "step": 2655 }, { "epoch": 1.3185503888796954, "grad_norm": 0.5232374668121338, "learning_rate": 6.881006806653167e-06, "loss": 0.3693, "step": 2656 }, { "epoch": 1.3190468310441834, "grad_norm": 0.49684056639671326, "learning_rate": 6.878329644573835e-06, "loss": 0.4198, "step": 2657 }, { "epoch": 1.3195432732086712, "grad_norm": 0.4420150816440582, "learning_rate": 6.875651855371287e-06, "loss": 0.3595, "step": 2658 }, { "epoch": 1.320039715373159, "grad_norm": 0.5068520307540894, "learning_rate": 6.872973439939561e-06, "loss": 0.3801, "step": 2659 }, { "epoch": 1.3205361575376469, "grad_norm": 0.5186331272125244, "learning_rate": 6.870294399172908e-06, "loss": 0.454, "step": 2660 }, { "epoch": 1.3210325997021348, "grad_norm": 0.4768770635128021, "learning_rate": 6.867614733965786e-06, "loss": 0.3522, "step": 2661 }, { "epoch": 1.3215290418666226, "grad_norm": 0.5254129767417908, "learning_rate": 6.864934445212864e-06, "loss": 0.3642, "step": 2662 }, { "epoch": 1.3220254840311103, "grad_norm": 0.5266954302787781, "learning_rate": 6.862253533809017e-06, "loss": 0.3985, "step": 2663 }, { "epoch": 1.3225219261955983, "grad_norm": 0.49741318821907043, "learning_rate": 6.859572000649328e-06, "loss": 0.4387, "step": 2664 }, { "epoch": 1.323018368360086, "grad_norm": 0.472665399312973, "learning_rate": 6.856889846629089e-06, "loss": 0.3314, "step": 2665 }, { "epoch": 1.323514810524574, "grad_norm": 0.5662683844566345, "learning_rate": 6.854207072643797e-06, "loss": 0.443, "step": 2666 }, { "epoch": 1.3240112526890617, "grad_norm": 0.43486666679382324, "learning_rate": 6.851523679589158e-06, "loss": 0.3308, "step": 2667 }, { "epoch": 1.3245076948535495, "grad_norm": 0.5351995825767517, "learning_rate": 6.848839668361085e-06, "loss": 0.4116, "step": 2668 }, { "epoch": 1.3250041370180374, "grad_norm": 0.49185919761657715, "learning_rate": 6.846155039855693e-06, "loss": 0.3874, "step": 2669 }, { "epoch": 1.3255005791825252, "grad_norm": 0.4895707666873932, "learning_rate": 6.843469794969311e-06, "loss": 0.4186, "step": 2670 }, { "epoch": 1.3259970213470131, "grad_norm": 0.4924766421318054, "learning_rate": 6.840783934598467e-06, "loss": 0.4014, "step": 2671 }, { "epoch": 1.3264934635115009, "grad_norm": 0.4641261100769043, "learning_rate": 6.838097459639896e-06, "loss": 0.3435, "step": 2672 }, { "epoch": 1.3269899056759886, "grad_norm": 0.5019619464874268, "learning_rate": 6.8354103709905415e-06, "loss": 0.3771, "step": 2673 }, { "epoch": 1.3274863478404766, "grad_norm": 0.5261362791061401, "learning_rate": 6.8327226695475464e-06, "loss": 0.4269, "step": 2674 }, { "epoch": 1.3279827900049646, "grad_norm": 0.5166534781455994, "learning_rate": 6.830034356208264e-06, "loss": 0.351, "step": 2675 }, { "epoch": 1.3284792321694523, "grad_norm": 0.4781677722930908, "learning_rate": 6.827345431870247e-06, "loss": 0.366, "step": 2676 }, { "epoch": 1.32897567433394, "grad_norm": 0.5418263077735901, "learning_rate": 6.824655897431254e-06, "loss": 0.4007, "step": 2677 }, { "epoch": 1.329472116498428, "grad_norm": 0.4324428141117096, "learning_rate": 6.821965753789248e-06, "loss": 0.3642, "step": 2678 }, { "epoch": 1.3299685586629157, "grad_norm": 0.47847265005111694, "learning_rate": 6.819275001842397e-06, "loss": 0.4052, "step": 2679 }, { "epoch": 1.3304650008274037, "grad_norm": 0.5223816633224487, "learning_rate": 6.8165836424890665e-06, "loss": 0.3753, "step": 2680 }, { "epoch": 1.3309614429918915, "grad_norm": 0.5560182929039001, "learning_rate": 6.813891676627831e-06, "loss": 0.3504, "step": 2681 }, { "epoch": 1.3314578851563792, "grad_norm": 0.510246217250824, "learning_rate": 6.811199105157462e-06, "loss": 0.3881, "step": 2682 }, { "epoch": 1.3319543273208672, "grad_norm": 0.4874461889266968, "learning_rate": 6.808505928976939e-06, "loss": 0.3614, "step": 2683 }, { "epoch": 1.332450769485355, "grad_norm": 0.5534042119979858, "learning_rate": 6.805812148985438e-06, "loss": 0.3811, "step": 2684 }, { "epoch": 1.3329472116498429, "grad_norm": 0.4764583706855774, "learning_rate": 6.803117766082339e-06, "loss": 0.3838, "step": 2685 }, { "epoch": 1.3334436538143306, "grad_norm": 0.551478922367096, "learning_rate": 6.800422781167224e-06, "loss": 0.4049, "step": 2686 }, { "epoch": 1.3339400959788184, "grad_norm": 0.4327585697174072, "learning_rate": 6.797727195139876e-06, "loss": 0.3378, "step": 2687 }, { "epoch": 1.3344365381433063, "grad_norm": 0.59022057056427, "learning_rate": 6.795031008900277e-06, "loss": 0.4051, "step": 2688 }, { "epoch": 1.334932980307794, "grad_norm": 0.44919130206108093, "learning_rate": 6.792334223348609e-06, "loss": 0.3592, "step": 2689 }, { "epoch": 1.335429422472282, "grad_norm": 0.5613179206848145, "learning_rate": 6.78963683938526e-06, "loss": 0.4249, "step": 2690 }, { "epoch": 1.3359258646367698, "grad_norm": 0.5308902263641357, "learning_rate": 6.786938857910806e-06, "loss": 0.3713, "step": 2691 }, { "epoch": 1.3364223068012577, "grad_norm": 0.4483213722705841, "learning_rate": 6.784240279826035e-06, "loss": 0.3689, "step": 2692 }, { "epoch": 1.3369187489657455, "grad_norm": 0.4629286825656891, "learning_rate": 6.781541106031928e-06, "loss": 0.3434, "step": 2693 }, { "epoch": 1.3374151911302334, "grad_norm": 0.5304310917854309, "learning_rate": 6.7788413374296665e-06, "loss": 0.3608, "step": 2694 }, { "epoch": 1.3379116332947212, "grad_norm": 0.517655074596405, "learning_rate": 6.776140974920627e-06, "loss": 0.3481, "step": 2695 }, { "epoch": 1.338408075459209, "grad_norm": 0.4859730005264282, "learning_rate": 6.77344001940639e-06, "loss": 0.3611, "step": 2696 }, { "epoch": 1.3389045176236969, "grad_norm": 0.5542461276054382, "learning_rate": 6.770738471788729e-06, "loss": 0.3392, "step": 2697 }, { "epoch": 1.3394009597881846, "grad_norm": 0.5361152291297913, "learning_rate": 6.7680363329696184e-06, "loss": 0.4305, "step": 2698 }, { "epoch": 1.3398974019526726, "grad_norm": 0.4350699484348297, "learning_rate": 6.7653336038512294e-06, "loss": 0.3894, "step": 2699 }, { "epoch": 1.3403938441171603, "grad_norm": 0.5138221979141235, "learning_rate": 6.762630285335929e-06, "loss": 0.3439, "step": 2700 }, { "epoch": 1.340890286281648, "grad_norm": 0.5303753614425659, "learning_rate": 6.759926378326281e-06, "loss": 0.4547, "step": 2701 }, { "epoch": 1.341386728446136, "grad_norm": 0.5009359121322632, "learning_rate": 6.757221883725048e-06, "loss": 0.4315, "step": 2702 }, { "epoch": 1.3418831706106238, "grad_norm": 0.45971187949180603, "learning_rate": 6.754516802435187e-06, "loss": 0.315, "step": 2703 }, { "epoch": 1.3423796127751118, "grad_norm": 0.5412535071372986, "learning_rate": 6.751811135359851e-06, "loss": 0.36, "step": 2704 }, { "epoch": 1.3428760549395995, "grad_norm": 0.5032132863998413, "learning_rate": 6.7491048834023884e-06, "loss": 0.4247, "step": 2705 }, { "epoch": 1.3433724971040872, "grad_norm": 0.4411214590072632, "learning_rate": 6.746398047466343e-06, "loss": 0.3627, "step": 2706 }, { "epoch": 1.3438689392685752, "grad_norm": 0.567470133304596, "learning_rate": 6.7436906284554545e-06, "loss": 0.4258, "step": 2707 }, { "epoch": 1.3443653814330632, "grad_norm": 0.4781873822212219, "learning_rate": 6.740982627273655e-06, "loss": 0.4402, "step": 2708 }, { "epoch": 1.344861823597551, "grad_norm": 0.48282018303871155, "learning_rate": 6.738274044825074e-06, "loss": 0.3881, "step": 2709 }, { "epoch": 1.3453582657620387, "grad_norm": 0.40556085109710693, "learning_rate": 6.735564882014032e-06, "loss": 0.3459, "step": 2710 }, { "epoch": 1.3458547079265266, "grad_norm": 0.5869510769844055, "learning_rate": 6.732855139745047e-06, "loss": 0.4646, "step": 2711 }, { "epoch": 1.3463511500910144, "grad_norm": 0.43314552307128906, "learning_rate": 6.730144818922828e-06, "loss": 0.3535, "step": 2712 }, { "epoch": 1.3468475922555023, "grad_norm": 0.5107969045639038, "learning_rate": 6.727433920452275e-06, "loss": 0.3994, "step": 2713 }, { "epoch": 1.34734403441999, "grad_norm": 0.43179866671562195, "learning_rate": 6.724722445238487e-06, "loss": 0.3498, "step": 2714 }, { "epoch": 1.3478404765844778, "grad_norm": 0.5693528056144714, "learning_rate": 6.722010394186748e-06, "loss": 0.426, "step": 2715 }, { "epoch": 1.3483369187489658, "grad_norm": 0.5210204720497131, "learning_rate": 6.719297768202541e-06, "loss": 0.4583, "step": 2716 }, { "epoch": 1.3488333609134535, "grad_norm": 0.4550480842590332, "learning_rate": 6.716584568191538e-06, "loss": 0.3431, "step": 2717 }, { "epoch": 1.3493298030779415, "grad_norm": 0.5839453935623169, "learning_rate": 6.713870795059601e-06, "loss": 0.4073, "step": 2718 }, { "epoch": 1.3498262452424292, "grad_norm": 0.44118374586105347, "learning_rate": 6.711156449712786e-06, "loss": 0.343, "step": 2719 }, { "epoch": 1.350322687406917, "grad_norm": 0.5169638991355896, "learning_rate": 6.70844153305734e-06, "loss": 0.4258, "step": 2720 }, { "epoch": 1.350819129571405, "grad_norm": 0.47168341279029846, "learning_rate": 6.705726045999697e-06, "loss": 0.3357, "step": 2721 }, { "epoch": 1.351315571735893, "grad_norm": 0.5323677659034729, "learning_rate": 6.703009989446487e-06, "loss": 0.4003, "step": 2722 }, { "epoch": 1.3518120139003806, "grad_norm": 0.5143638849258423, "learning_rate": 6.700293364304528e-06, "loss": 0.4593, "step": 2723 }, { "epoch": 1.3523084560648684, "grad_norm": 0.5503333806991577, "learning_rate": 6.697576171480824e-06, "loss": 0.4253, "step": 2724 }, { "epoch": 1.3528048982293563, "grad_norm": 0.5424492955207825, "learning_rate": 6.6948584118825745e-06, "loss": 0.3613, "step": 2725 }, { "epoch": 1.353301340393844, "grad_norm": 0.5604655742645264, "learning_rate": 6.692140086417165e-06, "loss": 0.4341, "step": 2726 }, { "epoch": 1.353797782558332, "grad_norm": 0.49838685989379883, "learning_rate": 6.689421195992172e-06, "loss": 0.3378, "step": 2727 }, { "epoch": 1.3542942247228198, "grad_norm": 0.6124210953712463, "learning_rate": 6.686701741515355e-06, "loss": 0.4436, "step": 2728 }, { "epoch": 1.3547906668873075, "grad_norm": 0.46880024671554565, "learning_rate": 6.683981723894672e-06, "loss": 0.3848, "step": 2729 }, { "epoch": 1.3552871090517955, "grad_norm": 0.4371373951435089, "learning_rate": 6.681261144038257e-06, "loss": 0.3728, "step": 2730 }, { "epoch": 1.3557835512162832, "grad_norm": 0.4742935299873352, "learning_rate": 6.678540002854441e-06, "loss": 0.3811, "step": 2731 }, { "epoch": 1.3562799933807712, "grad_norm": 0.552609920501709, "learning_rate": 6.675818301251737e-06, "loss": 0.3935, "step": 2732 }, { "epoch": 1.356776435545259, "grad_norm": 0.4575135409832001, "learning_rate": 6.6730960401388504e-06, "loss": 0.3568, "step": 2733 }, { "epoch": 1.3572728777097467, "grad_norm": 0.48211154341697693, "learning_rate": 6.670373220424666e-06, "loss": 0.3608, "step": 2734 }, { "epoch": 1.3577693198742347, "grad_norm": 0.49773144721984863, "learning_rate": 6.6676498430182646e-06, "loss": 0.3816, "step": 2735 }, { "epoch": 1.3582657620387226, "grad_norm": 0.4984967112541199, "learning_rate": 6.664925908828902e-06, "loss": 0.3999, "step": 2736 }, { "epoch": 1.3587622042032104, "grad_norm": 0.4871288239955902, "learning_rate": 6.66220141876603e-06, "loss": 0.3882, "step": 2737 }, { "epoch": 1.359258646367698, "grad_norm": 0.4693106412887573, "learning_rate": 6.6594763737392794e-06, "loss": 0.3843, "step": 2738 }, { "epoch": 1.359755088532186, "grad_norm": 0.4136201739311218, "learning_rate": 6.656750774658471e-06, "loss": 0.3383, "step": 2739 }, { "epoch": 1.3602515306966738, "grad_norm": 0.4403086006641388, "learning_rate": 6.6540246224336045e-06, "loss": 0.4218, "step": 2740 }, { "epoch": 1.3607479728611618, "grad_norm": 0.4776681661605835, "learning_rate": 6.651297917974872e-06, "loss": 0.4199, "step": 2741 }, { "epoch": 1.3612444150256495, "grad_norm": 0.5287438035011292, "learning_rate": 6.648570662192646e-06, "loss": 0.4347, "step": 2742 }, { "epoch": 1.3617408571901373, "grad_norm": 0.4766487777233124, "learning_rate": 6.64584285599748e-06, "loss": 0.3573, "step": 2743 }, { "epoch": 1.3622372993546252, "grad_norm": 0.43281152844429016, "learning_rate": 6.643114500300116e-06, "loss": 0.3798, "step": 2744 }, { "epoch": 1.362733741519113, "grad_norm": 0.48735666275024414, "learning_rate": 6.640385596011478e-06, "loss": 0.3864, "step": 2745 }, { "epoch": 1.363230183683601, "grad_norm": 0.4591360092163086, "learning_rate": 6.637656144042672e-06, "loss": 0.4116, "step": 2746 }, { "epoch": 1.3637266258480887, "grad_norm": 0.4336644113063812, "learning_rate": 6.6349261453049895e-06, "loss": 0.3671, "step": 2747 }, { "epoch": 1.3642230680125764, "grad_norm": 0.5373453497886658, "learning_rate": 6.632195600709901e-06, "loss": 0.4012, "step": 2748 }, { "epoch": 1.3647195101770644, "grad_norm": 0.4664751887321472, "learning_rate": 6.629464511169062e-06, "loss": 0.3822, "step": 2749 }, { "epoch": 1.3652159523415521, "grad_norm": 0.4118252098560333, "learning_rate": 6.626732877594311e-06, "loss": 0.3493, "step": 2750 }, { "epoch": 1.36571239450604, "grad_norm": 0.5561785697937012, "learning_rate": 6.624000700897662e-06, "loss": 0.4472, "step": 2751 }, { "epoch": 1.3662088366705278, "grad_norm": 0.4447516202926636, "learning_rate": 6.6212679819913185e-06, "loss": 0.3874, "step": 2752 }, { "epoch": 1.3667052788350158, "grad_norm": 0.4899289608001709, "learning_rate": 6.618534721787658e-06, "loss": 0.4044, "step": 2753 }, { "epoch": 1.3672017209995035, "grad_norm": 0.4762173593044281, "learning_rate": 6.615800921199245e-06, "loss": 0.3538, "step": 2754 }, { "epoch": 1.3676981631639915, "grad_norm": 0.5324550867080688, "learning_rate": 6.613066581138819e-06, "loss": 0.3662, "step": 2755 }, { "epoch": 1.3681946053284793, "grad_norm": 0.48513466119766235, "learning_rate": 6.610331702519299e-06, "loss": 0.3714, "step": 2756 }, { "epoch": 1.368691047492967, "grad_norm": 0.5750452280044556, "learning_rate": 6.6075962862537934e-06, "loss": 0.4639, "step": 2757 }, { "epoch": 1.369187489657455, "grad_norm": 0.5113834738731384, "learning_rate": 6.6048603332555796e-06, "loss": 0.4119, "step": 2758 }, { "epoch": 1.3696839318219427, "grad_norm": 0.44084444642066956, "learning_rate": 6.602123844438117e-06, "loss": 0.3821, "step": 2759 }, { "epoch": 1.3701803739864307, "grad_norm": 0.5076250433921814, "learning_rate": 6.5993868207150465e-06, "loss": 0.3741, "step": 2760 }, { "epoch": 1.3706768161509184, "grad_norm": 0.587080717086792, "learning_rate": 6.596649263000187e-06, "loss": 0.4021, "step": 2761 }, { "epoch": 1.3711732583154062, "grad_norm": 0.4872446060180664, "learning_rate": 6.593911172207532e-06, "loss": 0.3664, "step": 2762 }, { "epoch": 1.3716697004798941, "grad_norm": 0.6175119876861572, "learning_rate": 6.591172549251255e-06, "loss": 0.4315, "step": 2763 }, { "epoch": 1.3721661426443819, "grad_norm": 0.4485227167606354, "learning_rate": 6.588433395045711e-06, "loss": 0.3769, "step": 2764 }, { "epoch": 1.3726625848088698, "grad_norm": 0.5071824789047241, "learning_rate": 6.5856937105054285e-06, "loss": 0.3898, "step": 2765 }, { "epoch": 1.3731590269733576, "grad_norm": 0.4832008481025696, "learning_rate": 6.582953496545112e-06, "loss": 0.3518, "step": 2766 }, { "epoch": 1.3736554691378453, "grad_norm": 0.45649027824401855, "learning_rate": 6.580212754079644e-06, "loss": 0.3409, "step": 2767 }, { "epoch": 1.3741519113023333, "grad_norm": 0.5058864951133728, "learning_rate": 6.5774714840240875e-06, "loss": 0.4231, "step": 2768 }, { "epoch": 1.3746483534668212, "grad_norm": 0.5127871036529541, "learning_rate": 6.574729687293675e-06, "loss": 0.3828, "step": 2769 }, { "epoch": 1.375144795631309, "grad_norm": 0.6408306956291199, "learning_rate": 6.571987364803819e-06, "loss": 0.4004, "step": 2770 }, { "epoch": 1.3756412377957967, "grad_norm": 0.4833792746067047, "learning_rate": 6.569244517470105e-06, "loss": 0.3923, "step": 2771 }, { "epoch": 1.3761376799602847, "grad_norm": 0.5229908227920532, "learning_rate": 6.5665011462082975e-06, "loss": 0.3883, "step": 2772 }, { "epoch": 1.3766341221247724, "grad_norm": 0.5205209851264954, "learning_rate": 6.5637572519343305e-06, "loss": 0.3489, "step": 2773 }, { "epoch": 1.3771305642892604, "grad_norm": 0.5053859353065491, "learning_rate": 6.56101283556432e-06, "loss": 0.4143, "step": 2774 }, { "epoch": 1.3776270064537481, "grad_norm": 0.5421066284179688, "learning_rate": 6.5582678980145476e-06, "loss": 0.4381, "step": 2775 }, { "epoch": 1.3781234486182359, "grad_norm": 0.5556256771087646, "learning_rate": 6.555522440201477e-06, "loss": 0.3833, "step": 2776 }, { "epoch": 1.3786198907827238, "grad_norm": 0.5119280219078064, "learning_rate": 6.55277646304174e-06, "loss": 0.3848, "step": 2777 }, { "epoch": 1.3791163329472116, "grad_norm": 0.5392611622810364, "learning_rate": 6.550029967452145e-06, "loss": 0.3522, "step": 2778 }, { "epoch": 1.3796127751116996, "grad_norm": 0.549945592880249, "learning_rate": 6.547282954349669e-06, "loss": 0.4163, "step": 2779 }, { "epoch": 1.3801092172761873, "grad_norm": 0.45233944058418274, "learning_rate": 6.544535424651468e-06, "loss": 0.3996, "step": 2780 }, { "epoch": 1.380605659440675, "grad_norm": 0.4954111576080322, "learning_rate": 6.541787379274869e-06, "loss": 0.3526, "step": 2781 }, { "epoch": 1.381102101605163, "grad_norm": 0.5243040919303894, "learning_rate": 6.539038819137364e-06, "loss": 0.395, "step": 2782 }, { "epoch": 1.381598543769651, "grad_norm": 0.4543014168739319, "learning_rate": 6.53628974515663e-06, "loss": 0.3528, "step": 2783 }, { "epoch": 1.3820949859341387, "grad_norm": 0.4736253321170807, "learning_rate": 6.533540158250502e-06, "loss": 0.3692, "step": 2784 }, { "epoch": 1.3825914280986265, "grad_norm": 0.5128673315048218, "learning_rate": 6.530790059336995e-06, "loss": 0.4032, "step": 2785 }, { "epoch": 1.3830878702631144, "grad_norm": 0.5038619041442871, "learning_rate": 6.528039449334291e-06, "loss": 0.3842, "step": 2786 }, { "epoch": 1.3835843124276022, "grad_norm": 0.5529479384422302, "learning_rate": 6.525288329160745e-06, "loss": 0.4218, "step": 2787 }, { "epoch": 1.3840807545920901, "grad_norm": 0.5285229682922363, "learning_rate": 6.522536699734881e-06, "loss": 0.3559, "step": 2788 }, { "epoch": 1.3845771967565779, "grad_norm": 0.48902979493141174, "learning_rate": 6.519784561975393e-06, "loss": 0.3408, "step": 2789 }, { "epoch": 1.3850736389210656, "grad_norm": 0.591895341873169, "learning_rate": 6.5170319168011455e-06, "loss": 0.3996, "step": 2790 }, { "epoch": 1.3855700810855536, "grad_norm": 0.5082195997238159, "learning_rate": 6.514278765131172e-06, "loss": 0.384, "step": 2791 }, { "epoch": 1.3860665232500413, "grad_norm": 0.4560558497905731, "learning_rate": 6.511525107884674e-06, "loss": 0.3678, "step": 2792 }, { "epoch": 1.3865629654145293, "grad_norm": 0.5929627418518066, "learning_rate": 6.5087709459810245e-06, "loss": 0.4098, "step": 2793 }, { "epoch": 1.387059407579017, "grad_norm": 0.43811362981796265, "learning_rate": 6.506016280339762e-06, "loss": 0.3395, "step": 2794 }, { "epoch": 1.3875558497435048, "grad_norm": 0.4511667490005493, "learning_rate": 6.503261111880593e-06, "loss": 0.3496, "step": 2795 }, { "epoch": 1.3880522919079927, "grad_norm": 0.5709704756736755, "learning_rate": 6.500505441523396e-06, "loss": 0.419, "step": 2796 }, { "epoch": 1.3885487340724805, "grad_norm": 0.5601177215576172, "learning_rate": 6.497749270188214e-06, "loss": 0.4164, "step": 2797 }, { "epoch": 1.3890451762369684, "grad_norm": 0.48739394545555115, "learning_rate": 6.494992598795258e-06, "loss": 0.3721, "step": 2798 }, { "epoch": 1.3895416184014562, "grad_norm": 0.47468990087509155, "learning_rate": 6.492235428264903e-06, "loss": 0.3637, "step": 2799 }, { "epoch": 1.3900380605659441, "grad_norm": 0.4754701256752014, "learning_rate": 6.489477759517697e-06, "loss": 0.3412, "step": 2800 }, { "epoch": 1.3905345027304319, "grad_norm": 0.5304427146911621, "learning_rate": 6.486719593474347e-06, "loss": 0.4177, "step": 2801 }, { "epoch": 1.3910309448949199, "grad_norm": 0.5400927066802979, "learning_rate": 6.483960931055735e-06, "loss": 0.3724, "step": 2802 }, { "epoch": 1.3915273870594076, "grad_norm": 0.4609144926071167, "learning_rate": 6.481201773182896e-06, "loss": 0.346, "step": 2803 }, { "epoch": 1.3920238292238953, "grad_norm": 0.4711148738861084, "learning_rate": 6.478442120777044e-06, "loss": 0.388, "step": 2804 }, { "epoch": 1.3925202713883833, "grad_norm": 0.4655759632587433, "learning_rate": 6.4756819747595486e-06, "loss": 0.4183, "step": 2805 }, { "epoch": 1.393016713552871, "grad_norm": 0.44808632135391235, "learning_rate": 6.472921336051949e-06, "loss": 0.3636, "step": 2806 }, { "epoch": 1.393513155717359, "grad_norm": 0.4535768926143646, "learning_rate": 6.4701602055759475e-06, "loss": 0.3528, "step": 2807 }, { "epoch": 1.3940095978818468, "grad_norm": 0.46228909492492676, "learning_rate": 6.4673985842534094e-06, "loss": 0.3821, "step": 2808 }, { "epoch": 1.3945060400463345, "grad_norm": 0.464330792427063, "learning_rate": 6.464636473006367e-06, "loss": 0.3586, "step": 2809 }, { "epoch": 1.3950024822108225, "grad_norm": 0.5083336234092712, "learning_rate": 6.461873872757012e-06, "loss": 0.4474, "step": 2810 }, { "epoch": 1.3954989243753102, "grad_norm": 0.4818829298019409, "learning_rate": 6.4591107844277015e-06, "loss": 0.3507, "step": 2811 }, { "epoch": 1.3959953665397982, "grad_norm": 0.42485108971595764, "learning_rate": 6.456347208940956e-06, "loss": 0.3396, "step": 2812 }, { "epoch": 1.396491808704286, "grad_norm": 0.5351386070251465, "learning_rate": 6.453583147219462e-06, "loss": 0.4463, "step": 2813 }, { "epoch": 1.3969882508687737, "grad_norm": 0.4698607623577118, "learning_rate": 6.45081860018606e-06, "loss": 0.3446, "step": 2814 }, { "epoch": 1.3974846930332616, "grad_norm": 0.47580668330192566, "learning_rate": 6.448053568763757e-06, "loss": 0.3897, "step": 2815 }, { "epoch": 1.3979811351977496, "grad_norm": 0.4962351620197296, "learning_rate": 6.445288053875724e-06, "loss": 0.3977, "step": 2816 }, { "epoch": 1.3984775773622373, "grad_norm": 0.49214980006217957, "learning_rate": 6.442522056445292e-06, "loss": 0.3807, "step": 2817 }, { "epoch": 1.398974019526725, "grad_norm": 0.48073169589042664, "learning_rate": 6.43975557739595e-06, "loss": 0.3402, "step": 2818 }, { "epoch": 1.399470461691213, "grad_norm": 0.5077465176582336, "learning_rate": 6.43698861765135e-06, "loss": 0.431, "step": 2819 }, { "epoch": 1.3999669038557008, "grad_norm": 0.4314229190349579, "learning_rate": 6.434221178135306e-06, "loss": 0.3792, "step": 2820 }, { "epoch": 1.4004633460201887, "grad_norm": 0.5104011297225952, "learning_rate": 6.431453259771792e-06, "loss": 0.4036, "step": 2821 }, { "epoch": 1.4009597881846765, "grad_norm": 0.4612751007080078, "learning_rate": 6.428684863484937e-06, "loss": 0.4186, "step": 2822 }, { "epoch": 1.4014562303491642, "grad_norm": 0.4490303099155426, "learning_rate": 6.425915990199038e-06, "loss": 0.3841, "step": 2823 }, { "epoch": 1.4019526725136522, "grad_norm": 0.6193149089813232, "learning_rate": 6.423146640838543e-06, "loss": 0.4274, "step": 2824 }, { "epoch": 1.40244911467814, "grad_norm": 0.44031643867492676, "learning_rate": 6.4203768163280645e-06, "loss": 0.3392, "step": 2825 }, { "epoch": 1.402945556842628, "grad_norm": 0.42441651225090027, "learning_rate": 6.417606517592371e-06, "loss": 0.3955, "step": 2826 }, { "epoch": 1.4034419990071156, "grad_norm": 0.49026504158973694, "learning_rate": 6.414835745556387e-06, "loss": 0.4554, "step": 2827 }, { "epoch": 1.4039384411716034, "grad_norm": 0.4730052053928375, "learning_rate": 6.412064501145203e-06, "loss": 0.3553, "step": 2828 }, { "epoch": 1.4044348833360913, "grad_norm": 0.5197017192840576, "learning_rate": 6.409292785284058e-06, "loss": 0.343, "step": 2829 }, { "epoch": 1.4049313255005793, "grad_norm": 0.46997949481010437, "learning_rate": 6.406520598898357e-06, "loss": 0.4095, "step": 2830 }, { "epoch": 1.405427767665067, "grad_norm": 0.5439953804016113, "learning_rate": 6.403747942913654e-06, "loss": 0.4239, "step": 2831 }, { "epoch": 1.4059242098295548, "grad_norm": 0.4454316794872284, "learning_rate": 6.400974818255665e-06, "loss": 0.3338, "step": 2832 }, { "epoch": 1.4064206519940428, "grad_norm": 0.4830251634120941, "learning_rate": 6.398201225850259e-06, "loss": 0.3711, "step": 2833 }, { "epoch": 1.4069170941585305, "grad_norm": 0.5741102695465088, "learning_rate": 6.395427166623466e-06, "loss": 0.4629, "step": 2834 }, { "epoch": 1.4074135363230185, "grad_norm": 0.4791935682296753, "learning_rate": 6.392652641501467e-06, "loss": 0.3922, "step": 2835 }, { "epoch": 1.4079099784875062, "grad_norm": 0.414103627204895, "learning_rate": 6.389877651410601e-06, "loss": 0.2882, "step": 2836 }, { "epoch": 1.408406420651994, "grad_norm": 0.5518940687179565, "learning_rate": 6.387102197277364e-06, "loss": 0.3964, "step": 2837 }, { "epoch": 1.408902862816482, "grad_norm": 0.5828872323036194, "learning_rate": 6.3843262800284e-06, "loss": 0.436, "step": 2838 }, { "epoch": 1.4093993049809697, "grad_norm": 0.4893738031387329, "learning_rate": 6.381549900590517e-06, "loss": 0.3886, "step": 2839 }, { "epoch": 1.4098957471454576, "grad_norm": 0.5797652006149292, "learning_rate": 6.378773059890669e-06, "loss": 0.4111, "step": 2840 }, { "epoch": 1.4103921893099454, "grad_norm": 0.4726372957229614, "learning_rate": 6.375995758855971e-06, "loss": 0.3426, "step": 2841 }, { "epoch": 1.410888631474433, "grad_norm": 0.4854039251804352, "learning_rate": 6.3732179984136855e-06, "loss": 0.4091, "step": 2842 }, { "epoch": 1.411385073638921, "grad_norm": 0.5758674740791321, "learning_rate": 6.370439779491233e-06, "loss": 0.4559, "step": 2843 }, { "epoch": 1.411881515803409, "grad_norm": 0.49533143639564514, "learning_rate": 6.367661103016183e-06, "loss": 0.3597, "step": 2844 }, { "epoch": 1.4123779579678968, "grad_norm": 0.44342777132987976, "learning_rate": 6.3648819699162634e-06, "loss": 0.3319, "step": 2845 }, { "epoch": 1.4128744001323845, "grad_norm": 0.5894331932067871, "learning_rate": 6.362102381119349e-06, "loss": 0.3887, "step": 2846 }, { "epoch": 1.4133708422968725, "grad_norm": 0.5347595810890198, "learning_rate": 6.359322337553471e-06, "loss": 0.4, "step": 2847 }, { "epoch": 1.4138672844613602, "grad_norm": 0.5221450924873352, "learning_rate": 6.356541840146806e-06, "loss": 0.4043, "step": 2848 }, { "epoch": 1.4143637266258482, "grad_norm": 0.585406482219696, "learning_rate": 6.35376088982769e-06, "loss": 0.3791, "step": 2849 }, { "epoch": 1.414860168790336, "grad_norm": 0.5057156085968018, "learning_rate": 6.350979487524607e-06, "loss": 0.3738, "step": 2850 }, { "epoch": 1.4153566109548237, "grad_norm": 0.5443475246429443, "learning_rate": 6.34819763416619e-06, "loss": 0.3813, "step": 2851 }, { "epoch": 1.4158530531193116, "grad_norm": 0.5209202766418457, "learning_rate": 6.345415330681226e-06, "loss": 0.4113, "step": 2852 }, { "epoch": 1.4163494952837994, "grad_norm": 0.41802358627319336, "learning_rate": 6.342632577998648e-06, "loss": 0.3363, "step": 2853 }, { "epoch": 1.4168459374482874, "grad_norm": 0.565624475479126, "learning_rate": 6.3398493770475445e-06, "loss": 0.3706, "step": 2854 }, { "epoch": 1.417342379612775, "grad_norm": 0.5760032534599304, "learning_rate": 6.337065728757148e-06, "loss": 0.4281, "step": 2855 }, { "epoch": 1.4178388217772628, "grad_norm": 0.4728202819824219, "learning_rate": 6.334281634056845e-06, "loss": 0.3701, "step": 2856 }, { "epoch": 1.4183352639417508, "grad_norm": 0.47691452503204346, "learning_rate": 6.3314970938761664e-06, "loss": 0.4089, "step": 2857 }, { "epoch": 1.4188317061062385, "grad_norm": 0.47185376286506653, "learning_rate": 6.328712109144798e-06, "loss": 0.3716, "step": 2858 }, { "epoch": 1.4193281482707265, "grad_norm": 0.5213720798492432, "learning_rate": 6.325926680792567e-06, "loss": 0.3835, "step": 2859 }, { "epoch": 1.4198245904352143, "grad_norm": 0.5074344277381897, "learning_rate": 6.323140809749456e-06, "loss": 0.365, "step": 2860 }, { "epoch": 1.4203210325997022, "grad_norm": 0.5486474633216858, "learning_rate": 6.320354496945588e-06, "loss": 0.4248, "step": 2861 }, { "epoch": 1.42081747476419, "grad_norm": 0.42449137568473816, "learning_rate": 6.31756774331124e-06, "loss": 0.3666, "step": 2862 }, { "epoch": 1.421313916928678, "grad_norm": 0.490147203207016, "learning_rate": 6.3147805497768314e-06, "loss": 0.3971, "step": 2863 }, { "epoch": 1.4218103590931657, "grad_norm": 0.4968741238117218, "learning_rate": 6.311992917272931e-06, "loss": 0.4082, "step": 2864 }, { "epoch": 1.4223068012576534, "grad_norm": 0.5345895290374756, "learning_rate": 6.309204846730254e-06, "loss": 0.3632, "step": 2865 }, { "epoch": 1.4228032434221414, "grad_norm": 0.4970369040966034, "learning_rate": 6.30641633907966e-06, "loss": 0.413, "step": 2866 }, { "epoch": 1.4232996855866291, "grad_norm": 0.5387256145477295, "learning_rate": 6.303627395252156e-06, "loss": 0.402, "step": 2867 }, { "epoch": 1.423796127751117, "grad_norm": 0.4603298008441925, "learning_rate": 6.3008380161788965e-06, "loss": 0.3646, "step": 2868 }, { "epoch": 1.4242925699156048, "grad_norm": 0.46968552470207214, "learning_rate": 6.298048202791179e-06, "loss": 0.3739, "step": 2869 }, { "epoch": 1.4247890120800926, "grad_norm": 0.5228338241577148, "learning_rate": 6.295257956020444e-06, "loss": 0.3983, "step": 2870 }, { "epoch": 1.4252854542445805, "grad_norm": 0.4620840847492218, "learning_rate": 6.2924672767982834e-06, "loss": 0.3696, "step": 2871 }, { "epoch": 1.4257818964090683, "grad_norm": 0.4990810453891754, "learning_rate": 6.2896761660564245e-06, "loss": 0.3967, "step": 2872 }, { "epoch": 1.4262783385735562, "grad_norm": 0.5816195011138916, "learning_rate": 6.286884624726746e-06, "loss": 0.3934, "step": 2873 }, { "epoch": 1.426774780738044, "grad_norm": 0.39196062088012695, "learning_rate": 6.284092653741264e-06, "loss": 0.3051, "step": 2874 }, { "epoch": 1.4272712229025317, "grad_norm": 0.4675210118293762, "learning_rate": 6.281300254032148e-06, "loss": 0.4307, "step": 2875 }, { "epoch": 1.4277676650670197, "grad_norm": 0.4462066888809204, "learning_rate": 6.278507426531698e-06, "loss": 0.3552, "step": 2876 }, { "epoch": 1.4282641072315077, "grad_norm": 0.501668393611908, "learning_rate": 6.275714172172368e-06, "loss": 0.3975, "step": 2877 }, { "epoch": 1.4287605493959954, "grad_norm": 0.6058095097541809, "learning_rate": 6.272920491886748e-06, "loss": 0.441, "step": 2878 }, { "epoch": 1.4292569915604831, "grad_norm": 0.42943528294563293, "learning_rate": 6.270126386607571e-06, "loss": 0.3755, "step": 2879 }, { "epoch": 1.429753433724971, "grad_norm": 0.4892215430736542, "learning_rate": 6.267331857267716e-06, "loss": 0.4559, "step": 2880 }, { "epoch": 1.4302498758894588, "grad_norm": 0.48291677236557007, "learning_rate": 6.264536904800196e-06, "loss": 0.3637, "step": 2881 }, { "epoch": 1.4307463180539468, "grad_norm": 0.4481963515281677, "learning_rate": 6.261741530138172e-06, "loss": 0.3915, "step": 2882 }, { "epoch": 1.4312427602184346, "grad_norm": 0.5227588415145874, "learning_rate": 6.258945734214942e-06, "loss": 0.4168, "step": 2883 }, { "epoch": 1.4317392023829223, "grad_norm": 0.4905730187892914, "learning_rate": 6.25614951796395e-06, "loss": 0.366, "step": 2884 }, { "epoch": 1.4322356445474103, "grad_norm": 0.4647907614707947, "learning_rate": 6.2533528823187725e-06, "loss": 0.3802, "step": 2885 }, { "epoch": 1.432732086711898, "grad_norm": 0.5532459020614624, "learning_rate": 6.250555828213133e-06, "loss": 0.3957, "step": 2886 }, { "epoch": 1.433228528876386, "grad_norm": 0.4839721620082855, "learning_rate": 6.24775835658089e-06, "loss": 0.4111, "step": 2887 }, { "epoch": 1.4337249710408737, "grad_norm": 0.5069684982299805, "learning_rate": 6.244960468356044e-06, "loss": 0.3493, "step": 2888 }, { "epoch": 1.4342214132053615, "grad_norm": 0.47172829508781433, "learning_rate": 6.242162164472734e-06, "loss": 0.3473, "step": 2889 }, { "epoch": 1.4347178553698494, "grad_norm": 0.5650798082351685, "learning_rate": 6.239363445865237e-06, "loss": 0.4192, "step": 2890 }, { "epoch": 1.4352142975343374, "grad_norm": 0.5625591278076172, "learning_rate": 6.236564313467969e-06, "loss": 0.4396, "step": 2891 }, { "epoch": 1.4357107396988251, "grad_norm": 0.41707801818847656, "learning_rate": 6.233764768215485e-06, "loss": 0.3316, "step": 2892 }, { "epoch": 1.4362071818633129, "grad_norm": 0.49326252937316895, "learning_rate": 6.230964811042477e-06, "loss": 0.3956, "step": 2893 }, { "epoch": 1.4367036240278008, "grad_norm": 0.5657305121421814, "learning_rate": 6.228164442883775e-06, "loss": 0.4037, "step": 2894 }, { "epoch": 1.4372000661922886, "grad_norm": 0.534703254699707, "learning_rate": 6.225363664674345e-06, "loss": 0.4034, "step": 2895 }, { "epoch": 1.4376965083567765, "grad_norm": 0.48682406544685364, "learning_rate": 6.22256247734929e-06, "loss": 0.4239, "step": 2896 }, { "epoch": 1.4381929505212643, "grad_norm": 0.5691820383071899, "learning_rate": 6.2197608818438515e-06, "loss": 0.4136, "step": 2897 }, { "epoch": 1.438689392685752, "grad_norm": 0.5135661959648132, "learning_rate": 6.216958879093405e-06, "loss": 0.38, "step": 2898 }, { "epoch": 1.43918583485024, "grad_norm": 0.5430982112884521, "learning_rate": 6.214156470033467e-06, "loss": 0.3901, "step": 2899 }, { "epoch": 1.4396822770147277, "grad_norm": 0.5424976944923401, "learning_rate": 6.211353655599679e-06, "loss": 0.3939, "step": 2900 }, { "epoch": 1.4401787191792157, "grad_norm": 0.48734551668167114, "learning_rate": 6.208550436727831e-06, "loss": 0.3542, "step": 2901 }, { "epoch": 1.4406751613437034, "grad_norm": 0.5290714502334595, "learning_rate": 6.2057468143538365e-06, "loss": 0.4059, "step": 2902 }, { "epoch": 1.4411716035081912, "grad_norm": 0.49247005581855774, "learning_rate": 6.202942789413753e-06, "loss": 0.3858, "step": 2903 }, { "epoch": 1.4416680456726791, "grad_norm": 0.43769606947898865, "learning_rate": 6.200138362843765e-06, "loss": 0.4222, "step": 2904 }, { "epoch": 1.442164487837167, "grad_norm": 0.4773324429988861, "learning_rate": 6.197333535580196e-06, "loss": 0.3593, "step": 2905 }, { "epoch": 1.4426609300016549, "grad_norm": 0.5412654876708984, "learning_rate": 6.194528308559501e-06, "loss": 0.42, "step": 2906 }, { "epoch": 1.4431573721661426, "grad_norm": 0.537624180316925, "learning_rate": 6.191722682718269e-06, "loss": 0.4036, "step": 2907 }, { "epoch": 1.4436538143306306, "grad_norm": 0.5187153220176697, "learning_rate": 6.188916658993223e-06, "loss": 0.3519, "step": 2908 }, { "epoch": 1.4441502564951183, "grad_norm": 0.5137400031089783, "learning_rate": 6.186110238321217e-06, "loss": 0.4192, "step": 2909 }, { "epoch": 1.4446466986596063, "grad_norm": 0.4776400029659271, "learning_rate": 6.18330342163924e-06, "loss": 0.4295, "step": 2910 }, { "epoch": 1.445143140824094, "grad_norm": 0.4566693902015686, "learning_rate": 6.1804962098844105e-06, "loss": 0.3381, "step": 2911 }, { "epoch": 1.4456395829885818, "grad_norm": 0.5937601327896118, "learning_rate": 6.177688603993981e-06, "loss": 0.3991, "step": 2912 }, { "epoch": 1.4461360251530697, "grad_norm": 0.46261394023895264, "learning_rate": 6.174880604905334e-06, "loss": 0.3667, "step": 2913 }, { "epoch": 1.4466324673175575, "grad_norm": 0.4723789691925049, "learning_rate": 6.1720722135559844e-06, "loss": 0.3659, "step": 2914 }, { "epoch": 1.4471289094820454, "grad_norm": 0.5394299626350403, "learning_rate": 6.1692634308835766e-06, "loss": 0.3522, "step": 2915 }, { "epoch": 1.4476253516465332, "grad_norm": 0.5595528483390808, "learning_rate": 6.16645425782589e-06, "loss": 0.416, "step": 2916 }, { "epoch": 1.448121793811021, "grad_norm": 0.4655613899230957, "learning_rate": 6.163644695320829e-06, "loss": 0.3828, "step": 2917 }, { "epoch": 1.4486182359755089, "grad_norm": 0.5312438011169434, "learning_rate": 6.160834744306429e-06, "loss": 0.3869, "step": 2918 }, { "epoch": 1.4491146781399966, "grad_norm": 0.5271704196929932, "learning_rate": 6.158024405720859e-06, "loss": 0.4361, "step": 2919 }, { "epoch": 1.4496111203044846, "grad_norm": 0.5149099826812744, "learning_rate": 6.155213680502412e-06, "loss": 0.3881, "step": 2920 }, { "epoch": 1.4501075624689723, "grad_norm": 0.4816376864910126, "learning_rate": 6.1524025695895155e-06, "loss": 0.3503, "step": 2921 }, { "epoch": 1.45060400463346, "grad_norm": 0.46625208854675293, "learning_rate": 6.14959107392072e-06, "loss": 0.3314, "step": 2922 }, { "epoch": 1.451100446797948, "grad_norm": 0.6022461652755737, "learning_rate": 6.146779194434711e-06, "loss": 0.4118, "step": 2923 }, { "epoch": 1.451596888962436, "grad_norm": 0.46962815523147583, "learning_rate": 6.143966932070295e-06, "loss": 0.3531, "step": 2924 }, { "epoch": 1.4520933311269237, "grad_norm": 0.46993646025657654, "learning_rate": 6.141154287766413e-06, "loss": 0.3935, "step": 2925 }, { "epoch": 1.4525897732914115, "grad_norm": 0.5146974921226501, "learning_rate": 6.138341262462129e-06, "loss": 0.4532, "step": 2926 }, { "epoch": 1.4530862154558994, "grad_norm": 0.4820963740348816, "learning_rate": 6.135527857096635e-06, "loss": 0.3619, "step": 2927 }, { "epoch": 1.4535826576203872, "grad_norm": 0.5173943638801575, "learning_rate": 6.132714072609251e-06, "loss": 0.4148, "step": 2928 }, { "epoch": 1.4540790997848752, "grad_norm": 0.49436283111572266, "learning_rate": 6.1298999099394256e-06, "loss": 0.4068, "step": 2929 }, { "epoch": 1.454575541949363, "grad_norm": 0.4709935784339905, "learning_rate": 6.1270853700267275e-06, "loss": 0.4036, "step": 2930 }, { "epoch": 1.4550719841138506, "grad_norm": 0.4436509311199188, "learning_rate": 6.124270453810858e-06, "loss": 0.3752, "step": 2931 }, { "epoch": 1.4555684262783386, "grad_norm": 0.5008065104484558, "learning_rate": 6.1214551622316385e-06, "loss": 0.3592, "step": 2932 }, { "epoch": 1.4560648684428263, "grad_norm": 0.5230710506439209, "learning_rate": 6.118639496229021e-06, "loss": 0.4258, "step": 2933 }, { "epoch": 1.4565613106073143, "grad_norm": 0.4568028450012207, "learning_rate": 6.115823456743079e-06, "loss": 0.3566, "step": 2934 }, { "epoch": 1.457057752771802, "grad_norm": 0.4865848422050476, "learning_rate": 6.11300704471401e-06, "loss": 0.3805, "step": 2935 }, { "epoch": 1.4575541949362898, "grad_norm": 0.505322277545929, "learning_rate": 6.11019026108214e-06, "loss": 0.4121, "step": 2936 }, { "epoch": 1.4580506371007778, "grad_norm": 0.4240327477455139, "learning_rate": 6.107373106787914e-06, "loss": 0.3361, "step": 2937 }, { "epoch": 1.4585470792652657, "grad_norm": 0.5059981346130371, "learning_rate": 6.104555582771904e-06, "loss": 0.4183, "step": 2938 }, { "epoch": 1.4590435214297535, "grad_norm": 0.4331313967704773, "learning_rate": 6.101737689974805e-06, "loss": 0.3186, "step": 2939 }, { "epoch": 1.4595399635942412, "grad_norm": 0.4777265787124634, "learning_rate": 6.098919429337436e-06, "loss": 0.4062, "step": 2940 }, { "epoch": 1.4600364057587292, "grad_norm": 0.4617581069469452, "learning_rate": 6.0961008018007365e-06, "loss": 0.411, "step": 2941 }, { "epoch": 1.460532847923217, "grad_norm": 0.42255645990371704, "learning_rate": 6.09328180830577e-06, "loss": 0.3667, "step": 2942 }, { "epoch": 1.4610292900877049, "grad_norm": 0.49780309200286865, "learning_rate": 6.090462449793721e-06, "loss": 0.3974, "step": 2943 }, { "epoch": 1.4615257322521926, "grad_norm": 0.5568789839744568, "learning_rate": 6.0876427272058955e-06, "loss": 0.4413, "step": 2944 }, { "epoch": 1.4620221744166804, "grad_norm": 0.43995797634124756, "learning_rate": 6.084822641483725e-06, "loss": 0.3214, "step": 2945 }, { "epoch": 1.4625186165811683, "grad_norm": 0.4635447561740875, "learning_rate": 6.082002193568759e-06, "loss": 0.4276, "step": 2946 }, { "epoch": 1.463015058745656, "grad_norm": 0.4522685110569, "learning_rate": 6.079181384402667e-06, "loss": 0.3643, "step": 2947 }, { "epoch": 1.463511500910144, "grad_norm": 0.4401547312736511, "learning_rate": 6.076360214927242e-06, "loss": 0.3934, "step": 2948 }, { "epoch": 1.4640079430746318, "grad_norm": 0.46827369928359985, "learning_rate": 6.0735386860843944e-06, "loss": 0.3807, "step": 2949 }, { "epoch": 1.4645043852391195, "grad_norm": 0.46283575892448425, "learning_rate": 6.070716798816157e-06, "loss": 0.3577, "step": 2950 }, { "epoch": 1.4650008274036075, "grad_norm": 0.4747432768344879, "learning_rate": 6.0678945540646815e-06, "loss": 0.3807, "step": 2951 }, { "epoch": 1.4654972695680955, "grad_norm": 0.5184155106544495, "learning_rate": 6.065071952772238e-06, "loss": 0.4314, "step": 2952 }, { "epoch": 1.4659937117325832, "grad_norm": 0.4701470732688904, "learning_rate": 6.062248995881216e-06, "loss": 0.3812, "step": 2953 }, { "epoch": 1.466490153897071, "grad_norm": 0.4981103837490082, "learning_rate": 6.0594256843341235e-06, "loss": 0.3974, "step": 2954 }, { "epoch": 1.466986596061559, "grad_norm": 0.5772714018821716, "learning_rate": 6.056602019073591e-06, "loss": 0.3868, "step": 2955 }, { "epoch": 1.4674830382260466, "grad_norm": 0.4372249245643616, "learning_rate": 6.05377800104236e-06, "loss": 0.3648, "step": 2956 }, { "epoch": 1.4679794803905346, "grad_norm": 0.5097983479499817, "learning_rate": 6.050953631183295e-06, "loss": 0.4186, "step": 2957 }, { "epoch": 1.4684759225550224, "grad_norm": 0.4681105315685272, "learning_rate": 6.048128910439374e-06, "loss": 0.3746, "step": 2958 }, { "epoch": 1.46897236471951, "grad_norm": 0.4437284767627716, "learning_rate": 6.045303839753699e-06, "loss": 0.3436, "step": 2959 }, { "epoch": 1.469468806883998, "grad_norm": 0.48966360092163086, "learning_rate": 6.042478420069481e-06, "loss": 0.4161, "step": 2960 }, { "epoch": 1.4699652490484858, "grad_norm": 0.4446941018104553, "learning_rate": 6.03965265233005e-06, "loss": 0.3795, "step": 2961 }, { "epoch": 1.4704616912129738, "grad_norm": 0.49069198966026306, "learning_rate": 6.036826537478856e-06, "loss": 0.3774, "step": 2962 }, { "epoch": 1.4709581333774615, "grad_norm": 0.4817504286766052, "learning_rate": 6.0340000764594595e-06, "loss": 0.4258, "step": 2963 }, { "epoch": 1.4714545755419493, "grad_norm": 0.49521809816360474, "learning_rate": 6.031173270215541e-06, "loss": 0.4108, "step": 2964 }, { "epoch": 1.4719510177064372, "grad_norm": 0.4771311581134796, "learning_rate": 6.028346119690893e-06, "loss": 0.3815, "step": 2965 }, { "epoch": 1.472447459870925, "grad_norm": 0.5284591913223267, "learning_rate": 6.025518625829425e-06, "loss": 0.4175, "step": 2966 }, { "epoch": 1.472943902035413, "grad_norm": 0.5405148267745972, "learning_rate": 6.022690789575159e-06, "loss": 0.3788, "step": 2967 }, { "epoch": 1.4734403441999007, "grad_norm": 0.46371397376060486, "learning_rate": 6.019862611872234e-06, "loss": 0.377, "step": 2968 }, { "epoch": 1.4739367863643886, "grad_norm": 0.5047737956047058, "learning_rate": 6.017034093664901e-06, "loss": 0.3586, "step": 2969 }, { "epoch": 1.4744332285288764, "grad_norm": 0.46036145091056824, "learning_rate": 6.014205235897526e-06, "loss": 0.3766, "step": 2970 }, { "epoch": 1.4749296706933643, "grad_norm": 0.5083185434341431, "learning_rate": 6.011376039514587e-06, "loss": 0.3991, "step": 2971 }, { "epoch": 1.475426112857852, "grad_norm": 0.4420025050640106, "learning_rate": 6.008546505460677e-06, "loss": 0.361, "step": 2972 }, { "epoch": 1.4759225550223398, "grad_norm": 0.5310854315757751, "learning_rate": 6.005716634680499e-06, "loss": 0.4122, "step": 2973 }, { "epoch": 1.4764189971868278, "grad_norm": 0.5636497139930725, "learning_rate": 6.002886428118869e-06, "loss": 0.412, "step": 2974 }, { "epoch": 1.4769154393513155, "grad_norm": 0.4647793173789978, "learning_rate": 6.000055886720719e-06, "loss": 0.3876, "step": 2975 }, { "epoch": 1.4774118815158035, "grad_norm": 0.45848941802978516, "learning_rate": 5.997225011431089e-06, "loss": 0.3492, "step": 2976 }, { "epoch": 1.4779083236802912, "grad_norm": 0.4959879219532013, "learning_rate": 5.994393803195129e-06, "loss": 0.3687, "step": 2977 }, { "epoch": 1.478404765844779, "grad_norm": 0.49131181836128235, "learning_rate": 5.991562262958105e-06, "loss": 0.3667, "step": 2978 }, { "epoch": 1.478901208009267, "grad_norm": 0.43077629804611206, "learning_rate": 5.9887303916653916e-06, "loss": 0.3344, "step": 2979 }, { "epoch": 1.4793976501737547, "grad_norm": 0.5275565385818481, "learning_rate": 5.985898190262471e-06, "loss": 0.4025, "step": 2980 }, { "epoch": 1.4798940923382427, "grad_norm": 0.41156449913978577, "learning_rate": 5.983065659694942e-06, "loss": 0.345, "step": 2981 }, { "epoch": 1.4803905345027304, "grad_norm": 0.457209050655365, "learning_rate": 5.980232800908507e-06, "loss": 0.3614, "step": 2982 }, { "epoch": 1.4808869766672181, "grad_norm": 0.4768245816230774, "learning_rate": 5.97739961484898e-06, "loss": 0.4151, "step": 2983 }, { "epoch": 1.481383418831706, "grad_norm": 0.4744529128074646, "learning_rate": 5.974566102462286e-06, "loss": 0.3895, "step": 2984 }, { "epoch": 1.481879860996194, "grad_norm": 0.4613817036151886, "learning_rate": 5.971732264694458e-06, "loss": 0.4068, "step": 2985 }, { "epoch": 1.4823763031606818, "grad_norm": 0.4628003239631653, "learning_rate": 5.9688981024916355e-06, "loss": 0.3581, "step": 2986 }, { "epoch": 1.4828727453251696, "grad_norm": 0.4954255223274231, "learning_rate": 5.966063616800072e-06, "loss": 0.4095, "step": 2987 }, { "epoch": 1.4833691874896575, "grad_norm": 0.48800069093704224, "learning_rate": 5.9632288085661215e-06, "loss": 0.4417, "step": 2988 }, { "epoch": 1.4838656296541453, "grad_norm": 0.44811126589775085, "learning_rate": 5.960393678736252e-06, "loss": 0.3278, "step": 2989 }, { "epoch": 1.4843620718186332, "grad_norm": 0.4542417526245117, "learning_rate": 5.9575582282570356e-06, "loss": 0.3774, "step": 2990 }, { "epoch": 1.484858513983121, "grad_norm": 0.47343775629997253, "learning_rate": 5.95472245807515e-06, "loss": 0.4498, "step": 2991 }, { "epoch": 1.4853549561476087, "grad_norm": 0.5164527893066406, "learning_rate": 5.951886369137384e-06, "loss": 0.4311, "step": 2992 }, { "epoch": 1.4858513983120967, "grad_norm": 0.5103790760040283, "learning_rate": 5.94904996239063e-06, "loss": 0.3781, "step": 2993 }, { "epoch": 1.4863478404765844, "grad_norm": 0.4897097051143646, "learning_rate": 5.946213238781889e-06, "loss": 0.3934, "step": 2994 }, { "epoch": 1.4868442826410724, "grad_norm": 0.43809816241264343, "learning_rate": 5.943376199258264e-06, "loss": 0.3877, "step": 2995 }, { "epoch": 1.4873407248055601, "grad_norm": 0.4456304907798767, "learning_rate": 5.9405388447669655e-06, "loss": 0.3993, "step": 2996 }, { "epoch": 1.4878371669700479, "grad_norm": 0.4723958373069763, "learning_rate": 5.9377011762553075e-06, "loss": 0.4252, "step": 2997 }, { "epoch": 1.4883336091345358, "grad_norm": 0.4823512136936188, "learning_rate": 5.9348631946707135e-06, "loss": 0.3784, "step": 2998 }, { "epoch": 1.4888300512990238, "grad_norm": 0.4729604721069336, "learning_rate": 5.932024900960707e-06, "loss": 0.3376, "step": 2999 }, { "epoch": 1.4893264934635115, "grad_norm": 0.4803103804588318, "learning_rate": 5.929186296072915e-06, "loss": 0.4151, "step": 3000 }, { "epoch": 1.4898229356279993, "grad_norm": 0.47063660621643066, "learning_rate": 5.926347380955074e-06, "loss": 0.4335, "step": 3001 }, { "epoch": 1.4903193777924872, "grad_norm": 0.4588666260242462, "learning_rate": 5.9235081565550205e-06, "loss": 0.3728, "step": 3002 }, { "epoch": 1.490815819956975, "grad_norm": 0.48250502347946167, "learning_rate": 5.920668623820692e-06, "loss": 0.3885, "step": 3003 }, { "epoch": 1.491312262121463, "grad_norm": 0.5572172403335571, "learning_rate": 5.917828783700132e-06, "loss": 0.4302, "step": 3004 }, { "epoch": 1.4918087042859507, "grad_norm": 0.4415736496448517, "learning_rate": 5.914988637141488e-06, "loss": 0.3267, "step": 3005 }, { "epoch": 1.4923051464504384, "grad_norm": 0.48094260692596436, "learning_rate": 5.912148185093004e-06, "loss": 0.4011, "step": 3006 }, { "epoch": 1.4928015886149264, "grad_norm": 0.4380779266357422, "learning_rate": 5.909307428503033e-06, "loss": 0.369, "step": 3007 }, { "epoch": 1.4932980307794141, "grad_norm": 0.4610111713409424, "learning_rate": 5.906466368320025e-06, "loss": 0.3472, "step": 3008 }, { "epoch": 1.493794472943902, "grad_norm": 0.519202709197998, "learning_rate": 5.903625005492532e-06, "loss": 0.4052, "step": 3009 }, { "epoch": 1.4942909151083899, "grad_norm": 0.5138774514198303, "learning_rate": 5.9007833409692094e-06, "loss": 0.3791, "step": 3010 }, { "epoch": 1.4947873572728776, "grad_norm": 0.5479790568351746, "learning_rate": 5.897941375698812e-06, "loss": 0.3418, "step": 3011 }, { "epoch": 1.4952837994373656, "grad_norm": 0.5075105428695679, "learning_rate": 5.895099110630193e-06, "loss": 0.4337, "step": 3012 }, { "epoch": 1.4957802416018535, "grad_norm": 0.43285059928894043, "learning_rate": 5.892256546712311e-06, "loss": 0.3353, "step": 3013 }, { "epoch": 1.4962766837663413, "grad_norm": 0.47480833530426025, "learning_rate": 5.889413684894215e-06, "loss": 0.352, "step": 3014 }, { "epoch": 1.496773125930829, "grad_norm": 0.48740699887275696, "learning_rate": 5.886570526125064e-06, "loss": 0.4186, "step": 3015 }, { "epoch": 1.497269568095317, "grad_norm": 0.45613187551498413, "learning_rate": 5.883727071354109e-06, "loss": 0.4115, "step": 3016 }, { "epoch": 1.4977660102598047, "grad_norm": 0.48340392112731934, "learning_rate": 5.880883321530702e-06, "loss": 0.4189, "step": 3017 }, { "epoch": 1.4982624524242927, "grad_norm": 0.4165792763233185, "learning_rate": 5.878039277604298e-06, "loss": 0.2943, "step": 3018 }, { "epoch": 1.4987588945887804, "grad_norm": 0.48501506447792053, "learning_rate": 5.875194940524442e-06, "loss": 0.3627, "step": 3019 }, { "epoch": 1.4992553367532682, "grad_norm": 0.5334231853485107, "learning_rate": 5.872350311240782e-06, "loss": 0.3976, "step": 3020 }, { "epoch": 1.4997517789177561, "grad_norm": 0.43618085980415344, "learning_rate": 5.869505390703062e-06, "loss": 0.3375, "step": 3021 }, { "epoch": 1.5002482210822439, "grad_norm": 0.46870240569114685, "learning_rate": 5.866660179861125e-06, "loss": 0.3802, "step": 3022 }, { "epoch": 1.5007446632467318, "grad_norm": 0.5335794687271118, "learning_rate": 5.8638146796649065e-06, "loss": 0.4051, "step": 3023 }, { "epoch": 1.5012411054112196, "grad_norm": 0.4826413691043854, "learning_rate": 5.860968891064445e-06, "loss": 0.4411, "step": 3024 }, { "epoch": 1.5017375475757073, "grad_norm": 0.48097482323646545, "learning_rate": 5.858122815009869e-06, "loss": 0.3642, "step": 3025 }, { "epoch": 1.5022339897401953, "grad_norm": 0.47042325139045715, "learning_rate": 5.8552764524514095e-06, "loss": 0.3209, "step": 3026 }, { "epoch": 1.5027304319046833, "grad_norm": 0.5223779678344727, "learning_rate": 5.852429804339386e-06, "loss": 0.3961, "step": 3027 }, { "epoch": 1.503226874069171, "grad_norm": 0.4718024730682373, "learning_rate": 5.84958287162422e-06, "loss": 0.3423, "step": 3028 }, { "epoch": 1.5037233162336587, "grad_norm": 0.4953024983406067, "learning_rate": 5.846735655256423e-06, "loss": 0.3928, "step": 3029 }, { "epoch": 1.5042197583981465, "grad_norm": 0.4952942430973053, "learning_rate": 5.843888156186604e-06, "loss": 0.473, "step": 3030 }, { "epoch": 1.5047162005626344, "grad_norm": 0.45386067032814026, "learning_rate": 5.841040375365464e-06, "loss": 0.3509, "step": 3031 }, { "epoch": 1.5052126427271224, "grad_norm": 0.5119835138320923, "learning_rate": 5.838192313743802e-06, "loss": 0.3327, "step": 3032 }, { "epoch": 1.5057090848916101, "grad_norm": 0.505625307559967, "learning_rate": 5.835343972272507e-06, "loss": 0.3944, "step": 3033 }, { "epoch": 1.506205527056098, "grad_norm": 0.49416452646255493, "learning_rate": 5.832495351902563e-06, "loss": 0.3685, "step": 3034 }, { "epoch": 1.5067019692205859, "grad_norm": 0.49477770924568176, "learning_rate": 5.829646453585047e-06, "loss": 0.3817, "step": 3035 }, { "epoch": 1.5071984113850736, "grad_norm": 0.5470396876335144, "learning_rate": 5.826797278271128e-06, "loss": 0.3503, "step": 3036 }, { "epoch": 1.5076948535495616, "grad_norm": 0.46110785007476807, "learning_rate": 5.8239478269120706e-06, "loss": 0.3391, "step": 3037 }, { "epoch": 1.5081912957140493, "grad_norm": 0.5033400654792786, "learning_rate": 5.821098100459226e-06, "loss": 0.4074, "step": 3038 }, { "epoch": 1.508687737878537, "grad_norm": 0.4320400059223175, "learning_rate": 5.818248099864042e-06, "loss": 0.372, "step": 3039 }, { "epoch": 1.509184180043025, "grad_norm": 0.48859524726867676, "learning_rate": 5.815397826078056e-06, "loss": 0.3915, "step": 3040 }, { "epoch": 1.509680622207513, "grad_norm": 0.49501124024391174, "learning_rate": 5.812547280052899e-06, "loss": 0.399, "step": 3041 }, { "epoch": 1.5101770643720007, "grad_norm": 0.502048909664154, "learning_rate": 5.809696462740287e-06, "loss": 0.4085, "step": 3042 }, { "epoch": 1.5106735065364885, "grad_norm": 0.48589378595352173, "learning_rate": 5.806845375092033e-06, "loss": 0.4096, "step": 3043 }, { "epoch": 1.5111699487009762, "grad_norm": 0.4405030608177185, "learning_rate": 5.803994018060038e-06, "loss": 0.3387, "step": 3044 }, { "epoch": 1.5116663908654642, "grad_norm": 0.6253395080566406, "learning_rate": 5.801142392596291e-06, "loss": 0.4076, "step": 3045 }, { "epoch": 1.5121628330299521, "grad_norm": 0.5044987201690674, "learning_rate": 5.798290499652873e-06, "loss": 0.3992, "step": 3046 }, { "epoch": 1.5126592751944399, "grad_norm": 0.47477829456329346, "learning_rate": 5.795438340181954e-06, "loss": 0.4028, "step": 3047 }, { "epoch": 1.5131557173589276, "grad_norm": 0.479840487241745, "learning_rate": 5.79258591513579e-06, "loss": 0.3632, "step": 3048 }, { "epoch": 1.5136521595234154, "grad_norm": 0.52299565076828, "learning_rate": 5.789733225466732e-06, "loss": 0.4004, "step": 3049 }, { "epoch": 1.5141486016879033, "grad_norm": 0.5175360441207886, "learning_rate": 5.786880272127213e-06, "loss": 0.3653, "step": 3050 }, { "epoch": 1.5146450438523913, "grad_norm": 0.5081112384796143, "learning_rate": 5.784027056069757e-06, "loss": 0.3812, "step": 3051 }, { "epoch": 1.515141486016879, "grad_norm": 0.46721237897872925, "learning_rate": 5.781173578246978e-06, "loss": 0.3728, "step": 3052 }, { "epoch": 1.5156379281813668, "grad_norm": 0.45284268260002136, "learning_rate": 5.77831983961157e-06, "loss": 0.3565, "step": 3053 }, { "epoch": 1.5161343703458547, "grad_norm": 0.4983207881450653, "learning_rate": 5.775465841116323e-06, "loss": 0.3783, "step": 3054 }, { "epoch": 1.5166308125103427, "grad_norm": 0.46817994117736816, "learning_rate": 5.772611583714106e-06, "loss": 0.3807, "step": 3055 }, { "epoch": 1.5171272546748304, "grad_norm": 0.44561418890953064, "learning_rate": 5.769757068357878e-06, "loss": 0.3585, "step": 3056 }, { "epoch": 1.5176236968393182, "grad_norm": 0.5194921493530273, "learning_rate": 5.766902296000689e-06, "loss": 0.3975, "step": 3057 }, { "epoch": 1.518120139003806, "grad_norm": 0.4625793695449829, "learning_rate": 5.7640472675956664e-06, "loss": 0.353, "step": 3058 }, { "epoch": 1.518616581168294, "grad_norm": 0.5132588744163513, "learning_rate": 5.761191984096026e-06, "loss": 0.3989, "step": 3059 }, { "epoch": 1.5191130233327819, "grad_norm": 0.4505656957626343, "learning_rate": 5.758336446455069e-06, "loss": 0.3861, "step": 3060 }, { "epoch": 1.5196094654972696, "grad_norm": 0.45143836736679077, "learning_rate": 5.755480655626185e-06, "loss": 0.3825, "step": 3061 }, { "epoch": 1.5201059076617573, "grad_norm": 0.49614834785461426, "learning_rate": 5.752624612562841e-06, "loss": 0.3986, "step": 3062 }, { "epoch": 1.520602349826245, "grad_norm": 0.4966813027858734, "learning_rate": 5.749768318218595e-06, "loss": 0.3878, "step": 3063 }, { "epoch": 1.521098791990733, "grad_norm": 0.515092670917511, "learning_rate": 5.746911773547084e-06, "loss": 0.4039, "step": 3064 }, { "epoch": 1.521595234155221, "grad_norm": 0.5030320882797241, "learning_rate": 5.744054979502035e-06, "loss": 0.3272, "step": 3065 }, { "epoch": 1.5220916763197088, "grad_norm": 0.5252659320831299, "learning_rate": 5.741197937037248e-06, "loss": 0.4462, "step": 3066 }, { "epoch": 1.5225881184841965, "grad_norm": 0.4515500068664551, "learning_rate": 5.738340647106615e-06, "loss": 0.3885, "step": 3067 }, { "epoch": 1.5230845606486845, "grad_norm": 0.40014195442199707, "learning_rate": 5.735483110664107e-06, "loss": 0.3118, "step": 3068 }, { "epoch": 1.5235810028131722, "grad_norm": 0.6451682448387146, "learning_rate": 5.732625328663777e-06, "loss": 0.4328, "step": 3069 }, { "epoch": 1.5240774449776602, "grad_norm": 0.49760860204696655, "learning_rate": 5.729767302059763e-06, "loss": 0.3864, "step": 3070 }, { "epoch": 1.524573887142148, "grad_norm": 0.5186985731124878, "learning_rate": 5.726909031806279e-06, "loss": 0.3648, "step": 3071 }, { "epoch": 1.5250703293066357, "grad_norm": 0.6119673848152161, "learning_rate": 5.724050518857627e-06, "loss": 0.451, "step": 3072 }, { "epoch": 1.5255667714711236, "grad_norm": 0.39990460872650146, "learning_rate": 5.721191764168183e-06, "loss": 0.3079, "step": 3073 }, { "epoch": 1.5260632136356116, "grad_norm": 0.5348159074783325, "learning_rate": 5.718332768692413e-06, "loss": 0.4345, "step": 3074 }, { "epoch": 1.5265596558000993, "grad_norm": 0.5287999510765076, "learning_rate": 5.715473533384853e-06, "loss": 0.3834, "step": 3075 }, { "epoch": 1.527056097964587, "grad_norm": 0.45896318554878235, "learning_rate": 5.712614059200126e-06, "loss": 0.3666, "step": 3076 }, { "epoch": 1.5275525401290748, "grad_norm": 0.48067134618759155, "learning_rate": 5.709754347092933e-06, "loss": 0.3914, "step": 3077 }, { "epoch": 1.5280489822935628, "grad_norm": 0.5323483943939209, "learning_rate": 5.706894398018053e-06, "loss": 0.3585, "step": 3078 }, { "epoch": 1.5285454244580507, "grad_norm": 0.49580636620521545, "learning_rate": 5.704034212930346e-06, "loss": 0.3483, "step": 3079 }, { "epoch": 1.5290418666225385, "grad_norm": 0.4701765179634094, "learning_rate": 5.7011737927847484e-06, "loss": 0.376, "step": 3080 }, { "epoch": 1.5295383087870262, "grad_norm": 0.5424783825874329, "learning_rate": 5.69831313853628e-06, "loss": 0.4346, "step": 3081 }, { "epoch": 1.5300347509515142, "grad_norm": 0.4667849838733673, "learning_rate": 5.695452251140034e-06, "loss": 0.3332, "step": 3082 }, { "epoch": 1.530531193116002, "grad_norm": 0.40911057591438293, "learning_rate": 5.692591131551182e-06, "loss": 0.3498, "step": 3083 }, { "epoch": 1.53102763528049, "grad_norm": 0.4778205156326294, "learning_rate": 5.689729780724974e-06, "loss": 0.3516, "step": 3084 }, { "epoch": 1.5315240774449776, "grad_norm": 0.44697850942611694, "learning_rate": 5.68686819961674e-06, "loss": 0.3517, "step": 3085 }, { "epoch": 1.5320205196094654, "grad_norm": 0.44414448738098145, "learning_rate": 5.6840063891818795e-06, "loss": 0.3835, "step": 3086 }, { "epoch": 1.5325169617739534, "grad_norm": 0.439646452665329, "learning_rate": 5.681144350375877e-06, "loss": 0.3652, "step": 3087 }, { "epoch": 1.5330134039384413, "grad_norm": 0.5450682640075684, "learning_rate": 5.678282084154289e-06, "loss": 0.4355, "step": 3088 }, { "epoch": 1.533509846102929, "grad_norm": 0.495455801486969, "learning_rate": 5.675419591472747e-06, "loss": 0.3897, "step": 3089 }, { "epoch": 1.5340062882674168, "grad_norm": 0.4796082079410553, "learning_rate": 5.672556873286961e-06, "loss": 0.385, "step": 3090 }, { "epoch": 1.5345027304319045, "grad_norm": 0.5416748523712158, "learning_rate": 5.669693930552714e-06, "loss": 0.3871, "step": 3091 }, { "epoch": 1.5349991725963925, "grad_norm": 0.527045726776123, "learning_rate": 5.6668307642258655e-06, "loss": 0.4099, "step": 3092 }, { "epoch": 1.5354956147608805, "grad_norm": 0.4805630147457123, "learning_rate": 5.663967375262348e-06, "loss": 0.3906, "step": 3093 }, { "epoch": 1.5359920569253682, "grad_norm": 0.4742708206176758, "learning_rate": 5.6611037646181684e-06, "loss": 0.351, "step": 3094 }, { "epoch": 1.536488499089856, "grad_norm": 0.5174568891525269, "learning_rate": 5.65823993324941e-06, "loss": 0.3479, "step": 3095 }, { "epoch": 1.5369849412543437, "grad_norm": 0.4991834759712219, "learning_rate": 5.655375882112228e-06, "loss": 0.3816, "step": 3096 }, { "epoch": 1.5374813834188317, "grad_norm": 0.500106155872345, "learning_rate": 5.652511612162851e-06, "loss": 0.3749, "step": 3097 }, { "epoch": 1.5379778255833196, "grad_norm": 0.6360189318656921, "learning_rate": 5.649647124357582e-06, "loss": 0.4385, "step": 3098 }, { "epoch": 1.5384742677478074, "grad_norm": 0.4853738844394684, "learning_rate": 5.646782419652793e-06, "loss": 0.3731, "step": 3099 }, { "epoch": 1.5389707099122951, "grad_norm": 0.46360230445861816, "learning_rate": 5.643917499004934e-06, "loss": 0.3671, "step": 3100 }, { "epoch": 1.539467152076783, "grad_norm": 0.5919646620750427, "learning_rate": 5.641052363370523e-06, "loss": 0.4333, "step": 3101 }, { "epoch": 1.539963594241271, "grad_norm": 0.5400118827819824, "learning_rate": 5.63818701370615e-06, "loss": 0.3772, "step": 3102 }, { "epoch": 1.5404600364057588, "grad_norm": 0.46896079182624817, "learning_rate": 5.635321450968476e-06, "loss": 0.4233, "step": 3103 }, { "epoch": 1.5409564785702465, "grad_norm": 0.5062649846076965, "learning_rate": 5.63245567611424e-06, "loss": 0.4023, "step": 3104 }, { "epoch": 1.5414529207347343, "grad_norm": 0.5128011107444763, "learning_rate": 5.629589690100241e-06, "loss": 0.3191, "step": 3105 }, { "epoch": 1.5419493628992222, "grad_norm": 0.4402811825275421, "learning_rate": 5.626723493883357e-06, "loss": 0.4032, "step": 3106 }, { "epoch": 1.5424458050637102, "grad_norm": 0.480471670627594, "learning_rate": 5.623857088420531e-06, "loss": 0.4142, "step": 3107 }, { "epoch": 1.542942247228198, "grad_norm": 0.6123782992362976, "learning_rate": 5.620990474668779e-06, "loss": 0.4144, "step": 3108 }, { "epoch": 1.5434386893926857, "grad_norm": 0.4794464111328125, "learning_rate": 5.618123653585184e-06, "loss": 0.3765, "step": 3109 }, { "epoch": 1.5439351315571734, "grad_norm": 0.48603540658950806, "learning_rate": 5.615256626126903e-06, "loss": 0.3819, "step": 3110 }, { "epoch": 1.5444315737216614, "grad_norm": 0.535900354385376, "learning_rate": 5.612389393251154e-06, "loss": 0.3556, "step": 3111 }, { "epoch": 1.5449280158861494, "grad_norm": 0.48744410276412964, "learning_rate": 5.609521955915231e-06, "loss": 0.3945, "step": 3112 }, { "epoch": 1.545424458050637, "grad_norm": 0.4886934757232666, "learning_rate": 5.606654315076494e-06, "loss": 0.3764, "step": 3113 }, { "epoch": 1.5459209002151248, "grad_norm": 0.4352734386920929, "learning_rate": 5.6037864716923675e-06, "loss": 0.3291, "step": 3114 }, { "epoch": 1.5464173423796128, "grad_norm": 0.4770011901855469, "learning_rate": 5.60091842672035e-06, "loss": 0.3847, "step": 3115 }, { "epoch": 1.5469137845441008, "grad_norm": 0.5553373098373413, "learning_rate": 5.5980501811179996e-06, "loss": 0.4576, "step": 3116 }, { "epoch": 1.5474102267085885, "grad_norm": 0.5688639879226685, "learning_rate": 5.595181735842951e-06, "loss": 0.3515, "step": 3117 }, { "epoch": 1.5479066688730763, "grad_norm": 0.49748489260673523, "learning_rate": 5.592313091852894e-06, "loss": 0.3425, "step": 3118 }, { "epoch": 1.548403111037564, "grad_norm": 0.5785113573074341, "learning_rate": 5.589444250105595e-06, "loss": 0.3944, "step": 3119 }, { "epoch": 1.548899553202052, "grad_norm": 0.4883442223072052, "learning_rate": 5.58657521155888e-06, "loss": 0.3413, "step": 3120 }, { "epoch": 1.54939599536654, "grad_norm": 0.5131576061248779, "learning_rate": 5.583705977170646e-06, "loss": 0.3773, "step": 3121 }, { "epoch": 1.5498924375310277, "grad_norm": 0.493145227432251, "learning_rate": 5.580836547898849e-06, "loss": 0.4214, "step": 3122 }, { "epoch": 1.5503888796955154, "grad_norm": 0.44078317284584045, "learning_rate": 5.577966924701516e-06, "loss": 0.3516, "step": 3123 }, { "epoch": 1.5508853218600032, "grad_norm": 0.5537282824516296, "learning_rate": 5.575097108536735e-06, "loss": 0.3953, "step": 3124 }, { "epoch": 1.5513817640244911, "grad_norm": 0.48393455147743225, "learning_rate": 5.572227100362658e-06, "loss": 0.33, "step": 3125 }, { "epoch": 1.551878206188979, "grad_norm": 0.5270490646362305, "learning_rate": 5.569356901137506e-06, "loss": 0.4209, "step": 3126 }, { "epoch": 1.5523746483534668, "grad_norm": 0.45230087637901306, "learning_rate": 5.566486511819558e-06, "loss": 0.3378, "step": 3127 }, { "epoch": 1.5528710905179546, "grad_norm": 0.4904930293560028, "learning_rate": 5.563615933367161e-06, "loss": 0.4011, "step": 3128 }, { "epoch": 1.5533675326824425, "grad_norm": 0.45227932929992676, "learning_rate": 5.560745166738722e-06, "loss": 0.3631, "step": 3129 }, { "epoch": 1.5538639748469303, "grad_norm": 0.5291053652763367, "learning_rate": 5.557874212892711e-06, "loss": 0.3534, "step": 3130 }, { "epoch": 1.5543604170114182, "grad_norm": 0.5014159083366394, "learning_rate": 5.555003072787664e-06, "loss": 0.3953, "step": 3131 }, { "epoch": 1.554856859175906, "grad_norm": 0.5214545130729675, "learning_rate": 5.552131747382174e-06, "loss": 0.391, "step": 3132 }, { "epoch": 1.5553533013403937, "grad_norm": 0.44628822803497314, "learning_rate": 5.5492602376349e-06, "loss": 0.376, "step": 3133 }, { "epoch": 1.5558497435048817, "grad_norm": 0.5342276692390442, "learning_rate": 5.5463885445045605e-06, "loss": 0.4392, "step": 3134 }, { "epoch": 1.5563461856693697, "grad_norm": 0.46319812536239624, "learning_rate": 5.543516668949935e-06, "loss": 0.4066, "step": 3135 }, { "epoch": 1.5568426278338574, "grad_norm": 0.5062613487243652, "learning_rate": 5.540644611929869e-06, "loss": 0.4159, "step": 3136 }, { "epoch": 1.5573390699983451, "grad_norm": 0.4963188171386719, "learning_rate": 5.5377723744032585e-06, "loss": 0.3909, "step": 3137 }, { "epoch": 1.557835512162833, "grad_norm": 0.46758949756622314, "learning_rate": 5.534899957329067e-06, "loss": 0.3604, "step": 3138 }, { "epoch": 1.5583319543273209, "grad_norm": 0.4890781044960022, "learning_rate": 5.53202736166632e-06, "loss": 0.3723, "step": 3139 }, { "epoch": 1.5588283964918088, "grad_norm": 0.6552484035491943, "learning_rate": 5.529154588374096e-06, "loss": 0.4114, "step": 3140 }, { "epoch": 1.5593248386562966, "grad_norm": 0.44689908623695374, "learning_rate": 5.526281638411537e-06, "loss": 0.3527, "step": 3141 }, { "epoch": 1.5598212808207843, "grad_norm": 0.4889706075191498, "learning_rate": 5.523408512737841e-06, "loss": 0.3641, "step": 3142 }, { "epoch": 1.5603177229852723, "grad_norm": 0.5054059028625488, "learning_rate": 5.520535212312268e-06, "loss": 0.3845, "step": 3143 }, { "epoch": 1.56081416514976, "grad_norm": 0.4834757149219513, "learning_rate": 5.5176617380941355e-06, "loss": 0.4012, "step": 3144 }, { "epoch": 1.561310607314248, "grad_norm": 0.506269097328186, "learning_rate": 5.514788091042819e-06, "loss": 0.3832, "step": 3145 }, { "epoch": 1.5618070494787357, "grad_norm": 0.5095431804656982, "learning_rate": 5.511914272117748e-06, "loss": 0.3948, "step": 3146 }, { "epoch": 1.5623034916432235, "grad_norm": 0.45850038528442383, "learning_rate": 5.5090402822784175e-06, "loss": 0.4085, "step": 3147 }, { "epoch": 1.5627999338077114, "grad_norm": 0.5302203893661499, "learning_rate": 5.506166122484369e-06, "loss": 0.4098, "step": 3148 }, { "epoch": 1.5632963759721994, "grad_norm": 0.4651108682155609, "learning_rate": 5.503291793695211e-06, "loss": 0.3624, "step": 3149 }, { "epoch": 1.5637928181366871, "grad_norm": 0.5264981389045715, "learning_rate": 5.500417296870599e-06, "loss": 0.3839, "step": 3150 }, { "epoch": 1.5642892603011749, "grad_norm": 0.4615473449230194, "learning_rate": 5.497542632970255e-06, "loss": 0.346, "step": 3151 }, { "epoch": 1.5647857024656626, "grad_norm": 0.49167826771736145, "learning_rate": 5.494667802953947e-06, "loss": 0.3624, "step": 3152 }, { "epoch": 1.5652821446301506, "grad_norm": 0.5363531708717346, "learning_rate": 5.4917928077815034e-06, "loss": 0.39, "step": 3153 }, { "epoch": 1.5657785867946385, "grad_norm": 0.46570929884910583, "learning_rate": 5.488917648412809e-06, "loss": 0.3627, "step": 3154 }, { "epoch": 1.5662750289591263, "grad_norm": 0.5407665371894836, "learning_rate": 5.486042325807799e-06, "loss": 0.414, "step": 3155 }, { "epoch": 1.566771471123614, "grad_norm": 0.41429904103279114, "learning_rate": 5.483166840926467e-06, "loss": 0.3641, "step": 3156 }, { "epoch": 1.5672679132881018, "grad_norm": 0.43161582946777344, "learning_rate": 5.480291194728857e-06, "loss": 0.3798, "step": 3157 }, { "epoch": 1.5677643554525897, "grad_norm": 0.5863943099975586, "learning_rate": 5.477415388175071e-06, "loss": 0.3795, "step": 3158 }, { "epoch": 1.5682607976170777, "grad_norm": 0.5776811242103577, "learning_rate": 5.474539422225263e-06, "loss": 0.3763, "step": 3159 }, { "epoch": 1.5687572397815654, "grad_norm": 0.4625649154186249, "learning_rate": 5.47166329783964e-06, "loss": 0.4096, "step": 3160 }, { "epoch": 1.5692536819460532, "grad_norm": 0.5359938740730286, "learning_rate": 5.4687870159784595e-06, "loss": 0.3458, "step": 3161 }, { "epoch": 1.5697501241105412, "grad_norm": 0.5243372321128845, "learning_rate": 5.465910577602037e-06, "loss": 0.3626, "step": 3162 }, { "epoch": 1.5702465662750291, "grad_norm": 0.4140010178089142, "learning_rate": 5.463033983670733e-06, "loss": 0.3422, "step": 3163 }, { "epoch": 1.5707430084395169, "grad_norm": 0.46343332529067993, "learning_rate": 5.4601572351449695e-06, "loss": 0.3906, "step": 3164 }, { "epoch": 1.5712394506040046, "grad_norm": 0.44875162839889526, "learning_rate": 5.457280332985209e-06, "loss": 0.3383, "step": 3165 }, { "epoch": 1.5717358927684923, "grad_norm": 0.5776742696762085, "learning_rate": 5.454403278151974e-06, "loss": 0.4136, "step": 3166 }, { "epoch": 1.5722323349329803, "grad_norm": 0.5127443671226501, "learning_rate": 5.451526071605835e-06, "loss": 0.4085, "step": 3167 }, { "epoch": 1.5727287770974683, "grad_norm": 0.4499658942222595, "learning_rate": 5.44864871430741e-06, "loss": 0.3479, "step": 3168 }, { "epoch": 1.573225219261956, "grad_norm": 0.5491783618927002, "learning_rate": 5.445771207217377e-06, "loss": 0.4118, "step": 3169 }, { "epoch": 1.5737216614264438, "grad_norm": 0.46042826771736145, "learning_rate": 5.4428935512964505e-06, "loss": 0.3515, "step": 3170 }, { "epoch": 1.5742181035909315, "grad_norm": 0.47793740034103394, "learning_rate": 5.440015747505406e-06, "loss": 0.3763, "step": 3171 }, { "epoch": 1.5747145457554195, "grad_norm": 0.4990359842777252, "learning_rate": 5.437137796805062e-06, "loss": 0.3822, "step": 3172 }, { "epoch": 1.5752109879199074, "grad_norm": 0.47266799211502075, "learning_rate": 5.434259700156288e-06, "loss": 0.363, "step": 3173 }, { "epoch": 1.5757074300843952, "grad_norm": 0.512065589427948, "learning_rate": 5.431381458520002e-06, "loss": 0.4207, "step": 3174 }, { "epoch": 1.576203872248883, "grad_norm": 0.438357949256897, "learning_rate": 5.428503072857172e-06, "loss": 0.3115, "step": 3175 }, { "epoch": 1.5767003144133709, "grad_norm": 0.559398889541626, "learning_rate": 5.425624544128813e-06, "loss": 0.4128, "step": 3176 }, { "epoch": 1.5771967565778586, "grad_norm": 0.4496886432170868, "learning_rate": 5.422745873295985e-06, "loss": 0.3693, "step": 3177 }, { "epoch": 1.5776931987423466, "grad_norm": 0.48408564925193787, "learning_rate": 5.4198670613198e-06, "loss": 0.3985, "step": 3178 }, { "epoch": 1.5781896409068343, "grad_norm": 0.4493131637573242, "learning_rate": 5.416988109161414e-06, "loss": 0.3901, "step": 3179 }, { "epoch": 1.578686083071322, "grad_norm": 0.5167300701141357, "learning_rate": 5.414109017782033e-06, "loss": 0.4295, "step": 3180 }, { "epoch": 1.57918252523581, "grad_norm": 0.4617859423160553, "learning_rate": 5.411229788142905e-06, "loss": 0.3551, "step": 3181 }, { "epoch": 1.579678967400298, "grad_norm": 0.594233512878418, "learning_rate": 5.408350421205326e-06, "loss": 0.4324, "step": 3182 }, { "epoch": 1.5801754095647857, "grad_norm": 0.45740756392478943, "learning_rate": 5.405470917930641e-06, "loss": 0.3569, "step": 3183 }, { "epoch": 1.5806718517292735, "grad_norm": 0.4776029884815216, "learning_rate": 5.4025912792802374e-06, "loss": 0.3979, "step": 3184 }, { "epoch": 1.5811682938937612, "grad_norm": 0.47764289379119873, "learning_rate": 5.3997115062155455e-06, "loss": 0.3852, "step": 3185 }, { "epoch": 1.5816647360582492, "grad_norm": 0.49383458495140076, "learning_rate": 5.396831599698048e-06, "loss": 0.354, "step": 3186 }, { "epoch": 1.5821611782227372, "grad_norm": 0.5115252137184143, "learning_rate": 5.393951560689262e-06, "loss": 0.3727, "step": 3187 }, { "epoch": 1.582657620387225, "grad_norm": 0.49272486567497253, "learning_rate": 5.39107139015076e-06, "loss": 0.3544, "step": 3188 }, { "epoch": 1.5831540625517126, "grad_norm": 0.42440924048423767, "learning_rate": 5.388191089044146e-06, "loss": 0.3101, "step": 3189 }, { "epoch": 1.5836505047162006, "grad_norm": 0.5175232291221619, "learning_rate": 5.385310658331079e-06, "loss": 0.4434, "step": 3190 }, { "epoch": 1.5841469468806884, "grad_norm": 0.5152369141578674, "learning_rate": 5.382430098973256e-06, "loss": 0.4072, "step": 3191 }, { "epoch": 1.5846433890451763, "grad_norm": 0.42499566078186035, "learning_rate": 5.379549411932417e-06, "loss": 0.3655, "step": 3192 }, { "epoch": 1.585139831209664, "grad_norm": 0.5180062055587769, "learning_rate": 5.376668598170344e-06, "loss": 0.3497, "step": 3193 }, { "epoch": 1.5856362733741518, "grad_norm": 0.4908702075481415, "learning_rate": 5.373787658648864e-06, "loss": 0.342, "step": 3194 }, { "epoch": 1.5861327155386398, "grad_norm": 0.5416204333305359, "learning_rate": 5.370906594329844e-06, "loss": 0.4201, "step": 3195 }, { "epoch": 1.5866291577031277, "grad_norm": 0.4603067934513092, "learning_rate": 5.368025406175191e-06, "loss": 0.3807, "step": 3196 }, { "epoch": 1.5871255998676155, "grad_norm": 0.450633704662323, "learning_rate": 5.365144095146858e-06, "loss": 0.3538, "step": 3197 }, { "epoch": 1.5876220420321032, "grad_norm": 0.4563947916030884, "learning_rate": 5.362262662206837e-06, "loss": 0.4154, "step": 3198 }, { "epoch": 1.588118484196591, "grad_norm": 0.4779997169971466, "learning_rate": 5.359381108317159e-06, "loss": 0.3833, "step": 3199 }, { "epoch": 1.588614926361079, "grad_norm": 0.49796611070632935, "learning_rate": 5.3564994344398944e-06, "loss": 0.397, "step": 3200 }, { "epoch": 1.589111368525567, "grad_norm": 0.4531799554824829, "learning_rate": 5.35361764153716e-06, "loss": 0.3815, "step": 3201 }, { "epoch": 1.5896078106900546, "grad_norm": 0.5081040263175964, "learning_rate": 5.350735730571104e-06, "loss": 0.3806, "step": 3202 }, { "epoch": 1.5901042528545424, "grad_norm": 0.5132181644439697, "learning_rate": 5.347853702503921e-06, "loss": 0.375, "step": 3203 }, { "epoch": 1.5906006950190301, "grad_norm": 0.48645976185798645, "learning_rate": 5.344971558297841e-06, "loss": 0.3813, "step": 3204 }, { "epoch": 1.591097137183518, "grad_norm": 0.4522711932659149, "learning_rate": 5.342089298915133e-06, "loss": 0.3812, "step": 3205 }, { "epoch": 1.591593579348006, "grad_norm": 0.4656631350517273, "learning_rate": 5.339206925318106e-06, "loss": 0.3647, "step": 3206 }, { "epoch": 1.5920900215124938, "grad_norm": 0.5515118837356567, "learning_rate": 5.336324438469104e-06, "loss": 0.4403, "step": 3207 }, { "epoch": 1.5925864636769815, "grad_norm": 0.4974838197231293, "learning_rate": 5.333441839330515e-06, "loss": 0.3245, "step": 3208 }, { "epoch": 1.5930829058414695, "grad_norm": 0.4909175932407379, "learning_rate": 5.330559128864757e-06, "loss": 0.3965, "step": 3209 }, { "epoch": 1.5935793480059575, "grad_norm": 0.4910305142402649, "learning_rate": 5.327676308034292e-06, "loss": 0.3704, "step": 3210 }, { "epoch": 1.5940757901704452, "grad_norm": 0.48343533277511597, "learning_rate": 5.324793377801611e-06, "loss": 0.4217, "step": 3211 }, { "epoch": 1.594572232334933, "grad_norm": 0.5127272009849548, "learning_rate": 5.321910339129251e-06, "loss": 0.3942, "step": 3212 }, { "epoch": 1.5950686744994207, "grad_norm": 0.47474390268325806, "learning_rate": 5.3190271929797755e-06, "loss": 0.3665, "step": 3213 }, { "epoch": 1.5955651166639087, "grad_norm": 0.4463144838809967, "learning_rate": 5.316143940315792e-06, "loss": 0.3902, "step": 3214 }, { "epoch": 1.5960615588283966, "grad_norm": 0.4966031312942505, "learning_rate": 5.313260582099938e-06, "loss": 0.3661, "step": 3215 }, { "epoch": 1.5965580009928844, "grad_norm": 0.45408904552459717, "learning_rate": 5.310377119294892e-06, "loss": 0.4179, "step": 3216 }, { "epoch": 1.597054443157372, "grad_norm": 0.37644094228744507, "learning_rate": 5.307493552863359e-06, "loss": 0.3524, "step": 3217 }, { "epoch": 1.5975508853218598, "grad_norm": 0.49868059158325195, "learning_rate": 5.304609883768088e-06, "loss": 0.4445, "step": 3218 }, { "epoch": 1.5980473274863478, "grad_norm": 0.4731038510799408, "learning_rate": 5.3017261129718545e-06, "loss": 0.3414, "step": 3219 }, { "epoch": 1.5985437696508358, "grad_norm": 0.48283442854881287, "learning_rate": 5.298842241437473e-06, "loss": 0.356, "step": 3220 }, { "epoch": 1.5990402118153235, "grad_norm": 0.4784277677536011, "learning_rate": 5.295958270127787e-06, "loss": 0.3954, "step": 3221 }, { "epoch": 1.5995366539798113, "grad_norm": 0.4838782548904419, "learning_rate": 5.293074200005679e-06, "loss": 0.3635, "step": 3222 }, { "epoch": 1.6000330961442992, "grad_norm": 0.530946671962738, "learning_rate": 5.290190032034063e-06, "loss": 0.4004, "step": 3223 }, { "epoch": 1.6005295383087872, "grad_norm": 0.44455212354660034, "learning_rate": 5.287305767175881e-06, "loss": 0.3278, "step": 3224 }, { "epoch": 1.601025980473275, "grad_norm": 0.49492162466049194, "learning_rate": 5.284421406394112e-06, "loss": 0.4378, "step": 3225 }, { "epoch": 1.6015224226377627, "grad_norm": 0.4368066191673279, "learning_rate": 5.281536950651765e-06, "loss": 0.3453, "step": 3226 }, { "epoch": 1.6020188648022504, "grad_norm": 0.4652442932128906, "learning_rate": 5.2786524009118836e-06, "loss": 0.3623, "step": 3227 }, { "epoch": 1.6025153069667384, "grad_norm": 0.46745243668556213, "learning_rate": 5.2757677581375375e-06, "loss": 0.3916, "step": 3228 }, { "epoch": 1.6030117491312263, "grad_norm": 0.49264010787010193, "learning_rate": 5.2728830232918315e-06, "loss": 0.3671, "step": 3229 }, { "epoch": 1.603508191295714, "grad_norm": 0.4626066982746124, "learning_rate": 5.269998197337901e-06, "loss": 0.406, "step": 3230 }, { "epoch": 1.6040046334602018, "grad_norm": 0.47666990756988525, "learning_rate": 5.267113281238912e-06, "loss": 0.3784, "step": 3231 }, { "epoch": 1.6045010756246896, "grad_norm": 0.47205018997192383, "learning_rate": 5.264228275958056e-06, "loss": 0.3735, "step": 3232 }, { "epoch": 1.6049975177891775, "grad_norm": 0.47830599546432495, "learning_rate": 5.261343182458562e-06, "loss": 0.3891, "step": 3233 }, { "epoch": 1.6054939599536655, "grad_norm": 0.44003450870513916, "learning_rate": 5.25845800170368e-06, "loss": 0.3341, "step": 3234 }, { "epoch": 1.6059904021181532, "grad_norm": 0.5325899124145508, "learning_rate": 5.255572734656697e-06, "loss": 0.4301, "step": 3235 }, { "epoch": 1.606486844282641, "grad_norm": 0.5137802362442017, "learning_rate": 5.252687382280924e-06, "loss": 0.386, "step": 3236 }, { "epoch": 1.606983286447129, "grad_norm": 0.5232200026512146, "learning_rate": 5.249801945539701e-06, "loss": 0.3549, "step": 3237 }, { "epoch": 1.6074797286116167, "grad_norm": 0.4936312735080719, "learning_rate": 5.246916425396398e-06, "loss": 0.3807, "step": 3238 }, { "epoch": 1.6079761707761047, "grad_norm": 0.477393239736557, "learning_rate": 5.244030822814411e-06, "loss": 0.4077, "step": 3239 }, { "epoch": 1.6084726129405924, "grad_norm": 0.4841611385345459, "learning_rate": 5.241145138757167e-06, "loss": 0.3841, "step": 3240 }, { "epoch": 1.6089690551050801, "grad_norm": 0.5191798210144043, "learning_rate": 5.238259374188113e-06, "loss": 0.3858, "step": 3241 }, { "epoch": 1.6094654972695681, "grad_norm": 0.5226397514343262, "learning_rate": 5.23537353007073e-06, "loss": 0.4141, "step": 3242 }, { "epoch": 1.609961939434056, "grad_norm": 0.47367024421691895, "learning_rate": 5.232487607368522e-06, "loss": 0.3868, "step": 3243 }, { "epoch": 1.6104583815985438, "grad_norm": 0.47281619906425476, "learning_rate": 5.229601607045021e-06, "loss": 0.3616, "step": 3244 }, { "epoch": 1.6109548237630316, "grad_norm": 0.5763335227966309, "learning_rate": 5.226715530063782e-06, "loss": 0.3972, "step": 3245 }, { "epoch": 1.6114512659275193, "grad_norm": 0.44838786125183105, "learning_rate": 5.223829377388392e-06, "loss": 0.3991, "step": 3246 }, { "epoch": 1.6119477080920073, "grad_norm": 0.5315632224082947, "learning_rate": 5.220943149982455e-06, "loss": 0.3911, "step": 3247 }, { "epoch": 1.6124441502564952, "grad_norm": 0.4569682478904724, "learning_rate": 5.218056848809604e-06, "loss": 0.3661, "step": 3248 }, { "epoch": 1.612940592420983, "grad_norm": 0.48278701305389404, "learning_rate": 5.2151704748335e-06, "loss": 0.3917, "step": 3249 }, { "epoch": 1.6134370345854707, "grad_norm": 0.44123363494873047, "learning_rate": 5.21228402901782e-06, "loss": 0.3063, "step": 3250 }, { "epoch": 1.6139334767499587, "grad_norm": 0.5838345289230347, "learning_rate": 5.2093975123262745e-06, "loss": 0.3858, "step": 3251 }, { "epoch": 1.6144299189144464, "grad_norm": 0.5130561590194702, "learning_rate": 5.20651092572259e-06, "loss": 0.4201, "step": 3252 }, { "epoch": 1.6149263610789344, "grad_norm": 0.4248834252357483, "learning_rate": 5.2036242701705185e-06, "loss": 0.3781, "step": 3253 }, { "epoch": 1.6154228032434221, "grad_norm": 0.4875808358192444, "learning_rate": 5.200737546633839e-06, "loss": 0.3884, "step": 3254 }, { "epoch": 1.6159192454079099, "grad_norm": 0.4986831545829773, "learning_rate": 5.197850756076348e-06, "loss": 0.3076, "step": 3255 }, { "epoch": 1.6164156875723978, "grad_norm": 0.6829372644424438, "learning_rate": 5.1949638994618666e-06, "loss": 0.4334, "step": 3256 }, { "epoch": 1.6169121297368858, "grad_norm": 0.5124260187149048, "learning_rate": 5.192076977754239e-06, "loss": 0.3444, "step": 3257 }, { "epoch": 1.6174085719013735, "grad_norm": 0.6076661944389343, "learning_rate": 5.189189991917328e-06, "loss": 0.4092, "step": 3258 }, { "epoch": 1.6179050140658613, "grad_norm": 0.4898504912853241, "learning_rate": 5.186302942915021e-06, "loss": 0.3828, "step": 3259 }, { "epoch": 1.618401456230349, "grad_norm": 0.45823344588279724, "learning_rate": 5.1834158317112245e-06, "loss": 0.3278, "step": 3260 }, { "epoch": 1.618897898394837, "grad_norm": 0.5591729879379272, "learning_rate": 5.180528659269867e-06, "loss": 0.4541, "step": 3261 }, { "epoch": 1.619394340559325, "grad_norm": 0.4719594120979309, "learning_rate": 5.177641426554896e-06, "loss": 0.3226, "step": 3262 }, { "epoch": 1.6198907827238127, "grad_norm": 0.40528565645217896, "learning_rate": 5.174754134530281e-06, "loss": 0.3465, "step": 3263 }, { "epoch": 1.6203872248883004, "grad_norm": 0.6003198027610779, "learning_rate": 5.1718667841600115e-06, "loss": 0.47, "step": 3264 }, { "epoch": 1.6208836670527882, "grad_norm": 0.5381267070770264, "learning_rate": 5.168979376408092e-06, "loss": 0.3797, "step": 3265 }, { "epoch": 1.6213801092172762, "grad_norm": 0.4585323631763458, "learning_rate": 5.166091912238552e-06, "loss": 0.3448, "step": 3266 }, { "epoch": 1.6218765513817641, "grad_norm": 0.4642663598060608, "learning_rate": 5.163204392615436e-06, "loss": 0.3785, "step": 3267 }, { "epoch": 1.6223729935462519, "grad_norm": 0.5034817457199097, "learning_rate": 5.16031681850281e-06, "loss": 0.4182, "step": 3268 }, { "epoch": 1.6228694357107396, "grad_norm": 0.5670713782310486, "learning_rate": 5.157429190864755e-06, "loss": 0.4072, "step": 3269 }, { "epoch": 1.6233658778752276, "grad_norm": 0.5305248498916626, "learning_rate": 5.154541510665372e-06, "loss": 0.4229, "step": 3270 }, { "epoch": 1.6238623200397155, "grad_norm": 0.46445971727371216, "learning_rate": 5.151653778868778e-06, "loss": 0.3827, "step": 3271 }, { "epoch": 1.6243587622042033, "grad_norm": 0.45501312613487244, "learning_rate": 5.14876599643911e-06, "loss": 0.3863, "step": 3272 }, { "epoch": 1.624855204368691, "grad_norm": 0.5102412104606628, "learning_rate": 5.145878164340518e-06, "loss": 0.3395, "step": 3273 }, { "epoch": 1.6253516465331788, "grad_norm": 0.5766253471374512, "learning_rate": 5.142990283537174e-06, "loss": 0.4497, "step": 3274 }, { "epoch": 1.6258480886976667, "grad_norm": 0.4392862021923065, "learning_rate": 5.140102354993258e-06, "loss": 0.3347, "step": 3275 }, { "epoch": 1.6263445308621547, "grad_norm": 0.5618142485618591, "learning_rate": 5.137214379672975e-06, "loss": 0.4048, "step": 3276 }, { "epoch": 1.6268409730266424, "grad_norm": 0.5115261673927307, "learning_rate": 5.134326358540538e-06, "loss": 0.3965, "step": 3277 }, { "epoch": 1.6273374151911302, "grad_norm": 0.4378219246864319, "learning_rate": 5.131438292560181e-06, "loss": 0.3781, "step": 3278 }, { "epoch": 1.627833857355618, "grad_norm": 0.44587045907974243, "learning_rate": 5.128550182696153e-06, "loss": 0.4006, "step": 3279 }, { "epoch": 1.6283302995201059, "grad_norm": 0.46051618456840515, "learning_rate": 5.12566202991271e-06, "loss": 0.3622, "step": 3280 }, { "epoch": 1.6288267416845938, "grad_norm": 0.4615706503391266, "learning_rate": 5.1227738351741326e-06, "loss": 0.3826, "step": 3281 }, { "epoch": 1.6293231838490816, "grad_norm": 0.5428385138511658, "learning_rate": 5.119885599444707e-06, "loss": 0.402, "step": 3282 }, { "epoch": 1.6298196260135693, "grad_norm": 0.4642165005207062, "learning_rate": 5.1169973236887394e-06, "loss": 0.4078, "step": 3283 }, { "epoch": 1.6303160681780573, "grad_norm": 0.47966358065605164, "learning_rate": 5.1141090088705436e-06, "loss": 0.3757, "step": 3284 }, { "epoch": 1.630812510342545, "grad_norm": 0.4830467104911804, "learning_rate": 5.111220655954452e-06, "loss": 0.3602, "step": 3285 }, { "epoch": 1.631308952507033, "grad_norm": 0.4650741517543793, "learning_rate": 5.108332265904805e-06, "loss": 0.3903, "step": 3286 }, { "epoch": 1.6318053946715207, "grad_norm": 0.4933299720287323, "learning_rate": 5.105443839685961e-06, "loss": 0.4771, "step": 3287 }, { "epoch": 1.6323018368360085, "grad_norm": 0.3871297538280487, "learning_rate": 5.102555378262283e-06, "loss": 0.345, "step": 3288 }, { "epoch": 1.6327982790004965, "grad_norm": 0.4716993570327759, "learning_rate": 5.099666882598152e-06, "loss": 0.3658, "step": 3289 }, { "epoch": 1.6332947211649844, "grad_norm": 0.4662562608718872, "learning_rate": 5.096778353657957e-06, "loss": 0.3557, "step": 3290 }, { "epoch": 1.6337911633294722, "grad_norm": 0.4417601227760315, "learning_rate": 5.093889792406101e-06, "loss": 0.3706, "step": 3291 }, { "epoch": 1.63428760549396, "grad_norm": 0.4981406629085541, "learning_rate": 5.091001199806994e-06, "loss": 0.4381, "step": 3292 }, { "epoch": 1.6347840476584476, "grad_norm": 0.38990846276283264, "learning_rate": 5.08811257682506e-06, "loss": 0.3043, "step": 3293 }, { "epoch": 1.6352804898229356, "grad_norm": 0.45619863271713257, "learning_rate": 5.085223924424733e-06, "loss": 0.3733, "step": 3294 }, { "epoch": 1.6357769319874236, "grad_norm": 0.43957844376564026, "learning_rate": 5.082335243570452e-06, "loss": 0.3936, "step": 3295 }, { "epoch": 1.6362733741519113, "grad_norm": 0.5176735520362854, "learning_rate": 5.079446535226673e-06, "loss": 0.4507, "step": 3296 }, { "epoch": 1.636769816316399, "grad_norm": 0.43426719307899475, "learning_rate": 5.076557800357853e-06, "loss": 0.329, "step": 3297 }, { "epoch": 1.637266258480887, "grad_norm": 0.43967878818511963, "learning_rate": 5.073669039928466e-06, "loss": 0.3818, "step": 3298 }, { "epoch": 1.6377627006453748, "grad_norm": 0.4521893262863159, "learning_rate": 5.0707802549029875e-06, "loss": 0.3734, "step": 3299 }, { "epoch": 1.6382591428098627, "grad_norm": 0.45867466926574707, "learning_rate": 5.067891446245905e-06, "loss": 0.3501, "step": 3300 }, { "epoch": 1.6387555849743505, "grad_norm": 0.4736049175262451, "learning_rate": 5.0650026149217135e-06, "loss": 0.3865, "step": 3301 }, { "epoch": 1.6392520271388382, "grad_norm": 0.4883287847042084, "learning_rate": 5.062113761894918e-06, "loss": 0.3562, "step": 3302 }, { "epoch": 1.6397484693033262, "grad_norm": 0.5141215920448303, "learning_rate": 5.059224888130023e-06, "loss": 0.3442, "step": 3303 }, { "epoch": 1.6402449114678141, "grad_norm": 0.4474670886993408, "learning_rate": 5.056335994591549e-06, "loss": 0.3456, "step": 3304 }, { "epoch": 1.640741353632302, "grad_norm": 0.49964386224746704, "learning_rate": 5.0534470822440176e-06, "loss": 0.4111, "step": 3305 }, { "epoch": 1.6412377957967896, "grad_norm": 0.45779678225517273, "learning_rate": 5.050558152051957e-06, "loss": 0.3956, "step": 3306 }, { "epoch": 1.6417342379612774, "grad_norm": 0.5284752249717712, "learning_rate": 5.047669204979906e-06, "loss": 0.4565, "step": 3307 }, { "epoch": 1.6422306801257653, "grad_norm": 0.46585357189178467, "learning_rate": 5.0447802419924e-06, "loss": 0.3941, "step": 3308 }, { "epoch": 1.6427271222902533, "grad_norm": 0.4334530532360077, "learning_rate": 5.0418912640539895e-06, "loss": 0.3766, "step": 3309 }, { "epoch": 1.643223564454741, "grad_norm": 0.4782232642173767, "learning_rate": 5.039002272129224e-06, "loss": 0.3775, "step": 3310 }, { "epoch": 1.6437200066192288, "grad_norm": 0.4471273720264435, "learning_rate": 5.036113267182661e-06, "loss": 0.3282, "step": 3311 }, { "epoch": 1.6442164487837165, "grad_norm": 0.4692699611186981, "learning_rate": 5.033224250178859e-06, "loss": 0.3878, "step": 3312 }, { "epoch": 1.6447128909482045, "grad_norm": 0.46200665831565857, "learning_rate": 5.030335222082383e-06, "loss": 0.3719, "step": 3313 }, { "epoch": 1.6452093331126925, "grad_norm": 0.4313834011554718, "learning_rate": 5.0274461838578e-06, "loss": 0.3271, "step": 3314 }, { "epoch": 1.6457057752771802, "grad_norm": 0.5446719527244568, "learning_rate": 5.024557136469682e-06, "loss": 0.4259, "step": 3315 }, { "epoch": 1.646202217441668, "grad_norm": 0.43932852149009705, "learning_rate": 5.021668080882605e-06, "loss": 0.2966, "step": 3316 }, { "epoch": 1.646698659606156, "grad_norm": 0.4691688120365143, "learning_rate": 5.018779018061143e-06, "loss": 0.4181, "step": 3317 }, { "epoch": 1.6471951017706439, "grad_norm": 0.4752158522605896, "learning_rate": 5.015889948969879e-06, "loss": 0.469, "step": 3318 }, { "epoch": 1.6476915439351316, "grad_norm": 0.48617076873779297, "learning_rate": 5.013000874573392e-06, "loss": 0.4043, "step": 3319 }, { "epoch": 1.6481879860996194, "grad_norm": 0.47643086314201355, "learning_rate": 5.0101117958362665e-06, "loss": 0.3722, "step": 3320 }, { "epoch": 1.648684428264107, "grad_norm": 0.4993745684623718, "learning_rate": 5.007222713723086e-06, "loss": 0.4078, "step": 3321 }, { "epoch": 1.649180870428595, "grad_norm": 0.4261152446269989, "learning_rate": 5.00433362919844e-06, "loss": 0.3318, "step": 3322 }, { "epoch": 1.649677312593083, "grad_norm": 0.49728214740753174, "learning_rate": 5.001444543226912e-06, "loss": 0.3999, "step": 3323 }, { "epoch": 1.6501737547575708, "grad_norm": 0.5511311292648315, "learning_rate": 4.99855545677309e-06, "loss": 0.3884, "step": 3324 }, { "epoch": 1.6506701969220585, "grad_norm": 0.4987035393714905, "learning_rate": 4.995666370801563e-06, "loss": 0.4098, "step": 3325 }, { "epoch": 1.6511666390865463, "grad_norm": 0.4124128520488739, "learning_rate": 4.9927772862769136e-06, "loss": 0.3534, "step": 3326 }, { "epoch": 1.6516630812510342, "grad_norm": 0.45523500442504883, "learning_rate": 4.989888204163735e-06, "loss": 0.3318, "step": 3327 }, { "epoch": 1.6521595234155222, "grad_norm": 0.5035392045974731, "learning_rate": 4.98699912542661e-06, "loss": 0.3812, "step": 3328 }, { "epoch": 1.65265596558001, "grad_norm": 0.5590910911560059, "learning_rate": 4.9841100510301234e-06, "loss": 0.3875, "step": 3329 }, { "epoch": 1.6531524077444977, "grad_norm": 0.5117429494857788, "learning_rate": 4.981220981938858e-06, "loss": 0.3674, "step": 3330 }, { "epoch": 1.6536488499089856, "grad_norm": 0.46769988536834717, "learning_rate": 4.978331919117398e-06, "loss": 0.3886, "step": 3331 }, { "epoch": 1.6541452920734736, "grad_norm": 0.5443318486213684, "learning_rate": 4.975442863530319e-06, "loss": 0.4131, "step": 3332 }, { "epoch": 1.6546417342379613, "grad_norm": 0.4369748532772064, "learning_rate": 4.9725538161422005e-06, "loss": 0.3312, "step": 3333 }, { "epoch": 1.655138176402449, "grad_norm": 0.4593949317932129, "learning_rate": 4.969664777917619e-06, "loss": 0.3696, "step": 3334 }, { "epoch": 1.6556346185669368, "grad_norm": 0.4848286211490631, "learning_rate": 4.966775749821143e-06, "loss": 0.4213, "step": 3335 }, { "epoch": 1.6561310607314248, "grad_norm": 0.4402492642402649, "learning_rate": 4.963886732817342e-06, "loss": 0.3814, "step": 3336 }, { "epoch": 1.6566275028959128, "grad_norm": 0.4461955726146698, "learning_rate": 4.9609977278707765e-06, "loss": 0.3692, "step": 3337 }, { "epoch": 1.6571239450604005, "grad_norm": 0.4975982904434204, "learning_rate": 4.958108735946012e-06, "loss": 0.3867, "step": 3338 }, { "epoch": 1.6576203872248882, "grad_norm": 0.4683343470096588, "learning_rate": 4.955219758007601e-06, "loss": 0.3242, "step": 3339 }, { "epoch": 1.658116829389376, "grad_norm": 0.4632796347141266, "learning_rate": 4.9523307950200976e-06, "loss": 0.3706, "step": 3340 }, { "epoch": 1.658613271553864, "grad_norm": 0.44400903582572937, "learning_rate": 4.949441847948043e-06, "loss": 0.3483, "step": 3341 }, { "epoch": 1.659109713718352, "grad_norm": 0.47217321395874023, "learning_rate": 4.946552917755983e-06, "loss": 0.37, "step": 3342 }, { "epoch": 1.6596061558828397, "grad_norm": 0.4998815953731537, "learning_rate": 4.943664005408453e-06, "loss": 0.3689, "step": 3343 }, { "epoch": 1.6601025980473274, "grad_norm": 0.5315583348274231, "learning_rate": 4.9407751118699784e-06, "loss": 0.3589, "step": 3344 }, { "epoch": 1.6605990402118154, "grad_norm": 0.5151258707046509, "learning_rate": 4.937886238105084e-06, "loss": 0.3897, "step": 3345 }, { "epoch": 1.6610954823763031, "grad_norm": 0.500183641910553, "learning_rate": 4.934997385078287e-06, "loss": 0.4018, "step": 3346 }, { "epoch": 1.661591924540791, "grad_norm": 0.4629913568496704, "learning_rate": 4.932108553754097e-06, "loss": 0.3413, "step": 3347 }, { "epoch": 1.6620883667052788, "grad_norm": 0.45401978492736816, "learning_rate": 4.929219745097015e-06, "loss": 0.3964, "step": 3348 }, { "epoch": 1.6625848088697666, "grad_norm": 0.5363314747810364, "learning_rate": 4.9263309600715356e-06, "loss": 0.3721, "step": 3349 }, { "epoch": 1.6630812510342545, "grad_norm": 0.48557570576667786, "learning_rate": 4.923442199642148e-06, "loss": 0.3886, "step": 3350 }, { "epoch": 1.6635776931987425, "grad_norm": 0.4495919644832611, "learning_rate": 4.92055346477333e-06, "loss": 0.3648, "step": 3351 }, { "epoch": 1.6640741353632302, "grad_norm": 0.3992302119731903, "learning_rate": 4.917664756429548e-06, "loss": 0.3664, "step": 3352 }, { "epoch": 1.664570577527718, "grad_norm": 0.484470397233963, "learning_rate": 4.914776075575268e-06, "loss": 0.4318, "step": 3353 }, { "epoch": 1.6650670196922057, "grad_norm": 0.4794333279132843, "learning_rate": 4.91188742317494e-06, "loss": 0.3808, "step": 3354 }, { "epoch": 1.6655634618566937, "grad_norm": 0.44626641273498535, "learning_rate": 4.9089988001930064e-06, "loss": 0.3732, "step": 3355 }, { "epoch": 1.6660599040211816, "grad_norm": 0.48163336515426636, "learning_rate": 4.9061102075939e-06, "loss": 0.4496, "step": 3356 }, { "epoch": 1.6665563461856694, "grad_norm": 0.41721126437187195, "learning_rate": 4.903221646342044e-06, "loss": 0.3334, "step": 3357 }, { "epoch": 1.6670527883501571, "grad_norm": 0.498501718044281, "learning_rate": 4.9003331174018494e-06, "loss": 0.423, "step": 3358 }, { "epoch": 1.667549230514645, "grad_norm": 0.4606717824935913, "learning_rate": 4.897444621737717e-06, "loss": 0.3552, "step": 3359 }, { "epoch": 1.6680456726791328, "grad_norm": 0.46971961855888367, "learning_rate": 4.894556160314041e-06, "loss": 0.3948, "step": 3360 }, { "epoch": 1.6685421148436208, "grad_norm": 0.47304481267929077, "learning_rate": 4.8916677340951965e-06, "loss": 0.3506, "step": 3361 }, { "epoch": 1.6690385570081085, "grad_norm": 0.5421172380447388, "learning_rate": 4.888779344045549e-06, "loss": 0.3976, "step": 3362 }, { "epoch": 1.6695349991725963, "grad_norm": 0.504268229007721, "learning_rate": 4.885890991129458e-06, "loss": 0.3628, "step": 3363 }, { "epoch": 1.6700314413370843, "grad_norm": 0.442213237285614, "learning_rate": 4.883002676311262e-06, "loss": 0.4089, "step": 3364 }, { "epoch": 1.6705278835015722, "grad_norm": 0.4718155264854431, "learning_rate": 4.880114400555294e-06, "loss": 0.3807, "step": 3365 }, { "epoch": 1.67102432566606, "grad_norm": 0.517512321472168, "learning_rate": 4.87722616482587e-06, "loss": 0.3923, "step": 3366 }, { "epoch": 1.6715207678305477, "grad_norm": 0.5367029309272766, "learning_rate": 4.87433797008729e-06, "loss": 0.4165, "step": 3367 }, { "epoch": 1.6720172099950354, "grad_norm": 0.4579204022884369, "learning_rate": 4.871449817303849e-06, "loss": 0.3091, "step": 3368 }, { "epoch": 1.6725136521595234, "grad_norm": 0.48689740896224976, "learning_rate": 4.86856170743982e-06, "loss": 0.3727, "step": 3369 }, { "epoch": 1.6730100943240114, "grad_norm": 0.6039444804191589, "learning_rate": 4.865673641459463e-06, "loss": 0.4563, "step": 3370 }, { "epoch": 1.6735065364884991, "grad_norm": 0.46140626072883606, "learning_rate": 4.862785620327028e-06, "loss": 0.3529, "step": 3371 }, { "epoch": 1.6740029786529869, "grad_norm": 0.5206184983253479, "learning_rate": 4.859897645006743e-06, "loss": 0.3547, "step": 3372 }, { "epoch": 1.6744994208174746, "grad_norm": 0.5317440032958984, "learning_rate": 4.8570097164628285e-06, "loss": 0.3241, "step": 3373 }, { "epoch": 1.6749958629819626, "grad_norm": 0.5644713044166565, "learning_rate": 4.854121835659482e-06, "loss": 0.3609, "step": 3374 }, { "epoch": 1.6754923051464505, "grad_norm": 0.48168453574180603, "learning_rate": 4.851234003560891e-06, "loss": 0.3505, "step": 3375 }, { "epoch": 1.6759887473109383, "grad_norm": 0.5196191668510437, "learning_rate": 4.848346221131223e-06, "loss": 0.4731, "step": 3376 }, { "epoch": 1.676485189475426, "grad_norm": 0.42718011140823364, "learning_rate": 4.845458489334631e-06, "loss": 0.3173, "step": 3377 }, { "epoch": 1.676981631639914, "grad_norm": 0.5030355453491211, "learning_rate": 4.842570809135246e-06, "loss": 0.4037, "step": 3378 }, { "epoch": 1.677478073804402, "grad_norm": 0.5747137665748596, "learning_rate": 4.839683181497192e-06, "loss": 0.4356, "step": 3379 }, { "epoch": 1.6779745159688897, "grad_norm": 0.4632101058959961, "learning_rate": 4.8367956073845655e-06, "loss": 0.3536, "step": 3380 }, { "epoch": 1.6784709581333774, "grad_norm": 0.4569934904575348, "learning_rate": 4.83390808776145e-06, "loss": 0.3583, "step": 3381 }, { "epoch": 1.6789674002978652, "grad_norm": 0.47134143114089966, "learning_rate": 4.831020623591909e-06, "loss": 0.3467, "step": 3382 }, { "epoch": 1.6794638424623531, "grad_norm": 0.523152232170105, "learning_rate": 4.828133215839991e-06, "loss": 0.4442, "step": 3383 }, { "epoch": 1.679960284626841, "grad_norm": 0.4402737319469452, "learning_rate": 4.82524586546972e-06, "loss": 0.4118, "step": 3384 }, { "epoch": 1.6804567267913288, "grad_norm": 0.501845121383667, "learning_rate": 4.822358573445106e-06, "loss": 0.4057, "step": 3385 }, { "epoch": 1.6809531689558166, "grad_norm": 0.4034756124019623, "learning_rate": 4.819471340730135e-06, "loss": 0.2933, "step": 3386 }, { "epoch": 1.6814496111203043, "grad_norm": 0.5599865913391113, "learning_rate": 4.816584168288776e-06, "loss": 0.4721, "step": 3387 }, { "epoch": 1.6819460532847923, "grad_norm": 0.489084929227829, "learning_rate": 4.81369705708498e-06, "loss": 0.3606, "step": 3388 }, { "epoch": 1.6824424954492803, "grad_norm": 0.5469633936882019, "learning_rate": 4.810810008082672e-06, "loss": 0.3606, "step": 3389 }, { "epoch": 1.682938937613768, "grad_norm": 0.5145878195762634, "learning_rate": 4.8079230222457616e-06, "loss": 0.3988, "step": 3390 }, { "epoch": 1.6834353797782557, "grad_norm": 0.5243152976036072, "learning_rate": 4.805036100538134e-06, "loss": 0.4041, "step": 3391 }, { "epoch": 1.6839318219427437, "grad_norm": 0.49982747435569763, "learning_rate": 4.802149243923655e-06, "loss": 0.394, "step": 3392 }, { "epoch": 1.6844282641072315, "grad_norm": 0.4969489574432373, "learning_rate": 4.799262453366162e-06, "loss": 0.3253, "step": 3393 }, { "epoch": 1.6849247062717194, "grad_norm": 0.4925042390823364, "learning_rate": 4.796375729829483e-06, "loss": 0.3441, "step": 3394 }, { "epoch": 1.6854211484362072, "grad_norm": 0.49405813217163086, "learning_rate": 4.793489074277412e-06, "loss": 0.363, "step": 3395 }, { "epoch": 1.685917590600695, "grad_norm": 0.4856064021587372, "learning_rate": 4.790602487673728e-06, "loss": 0.4019, "step": 3396 }, { "epoch": 1.6864140327651829, "grad_norm": 0.466412216424942, "learning_rate": 4.7877159709821805e-06, "loss": 0.3441, "step": 3397 }, { "epoch": 1.6869104749296708, "grad_norm": 0.45958203077316284, "learning_rate": 4.784829525166502e-06, "loss": 0.4019, "step": 3398 }, { "epoch": 1.6874069170941586, "grad_norm": 0.4753476679325104, "learning_rate": 4.781943151190397e-06, "loss": 0.3663, "step": 3399 }, { "epoch": 1.6879033592586463, "grad_norm": 0.4747154414653778, "learning_rate": 4.779056850017546e-06, "loss": 0.3511, "step": 3400 }, { "epoch": 1.688399801423134, "grad_norm": 0.47744736075401306, "learning_rate": 4.77617062261161e-06, "loss": 0.4402, "step": 3401 }, { "epoch": 1.688896243587622, "grad_norm": 0.4614260196685791, "learning_rate": 4.773284469936219e-06, "loss": 0.3326, "step": 3402 }, { "epoch": 1.68939268575211, "grad_norm": 0.5508041381835938, "learning_rate": 4.7703983929549816e-06, "loss": 0.3845, "step": 3403 }, { "epoch": 1.6898891279165977, "grad_norm": 0.44307199120521545, "learning_rate": 4.767512392631479e-06, "loss": 0.3532, "step": 3404 }, { "epoch": 1.6903855700810855, "grad_norm": 0.49262484908103943, "learning_rate": 4.764626469929272e-06, "loss": 0.4018, "step": 3405 }, { "epoch": 1.6908820122455734, "grad_norm": 0.48813900351524353, "learning_rate": 4.7617406258118895e-06, "loss": 0.3633, "step": 3406 }, { "epoch": 1.6913784544100612, "grad_norm": 0.47447600960731506, "learning_rate": 4.758854861242837e-06, "loss": 0.4099, "step": 3407 }, { "epoch": 1.6918748965745491, "grad_norm": 0.47608426213264465, "learning_rate": 4.755969177185589e-06, "loss": 0.3723, "step": 3408 }, { "epoch": 1.692371338739037, "grad_norm": 0.4613323211669922, "learning_rate": 4.753083574603603e-06, "loss": 0.4037, "step": 3409 }, { "epoch": 1.6928677809035246, "grad_norm": 0.47232237458229065, "learning_rate": 4.7501980544602995e-06, "loss": 0.3234, "step": 3410 }, { "epoch": 1.6933642230680126, "grad_norm": 0.45090043544769287, "learning_rate": 4.747312617719079e-06, "loss": 0.3479, "step": 3411 }, { "epoch": 1.6938606652325006, "grad_norm": 0.48787692189216614, "learning_rate": 4.744427265343304e-06, "loss": 0.4271, "step": 3412 }, { "epoch": 1.6943571073969883, "grad_norm": 0.4699952006340027, "learning_rate": 4.741541998296321e-06, "loss": 0.3493, "step": 3413 }, { "epoch": 1.694853549561476, "grad_norm": 0.4613645076751709, "learning_rate": 4.738656817541441e-06, "loss": 0.3984, "step": 3414 }, { "epoch": 1.6953499917259638, "grad_norm": 0.44380414485931396, "learning_rate": 4.735771724041945e-06, "loss": 0.3364, "step": 3415 }, { "epoch": 1.6958464338904518, "grad_norm": 0.5417414307594299, "learning_rate": 4.732886718761091e-06, "loss": 0.3998, "step": 3416 }, { "epoch": 1.6963428760549397, "grad_norm": 0.5100188255310059, "learning_rate": 4.730001802662101e-06, "loss": 0.3991, "step": 3417 }, { "epoch": 1.6968393182194275, "grad_norm": 0.4463158845901489, "learning_rate": 4.72711697670817e-06, "loss": 0.4012, "step": 3418 }, { "epoch": 1.6973357603839152, "grad_norm": 0.5250662565231323, "learning_rate": 4.724232241862464e-06, "loss": 0.3943, "step": 3419 }, { "epoch": 1.697832202548403, "grad_norm": 0.4802435338497162, "learning_rate": 4.721347599088118e-06, "loss": 0.3477, "step": 3420 }, { "epoch": 1.698328644712891, "grad_norm": 0.4270705282688141, "learning_rate": 4.7184630493482355e-06, "loss": 0.3457, "step": 3421 }, { "epoch": 1.6988250868773789, "grad_norm": 0.5473836660385132, "learning_rate": 4.71557859360589e-06, "loss": 0.3872, "step": 3422 }, { "epoch": 1.6993215290418666, "grad_norm": 0.4777112901210785, "learning_rate": 4.71269423282412e-06, "loss": 0.3744, "step": 3423 }, { "epoch": 1.6998179712063544, "grad_norm": 0.48616889119148254, "learning_rate": 4.709809967965939e-06, "loss": 0.371, "step": 3424 }, { "epoch": 1.7003144133708423, "grad_norm": 0.4861191511154175, "learning_rate": 4.706925799994322e-06, "loss": 0.3392, "step": 3425 }, { "epoch": 1.7008108555353303, "grad_norm": 0.5250683426856995, "learning_rate": 4.704041729872215e-06, "loss": 0.4146, "step": 3426 }, { "epoch": 1.701307297699818, "grad_norm": 0.43050655722618103, "learning_rate": 4.701157758562528e-06, "loss": 0.3266, "step": 3427 }, { "epoch": 1.7018037398643058, "grad_norm": 0.48748868703842163, "learning_rate": 4.698273887028147e-06, "loss": 0.3822, "step": 3428 }, { "epoch": 1.7023001820287935, "grad_norm": 0.5188548564910889, "learning_rate": 4.695390116231915e-06, "loss": 0.379, "step": 3429 }, { "epoch": 1.7027966241932815, "grad_norm": 0.488048255443573, "learning_rate": 4.692506447136641e-06, "loss": 0.3801, "step": 3430 }, { "epoch": 1.7032930663577694, "grad_norm": 0.4238170087337494, "learning_rate": 4.68962288070511e-06, "loss": 0.3387, "step": 3431 }, { "epoch": 1.7037895085222572, "grad_norm": 0.4667433500289917, "learning_rate": 4.686739417900063e-06, "loss": 0.3648, "step": 3432 }, { "epoch": 1.704285950686745, "grad_norm": 0.5427445769309998, "learning_rate": 4.68385605968421e-06, "loss": 0.3766, "step": 3433 }, { "epoch": 1.7047823928512327, "grad_norm": 0.5224066376686096, "learning_rate": 4.680972807020226e-06, "loss": 0.3904, "step": 3434 }, { "epoch": 1.7052788350157206, "grad_norm": 0.4896109104156494, "learning_rate": 4.67808966087075e-06, "loss": 0.4231, "step": 3435 }, { "epoch": 1.7057752771802086, "grad_norm": 0.40136653184890747, "learning_rate": 4.67520662219839e-06, "loss": 0.2951, "step": 3436 }, { "epoch": 1.7062717193446963, "grad_norm": 0.5384687185287476, "learning_rate": 4.672323691965711e-06, "loss": 0.3969, "step": 3437 }, { "epoch": 1.706768161509184, "grad_norm": 0.44101256132125854, "learning_rate": 4.669440871135243e-06, "loss": 0.3583, "step": 3438 }, { "epoch": 1.707264603673672, "grad_norm": 0.4485785663127899, "learning_rate": 4.666558160669486e-06, "loss": 0.324, "step": 3439 }, { "epoch": 1.70776104583816, "grad_norm": 0.49563363194465637, "learning_rate": 4.663675561530897e-06, "loss": 0.4274, "step": 3440 }, { "epoch": 1.7082574880026478, "grad_norm": 0.44446855783462524, "learning_rate": 4.660793074681895e-06, "loss": 0.3998, "step": 3441 }, { "epoch": 1.7087539301671355, "grad_norm": 0.47398316860198975, "learning_rate": 4.657910701084869e-06, "loss": 0.3528, "step": 3442 }, { "epoch": 1.7092503723316232, "grad_norm": 0.4716496169567108, "learning_rate": 4.655028441702161e-06, "loss": 0.409, "step": 3443 }, { "epoch": 1.7097468144961112, "grad_norm": 0.5033398866653442, "learning_rate": 4.6521462974960805e-06, "loss": 0.4092, "step": 3444 }, { "epoch": 1.7102432566605992, "grad_norm": 0.4234597384929657, "learning_rate": 4.649264269428896e-06, "loss": 0.3524, "step": 3445 }, { "epoch": 1.710739698825087, "grad_norm": 0.4175933599472046, "learning_rate": 4.6463823584628415e-06, "loss": 0.3605, "step": 3446 }, { "epoch": 1.7112361409895747, "grad_norm": 0.45022451877593994, "learning_rate": 4.643500565560106e-06, "loss": 0.334, "step": 3447 }, { "epoch": 1.7117325831540624, "grad_norm": 0.47362908720970154, "learning_rate": 4.640618891682844e-06, "loss": 0.4067, "step": 3448 }, { "epoch": 1.7122290253185504, "grad_norm": 0.4669371545314789, "learning_rate": 4.637737337793164e-06, "loss": 0.388, "step": 3449 }, { "epoch": 1.7127254674830383, "grad_norm": 0.43639075756073, "learning_rate": 4.634855904853143e-06, "loss": 0.3887, "step": 3450 }, { "epoch": 1.713221909647526, "grad_norm": 0.4635430574417114, "learning_rate": 4.63197459382481e-06, "loss": 0.3231, "step": 3451 }, { "epoch": 1.7137183518120138, "grad_norm": 0.4942784607410431, "learning_rate": 4.629093405670159e-06, "loss": 0.4139, "step": 3452 }, { "epoch": 1.7142147939765018, "grad_norm": 0.4729786515235901, "learning_rate": 4.626212341351137e-06, "loss": 0.4042, "step": 3453 }, { "epoch": 1.7147112361409895, "grad_norm": 0.4509636163711548, "learning_rate": 4.623331401829658e-06, "loss": 0.3611, "step": 3454 }, { "epoch": 1.7152076783054775, "grad_norm": 0.44507747888565063, "learning_rate": 4.6204505880675856e-06, "loss": 0.393, "step": 3455 }, { "epoch": 1.7157041204699652, "grad_norm": 0.45023876428604126, "learning_rate": 4.617569901026745e-06, "loss": 0.4213, "step": 3456 }, { "epoch": 1.716200562634453, "grad_norm": 0.4686136543750763, "learning_rate": 4.614689341668922e-06, "loss": 0.3509, "step": 3457 }, { "epoch": 1.716697004798941, "grad_norm": 0.4392837882041931, "learning_rate": 4.611808910955855e-06, "loss": 0.3743, "step": 3458 }, { "epoch": 1.717193446963429, "grad_norm": 0.5073841214179993, "learning_rate": 4.608928609849244e-06, "loss": 0.4314, "step": 3459 }, { "epoch": 1.7176898891279166, "grad_norm": 0.4887455999851227, "learning_rate": 4.606048439310738e-06, "loss": 0.3539, "step": 3460 }, { "epoch": 1.7181863312924044, "grad_norm": 0.5185580849647522, "learning_rate": 4.603168400301954e-06, "loss": 0.3918, "step": 3461 }, { "epoch": 1.7186827734568921, "grad_norm": 0.44850194454193115, "learning_rate": 4.600288493784455e-06, "loss": 0.3341, "step": 3462 }, { "epoch": 1.71917921562138, "grad_norm": 0.4217210114002228, "learning_rate": 4.597408720719765e-06, "loss": 0.396, "step": 3463 }, { "epoch": 1.719675657785868, "grad_norm": 0.4635525643825531, "learning_rate": 4.5945290820693585e-06, "loss": 0.4279, "step": 3464 }, { "epoch": 1.7201720999503558, "grad_norm": 0.44298022985458374, "learning_rate": 4.591649578794675e-06, "loss": 0.3916, "step": 3465 }, { "epoch": 1.7206685421148435, "grad_norm": 0.46849745512008667, "learning_rate": 4.588770211857096e-06, "loss": 0.3783, "step": 3466 }, { "epoch": 1.7211649842793315, "grad_norm": 0.4466189444065094, "learning_rate": 4.58589098221797e-06, "loss": 0.3893, "step": 3467 }, { "epoch": 1.7216614264438193, "grad_norm": 0.4149203896522522, "learning_rate": 4.583011890838586e-06, "loss": 0.375, "step": 3468 }, { "epoch": 1.7221578686083072, "grad_norm": 0.46251457929611206, "learning_rate": 4.580132938680202e-06, "loss": 0.3458, "step": 3469 }, { "epoch": 1.722654310772795, "grad_norm": 0.42749884724617004, "learning_rate": 4.577254126704017e-06, "loss": 0.3711, "step": 3470 }, { "epoch": 1.7231507529372827, "grad_norm": 0.46695011854171753, "learning_rate": 4.574375455871188e-06, "loss": 0.3947, "step": 3471 }, { "epoch": 1.7236471951017707, "grad_norm": 0.4962231516838074, "learning_rate": 4.571496927142829e-06, "loss": 0.411, "step": 3472 }, { "epoch": 1.7241436372662586, "grad_norm": 0.47511547803878784, "learning_rate": 4.56861854148e-06, "loss": 0.3306, "step": 3473 }, { "epoch": 1.7246400794307464, "grad_norm": 0.5320813655853271, "learning_rate": 4.565740299843714e-06, "loss": 0.3888, "step": 3474 }, { "epoch": 1.7251365215952341, "grad_norm": 0.4422111511230469, "learning_rate": 4.562862203194939e-06, "loss": 0.289, "step": 3475 }, { "epoch": 1.7256329637597219, "grad_norm": 0.4613319933414459, "learning_rate": 4.559984252494595e-06, "loss": 0.4451, "step": 3476 }, { "epoch": 1.7261294059242098, "grad_norm": 0.4377672076225281, "learning_rate": 4.55710644870355e-06, "loss": 0.3922, "step": 3477 }, { "epoch": 1.7266258480886978, "grad_norm": 0.44796860218048096, "learning_rate": 4.554228792782626e-06, "loss": 0.4154, "step": 3478 }, { "epoch": 1.7271222902531855, "grad_norm": 0.426013320684433, "learning_rate": 4.551351285692589e-06, "loss": 0.4045, "step": 3479 }, { "epoch": 1.7276187324176733, "grad_norm": 0.4654417932033539, "learning_rate": 4.548473928394167e-06, "loss": 0.4048, "step": 3480 }, { "epoch": 1.728115174582161, "grad_norm": 0.4524926543235779, "learning_rate": 4.545596721848027e-06, "loss": 0.3475, "step": 3481 }, { "epoch": 1.728611616746649, "grad_norm": 0.501118540763855, "learning_rate": 4.542719667014792e-06, "loss": 0.3838, "step": 3482 }, { "epoch": 1.729108058911137, "grad_norm": 0.45011305809020996, "learning_rate": 4.539842764855032e-06, "loss": 0.3763, "step": 3483 }, { "epoch": 1.7296045010756247, "grad_norm": 0.4322308897972107, "learning_rate": 4.5369660163292674e-06, "loss": 0.3444, "step": 3484 }, { "epoch": 1.7301009432401124, "grad_norm": 0.48286911845207214, "learning_rate": 4.534089422397965e-06, "loss": 0.4134, "step": 3485 }, { "epoch": 1.7305973854046004, "grad_norm": 0.5231637954711914, "learning_rate": 4.5312129840215405e-06, "loss": 0.3635, "step": 3486 }, { "epoch": 1.7310938275690884, "grad_norm": 0.4705500304698944, "learning_rate": 4.528336702160361e-06, "loss": 0.364, "step": 3487 }, { "epoch": 1.731590269733576, "grad_norm": 0.4489648938179016, "learning_rate": 4.5254605777747376e-06, "loss": 0.3555, "step": 3488 }, { "epoch": 1.7320867118980638, "grad_norm": 0.5398720502853394, "learning_rate": 4.5225846118249295e-06, "loss": 0.4115, "step": 3489 }, { "epoch": 1.7325831540625516, "grad_norm": 0.5135363936424255, "learning_rate": 4.519708805271144e-06, "loss": 0.3991, "step": 3490 }, { "epoch": 1.7330795962270396, "grad_norm": 0.46335577964782715, "learning_rate": 4.5168331590735345e-06, "loss": 0.3383, "step": 3491 }, { "epoch": 1.7335760383915275, "grad_norm": 0.4915655255317688, "learning_rate": 4.513957674192203e-06, "loss": 0.3825, "step": 3492 }, { "epoch": 1.7340724805560153, "grad_norm": 0.47002729773521423, "learning_rate": 4.511082351587194e-06, "loss": 0.3135, "step": 3493 }, { "epoch": 1.734568922720503, "grad_norm": 0.4883146286010742, "learning_rate": 4.5082071922184965e-06, "loss": 0.3413, "step": 3494 }, { "epoch": 1.7350653648849907, "grad_norm": 0.5414867997169495, "learning_rate": 4.505332197046055e-06, "loss": 0.4174, "step": 3495 }, { "epoch": 1.7355618070494787, "grad_norm": 0.4822658598423004, "learning_rate": 4.5024573670297475e-06, "loss": 0.3675, "step": 3496 }, { "epoch": 1.7360582492139667, "grad_norm": 0.577713668346405, "learning_rate": 4.499582703129402e-06, "loss": 0.4093, "step": 3497 }, { "epoch": 1.7365546913784544, "grad_norm": 0.4429943561553955, "learning_rate": 4.49670820630479e-06, "loss": 0.3671, "step": 3498 }, { "epoch": 1.7370511335429422, "grad_norm": 0.3899818956851959, "learning_rate": 4.493833877515632e-06, "loss": 0.2858, "step": 3499 }, { "epoch": 1.7375475757074301, "grad_norm": 0.6617653369903564, "learning_rate": 4.490959717721586e-06, "loss": 0.4738, "step": 3500 }, { "epoch": 1.7380440178719179, "grad_norm": 0.47387367486953735, "learning_rate": 4.4880857278822524e-06, "loss": 0.3671, "step": 3501 }, { "epoch": 1.7385404600364058, "grad_norm": 0.5058864951133728, "learning_rate": 4.485211908957183e-06, "loss": 0.4036, "step": 3502 }, { "epoch": 1.7390369022008936, "grad_norm": 0.5139012336730957, "learning_rate": 4.482338261905866e-06, "loss": 0.388, "step": 3503 }, { "epoch": 1.7395333443653813, "grad_norm": 0.4867737293243408, "learning_rate": 4.4794647876877335e-06, "loss": 0.3307, "step": 3504 }, { "epoch": 1.7400297865298693, "grad_norm": 0.5286304950714111, "learning_rate": 4.476591487262161e-06, "loss": 0.3888, "step": 3505 }, { "epoch": 1.7405262286943572, "grad_norm": 0.5182462930679321, "learning_rate": 4.473718361588465e-06, "loss": 0.3787, "step": 3506 }, { "epoch": 1.741022670858845, "grad_norm": 0.4065949320793152, "learning_rate": 4.470845411625906e-06, "loss": 0.3752, "step": 3507 }, { "epoch": 1.7415191130233327, "grad_norm": 0.471300333738327, "learning_rate": 4.467972638333682e-06, "loss": 0.3478, "step": 3508 }, { "epoch": 1.7420155551878205, "grad_norm": 0.5261066555976868, "learning_rate": 4.465100042670933e-06, "loss": 0.4322, "step": 3509 }, { "epoch": 1.7425119973523084, "grad_norm": 0.4511087238788605, "learning_rate": 4.462227625596743e-06, "loss": 0.3156, "step": 3510 }, { "epoch": 1.7430084395167964, "grad_norm": 0.5843825340270996, "learning_rate": 4.459355388070134e-06, "loss": 0.3625, "step": 3511 }, { "epoch": 1.7435048816812841, "grad_norm": 0.4734066128730774, "learning_rate": 4.456483331050064e-06, "loss": 0.3647, "step": 3512 }, { "epoch": 1.744001323845772, "grad_norm": 0.44752904772758484, "learning_rate": 4.453611455495441e-06, "loss": 0.3633, "step": 3513 }, { "epoch": 1.7444977660102599, "grad_norm": 0.5302708745002747, "learning_rate": 4.450739762365101e-06, "loss": 0.4072, "step": 3514 }, { "epoch": 1.7449942081747476, "grad_norm": 0.582202672958374, "learning_rate": 4.447868252617828e-06, "loss": 0.3889, "step": 3515 }, { "epoch": 1.7454906503392356, "grad_norm": 0.45629945397377014, "learning_rate": 4.444996927212337e-06, "loss": 0.2987, "step": 3516 }, { "epoch": 1.7459870925037233, "grad_norm": 0.4640304744243622, "learning_rate": 4.44212578710729e-06, "loss": 0.4143, "step": 3517 }, { "epoch": 1.746483534668211, "grad_norm": 0.4853842258453369, "learning_rate": 4.439254833261281e-06, "loss": 0.3801, "step": 3518 }, { "epoch": 1.746979976832699, "grad_norm": 0.5042877793312073, "learning_rate": 4.436384066632842e-06, "loss": 0.373, "step": 3519 }, { "epoch": 1.747476418997187, "grad_norm": 0.5045114755630493, "learning_rate": 4.433513488180443e-06, "loss": 0.3804, "step": 3520 }, { "epoch": 1.7479728611616747, "grad_norm": 0.404839426279068, "learning_rate": 4.4306430988624945e-06, "loss": 0.3679, "step": 3521 }, { "epoch": 1.7484693033261625, "grad_norm": 0.4290003776550293, "learning_rate": 4.427772899637343e-06, "loss": 0.3804, "step": 3522 }, { "epoch": 1.7489657454906502, "grad_norm": 0.550048828125, "learning_rate": 4.424902891463269e-06, "loss": 0.3974, "step": 3523 }, { "epoch": 1.7494621876551382, "grad_norm": 0.5163791179656982, "learning_rate": 4.422033075298485e-06, "loss": 0.3793, "step": 3524 }, { "epoch": 1.7499586298196261, "grad_norm": 0.46551281213760376, "learning_rate": 4.419163452101153e-06, "loss": 0.4233, "step": 3525 }, { "epoch": 1.7504550719841139, "grad_norm": 0.42707791924476624, "learning_rate": 4.416294022829356e-06, "loss": 0.3519, "step": 3526 }, { "epoch": 1.7509515141486016, "grad_norm": 0.4707711637020111, "learning_rate": 4.41342478844112e-06, "loss": 0.383, "step": 3527 }, { "epoch": 1.7514479563130894, "grad_norm": 0.4860548973083496, "learning_rate": 4.410555749894407e-06, "loss": 0.3912, "step": 3528 }, { "epoch": 1.7519443984775773, "grad_norm": 0.5088922381401062, "learning_rate": 4.407686908147107e-06, "loss": 0.3731, "step": 3529 }, { "epoch": 1.7524408406420653, "grad_norm": 0.5249324440956116, "learning_rate": 4.404818264157052e-06, "loss": 0.3337, "step": 3530 }, { "epoch": 1.752937282806553, "grad_norm": 0.5744071006774902, "learning_rate": 4.4019498188819996e-06, "loss": 0.4223, "step": 3531 }, { "epoch": 1.7534337249710408, "grad_norm": 0.43603500723838806, "learning_rate": 4.399081573279651e-06, "loss": 0.372, "step": 3532 }, { "epoch": 1.7539301671355287, "grad_norm": 0.415213942527771, "learning_rate": 4.396213528307633e-06, "loss": 0.3444, "step": 3533 }, { "epoch": 1.7544266093000167, "grad_norm": 0.48429110646247864, "learning_rate": 4.393345684923508e-06, "loss": 0.3592, "step": 3534 }, { "epoch": 1.7549230514645044, "grad_norm": 0.487858384847641, "learning_rate": 4.3904780440847695e-06, "loss": 0.3763, "step": 3535 }, { "epoch": 1.7554194936289922, "grad_norm": 0.5198289155960083, "learning_rate": 4.387610606748847e-06, "loss": 0.3755, "step": 3536 }, { "epoch": 1.75591593579348, "grad_norm": 0.4122050702571869, "learning_rate": 4.384743373873099e-06, "loss": 0.3674, "step": 3537 }, { "epoch": 1.756412377957968, "grad_norm": 0.49817100167274475, "learning_rate": 4.3818763464148165e-06, "loss": 0.4188, "step": 3538 }, { "epoch": 1.7569088201224559, "grad_norm": 0.4611572027206421, "learning_rate": 4.379009525331222e-06, "loss": 0.3418, "step": 3539 }, { "epoch": 1.7574052622869436, "grad_norm": 0.4816409647464752, "learning_rate": 4.37614291157947e-06, "loss": 0.3311, "step": 3540 }, { "epoch": 1.7579017044514313, "grad_norm": 0.49158650636672974, "learning_rate": 4.373276506116645e-06, "loss": 0.4042, "step": 3541 }, { "epoch": 1.758398146615919, "grad_norm": 0.4580959677696228, "learning_rate": 4.370410309899759e-06, "loss": 0.3748, "step": 3542 }, { "epoch": 1.758894588780407, "grad_norm": 0.4720197319984436, "learning_rate": 4.367544323885762e-06, "loss": 0.4003, "step": 3543 }, { "epoch": 1.759391030944895, "grad_norm": 0.43943893909454346, "learning_rate": 4.364678549031525e-06, "loss": 0.4261, "step": 3544 }, { "epoch": 1.7598874731093828, "grad_norm": 0.4236045181751251, "learning_rate": 4.3618129862938525e-06, "loss": 0.3236, "step": 3545 }, { "epoch": 1.7603839152738705, "grad_norm": 0.47550883889198303, "learning_rate": 4.358947636629478e-06, "loss": 0.3542, "step": 3546 }, { "epoch": 1.7608803574383585, "grad_norm": 0.41946107149124146, "learning_rate": 4.3560825009950665e-06, "loss": 0.3219, "step": 3547 }, { "epoch": 1.7613767996028464, "grad_norm": 0.4828420579433441, "learning_rate": 4.353217580347208e-06, "loss": 0.3992, "step": 3548 }, { "epoch": 1.7618732417673342, "grad_norm": 0.46132877469062805, "learning_rate": 4.3503528756424204e-06, "loss": 0.4191, "step": 3549 }, { "epoch": 1.762369683931822, "grad_norm": 0.4435800611972809, "learning_rate": 4.3474883878371496e-06, "loss": 0.3982, "step": 3550 }, { "epoch": 1.7628661260963097, "grad_norm": 0.4107528030872345, "learning_rate": 4.3446241178877735e-06, "loss": 0.402, "step": 3551 }, { "epoch": 1.7633625682607976, "grad_norm": 0.4894763231277466, "learning_rate": 4.341760066750591e-06, "loss": 0.372, "step": 3552 }, { "epoch": 1.7638590104252856, "grad_norm": 0.4782402813434601, "learning_rate": 4.338896235381832e-06, "loss": 0.4246, "step": 3553 }, { "epoch": 1.7643554525897733, "grad_norm": 0.47849252820014954, "learning_rate": 4.336032624737653e-06, "loss": 0.3562, "step": 3554 }, { "epoch": 1.764851894754261, "grad_norm": 0.46692416071891785, "learning_rate": 4.333169235774136e-06, "loss": 0.3835, "step": 3555 }, { "epoch": 1.7653483369187488, "grad_norm": 0.401881605386734, "learning_rate": 4.330306069447287e-06, "loss": 0.3078, "step": 3556 }, { "epoch": 1.7658447790832368, "grad_norm": 0.4690534472465515, "learning_rate": 4.327443126713039e-06, "loss": 0.3879, "step": 3557 }, { "epoch": 1.7663412212477247, "grad_norm": 0.47688475251197815, "learning_rate": 4.324580408527254e-06, "loss": 0.3923, "step": 3558 }, { "epoch": 1.7668376634122125, "grad_norm": 0.46162113547325134, "learning_rate": 4.321717915845713e-06, "loss": 0.3909, "step": 3559 }, { "epoch": 1.7673341055767002, "grad_norm": 0.4447939097881317, "learning_rate": 4.318855649624124e-06, "loss": 0.3638, "step": 3560 }, { "epoch": 1.7678305477411882, "grad_norm": 0.45476993918418884, "learning_rate": 4.315993610818121e-06, "loss": 0.3715, "step": 3561 }, { "epoch": 1.768326989905676, "grad_norm": 0.39025524258613586, "learning_rate": 4.3131318003832625e-06, "loss": 0.3133, "step": 3562 }, { "epoch": 1.768823432070164, "grad_norm": 0.5029804706573486, "learning_rate": 4.310270219275028e-06, "loss": 0.3792, "step": 3563 }, { "epoch": 1.7693198742346516, "grad_norm": 0.4173735976219177, "learning_rate": 4.307408868448822e-06, "loss": 0.3609, "step": 3564 }, { "epoch": 1.7698163163991394, "grad_norm": 0.4659776985645294, "learning_rate": 4.304547748859967e-06, "loss": 0.3875, "step": 3565 }, { "epoch": 1.7703127585636274, "grad_norm": 0.4837808907032013, "learning_rate": 4.301686861463722e-06, "loss": 0.3945, "step": 3566 }, { "epoch": 1.7708092007281153, "grad_norm": 0.39287564158439636, "learning_rate": 4.298826207215254e-06, "loss": 0.3221, "step": 3567 }, { "epoch": 1.771305642892603, "grad_norm": 0.5008443593978882, "learning_rate": 4.2959657870696555e-06, "loss": 0.3861, "step": 3568 }, { "epoch": 1.7718020850570908, "grad_norm": 0.546540379524231, "learning_rate": 4.293105601981948e-06, "loss": 0.4052, "step": 3569 }, { "epoch": 1.7722985272215785, "grad_norm": 0.46248146891593933, "learning_rate": 4.290245652907069e-06, "loss": 0.3837, "step": 3570 }, { "epoch": 1.7727949693860665, "grad_norm": 0.46027982234954834, "learning_rate": 4.287385940799876e-06, "loss": 0.3766, "step": 3571 }, { "epoch": 1.7732914115505545, "grad_norm": 0.4666723608970642, "learning_rate": 4.284526466615148e-06, "loss": 0.3465, "step": 3572 }, { "epoch": 1.7737878537150422, "grad_norm": 0.5692270398139954, "learning_rate": 4.281667231307588e-06, "loss": 0.4825, "step": 3573 }, { "epoch": 1.77428429587953, "grad_norm": 0.43290844559669495, "learning_rate": 4.278808235831818e-06, "loss": 0.3531, "step": 3574 }, { "epoch": 1.774780738044018, "grad_norm": 0.5107525587081909, "learning_rate": 4.2759494811423755e-06, "loss": 0.3895, "step": 3575 }, { "epoch": 1.7752771802085057, "grad_norm": 0.538622260093689, "learning_rate": 4.2730909681937224e-06, "loss": 0.3943, "step": 3576 }, { "epoch": 1.7757736223729936, "grad_norm": 0.4644205570220947, "learning_rate": 4.2702326979402385e-06, "loss": 0.3334, "step": 3577 }, { "epoch": 1.7762700645374814, "grad_norm": 0.4430505931377411, "learning_rate": 4.267374671336224e-06, "loss": 0.3728, "step": 3578 }, { "epoch": 1.7767665067019691, "grad_norm": 0.536597490310669, "learning_rate": 4.264516889335894e-06, "loss": 0.4195, "step": 3579 }, { "epoch": 1.777262948866457, "grad_norm": 0.3857428729534149, "learning_rate": 4.261659352893386e-06, "loss": 0.362, "step": 3580 }, { "epoch": 1.777759391030945, "grad_norm": 0.4755401611328125, "learning_rate": 4.258802062962754e-06, "loss": 0.3966, "step": 3581 }, { "epoch": 1.7782558331954328, "grad_norm": 0.47735631465911865, "learning_rate": 4.255945020497968e-06, "loss": 0.2968, "step": 3582 }, { "epoch": 1.7787522753599205, "grad_norm": 0.5246385335922241, "learning_rate": 4.253088226452915e-06, "loss": 0.431, "step": 3583 }, { "epoch": 1.7792487175244083, "grad_norm": 0.41932398080825806, "learning_rate": 4.250231681781406e-06, "loss": 0.3351, "step": 3584 }, { "epoch": 1.7797451596888962, "grad_norm": 0.4543163776397705, "learning_rate": 4.24737538743716e-06, "loss": 0.415, "step": 3585 }, { "epoch": 1.7802416018533842, "grad_norm": 0.4370085597038269, "learning_rate": 4.244519344373817e-06, "loss": 0.3589, "step": 3586 }, { "epoch": 1.780738044017872, "grad_norm": 0.4708434045314789, "learning_rate": 4.241663553544931e-06, "loss": 0.4169, "step": 3587 }, { "epoch": 1.7812344861823597, "grad_norm": 0.4372048079967499, "learning_rate": 4.2388080159039755e-06, "loss": 0.3333, "step": 3588 }, { "epoch": 1.7817309283468474, "grad_norm": 0.5062905550003052, "learning_rate": 4.235952732404336e-06, "loss": 0.4624, "step": 3589 }, { "epoch": 1.7822273705113354, "grad_norm": 0.4664192795753479, "learning_rate": 4.233097703999313e-06, "loss": 0.3358, "step": 3590 }, { "epoch": 1.7827238126758234, "grad_norm": 0.4664001762866974, "learning_rate": 4.230242931642121e-06, "loss": 0.4191, "step": 3591 }, { "epoch": 1.783220254840311, "grad_norm": 0.46705612540245056, "learning_rate": 4.2273884162858955e-06, "loss": 0.3699, "step": 3592 }, { "epoch": 1.7837166970047988, "grad_norm": 0.4151996672153473, "learning_rate": 4.224534158883679e-06, "loss": 0.3352, "step": 3593 }, { "epoch": 1.7842131391692868, "grad_norm": 0.487211674451828, "learning_rate": 4.22168016038843e-06, "loss": 0.4014, "step": 3594 }, { "epoch": 1.7847095813337748, "grad_norm": 0.4516749680042267, "learning_rate": 4.2188264217530235e-06, "loss": 0.3474, "step": 3595 }, { "epoch": 1.7852060234982625, "grad_norm": 0.5159856081008911, "learning_rate": 4.2159729439302435e-06, "loss": 0.4005, "step": 3596 }, { "epoch": 1.7857024656627503, "grad_norm": 0.4700857698917389, "learning_rate": 4.213119727872789e-06, "loss": 0.3425, "step": 3597 }, { "epoch": 1.786198907827238, "grad_norm": 0.5404385328292847, "learning_rate": 4.210266774533269e-06, "loss": 0.3842, "step": 3598 }, { "epoch": 1.786695349991726, "grad_norm": 0.531535804271698, "learning_rate": 4.207414084864211e-06, "loss": 0.3808, "step": 3599 }, { "epoch": 1.787191792156214, "grad_norm": 0.5046094655990601, "learning_rate": 4.204561659818049e-06, "loss": 0.3606, "step": 3600 }, { "epoch": 1.7876882343207017, "grad_norm": 0.4752775728702545, "learning_rate": 4.2017095003471294e-06, "loss": 0.3422, "step": 3601 }, { "epoch": 1.7881846764851894, "grad_norm": 0.4748501479625702, "learning_rate": 4.19885760740371e-06, "loss": 0.3792, "step": 3602 }, { "epoch": 1.7886811186496772, "grad_norm": 0.4885747730731964, "learning_rate": 4.196005981939963e-06, "loss": 0.3895, "step": 3603 }, { "epoch": 1.7891775608141651, "grad_norm": 0.5025051236152649, "learning_rate": 4.193154624907968e-06, "loss": 0.3427, "step": 3604 }, { "epoch": 1.789674002978653, "grad_norm": 0.46571409702301025, "learning_rate": 4.1903035372597155e-06, "loss": 0.3702, "step": 3605 }, { "epoch": 1.7901704451431408, "grad_norm": 0.47951647639274597, "learning_rate": 4.1874527199471025e-06, "loss": 0.3843, "step": 3606 }, { "epoch": 1.7906668873076286, "grad_norm": 0.4713567793369293, "learning_rate": 4.184602173921945e-06, "loss": 0.3668, "step": 3607 }, { "epoch": 1.7911633294721165, "grad_norm": 0.5357673168182373, "learning_rate": 4.181751900135959e-06, "loss": 0.4442, "step": 3608 }, { "epoch": 1.7916597716366043, "grad_norm": 0.435021311044693, "learning_rate": 4.178901899540775e-06, "loss": 0.354, "step": 3609 }, { "epoch": 1.7921562138010922, "grad_norm": 0.4818534553050995, "learning_rate": 4.17605217308793e-06, "loss": 0.4233, "step": 3610 }, { "epoch": 1.79265265596558, "grad_norm": 0.43191054463386536, "learning_rate": 4.173202721728873e-06, "loss": 0.34, "step": 3611 }, { "epoch": 1.7931490981300677, "grad_norm": 0.5029119253158569, "learning_rate": 4.170353546414955e-06, "loss": 0.394, "step": 3612 }, { "epoch": 1.7936455402945557, "grad_norm": 0.5248994827270508, "learning_rate": 4.167504648097438e-06, "loss": 0.3637, "step": 3613 }, { "epoch": 1.7941419824590437, "grad_norm": 0.46209439635276794, "learning_rate": 4.164656027727495e-06, "loss": 0.3617, "step": 3614 }, { "epoch": 1.7946384246235314, "grad_norm": 0.4959416389465332, "learning_rate": 4.161807686256199e-06, "loss": 0.397, "step": 3615 }, { "epoch": 1.7951348667880191, "grad_norm": 0.47974586486816406, "learning_rate": 4.158959624634537e-06, "loss": 0.4234, "step": 3616 }, { "epoch": 1.7956313089525069, "grad_norm": 0.524192750453949, "learning_rate": 4.156111843813397e-06, "loss": 0.392, "step": 3617 }, { "epoch": 1.7961277511169949, "grad_norm": 0.47121357917785645, "learning_rate": 4.153264344743578e-06, "loss": 0.3636, "step": 3618 }, { "epoch": 1.7966241932814828, "grad_norm": 0.48762795329093933, "learning_rate": 4.150417128375782e-06, "loss": 0.3953, "step": 3619 }, { "epoch": 1.7971206354459706, "grad_norm": 0.5085958242416382, "learning_rate": 4.147570195660614e-06, "loss": 0.3692, "step": 3620 }, { "epoch": 1.7976170776104583, "grad_norm": 0.4934573173522949, "learning_rate": 4.144723547548592e-06, "loss": 0.3487, "step": 3621 }, { "epoch": 1.7981135197749463, "grad_norm": 0.5353735685348511, "learning_rate": 4.141877184990133e-06, "loss": 0.4134, "step": 3622 }, { "epoch": 1.798609961939434, "grad_norm": 0.5507001876831055, "learning_rate": 4.1390311089355575e-06, "loss": 0.3891, "step": 3623 }, { "epoch": 1.799106404103922, "grad_norm": 0.5150303840637207, "learning_rate": 4.136185320335095e-06, "loss": 0.4213, "step": 3624 }, { "epoch": 1.7996028462684097, "grad_norm": 0.4204881191253662, "learning_rate": 4.133339820138876e-06, "loss": 0.3347, "step": 3625 }, { "epoch": 1.8000992884328975, "grad_norm": 0.4865262508392334, "learning_rate": 4.130494609296939e-06, "loss": 0.3957, "step": 3626 }, { "epoch": 1.8005957305973854, "grad_norm": 0.4690217077732086, "learning_rate": 4.12764968875922e-06, "loss": 0.3886, "step": 3627 }, { "epoch": 1.8010921727618734, "grad_norm": 0.5038434267044067, "learning_rate": 4.124805059475559e-06, "loss": 0.3682, "step": 3628 }, { "epoch": 1.8015886149263611, "grad_norm": 0.48325932025909424, "learning_rate": 4.1219607223957026e-06, "loss": 0.3389, "step": 3629 }, { "epoch": 1.8020850570908489, "grad_norm": 0.5063427686691284, "learning_rate": 4.119116678469298e-06, "loss": 0.3749, "step": 3630 }, { "epoch": 1.8025814992553366, "grad_norm": 0.4781516194343567, "learning_rate": 4.116272928645893e-06, "loss": 0.3698, "step": 3631 }, { "epoch": 1.8030779414198246, "grad_norm": 0.5003901720046997, "learning_rate": 4.113429473874938e-06, "loss": 0.4051, "step": 3632 }, { "epoch": 1.8035743835843125, "grad_norm": 0.4610329270362854, "learning_rate": 4.1105863151057865e-06, "loss": 0.3776, "step": 3633 }, { "epoch": 1.8040708257488003, "grad_norm": 0.5159478187561035, "learning_rate": 4.107743453287693e-06, "loss": 0.4074, "step": 3634 }, { "epoch": 1.804567267913288, "grad_norm": 0.4805334508419037, "learning_rate": 4.1049008893698066e-06, "loss": 0.3172, "step": 3635 }, { "epoch": 1.8050637100777758, "grad_norm": 0.5726794600486755, "learning_rate": 4.102058624301189e-06, "loss": 0.4073, "step": 3636 }, { "epoch": 1.8055601522422637, "grad_norm": 0.44641825556755066, "learning_rate": 4.099216659030792e-06, "loss": 0.3597, "step": 3637 }, { "epoch": 1.8060565944067517, "grad_norm": 0.4583229720592499, "learning_rate": 4.09637499450747e-06, "loss": 0.3837, "step": 3638 }, { "epoch": 1.8065530365712394, "grad_norm": 0.5406397581100464, "learning_rate": 4.0935336316799764e-06, "loss": 0.4016, "step": 3639 }, { "epoch": 1.8070494787357272, "grad_norm": 0.4920428693294525, "learning_rate": 4.090692571496968e-06, "loss": 0.333, "step": 3640 }, { "epoch": 1.8075459209002152, "grad_norm": 0.5153690576553345, "learning_rate": 4.087851814906997e-06, "loss": 0.2889, "step": 3641 }, { "epoch": 1.8080423630647031, "grad_norm": 0.5062406659126282, "learning_rate": 4.0850113628585155e-06, "loss": 0.3889, "step": 3642 }, { "epoch": 1.8085388052291909, "grad_norm": 0.4785611629486084, "learning_rate": 4.0821712162998686e-06, "loss": 0.3597, "step": 3643 }, { "epoch": 1.8090352473936786, "grad_norm": 0.47246065735816956, "learning_rate": 4.07933137617931e-06, "loss": 0.37, "step": 3644 }, { "epoch": 1.8095316895581663, "grad_norm": 0.4699362516403198, "learning_rate": 4.076491843444982e-06, "loss": 0.3505, "step": 3645 }, { "epoch": 1.8100281317226543, "grad_norm": 0.5347246527671814, "learning_rate": 4.0736526190449264e-06, "loss": 0.3869, "step": 3646 }, { "epoch": 1.8105245738871423, "grad_norm": 0.43637967109680176, "learning_rate": 4.0708137039270855e-06, "loss": 0.3854, "step": 3647 }, { "epoch": 1.81102101605163, "grad_norm": 0.43339988589286804, "learning_rate": 4.067975099039295e-06, "loss": 0.3803, "step": 3648 }, { "epoch": 1.8115174582161178, "grad_norm": 0.45117831230163574, "learning_rate": 4.065136805329289e-06, "loss": 0.3393, "step": 3649 }, { "epoch": 1.8120139003806055, "grad_norm": 0.5122005939483643, "learning_rate": 4.0622988237446924e-06, "loss": 0.364, "step": 3650 }, { "epoch": 1.8125103425450935, "grad_norm": 0.5469076037406921, "learning_rate": 4.059461155233036e-06, "loss": 0.43, "step": 3651 }, { "epoch": 1.8130067847095814, "grad_norm": 0.44147396087646484, "learning_rate": 4.056623800741738e-06, "loss": 0.3722, "step": 3652 }, { "epoch": 1.8135032268740692, "grad_norm": 0.3742326498031616, "learning_rate": 4.053786761218113e-06, "loss": 0.2979, "step": 3653 }, { "epoch": 1.813999669038557, "grad_norm": 0.5845015048980713, "learning_rate": 4.05095003760937e-06, "loss": 0.4061, "step": 3654 }, { "epoch": 1.8144961112030449, "grad_norm": 0.5274355411529541, "learning_rate": 4.048113630862617e-06, "loss": 0.3873, "step": 3655 }, { "epoch": 1.8149925533675328, "grad_norm": 0.4251171946525574, "learning_rate": 4.045277541924851e-06, "loss": 0.3789, "step": 3656 }, { "epoch": 1.8154889955320206, "grad_norm": 0.461000919342041, "learning_rate": 4.042441771742967e-06, "loss": 0.3662, "step": 3657 }, { "epoch": 1.8159854376965083, "grad_norm": 0.503721296787262, "learning_rate": 4.039606321263748e-06, "loss": 0.3792, "step": 3658 }, { "epoch": 1.816481879860996, "grad_norm": 0.5021674036979675, "learning_rate": 4.036771191433879e-06, "loss": 0.351, "step": 3659 }, { "epoch": 1.816978322025484, "grad_norm": 0.5009273290634155, "learning_rate": 4.03393638319993e-06, "loss": 0.3779, "step": 3660 }, { "epoch": 1.817474764189972, "grad_norm": 0.45084086060523987, "learning_rate": 4.0311018975083644e-06, "loss": 0.3418, "step": 3661 }, { "epoch": 1.8179712063544597, "grad_norm": 0.5336067080497742, "learning_rate": 4.028267735305544e-06, "loss": 0.4026, "step": 3662 }, { "epoch": 1.8184676485189475, "grad_norm": 0.42041563987731934, "learning_rate": 4.025433897537715e-06, "loss": 0.3397, "step": 3663 }, { "epoch": 1.8189640906834352, "grad_norm": 0.5174136161804199, "learning_rate": 4.022600385151022e-06, "loss": 0.3755, "step": 3664 }, { "epoch": 1.8194605328479232, "grad_norm": 0.5098208785057068, "learning_rate": 4.019767199091494e-06, "loss": 0.4203, "step": 3665 }, { "epoch": 1.8199569750124112, "grad_norm": 0.5025497674942017, "learning_rate": 4.016934340305059e-06, "loss": 0.3789, "step": 3666 }, { "epoch": 1.820453417176899, "grad_norm": 0.47372788190841675, "learning_rate": 4.01410180973753e-06, "loss": 0.3769, "step": 3667 }, { "epoch": 1.8209498593413866, "grad_norm": 0.43387049436569214, "learning_rate": 4.01126960833461e-06, "loss": 0.3278, "step": 3668 }, { "epoch": 1.8214463015058746, "grad_norm": 0.5082542300224304, "learning_rate": 4.008437737041895e-06, "loss": 0.4273, "step": 3669 }, { "epoch": 1.8219427436703624, "grad_norm": 0.4108525812625885, "learning_rate": 4.005606196804872e-06, "loss": 0.3367, "step": 3670 }, { "epoch": 1.8224391858348503, "grad_norm": 0.5954563617706299, "learning_rate": 4.0027749885689126e-06, "loss": 0.3857, "step": 3671 }, { "epoch": 1.822935627999338, "grad_norm": 0.46060991287231445, "learning_rate": 3.999944113279283e-06, "loss": 0.367, "step": 3672 }, { "epoch": 1.8234320701638258, "grad_norm": 0.46486854553222656, "learning_rate": 3.9971135718811315e-06, "loss": 0.4061, "step": 3673 }, { "epoch": 1.8239285123283138, "grad_norm": 0.5048240423202515, "learning_rate": 3.994283365319503e-06, "loss": 0.3903, "step": 3674 }, { "epoch": 1.8244249544928017, "grad_norm": 0.49208056926727295, "learning_rate": 3.991453494539326e-06, "loss": 0.3778, "step": 3675 }, { "epoch": 1.8249213966572895, "grad_norm": 0.4489381015300751, "learning_rate": 3.988623960485414e-06, "loss": 0.3767, "step": 3676 }, { "epoch": 1.8254178388217772, "grad_norm": 0.46327584981918335, "learning_rate": 3.985794764102475e-06, "loss": 0.3415, "step": 3677 }, { "epoch": 1.825914280986265, "grad_norm": 0.5122634768486023, "learning_rate": 3.9829659063351e-06, "loss": 0.3755, "step": 3678 }, { "epoch": 1.826410723150753, "grad_norm": 0.5548282861709595, "learning_rate": 3.980137388127768e-06, "loss": 0.3691, "step": 3679 }, { "epoch": 1.8269071653152409, "grad_norm": 0.4591432511806488, "learning_rate": 3.977309210424841e-06, "loss": 0.366, "step": 3680 }, { "epoch": 1.8274036074797286, "grad_norm": 0.3924201428890228, "learning_rate": 3.9744813741705766e-06, "loss": 0.3539, "step": 3681 }, { "epoch": 1.8279000496442164, "grad_norm": 0.4617617428302765, "learning_rate": 3.971653880309109e-06, "loss": 0.3679, "step": 3682 }, { "epoch": 1.8283964918087043, "grad_norm": 0.5163747072219849, "learning_rate": 3.968826729784462e-06, "loss": 0.3745, "step": 3683 }, { "epoch": 1.828892933973192, "grad_norm": 0.4639892578125, "learning_rate": 3.965999923540541e-06, "loss": 0.3336, "step": 3684 }, { "epoch": 1.82938937613768, "grad_norm": 0.4633890688419342, "learning_rate": 3.963173462521146e-06, "loss": 0.4084, "step": 3685 }, { "epoch": 1.8298858183021678, "grad_norm": 0.5015590190887451, "learning_rate": 3.960347347669951e-06, "loss": 0.3742, "step": 3686 }, { "epoch": 1.8303822604666555, "grad_norm": 0.5168230533599854, "learning_rate": 3.957521579930522e-06, "loss": 0.3958, "step": 3687 }, { "epoch": 1.8308787026311435, "grad_norm": 0.462544709444046, "learning_rate": 3.954696160246302e-06, "loss": 0.3639, "step": 3688 }, { "epoch": 1.8313751447956315, "grad_norm": 0.46928802132606506, "learning_rate": 3.951871089560626e-06, "loss": 0.3868, "step": 3689 }, { "epoch": 1.8318715869601192, "grad_norm": 0.4778861403465271, "learning_rate": 3.949046368816708e-06, "loss": 0.3887, "step": 3690 }, { "epoch": 1.832368029124607, "grad_norm": 0.49696585536003113, "learning_rate": 3.94622199895764e-06, "loss": 0.4005, "step": 3691 }, { "epoch": 1.8328644712890947, "grad_norm": 0.491460919380188, "learning_rate": 3.94339798092641e-06, "loss": 0.3874, "step": 3692 }, { "epoch": 1.8333609134535827, "grad_norm": 0.3538079559803009, "learning_rate": 3.940574315665877e-06, "loss": 0.3193, "step": 3693 }, { "epoch": 1.8338573556180706, "grad_norm": 0.4409700930118561, "learning_rate": 3.937751004118786e-06, "loss": 0.4097, "step": 3694 }, { "epoch": 1.8343537977825584, "grad_norm": 0.4653623402118683, "learning_rate": 3.934928047227764e-06, "loss": 0.3782, "step": 3695 }, { "epoch": 1.834850239947046, "grad_norm": 0.4990956485271454, "learning_rate": 3.932105445935319e-06, "loss": 0.3482, "step": 3696 }, { "epoch": 1.8353466821115338, "grad_norm": 0.4845690131187439, "learning_rate": 3.929283201183844e-06, "loss": 0.3974, "step": 3697 }, { "epoch": 1.8358431242760218, "grad_norm": 0.46645331382751465, "learning_rate": 3.926461313915607e-06, "loss": 0.4028, "step": 3698 }, { "epoch": 1.8363395664405098, "grad_norm": 0.4854544401168823, "learning_rate": 3.923639785072759e-06, "loss": 0.3941, "step": 3699 }, { "epoch": 1.8368360086049975, "grad_norm": 0.4479024112224579, "learning_rate": 3.920818615597334e-06, "loss": 0.3814, "step": 3700 }, { "epoch": 1.8373324507694853, "grad_norm": 0.4708687663078308, "learning_rate": 3.9179978064312426e-06, "loss": 0.318, "step": 3701 }, { "epoch": 1.8378288929339732, "grad_norm": 0.5162968635559082, "learning_rate": 3.915177358516276e-06, "loss": 0.4259, "step": 3702 }, { "epoch": 1.8383253350984612, "grad_norm": 0.48603519797325134, "learning_rate": 3.912357272794105e-06, "loss": 0.3528, "step": 3703 }, { "epoch": 1.838821777262949, "grad_norm": 0.4575191140174866, "learning_rate": 3.909537550206281e-06, "loss": 0.3249, "step": 3704 }, { "epoch": 1.8393182194274367, "grad_norm": 0.5025515556335449, "learning_rate": 3.906718191694232e-06, "loss": 0.3971, "step": 3705 }, { "epoch": 1.8398146615919244, "grad_norm": 0.4904787242412567, "learning_rate": 3.903899198199264e-06, "loss": 0.3777, "step": 3706 }, { "epoch": 1.8403111037564124, "grad_norm": 0.4746706485748291, "learning_rate": 3.901080570662565e-06, "loss": 0.3905, "step": 3707 }, { "epoch": 1.8408075459209003, "grad_norm": 0.44443798065185547, "learning_rate": 3.898262310025196e-06, "loss": 0.3791, "step": 3708 }, { "epoch": 1.841303988085388, "grad_norm": 0.45543456077575684, "learning_rate": 3.895444417228097e-06, "loss": 0.4001, "step": 3709 }, { "epoch": 1.8418004302498758, "grad_norm": 0.3921678066253662, "learning_rate": 3.892626893212088e-06, "loss": 0.2807, "step": 3710 }, { "epoch": 1.8422968724143636, "grad_norm": 0.48836374282836914, "learning_rate": 3.889809738917862e-06, "loss": 0.4546, "step": 3711 }, { "epoch": 1.8427933145788515, "grad_norm": 0.42370644211769104, "learning_rate": 3.8869929552859915e-06, "loss": 0.3784, "step": 3712 }, { "epoch": 1.8432897567433395, "grad_norm": 0.423628032207489, "learning_rate": 3.884176543256924e-06, "loss": 0.3336, "step": 3713 }, { "epoch": 1.8437861989078272, "grad_norm": 0.4682456851005554, "learning_rate": 3.88136050377098e-06, "loss": 0.3878, "step": 3714 }, { "epoch": 1.844282641072315, "grad_norm": 0.47669345140457153, "learning_rate": 3.878544837768362e-06, "loss": 0.3931, "step": 3715 }, { "epoch": 1.844779083236803, "grad_norm": 0.47018253803253174, "learning_rate": 3.875729546189144e-06, "loss": 0.3941, "step": 3716 }, { "epoch": 1.8452755254012907, "grad_norm": 0.42130813002586365, "learning_rate": 3.872914629973273e-06, "loss": 0.3988, "step": 3717 }, { "epoch": 1.8457719675657787, "grad_norm": 0.4348177909851074, "learning_rate": 3.870100090060577e-06, "loss": 0.3364, "step": 3718 }, { "epoch": 1.8462684097302664, "grad_norm": 0.4481654763221741, "learning_rate": 3.8672859273907495e-06, "loss": 0.4096, "step": 3719 }, { "epoch": 1.8467648518947541, "grad_norm": 0.4431994557380676, "learning_rate": 3.864472142903367e-06, "loss": 0.4391, "step": 3720 }, { "epoch": 1.847261294059242, "grad_norm": 0.42713499069213867, "learning_rate": 3.861658737537872e-06, "loss": 0.3918, "step": 3721 }, { "epoch": 1.84775773622373, "grad_norm": 0.458337664604187, "learning_rate": 3.858845712233588e-06, "loss": 0.3393, "step": 3722 }, { "epoch": 1.8482541783882178, "grad_norm": 0.48151859641075134, "learning_rate": 3.8560330679297065e-06, "loss": 0.381, "step": 3723 }, { "epoch": 1.8487506205527056, "grad_norm": 0.39000263810157776, "learning_rate": 3.853220805565292e-06, "loss": 0.3129, "step": 3724 }, { "epoch": 1.8492470627171933, "grad_norm": 0.5165789723396301, "learning_rate": 3.850408926079281e-06, "loss": 0.4353, "step": 3725 }, { "epoch": 1.8497435048816813, "grad_norm": 0.43871140480041504, "learning_rate": 3.847597430410486e-06, "loss": 0.3419, "step": 3726 }, { "epoch": 1.8502399470461692, "grad_norm": 0.4919199049472809, "learning_rate": 3.844786319497589e-06, "loss": 0.3597, "step": 3727 }, { "epoch": 1.850736389210657, "grad_norm": 0.45988988876342773, "learning_rate": 3.841975594279144e-06, "loss": 0.4099, "step": 3728 }, { "epoch": 1.8512328313751447, "grad_norm": 0.49343591928482056, "learning_rate": 3.839165255693571e-06, "loss": 0.4194, "step": 3729 }, { "epoch": 1.8517292735396327, "grad_norm": 0.42829081416130066, "learning_rate": 3.836355304679173e-06, "loss": 0.3589, "step": 3730 }, { "epoch": 1.8522257157041204, "grad_norm": 0.4789847135543823, "learning_rate": 3.833545742174113e-06, "loss": 0.3346, "step": 3731 }, { "epoch": 1.8527221578686084, "grad_norm": 0.5160104036331177, "learning_rate": 3.830736569116423e-06, "loss": 0.3672, "step": 3732 }, { "epoch": 1.8532186000330961, "grad_norm": 0.48245617747306824, "learning_rate": 3.827927786444018e-06, "loss": 0.3379, "step": 3733 }, { "epoch": 1.8537150421975839, "grad_norm": 0.4738095700740814, "learning_rate": 3.825119395094668e-06, "loss": 0.3858, "step": 3734 }, { "epoch": 1.8542114843620718, "grad_norm": 0.48211973905563354, "learning_rate": 3.822311396006022e-06, "loss": 0.4031, "step": 3735 }, { "epoch": 1.8547079265265598, "grad_norm": 0.43624618649482727, "learning_rate": 3.81950379011559e-06, "loss": 0.3269, "step": 3736 }, { "epoch": 1.8552043686910475, "grad_norm": 0.46170324087142944, "learning_rate": 3.816696578360761e-06, "loss": 0.4032, "step": 3737 }, { "epoch": 1.8557008108555353, "grad_norm": 0.5118418335914612, "learning_rate": 3.8138897616787847e-06, "loss": 0.4498, "step": 3738 }, { "epoch": 1.856197253020023, "grad_norm": 0.41337326169013977, "learning_rate": 3.8110833410067795e-06, "loss": 0.3241, "step": 3739 }, { "epoch": 1.856693695184511, "grad_norm": 0.38195449113845825, "learning_rate": 3.808277317281732e-06, "loss": 0.4003, "step": 3740 }, { "epoch": 1.857190137348999, "grad_norm": 0.4197715222835541, "learning_rate": 3.805471691440501e-06, "loss": 0.4183, "step": 3741 }, { "epoch": 1.8576865795134867, "grad_norm": 0.4379337430000305, "learning_rate": 3.802666464419806e-06, "loss": 0.3477, "step": 3742 }, { "epoch": 1.8581830216779744, "grad_norm": 0.4799128770828247, "learning_rate": 3.7998616371562377e-06, "loss": 0.3021, "step": 3743 }, { "epoch": 1.8586794638424622, "grad_norm": 0.4829934537410736, "learning_rate": 3.797057210586248e-06, "loss": 0.384, "step": 3744 }, { "epoch": 1.8591759060069502, "grad_norm": 0.4464662969112396, "learning_rate": 3.7942531856461643e-06, "loss": 0.4021, "step": 3745 }, { "epoch": 1.8596723481714381, "grad_norm": 0.4609619677066803, "learning_rate": 3.7914495632721713e-06, "loss": 0.3828, "step": 3746 }, { "epoch": 1.8601687903359259, "grad_norm": 0.4475337862968445, "learning_rate": 3.788646344400321e-06, "loss": 0.3082, "step": 3747 }, { "epoch": 1.8606652325004136, "grad_norm": 0.5082082152366638, "learning_rate": 3.7858435299665354e-06, "loss": 0.3731, "step": 3748 }, { "epoch": 1.8611616746649016, "grad_norm": 0.5030298233032227, "learning_rate": 3.783041120906596e-06, "loss": 0.3929, "step": 3749 }, { "epoch": 1.8616581168293895, "grad_norm": 0.43762511014938354, "learning_rate": 3.7802391181561497e-06, "loss": 0.3647, "step": 3750 }, { "epoch": 1.8621545589938773, "grad_norm": 0.4900294244289398, "learning_rate": 3.7774375226507106e-06, "loss": 0.3895, "step": 3751 }, { "epoch": 1.862651001158365, "grad_norm": 0.4400447905063629, "learning_rate": 3.7746363353256567e-06, "loss": 0.3967, "step": 3752 }, { "epoch": 1.8631474433228528, "grad_norm": 0.43064820766448975, "learning_rate": 3.7718355571162266e-06, "loss": 0.3894, "step": 3753 }, { "epoch": 1.8636438854873407, "grad_norm": 0.5131903290748596, "learning_rate": 3.769035188957525e-06, "loss": 0.3557, "step": 3754 }, { "epoch": 1.8641403276518287, "grad_norm": 0.5066806077957153, "learning_rate": 3.766235231784515e-06, "loss": 0.3998, "step": 3755 }, { "epoch": 1.8646367698163164, "grad_norm": 0.41914162039756775, "learning_rate": 3.7634356865320327e-06, "loss": 0.3435, "step": 3756 }, { "epoch": 1.8651332119808042, "grad_norm": 0.49522680044174194, "learning_rate": 3.760636554134765e-06, "loss": 0.3885, "step": 3757 }, { "epoch": 1.865629654145292, "grad_norm": 0.45525863766670227, "learning_rate": 3.757837835527268e-06, "loss": 0.3578, "step": 3758 }, { "epoch": 1.8661260963097799, "grad_norm": 0.4840557277202606, "learning_rate": 3.7550395316439568e-06, "loss": 0.3719, "step": 3759 }, { "epoch": 1.8666225384742678, "grad_norm": 0.43183356523513794, "learning_rate": 3.7522416434191117e-06, "loss": 0.3869, "step": 3760 }, { "epoch": 1.8671189806387556, "grad_norm": 0.45994600653648376, "learning_rate": 3.7494441717868698e-06, "loss": 0.3598, "step": 3761 }, { "epoch": 1.8676154228032433, "grad_norm": 0.44417205452919006, "learning_rate": 3.746647117681228e-06, "loss": 0.357, "step": 3762 }, { "epoch": 1.8681118649677313, "grad_norm": 0.5071812272071838, "learning_rate": 3.7438504820360523e-06, "loss": 0.3983, "step": 3763 }, { "epoch": 1.8686083071322193, "grad_norm": 0.4589729607105255, "learning_rate": 3.741054265785059e-06, "loss": 0.3954, "step": 3764 }, { "epoch": 1.869104749296707, "grad_norm": 0.451973557472229, "learning_rate": 3.738258469861831e-06, "loss": 0.3452, "step": 3765 }, { "epoch": 1.8696011914611947, "grad_norm": 0.4420022666454315, "learning_rate": 3.7354630951998063e-06, "loss": 0.4049, "step": 3766 }, { "epoch": 1.8700976336256825, "grad_norm": 0.4288995563983917, "learning_rate": 3.732668142732286e-06, "loss": 0.328, "step": 3767 }, { "epoch": 1.8705940757901705, "grad_norm": 0.5115130543708801, "learning_rate": 3.7298736133924295e-06, "loss": 0.3745, "step": 3768 }, { "epoch": 1.8710905179546584, "grad_norm": 0.5285108685493469, "learning_rate": 3.727079508113254e-06, "loss": 0.3928, "step": 3769 }, { "epoch": 1.8715869601191462, "grad_norm": 0.46632644534111023, "learning_rate": 3.724285827827633e-06, "loss": 0.3404, "step": 3770 }, { "epoch": 1.872083402283634, "grad_norm": 0.46091702580451965, "learning_rate": 3.721492573468303e-06, "loss": 0.4469, "step": 3771 }, { "epoch": 1.8725798444481216, "grad_norm": 0.410129576921463, "learning_rate": 3.7186997459678553e-06, "loss": 0.3212, "step": 3772 }, { "epoch": 1.8730762866126096, "grad_norm": 0.47113510966300964, "learning_rate": 3.715907346258737e-06, "loss": 0.4093, "step": 3773 }, { "epoch": 1.8735727287770976, "grad_norm": 0.42407846450805664, "learning_rate": 3.7131153752732563e-06, "loss": 0.4009, "step": 3774 }, { "epoch": 1.8740691709415853, "grad_norm": 0.4961521625518799, "learning_rate": 3.7103238339435776e-06, "loss": 0.4334, "step": 3775 }, { "epoch": 1.874565613106073, "grad_norm": 0.4896214008331299, "learning_rate": 3.7075327232017195e-06, "loss": 0.3873, "step": 3776 }, { "epoch": 1.875062055270561, "grad_norm": 0.4033004939556122, "learning_rate": 3.7047420439795555e-06, "loss": 0.3557, "step": 3777 }, { "epoch": 1.8755584974350488, "grad_norm": 0.45615020394325256, "learning_rate": 3.701951797208822e-06, "loss": 0.3361, "step": 3778 }, { "epoch": 1.8760549395995367, "grad_norm": 0.4505302309989929, "learning_rate": 3.6991619838211048e-06, "loss": 0.3862, "step": 3779 }, { "epoch": 1.8765513817640245, "grad_norm": 0.46518397331237793, "learning_rate": 3.696372604747845e-06, "loss": 0.3599, "step": 3780 }, { "epoch": 1.8770478239285122, "grad_norm": 0.4696800410747528, "learning_rate": 3.6935836609203412e-06, "loss": 0.336, "step": 3781 }, { "epoch": 1.8775442660930002, "grad_norm": 0.49134159088134766, "learning_rate": 3.6907951532697474e-06, "loss": 0.4122, "step": 3782 }, { "epoch": 1.8780407082574881, "grad_norm": 0.4770830273628235, "learning_rate": 3.688007082727071e-06, "loss": 0.4346, "step": 3783 }, { "epoch": 1.8785371504219759, "grad_norm": 0.4018573462963104, "learning_rate": 3.6852194502231707e-06, "loss": 0.3449, "step": 3784 }, { "epoch": 1.8790335925864636, "grad_norm": 0.5642313957214355, "learning_rate": 3.682432256688761e-06, "loss": 0.4373, "step": 3785 }, { "epoch": 1.8795300347509514, "grad_norm": 0.4120226800441742, "learning_rate": 3.6796455030544133e-06, "loss": 0.3385, "step": 3786 }, { "epoch": 1.8800264769154393, "grad_norm": 0.4465112090110779, "learning_rate": 3.6768591902505467e-06, "loss": 0.3464, "step": 3787 }, { "epoch": 1.8805229190799273, "grad_norm": 0.46800968050956726, "learning_rate": 3.674073319207433e-06, "loss": 0.3913, "step": 3788 }, { "epoch": 1.881019361244415, "grad_norm": 0.45562127232551575, "learning_rate": 3.671287890855204e-06, "loss": 0.373, "step": 3789 }, { "epoch": 1.8815158034089028, "grad_norm": 0.46206143498420715, "learning_rate": 3.6685029061238344e-06, "loss": 0.4239, "step": 3790 }, { "epoch": 1.8820122455733908, "grad_norm": 0.4163494408130646, "learning_rate": 3.665718365943158e-06, "loss": 0.3246, "step": 3791 }, { "epoch": 1.8825086877378785, "grad_norm": 0.5329844951629639, "learning_rate": 3.662934271242853e-06, "loss": 0.4009, "step": 3792 }, { "epoch": 1.8830051299023665, "grad_norm": 0.5642224550247192, "learning_rate": 3.6601506229524576e-06, "loss": 0.3783, "step": 3793 }, { "epoch": 1.8835015720668542, "grad_norm": 0.4636535048484802, "learning_rate": 3.6573674220013532e-06, "loss": 0.3451, "step": 3794 }, { "epoch": 1.883998014231342, "grad_norm": 0.43775463104248047, "learning_rate": 3.654584669318777e-06, "loss": 0.3331, "step": 3795 }, { "epoch": 1.88449445639583, "grad_norm": 0.4376583993434906, "learning_rate": 3.6518023658338107e-06, "loss": 0.3694, "step": 3796 }, { "epoch": 1.8849908985603179, "grad_norm": 0.522275984287262, "learning_rate": 3.6490205124753947e-06, "loss": 0.3901, "step": 3797 }, { "epoch": 1.8854873407248056, "grad_norm": 0.4658796489238739, "learning_rate": 3.646239110172311e-06, "loss": 0.3794, "step": 3798 }, { "epoch": 1.8859837828892934, "grad_norm": 0.44675353169441223, "learning_rate": 3.6434581598531937e-06, "loss": 0.4207, "step": 3799 }, { "epoch": 1.886480225053781, "grad_norm": 0.38023504614830017, "learning_rate": 3.640677662446531e-06, "loss": 0.3855, "step": 3800 }, { "epoch": 1.886976667218269, "grad_norm": 0.4420967102050781, "learning_rate": 3.6378976188806525e-06, "loss": 0.3945, "step": 3801 }, { "epoch": 1.887473109382757, "grad_norm": 0.42175421118736267, "learning_rate": 3.6351180300837386e-06, "loss": 0.3734, "step": 3802 }, { "epoch": 1.8879695515472448, "grad_norm": 0.44844186305999756, "learning_rate": 3.632338896983817e-06, "loss": 0.3812, "step": 3803 }, { "epoch": 1.8884659937117325, "grad_norm": 0.4834895730018616, "learning_rate": 3.6295602205087687e-06, "loss": 0.3648, "step": 3804 }, { "epoch": 1.8889624358762203, "grad_norm": 0.5107371807098389, "learning_rate": 3.6267820015863153e-06, "loss": 0.444, "step": 3805 }, { "epoch": 1.8894588780407082, "grad_norm": 0.4372791647911072, "learning_rate": 3.624004241144031e-06, "loss": 0.3631, "step": 3806 }, { "epoch": 1.8899553202051962, "grad_norm": 0.4042632579803467, "learning_rate": 3.621226940109331e-06, "loss": 0.3548, "step": 3807 }, { "epoch": 1.890451762369684, "grad_norm": 0.493424654006958, "learning_rate": 3.618450099409484e-06, "loss": 0.4206, "step": 3808 }, { "epoch": 1.8909482045341717, "grad_norm": 0.458377867937088, "learning_rate": 3.6156737199716014e-06, "loss": 0.3639, "step": 3809 }, { "epoch": 1.8914446466986596, "grad_norm": 0.4268186688423157, "learning_rate": 3.612897802722639e-06, "loss": 0.3707, "step": 3810 }, { "epoch": 1.8919410888631476, "grad_norm": 0.43843787908554077, "learning_rate": 3.6101223485893995e-06, "loss": 0.3616, "step": 3811 }, { "epoch": 1.8924375310276353, "grad_norm": 0.4683363735675812, "learning_rate": 3.6073473584985346e-06, "loss": 0.4094, "step": 3812 }, { "epoch": 1.892933973192123, "grad_norm": 0.44810551404953003, "learning_rate": 3.6045728333765356e-06, "loss": 0.3872, "step": 3813 }, { "epoch": 1.8934304153566108, "grad_norm": 0.4189211130142212, "learning_rate": 3.601798774149742e-06, "loss": 0.3824, "step": 3814 }, { "epoch": 1.8939268575210988, "grad_norm": 0.5058599710464478, "learning_rate": 3.5990251817443365e-06, "loss": 0.3594, "step": 3815 }, { "epoch": 1.8944232996855868, "grad_norm": 0.475224107503891, "learning_rate": 3.596252057086348e-06, "loss": 0.3361, "step": 3816 }, { "epoch": 1.8949197418500745, "grad_norm": 0.4567880630493164, "learning_rate": 3.593479401101645e-06, "loss": 0.3917, "step": 3817 }, { "epoch": 1.8954161840145622, "grad_norm": 0.4575487971305847, "learning_rate": 3.590707214715942e-06, "loss": 0.3987, "step": 3818 }, { "epoch": 1.89591262617905, "grad_norm": 0.43293526768684387, "learning_rate": 3.5879354988547988e-06, "loss": 0.3841, "step": 3819 }, { "epoch": 1.896409068343538, "grad_norm": 0.3894137442111969, "learning_rate": 3.585164254443615e-06, "loss": 0.343, "step": 3820 }, { "epoch": 1.896905510508026, "grad_norm": 0.4473779797554016, "learning_rate": 3.582393482407632e-06, "loss": 0.3805, "step": 3821 }, { "epoch": 1.8974019526725137, "grad_norm": 0.42236071825027466, "learning_rate": 3.5796231836719363e-06, "loss": 0.379, "step": 3822 }, { "epoch": 1.8978983948370014, "grad_norm": 0.4639647901058197, "learning_rate": 3.5768533591614575e-06, "loss": 0.4427, "step": 3823 }, { "epoch": 1.8983948370014894, "grad_norm": 0.45576101541519165, "learning_rate": 3.5740840098009634e-06, "loss": 0.3751, "step": 3824 }, { "epoch": 1.898891279165977, "grad_norm": 0.42052578926086426, "learning_rate": 3.5713151365150645e-06, "loss": 0.3239, "step": 3825 }, { "epoch": 1.899387721330465, "grad_norm": 0.4445875585079193, "learning_rate": 3.5685467402282093e-06, "loss": 0.3217, "step": 3826 }, { "epoch": 1.8998841634949528, "grad_norm": 0.5150508880615234, "learning_rate": 3.565778821864695e-06, "loss": 0.4779, "step": 3827 }, { "epoch": 1.9003806056594406, "grad_norm": 0.43799686431884766, "learning_rate": 3.563011382348651e-06, "loss": 0.3517, "step": 3828 }, { "epoch": 1.9008770478239285, "grad_norm": 0.49701425433158875, "learning_rate": 3.560244422604052e-06, "loss": 0.482, "step": 3829 }, { "epoch": 1.9013734899884165, "grad_norm": 0.4346961975097656, "learning_rate": 3.557477943554709e-06, "loss": 0.3414, "step": 3830 }, { "epoch": 1.9018699321529042, "grad_norm": 0.4545556604862213, "learning_rate": 3.5547119461242766e-06, "loss": 0.3429, "step": 3831 }, { "epoch": 1.902366374317392, "grad_norm": 0.4911668598651886, "learning_rate": 3.551946431236245e-06, "loss": 0.4093, "step": 3832 }, { "epoch": 1.9028628164818797, "grad_norm": 0.4486316442489624, "learning_rate": 3.5491813998139413e-06, "loss": 0.3684, "step": 3833 }, { "epoch": 1.9033592586463677, "grad_norm": 0.46594908833503723, "learning_rate": 3.5464168527805398e-06, "loss": 0.3757, "step": 3834 }, { "epoch": 1.9038557008108556, "grad_norm": 0.4342899024486542, "learning_rate": 3.5436527910590446e-06, "loss": 0.3653, "step": 3835 }, { "epoch": 1.9043521429753434, "grad_norm": 0.527121365070343, "learning_rate": 3.5408892155723e-06, "loss": 0.36, "step": 3836 }, { "epoch": 1.9048485851398311, "grad_norm": 0.46456989645957947, "learning_rate": 3.53812612724299e-06, "loss": 0.2867, "step": 3837 }, { "epoch": 1.905345027304319, "grad_norm": 0.48705723881721497, "learning_rate": 3.535363526993635e-06, "loss": 0.4898, "step": 3838 }, { "epoch": 1.9058414694688068, "grad_norm": 0.4238540232181549, "learning_rate": 3.5326014157465922e-06, "loss": 0.3614, "step": 3839 }, { "epoch": 1.9063379116332948, "grad_norm": 0.41788801550865173, "learning_rate": 3.5298397944240524e-06, "loss": 0.4114, "step": 3840 }, { "epoch": 1.9068343537977825, "grad_norm": 0.4184514880180359, "learning_rate": 3.5270786639480512e-06, "loss": 0.3792, "step": 3841 }, { "epoch": 1.9073307959622703, "grad_norm": 0.41639941930770874, "learning_rate": 3.524318025240453e-06, "loss": 0.3828, "step": 3842 }, { "epoch": 1.9078272381267583, "grad_norm": 0.5249063372612, "learning_rate": 3.5215578792229586e-06, "loss": 0.4428, "step": 3843 }, { "epoch": 1.9083236802912462, "grad_norm": 0.37573736906051636, "learning_rate": 3.518798226817105e-06, "loss": 0.3454, "step": 3844 }, { "epoch": 1.908820122455734, "grad_norm": 0.4290904402732849, "learning_rate": 3.516039068944267e-06, "loss": 0.3908, "step": 3845 }, { "epoch": 1.9093165646202217, "grad_norm": 0.45680248737335205, "learning_rate": 3.513280406525653e-06, "loss": 0.4061, "step": 3846 }, { "epoch": 1.9098130067847094, "grad_norm": 0.41124048829078674, "learning_rate": 3.510522240482305e-06, "loss": 0.3256, "step": 3847 }, { "epoch": 1.9103094489491974, "grad_norm": 0.46241307258605957, "learning_rate": 3.507764571735097e-06, "loss": 0.4044, "step": 3848 }, { "epoch": 1.9108058911136854, "grad_norm": 0.47688406705856323, "learning_rate": 3.5050074012047443e-06, "loss": 0.3923, "step": 3849 }, { "epoch": 1.9113023332781731, "grad_norm": 0.4380684196949005, "learning_rate": 3.5022507298117873e-06, "loss": 0.3677, "step": 3850 }, { "epoch": 1.9117987754426609, "grad_norm": 0.4725552201271057, "learning_rate": 3.4994945584766048e-06, "loss": 0.4042, "step": 3851 }, { "epoch": 1.9122952176071486, "grad_norm": 0.48897215723991394, "learning_rate": 3.4967388881194083e-06, "loss": 0.3406, "step": 3852 }, { "epoch": 1.9127916597716366, "grad_norm": 0.4582853615283966, "learning_rate": 3.49398371966024e-06, "loss": 0.3806, "step": 3853 }, { "epoch": 1.9132881019361245, "grad_norm": 0.45571690797805786, "learning_rate": 3.4912290540189776e-06, "loss": 0.3736, "step": 3854 }, { "epoch": 1.9137845441006123, "grad_norm": 0.42505770921707153, "learning_rate": 3.4884748921153253e-06, "loss": 0.3847, "step": 3855 }, { "epoch": 1.9142809862651, "grad_norm": 0.5085558295249939, "learning_rate": 3.4857212348688285e-06, "loss": 0.4465, "step": 3856 }, { "epoch": 1.914777428429588, "grad_norm": 0.37981805205345154, "learning_rate": 3.4829680831988557e-06, "loss": 0.2896, "step": 3857 }, { "epoch": 1.915273870594076, "grad_norm": 0.484489381313324, "learning_rate": 3.480215438024609e-06, "loss": 0.3821, "step": 3858 }, { "epoch": 1.9157703127585637, "grad_norm": 0.4511788785457611, "learning_rate": 3.4774633002651196e-06, "loss": 0.3629, "step": 3859 }, { "epoch": 1.9162667549230514, "grad_norm": 0.4712088406085968, "learning_rate": 3.4747116708392565e-06, "loss": 0.3854, "step": 3860 }, { "epoch": 1.9167631970875392, "grad_norm": 0.38788479566574097, "learning_rate": 3.4719605506657105e-06, "loss": 0.2771, "step": 3861 }, { "epoch": 1.9172596392520271, "grad_norm": 0.4404059946537018, "learning_rate": 3.4692099406630076e-06, "loss": 0.3437, "step": 3862 }, { "epoch": 1.917756081416515, "grad_norm": 0.4929441213607788, "learning_rate": 3.466459841749499e-06, "loss": 0.3895, "step": 3863 }, { "epoch": 1.9182525235810028, "grad_norm": 0.4941202998161316, "learning_rate": 3.463710254843372e-06, "loss": 0.3624, "step": 3864 }, { "epoch": 1.9187489657454906, "grad_norm": 0.4612749218940735, "learning_rate": 3.4609611808626363e-06, "loss": 0.4041, "step": 3865 }, { "epoch": 1.9192454079099783, "grad_norm": 0.4751714766025543, "learning_rate": 3.458212620725134e-06, "loss": 0.3821, "step": 3866 }, { "epoch": 1.9197418500744663, "grad_norm": 0.48514601588249207, "learning_rate": 3.4554645753485326e-06, "loss": 0.4077, "step": 3867 }, { "epoch": 1.9202382922389543, "grad_norm": 0.4924198091030121, "learning_rate": 3.452717045650332e-06, "loss": 0.356, "step": 3868 }, { "epoch": 1.920734734403442, "grad_norm": 0.4211876094341278, "learning_rate": 3.449970032547858e-06, "loss": 0.398, "step": 3869 }, { "epoch": 1.9212311765679297, "grad_norm": 0.4463101029396057, "learning_rate": 3.4472235369582603e-06, "loss": 0.4348, "step": 3870 }, { "epoch": 1.9217276187324177, "grad_norm": 0.43971967697143555, "learning_rate": 3.4444775597985236e-06, "loss": 0.327, "step": 3871 }, { "epoch": 1.9222240608969057, "grad_norm": 0.5212010145187378, "learning_rate": 3.4417321019854533e-06, "loss": 0.3759, "step": 3872 }, { "epoch": 1.9227205030613934, "grad_norm": 0.586242139339447, "learning_rate": 3.4389871644356825e-06, "loss": 0.3678, "step": 3873 }, { "epoch": 1.9232169452258812, "grad_norm": 0.40149620175361633, "learning_rate": 3.4362427480656703e-06, "loss": 0.3645, "step": 3874 }, { "epoch": 1.923713387390369, "grad_norm": 0.5202354788780212, "learning_rate": 3.4334988537917045e-06, "loss": 0.3849, "step": 3875 }, { "epoch": 1.9242098295548569, "grad_norm": 0.5222676992416382, "learning_rate": 3.430755482529896e-06, "loss": 0.349, "step": 3876 }, { "epoch": 1.9247062717193448, "grad_norm": 0.4972096383571625, "learning_rate": 3.428012635196184e-06, "loss": 0.3659, "step": 3877 }, { "epoch": 1.9252027138838326, "grad_norm": 0.4313439130783081, "learning_rate": 3.425270312706326e-06, "loss": 0.2969, "step": 3878 }, { "epoch": 1.9256991560483203, "grad_norm": 0.46316957473754883, "learning_rate": 3.4225285159759137e-06, "loss": 0.3963, "step": 3879 }, { "epoch": 1.926195598212808, "grad_norm": 0.4909144341945648, "learning_rate": 3.419787245920357e-06, "loss": 0.4469, "step": 3880 }, { "epoch": 1.926692040377296, "grad_norm": 0.42718759179115295, "learning_rate": 3.4170465034548883e-06, "loss": 0.4019, "step": 3881 }, { "epoch": 1.927188482541784, "grad_norm": 0.46528270840644836, "learning_rate": 3.4143062894945727e-06, "loss": 0.4098, "step": 3882 }, { "epoch": 1.9276849247062717, "grad_norm": 0.40750652551651, "learning_rate": 3.41156660495429e-06, "loss": 0.2842, "step": 3883 }, { "epoch": 1.9281813668707595, "grad_norm": 0.4806942641735077, "learning_rate": 3.4088274507487455e-06, "loss": 0.3672, "step": 3884 }, { "epoch": 1.9286778090352474, "grad_norm": 0.5110036134719849, "learning_rate": 3.4060888277924697e-06, "loss": 0.3582, "step": 3885 }, { "epoch": 1.9291742511997352, "grad_norm": 0.4451708197593689, "learning_rate": 3.4033507369998143e-06, "loss": 0.3931, "step": 3886 }, { "epoch": 1.9296706933642231, "grad_norm": 0.43044352531433105, "learning_rate": 3.400613179284954e-06, "loss": 0.3281, "step": 3887 }, { "epoch": 1.9301671355287109, "grad_norm": 0.44830501079559326, "learning_rate": 3.3978761555618845e-06, "loss": 0.362, "step": 3888 }, { "epoch": 1.9306635776931986, "grad_norm": 0.4547058939933777, "learning_rate": 3.3951396667444213e-06, "loss": 0.3893, "step": 3889 }, { "epoch": 1.9311600198576866, "grad_norm": 0.46298420429229736, "learning_rate": 3.3924037137462074e-06, "loss": 0.358, "step": 3890 }, { "epoch": 1.9316564620221746, "grad_norm": 0.47441577911376953, "learning_rate": 3.389668297480702e-06, "loss": 0.3768, "step": 3891 }, { "epoch": 1.9321529041866623, "grad_norm": 0.47024184465408325, "learning_rate": 3.3869334188611848e-06, "loss": 0.3408, "step": 3892 }, { "epoch": 1.93264934635115, "grad_norm": 0.46019765734672546, "learning_rate": 3.384199078800756e-06, "loss": 0.3431, "step": 3893 }, { "epoch": 1.9331457885156378, "grad_norm": 0.45224711298942566, "learning_rate": 3.381465278212343e-06, "loss": 0.4013, "step": 3894 }, { "epoch": 1.9336422306801258, "grad_norm": 0.46122804284095764, "learning_rate": 3.3787320180086836e-06, "loss": 0.4097, "step": 3895 }, { "epoch": 1.9341386728446137, "grad_norm": 0.4277852475643158, "learning_rate": 3.375999299102338e-06, "loss": 0.3821, "step": 3896 }, { "epoch": 1.9346351150091015, "grad_norm": 0.4251877963542938, "learning_rate": 3.373267122405691e-06, "loss": 0.3604, "step": 3897 }, { "epoch": 1.9351315571735892, "grad_norm": 0.4614506661891937, "learning_rate": 3.3705354888309395e-06, "loss": 0.3341, "step": 3898 }, { "epoch": 1.9356279993380772, "grad_norm": 0.49930834770202637, "learning_rate": 3.3678043992901e-06, "loss": 0.3451, "step": 3899 }, { "epoch": 1.936124441502565, "grad_norm": 0.4797849655151367, "learning_rate": 3.3650738546950117e-06, "loss": 0.3793, "step": 3900 }, { "epoch": 1.9366208836670529, "grad_norm": 0.5283535122871399, "learning_rate": 3.3623438559573284e-06, "loss": 0.4021, "step": 3901 }, { "epoch": 1.9371173258315406, "grad_norm": 0.45285657048225403, "learning_rate": 3.3596144039885237e-06, "loss": 0.428, "step": 3902 }, { "epoch": 1.9376137679960284, "grad_norm": 0.4767164885997772, "learning_rate": 3.3568854996998864e-06, "loss": 0.4286, "step": 3903 }, { "epoch": 1.9381102101605163, "grad_norm": 0.44039687514305115, "learning_rate": 3.354157144002521e-06, "loss": 0.3013, "step": 3904 }, { "epoch": 1.9386066523250043, "grad_norm": 0.5044416785240173, "learning_rate": 3.351429337807356e-06, "loss": 0.3933, "step": 3905 }, { "epoch": 1.939103094489492, "grad_norm": 0.45567986369132996, "learning_rate": 3.3487020820251293e-06, "loss": 0.341, "step": 3906 }, { "epoch": 1.9395995366539798, "grad_norm": 0.3941975235939026, "learning_rate": 3.3459753775663963e-06, "loss": 0.3537, "step": 3907 }, { "epoch": 1.9400959788184675, "grad_norm": 0.4154919981956482, "learning_rate": 3.343249225341531e-06, "loss": 0.3924, "step": 3908 }, { "epoch": 1.9405924209829555, "grad_norm": 0.4351412057876587, "learning_rate": 3.3405236262607214e-06, "loss": 0.3651, "step": 3909 }, { "epoch": 1.9410888631474434, "grad_norm": 0.5081251859664917, "learning_rate": 3.337798581233972e-06, "loss": 0.3204, "step": 3910 }, { "epoch": 1.9415853053119312, "grad_norm": 0.46455422043800354, "learning_rate": 3.3350740911710987e-06, "loss": 0.3939, "step": 3911 }, { "epoch": 1.942081747476419, "grad_norm": 0.4831695854663849, "learning_rate": 3.3323501569817375e-06, "loss": 0.3748, "step": 3912 }, { "epoch": 1.9425781896409067, "grad_norm": 0.4916326105594635, "learning_rate": 3.3296267795753345e-06, "loss": 0.3571, "step": 3913 }, { "epoch": 1.9430746318053946, "grad_norm": 0.4878450036048889, "learning_rate": 3.3269039598611525e-06, "loss": 0.3717, "step": 3914 }, { "epoch": 1.9435710739698826, "grad_norm": 0.47554904222488403, "learning_rate": 3.324181698748263e-06, "loss": 0.42, "step": 3915 }, { "epoch": 1.9440675161343703, "grad_norm": 0.40479013323783875, "learning_rate": 3.3214599971455596e-06, "loss": 0.3479, "step": 3916 }, { "epoch": 1.944563958298858, "grad_norm": 0.42386606335639954, "learning_rate": 3.3187388559617438e-06, "loss": 0.398, "step": 3917 }, { "epoch": 1.945060400463346, "grad_norm": 0.4082321226596832, "learning_rate": 3.3160182761053306e-06, "loss": 0.3435, "step": 3918 }, { "epoch": 1.945556842627834, "grad_norm": 0.4988901615142822, "learning_rate": 3.3132982584846442e-06, "loss": 0.4231, "step": 3919 }, { "epoch": 1.9460532847923218, "grad_norm": 0.42098596692085266, "learning_rate": 3.310578804007829e-06, "loss": 0.3328, "step": 3920 }, { "epoch": 1.9465497269568095, "grad_norm": 0.4935988485813141, "learning_rate": 3.307859913582836e-06, "loss": 0.4075, "step": 3921 }, { "epoch": 1.9470461691212972, "grad_norm": 0.509168803691864, "learning_rate": 3.3051415881174263e-06, "loss": 0.369, "step": 3922 }, { "epoch": 1.9475426112857852, "grad_norm": 0.4401465356349945, "learning_rate": 3.3024238285191774e-06, "loss": 0.3372, "step": 3923 }, { "epoch": 1.9480390534502732, "grad_norm": 0.4912392795085907, "learning_rate": 3.299706635695474e-06, "loss": 0.3349, "step": 3924 }, { "epoch": 1.948535495614761, "grad_norm": 0.468997597694397, "learning_rate": 3.2969900105535148e-06, "loss": 0.369, "step": 3925 }, { "epoch": 1.9490319377792487, "grad_norm": 0.4835629165172577, "learning_rate": 3.2942739540003034e-06, "loss": 0.3769, "step": 3926 }, { "epoch": 1.9495283799437364, "grad_norm": 0.41025111079216003, "learning_rate": 3.2915584669426624e-06, "loss": 0.3103, "step": 3927 }, { "epoch": 1.9500248221082244, "grad_norm": 0.4638621509075165, "learning_rate": 3.288843550287216e-06, "loss": 0.3747, "step": 3928 }, { "epoch": 1.9505212642727123, "grad_norm": 0.37619131803512573, "learning_rate": 3.2861292049404016e-06, "loss": 0.3299, "step": 3929 }, { "epoch": 1.9510177064372, "grad_norm": 0.47496017813682556, "learning_rate": 3.2834154318084632e-06, "loss": 0.4152, "step": 3930 }, { "epoch": 1.9515141486016878, "grad_norm": 0.4273815155029297, "learning_rate": 3.2807022317974594e-06, "loss": 0.3616, "step": 3931 }, { "epoch": 1.9520105907661758, "grad_norm": 0.4611923098564148, "learning_rate": 3.277989605813252e-06, "loss": 0.3786, "step": 3932 }, { "epoch": 1.9525070329306637, "grad_norm": 0.40864941477775574, "learning_rate": 3.2752775547615147e-06, "loss": 0.3972, "step": 3933 }, { "epoch": 1.9530034750951515, "grad_norm": 0.46487849950790405, "learning_rate": 3.2725660795477242e-06, "loss": 0.3509, "step": 3934 }, { "epoch": 1.9534999172596392, "grad_norm": 0.4746097922325134, "learning_rate": 3.269855181077173e-06, "loss": 0.4254, "step": 3935 }, { "epoch": 1.953996359424127, "grad_norm": 0.4508689343929291, "learning_rate": 3.2671448602549537e-06, "loss": 0.3639, "step": 3936 }, { "epoch": 1.954492801588615, "grad_norm": 0.43319061398506165, "learning_rate": 3.2644351179859678e-06, "loss": 0.345, "step": 3937 }, { "epoch": 1.954989243753103, "grad_norm": 0.3998836874961853, "learning_rate": 3.2617259551749283e-06, "loss": 0.3705, "step": 3938 }, { "epoch": 1.9554856859175906, "grad_norm": 0.4557395875453949, "learning_rate": 3.2590173727263464e-06, "loss": 0.3803, "step": 3939 }, { "epoch": 1.9559821280820784, "grad_norm": 0.4926530122756958, "learning_rate": 3.256309371544548e-06, "loss": 0.3509, "step": 3940 }, { "epoch": 1.9564785702465661, "grad_norm": 0.5358515977859497, "learning_rate": 3.253601952533658e-06, "loss": 0.3961, "step": 3941 }, { "epoch": 1.956975012411054, "grad_norm": 0.4876275658607483, "learning_rate": 3.2508951165976132e-06, "loss": 0.3632, "step": 3942 }, { "epoch": 1.957471454575542, "grad_norm": 0.44374972581863403, "learning_rate": 3.2481888646401506e-06, "loss": 0.4189, "step": 3943 }, { "epoch": 1.9579678967400298, "grad_norm": 0.492404580116272, "learning_rate": 3.2454831975648147e-06, "loss": 0.3654, "step": 3944 }, { "epoch": 1.9584643389045175, "grad_norm": 0.44605931639671326, "learning_rate": 3.2427781162749527e-06, "loss": 0.424, "step": 3945 }, { "epoch": 1.9589607810690055, "grad_norm": 0.4569104015827179, "learning_rate": 3.2400736216737207e-06, "loss": 0.3307, "step": 3946 }, { "epoch": 1.9594572232334933, "grad_norm": 0.48981839418411255, "learning_rate": 3.2373697146640727e-06, "loss": 0.4374, "step": 3947 }, { "epoch": 1.9599536653979812, "grad_norm": 0.42025017738342285, "learning_rate": 3.2346663961487722e-06, "loss": 0.3536, "step": 3948 }, { "epoch": 1.960450107562469, "grad_norm": 0.45516565442085266, "learning_rate": 3.2319636670303815e-06, "loss": 0.3577, "step": 3949 }, { "epoch": 1.9609465497269567, "grad_norm": 0.4819066524505615, "learning_rate": 3.2292615282112715e-06, "loss": 0.3298, "step": 3950 }, { "epoch": 1.9614429918914447, "grad_norm": 0.45968595147132874, "learning_rate": 3.226559980593612e-06, "loss": 0.3621, "step": 3951 }, { "epoch": 1.9619394340559326, "grad_norm": 0.4988240897655487, "learning_rate": 3.2238590250793734e-06, "loss": 0.378, "step": 3952 }, { "epoch": 1.9624358762204204, "grad_norm": 0.5040711164474487, "learning_rate": 3.2211586625703343e-06, "loss": 0.4043, "step": 3953 }, { "epoch": 1.9629323183849081, "grad_norm": 0.3906843960285187, "learning_rate": 3.2184588939680727e-06, "loss": 0.3604, "step": 3954 }, { "epoch": 1.9634287605493959, "grad_norm": 0.41333889961242676, "learning_rate": 3.2157597201739655e-06, "loss": 0.3733, "step": 3955 }, { "epoch": 1.9639252027138838, "grad_norm": 0.4655856788158417, "learning_rate": 3.2130611420891943e-06, "loss": 0.3879, "step": 3956 }, { "epoch": 1.9644216448783718, "grad_norm": 0.46577051281929016, "learning_rate": 3.210363160614742e-06, "loss": 0.3527, "step": 3957 }, { "epoch": 1.9649180870428595, "grad_norm": 0.4502418041229248, "learning_rate": 3.207665776651392e-06, "loss": 0.3978, "step": 3958 }, { "epoch": 1.9654145292073473, "grad_norm": 0.45383399724960327, "learning_rate": 3.2049689910997255e-06, "loss": 0.3577, "step": 3959 }, { "epoch": 1.9659109713718352, "grad_norm": 0.4442475736141205, "learning_rate": 3.202272804860125e-06, "loss": 0.4167, "step": 3960 }, { "epoch": 1.966407413536323, "grad_norm": 0.4525529146194458, "learning_rate": 3.1995772188327778e-06, "loss": 0.4122, "step": 3961 }, { "epoch": 1.966903855700811, "grad_norm": 0.4665185213088989, "learning_rate": 3.196882233917663e-06, "loss": 0.3514, "step": 3962 }, { "epoch": 1.9674002978652987, "grad_norm": 0.45976176857948303, "learning_rate": 3.194187851014565e-06, "loss": 0.4124, "step": 3963 }, { "epoch": 1.9678967400297864, "grad_norm": 0.4484221041202545, "learning_rate": 3.1914940710230622e-06, "loss": 0.3085, "step": 3964 }, { "epoch": 1.9683931821942744, "grad_norm": 0.452349990606308, "learning_rate": 3.18880089484254e-06, "loss": 0.3262, "step": 3965 }, { "epoch": 1.9688896243587624, "grad_norm": 0.4944424629211426, "learning_rate": 3.186108323372172e-06, "loss": 0.3209, "step": 3966 }, { "epoch": 1.96938606652325, "grad_norm": 0.4735720753669739, "learning_rate": 3.1834163575109343e-06, "loss": 0.4084, "step": 3967 }, { "epoch": 1.9698825086877378, "grad_norm": 0.4304461181163788, "learning_rate": 3.180724998157605e-06, "loss": 0.3572, "step": 3968 }, { "epoch": 1.9703789508522256, "grad_norm": 0.47394508123397827, "learning_rate": 3.1780342462107535e-06, "loss": 0.3955, "step": 3969 }, { "epoch": 1.9708753930167136, "grad_norm": 0.46997588872909546, "learning_rate": 3.1753441025687483e-06, "loss": 0.4019, "step": 3970 }, { "epoch": 1.9713718351812015, "grad_norm": 0.44212549924850464, "learning_rate": 3.172654568129755e-06, "loss": 0.3811, "step": 3971 }, { "epoch": 1.9718682773456893, "grad_norm": 0.412061870098114, "learning_rate": 3.169965643791737e-06, "loss": 0.3668, "step": 3972 }, { "epoch": 1.972364719510177, "grad_norm": 0.3999890685081482, "learning_rate": 3.1672773304524552e-06, "loss": 0.4214, "step": 3973 }, { "epoch": 1.9728611616746647, "grad_norm": 0.47337606549263, "learning_rate": 3.1645896290094615e-06, "loss": 0.3422, "step": 3974 }, { "epoch": 1.9733576038391527, "grad_norm": 0.475965291261673, "learning_rate": 3.1619025403601043e-06, "loss": 0.3296, "step": 3975 }, { "epoch": 1.9738540460036407, "grad_norm": 0.49214160442352295, "learning_rate": 3.1592160654015346e-06, "loss": 0.3877, "step": 3976 }, { "epoch": 1.9743504881681284, "grad_norm": 0.416207879781723, "learning_rate": 3.1565302050306914e-06, "loss": 0.3712, "step": 3977 }, { "epoch": 1.9748469303326162, "grad_norm": 0.455045223236084, "learning_rate": 3.1538449601443067e-06, "loss": 0.396, "step": 3978 }, { "epoch": 1.9753433724971041, "grad_norm": 0.4864142835140228, "learning_rate": 3.151160331638917e-06, "loss": 0.4188, "step": 3979 }, { "epoch": 1.975839814661592, "grad_norm": 0.4317370653152466, "learning_rate": 3.1484763204108433e-06, "loss": 0.3613, "step": 3980 }, { "epoch": 1.9763362568260798, "grad_norm": 0.4548439383506775, "learning_rate": 3.1457929273562048e-06, "loss": 0.393, "step": 3981 }, { "epoch": 1.9768326989905676, "grad_norm": 0.38922542333602905, "learning_rate": 3.143110153370912e-06, "loss": 0.3523, "step": 3982 }, { "epoch": 1.9773291411550553, "grad_norm": 0.4398716688156128, "learning_rate": 3.1404279993506726e-06, "loss": 0.3859, "step": 3983 }, { "epoch": 1.9778255833195433, "grad_norm": 0.4570976793766022, "learning_rate": 3.137746466190985e-06, "loss": 0.3533, "step": 3984 }, { "epoch": 1.9783220254840312, "grad_norm": 0.45662063360214233, "learning_rate": 3.1350655547871384e-06, "loss": 0.3581, "step": 3985 }, { "epoch": 1.978818467648519, "grad_norm": 0.45890939235687256, "learning_rate": 3.1323852660342146e-06, "loss": 0.3668, "step": 3986 }, { "epoch": 1.9793149098130067, "grad_norm": 0.4702968895435333, "learning_rate": 3.1297056008270932e-06, "loss": 0.3975, "step": 3987 }, { "epoch": 1.9798113519774945, "grad_norm": 0.43837013840675354, "learning_rate": 3.127026560060441e-06, "loss": 0.3642, "step": 3988 }, { "epoch": 1.9803077941419824, "grad_norm": 0.46378058195114136, "learning_rate": 3.124348144628715e-06, "loss": 0.3765, "step": 3989 }, { "epoch": 1.9808042363064704, "grad_norm": 0.5019510984420776, "learning_rate": 3.121670355426165e-06, "loss": 0.3631, "step": 3990 }, { "epoch": 1.9813006784709581, "grad_norm": 0.5299546718597412, "learning_rate": 3.1189931933468345e-06, "loss": 0.4096, "step": 3991 }, { "epoch": 1.9817971206354459, "grad_norm": 0.42922502756118774, "learning_rate": 3.116316659284554e-06, "loss": 0.359, "step": 3992 }, { "epoch": 1.9822935627999339, "grad_norm": 0.3882693648338318, "learning_rate": 3.1136407541329435e-06, "loss": 0.3193, "step": 3993 }, { "epoch": 1.9827900049644216, "grad_norm": 0.46816176176071167, "learning_rate": 3.1109654787854184e-06, "loss": 0.3871, "step": 3994 }, { "epoch": 1.9832864471289096, "grad_norm": 0.49264511466026306, "learning_rate": 3.108290834135178e-06, "loss": 0.4237, "step": 3995 }, { "epoch": 1.9837828892933973, "grad_norm": 0.4677193760871887, "learning_rate": 3.105616821075216e-06, "loss": 0.3348, "step": 3996 }, { "epoch": 1.984279331457885, "grad_norm": 0.4362378418445587, "learning_rate": 3.102943440498308e-06, "loss": 0.3638, "step": 3997 }, { "epoch": 1.984775773622373, "grad_norm": 0.48469245433807373, "learning_rate": 3.1002706932970283e-06, "loss": 0.3453, "step": 3998 }, { "epoch": 1.985272215786861, "grad_norm": 0.4909285008907318, "learning_rate": 3.097598580363732e-06, "loss": 0.4314, "step": 3999 }, { "epoch": 1.9857686579513487, "grad_norm": 0.4174289107322693, "learning_rate": 3.094927102590566e-06, "loss": 0.2854, "step": 4000 }, { "epoch": 1.9862651001158365, "grad_norm": 0.4888894259929657, "learning_rate": 3.0922562608694604e-06, "loss": 0.3912, "step": 4001 }, { "epoch": 1.9867615422803242, "grad_norm": 0.4581105411052704, "learning_rate": 3.089586056092143e-06, "loss": 0.3822, "step": 4002 }, { "epoch": 1.9872579844448122, "grad_norm": 0.49039074778556824, "learning_rate": 3.086916489150118e-06, "loss": 0.3926, "step": 4003 }, { "epoch": 1.9877544266093001, "grad_norm": 0.4594200551509857, "learning_rate": 3.0842475609346833e-06, "loss": 0.3435, "step": 4004 }, { "epoch": 1.9882508687737879, "grad_norm": 0.4575875699520111, "learning_rate": 3.081579272336919e-06, "loss": 0.3568, "step": 4005 }, { "epoch": 1.9887473109382756, "grad_norm": 0.4794960021972656, "learning_rate": 3.0789116242476967e-06, "loss": 0.3743, "step": 4006 }, { "epoch": 1.9892437531027636, "grad_norm": 0.5102534890174866, "learning_rate": 3.076244617557672e-06, "loss": 0.3805, "step": 4007 }, { "epoch": 1.9897401952672513, "grad_norm": 0.46103140711784363, "learning_rate": 3.073578253157282e-06, "loss": 0.3077, "step": 4008 }, { "epoch": 1.9902366374317393, "grad_norm": 0.5118879675865173, "learning_rate": 3.070912531936759e-06, "loss": 0.4221, "step": 4009 }, { "epoch": 1.990733079596227, "grad_norm": 0.43111783266067505, "learning_rate": 3.06824745478611e-06, "loss": 0.3347, "step": 4010 }, { "epoch": 1.9912295217607148, "grad_norm": 0.48555001616477966, "learning_rate": 3.0655830225951355e-06, "loss": 0.3845, "step": 4011 }, { "epoch": 1.9917259639252027, "grad_norm": 0.491271436214447, "learning_rate": 3.062919236253412e-06, "loss": 0.3349, "step": 4012 }, { "epoch": 1.9922224060896907, "grad_norm": 0.47756364941596985, "learning_rate": 3.0602560966503114e-06, "loss": 0.3878, "step": 4013 }, { "epoch": 1.9927188482541784, "grad_norm": 0.43309107422828674, "learning_rate": 3.057593604674981e-06, "loss": 0.3929, "step": 4014 }, { "epoch": 1.9932152904186662, "grad_norm": 0.41184887290000916, "learning_rate": 3.0549317612163543e-06, "loss": 0.3505, "step": 4015 }, { "epoch": 1.993711732583154, "grad_norm": 0.4494298994541168, "learning_rate": 3.052270567163146e-06, "loss": 0.4256, "step": 4016 }, { "epoch": 1.994208174747642, "grad_norm": 0.38439974188804626, "learning_rate": 3.0496100234038615e-06, "loss": 0.3368, "step": 4017 }, { "epoch": 1.9947046169121299, "grad_norm": 0.3994872272014618, "learning_rate": 3.0469501308267803e-06, "loss": 0.3403, "step": 4018 }, { "epoch": 1.9952010590766176, "grad_norm": 0.4775511920452118, "learning_rate": 3.0442908903199692e-06, "loss": 0.4279, "step": 4019 }, { "epoch": 1.9956975012411053, "grad_norm": 0.44582968950271606, "learning_rate": 3.0416323027712767e-06, "loss": 0.371, "step": 4020 }, { "epoch": 1.996193943405593, "grad_norm": 0.4629029333591461, "learning_rate": 3.0389743690683337e-06, "loss": 0.3547, "step": 4021 }, { "epoch": 1.996690385570081, "grad_norm": 0.47807958722114563, "learning_rate": 3.036317090098552e-06, "loss": 0.3713, "step": 4022 }, { "epoch": 1.997186827734569, "grad_norm": 0.48553091287612915, "learning_rate": 3.033660466749121e-06, "loss": 0.4235, "step": 4023 }, { "epoch": 1.9976832698990568, "grad_norm": 0.4018627107143402, "learning_rate": 3.0310044999070204e-06, "loss": 0.3514, "step": 4024 }, { "epoch": 1.9981797120635445, "grad_norm": 0.5465929508209229, "learning_rate": 3.0283491904590027e-06, "loss": 0.3715, "step": 4025 }, { "epoch": 1.9986761542280325, "grad_norm": 0.4925004541873932, "learning_rate": 3.0256945392916033e-06, "loss": 0.4088, "step": 4026 }, { "epoch": 1.9991725963925204, "grad_norm": 0.4681840240955353, "learning_rate": 3.0230405472911374e-06, "loss": 0.3722, "step": 4027 }, { "epoch": 1.9996690385570082, "grad_norm": 0.4617413580417633, "learning_rate": 3.020387215343704e-06, "loss": 0.4158, "step": 4028 }, { "epoch": 2.000165480721496, "grad_norm": 0.9357897639274597, "learning_rate": 3.017734544335176e-06, "loss": 0.5474, "step": 4029 }, { "epoch": 2.0006619228859837, "grad_norm": 0.44395384192466736, "learning_rate": 3.0150825351512094e-06, "loss": 0.342, "step": 4030 }, { "epoch": 2.0011583650504714, "grad_norm": 0.45000502467155457, "learning_rate": 3.0124311886772352e-06, "loss": 0.3717, "step": 4031 }, { "epoch": 2.0016548072149596, "grad_norm": 0.4110393524169922, "learning_rate": 3.009780505798469e-06, "loss": 0.3465, "step": 4032 }, { "epoch": 2.0021512493794473, "grad_norm": 0.3823389708995819, "learning_rate": 3.007130487399901e-06, "loss": 0.3258, "step": 4033 }, { "epoch": 2.002647691543935, "grad_norm": 0.45692434906959534, "learning_rate": 3.0044811343662996e-06, "loss": 0.38, "step": 4034 }, { "epoch": 2.003144133708423, "grad_norm": 0.4153729975223541, "learning_rate": 3.0018324475822113e-06, "loss": 0.2975, "step": 4035 }, { "epoch": 2.003640575872911, "grad_norm": 0.47333085536956787, "learning_rate": 2.9991844279319636e-06, "loss": 0.3682, "step": 4036 }, { "epoch": 2.0041370180373987, "grad_norm": 0.4875671863555908, "learning_rate": 2.996537076299656e-06, "loss": 0.3279, "step": 4037 }, { "epoch": 2.0046334602018865, "grad_norm": 0.5275771617889404, "learning_rate": 2.9938903935691655e-06, "loss": 0.3105, "step": 4038 }, { "epoch": 2.0051299023663742, "grad_norm": 0.40707552433013916, "learning_rate": 2.991244380624152e-06, "loss": 0.2909, "step": 4039 }, { "epoch": 2.005626344530862, "grad_norm": 0.4915716350078583, "learning_rate": 2.9885990383480447e-06, "loss": 0.3714, "step": 4040 }, { "epoch": 2.00612278669535, "grad_norm": 0.4565674662590027, "learning_rate": 2.98595436762405e-06, "loss": 0.3332, "step": 4041 }, { "epoch": 2.006619228859838, "grad_norm": 0.4593732953071594, "learning_rate": 2.9833103693351533e-06, "loss": 0.3724, "step": 4042 }, { "epoch": 2.0071156710243256, "grad_norm": 0.46393924951553345, "learning_rate": 2.980667044364114e-06, "loss": 0.3165, "step": 4043 }, { "epoch": 2.0076121131888134, "grad_norm": 0.43123868107795715, "learning_rate": 2.9780243935934673e-06, "loss": 0.2994, "step": 4044 }, { "epoch": 2.008108555353301, "grad_norm": 0.486145555973053, "learning_rate": 2.9753824179055214e-06, "loss": 0.3019, "step": 4045 }, { "epoch": 2.0086049975177893, "grad_norm": 0.5021054148674011, "learning_rate": 2.972741118182358e-06, "loss": 0.3418, "step": 4046 }, { "epoch": 2.009101439682277, "grad_norm": 0.40502557158470154, "learning_rate": 2.970100495305839e-06, "loss": 0.281, "step": 4047 }, { "epoch": 2.009597881846765, "grad_norm": 0.4637477695941925, "learning_rate": 2.9674605501575954e-06, "loss": 0.4035, "step": 4048 }, { "epoch": 2.0100943240112525, "grad_norm": 0.5201930999755859, "learning_rate": 2.9648212836190305e-06, "loss": 0.3643, "step": 4049 }, { "epoch": 2.0105907661757407, "grad_norm": 0.3585076928138733, "learning_rate": 2.9621826965713285e-06, "loss": 0.328, "step": 4050 }, { "epoch": 2.0110872083402285, "grad_norm": 0.4371580183506012, "learning_rate": 2.959544789895438e-06, "loss": 0.3212, "step": 4051 }, { "epoch": 2.011583650504716, "grad_norm": 0.43709632754325867, "learning_rate": 2.956907564472086e-06, "loss": 0.3539, "step": 4052 }, { "epoch": 2.012080092669204, "grad_norm": 0.48389971256256104, "learning_rate": 2.9542710211817687e-06, "loss": 0.3132, "step": 4053 }, { "epoch": 2.0125765348336917, "grad_norm": 0.4883803129196167, "learning_rate": 2.95163516090476e-06, "loss": 0.3235, "step": 4054 }, { "epoch": 2.01307297699818, "grad_norm": 0.455944687128067, "learning_rate": 2.948999984521099e-06, "loss": 0.359, "step": 4055 }, { "epoch": 2.0135694191626676, "grad_norm": 0.45482969284057617, "learning_rate": 2.946365492910599e-06, "loss": 0.3934, "step": 4056 }, { "epoch": 2.0140658613271554, "grad_norm": 0.44198405742645264, "learning_rate": 2.9437316869528467e-06, "loss": 0.3698, "step": 4057 }, { "epoch": 2.014562303491643, "grad_norm": 0.425021767616272, "learning_rate": 2.9410985675271968e-06, "loss": 0.3469, "step": 4058 }, { "epoch": 2.015058745656131, "grad_norm": 0.4396118223667145, "learning_rate": 2.9384661355127798e-06, "loss": 0.3295, "step": 4059 }, { "epoch": 2.015555187820619, "grad_norm": 0.4590144157409668, "learning_rate": 2.935834391788488e-06, "loss": 0.3846, "step": 4060 }, { "epoch": 2.016051629985107, "grad_norm": 0.45408523082733154, "learning_rate": 2.9332033372329936e-06, "loss": 0.3087, "step": 4061 }, { "epoch": 2.0165480721495945, "grad_norm": 0.44869446754455566, "learning_rate": 2.930572972724733e-06, "loss": 0.294, "step": 4062 }, { "epoch": 2.0170445143140823, "grad_norm": 0.396417498588562, "learning_rate": 2.927943299141912e-06, "loss": 0.3207, "step": 4063 }, { "epoch": 2.0175409564785705, "grad_norm": 0.47626155614852905, "learning_rate": 2.9253143173625076e-06, "loss": 0.3833, "step": 4064 }, { "epoch": 2.018037398643058, "grad_norm": 0.44115763902664185, "learning_rate": 2.9226860282642668e-06, "loss": 0.3394, "step": 4065 }, { "epoch": 2.018533840807546, "grad_norm": 0.4194934070110321, "learning_rate": 2.9200584327247017e-06, "loss": 0.2704, "step": 4066 }, { "epoch": 2.0190302829720337, "grad_norm": 0.45563745498657227, "learning_rate": 2.9174315316210987e-06, "loss": 0.3606, "step": 4067 }, { "epoch": 2.0195267251365214, "grad_norm": 0.4873954951763153, "learning_rate": 2.914805325830502e-06, "loss": 0.3593, "step": 4068 }, { "epoch": 2.0200231673010096, "grad_norm": 0.4943287968635559, "learning_rate": 2.912179816229739e-06, "loss": 0.3724, "step": 4069 }, { "epoch": 2.0205196094654974, "grad_norm": 0.4719430208206177, "learning_rate": 2.909555003695389e-06, "loss": 0.342, "step": 4070 }, { "epoch": 2.021016051629985, "grad_norm": 0.5066685080528259, "learning_rate": 2.9069308891038083e-06, "loss": 0.3544, "step": 4071 }, { "epoch": 2.021512493794473, "grad_norm": 0.44336771965026855, "learning_rate": 2.9043074733311172e-06, "loss": 0.3304, "step": 4072 }, { "epoch": 2.0220089359589606, "grad_norm": 0.4804818630218506, "learning_rate": 2.901684757253203e-06, "loss": 0.3561, "step": 4073 }, { "epoch": 2.0225053781234488, "grad_norm": 0.47616446018218994, "learning_rate": 2.8990627417457216e-06, "loss": 0.3403, "step": 4074 }, { "epoch": 2.0230018202879365, "grad_norm": 0.4717146158218384, "learning_rate": 2.8964414276840858e-06, "loss": 0.335, "step": 4075 }, { "epoch": 2.0234982624524243, "grad_norm": 0.4567410945892334, "learning_rate": 2.8938208159434905e-06, "loss": 0.2847, "step": 4076 }, { "epoch": 2.023994704616912, "grad_norm": 0.5352848172187805, "learning_rate": 2.8912009073988796e-06, "loss": 0.3306, "step": 4077 }, { "epoch": 2.0244911467814, "grad_norm": 0.3879812955856323, "learning_rate": 2.888581702924972e-06, "loss": 0.2989, "step": 4078 }, { "epoch": 2.024987588945888, "grad_norm": 0.4592445492744446, "learning_rate": 2.885963203396248e-06, "loss": 0.4277, "step": 4079 }, { "epoch": 2.0254840311103757, "grad_norm": 0.46481114625930786, "learning_rate": 2.8833454096869546e-06, "loss": 0.302, "step": 4080 }, { "epoch": 2.0259804732748634, "grad_norm": 0.43593934178352356, "learning_rate": 2.8807283226711036e-06, "loss": 0.2952, "step": 4081 }, { "epoch": 2.026476915439351, "grad_norm": 0.5036976337432861, "learning_rate": 2.8781119432224646e-06, "loss": 0.3219, "step": 4082 }, { "epoch": 2.0269733576038393, "grad_norm": 0.3979686200618744, "learning_rate": 2.875496272214578e-06, "loss": 0.3519, "step": 4083 }, { "epoch": 2.027469799768327, "grad_norm": 0.40932902693748474, "learning_rate": 2.8728813105207455e-06, "loss": 0.3402, "step": 4084 }, { "epoch": 2.027966241932815, "grad_norm": 0.4122369587421417, "learning_rate": 2.8702670590140314e-06, "loss": 0.3643, "step": 4085 }, { "epoch": 2.0284626840973026, "grad_norm": 0.43136870861053467, "learning_rate": 2.867653518567265e-06, "loss": 0.3801, "step": 4086 }, { "epoch": 2.0289591262617903, "grad_norm": 0.45937681198120117, "learning_rate": 2.8650406900530316e-06, "loss": 0.3352, "step": 4087 }, { "epoch": 2.0294555684262785, "grad_norm": 0.4651472568511963, "learning_rate": 2.8624285743436904e-06, "loss": 0.2934, "step": 4088 }, { "epoch": 2.0299520105907662, "grad_norm": 0.4585302472114563, "learning_rate": 2.85981717231135e-06, "loss": 0.3202, "step": 4089 }, { "epoch": 2.030448452755254, "grad_norm": 0.4473892152309418, "learning_rate": 2.857206484827889e-06, "loss": 0.3357, "step": 4090 }, { "epoch": 2.0309448949197417, "grad_norm": 0.4385612905025482, "learning_rate": 2.8545965127649455e-06, "loss": 0.344, "step": 4091 }, { "epoch": 2.0314413370842295, "grad_norm": 0.4444335103034973, "learning_rate": 2.851987256993919e-06, "loss": 0.299, "step": 4092 }, { "epoch": 2.0319377792487177, "grad_norm": 0.4880479574203491, "learning_rate": 2.8493787183859657e-06, "loss": 0.3252, "step": 4093 }, { "epoch": 2.0324342214132054, "grad_norm": 0.4331934452056885, "learning_rate": 2.8467708978120075e-06, "loss": 0.2535, "step": 4094 }, { "epoch": 2.032930663577693, "grad_norm": 0.4383775591850281, "learning_rate": 2.844163796142725e-06, "loss": 0.3636, "step": 4095 }, { "epoch": 2.033427105742181, "grad_norm": 0.4684530794620514, "learning_rate": 2.8415574142485588e-06, "loss": 0.3595, "step": 4096 }, { "epoch": 2.033923547906669, "grad_norm": 0.40291067957878113, "learning_rate": 2.83895175299971e-06, "loss": 0.3042, "step": 4097 }, { "epoch": 2.034419990071157, "grad_norm": 0.5173322558403015, "learning_rate": 2.836346813266134e-06, "loss": 0.3621, "step": 4098 }, { "epoch": 2.0349164322356446, "grad_norm": 0.4536442458629608, "learning_rate": 2.8337425959175558e-06, "loss": 0.3123, "step": 4099 }, { "epoch": 2.0354128744001323, "grad_norm": 0.46201851963996887, "learning_rate": 2.831139101823447e-06, "loss": 0.3301, "step": 4100 }, { "epoch": 2.03590931656462, "grad_norm": 0.4614960849285126, "learning_rate": 2.8285363318530455e-06, "loss": 0.3249, "step": 4101 }, { "epoch": 2.0364057587291082, "grad_norm": 0.3876713812351227, "learning_rate": 2.825934286875346e-06, "loss": 0.3176, "step": 4102 }, { "epoch": 2.036902200893596, "grad_norm": 0.42458659410476685, "learning_rate": 2.8233329677591003e-06, "loss": 0.2841, "step": 4103 }, { "epoch": 2.0373986430580837, "grad_norm": 0.4528810679912567, "learning_rate": 2.8207323753728205e-06, "loss": 0.3635, "step": 4104 }, { "epoch": 2.0378950852225715, "grad_norm": 0.4662970304489136, "learning_rate": 2.8181325105847667e-06, "loss": 0.4044, "step": 4105 }, { "epoch": 2.038391527387059, "grad_norm": 0.4458432197570801, "learning_rate": 2.815533374262972e-06, "loss": 0.3179, "step": 4106 }, { "epoch": 2.0388879695515474, "grad_norm": 0.44410043954849243, "learning_rate": 2.8129349672752117e-06, "loss": 0.3687, "step": 4107 }, { "epoch": 2.039384411716035, "grad_norm": 0.4310927391052246, "learning_rate": 2.8103372904890234e-06, "loss": 0.3154, "step": 4108 }, { "epoch": 2.039880853880523, "grad_norm": 0.49696817994117737, "learning_rate": 2.8077403447717034e-06, "loss": 0.377, "step": 4109 }, { "epoch": 2.0403772960450106, "grad_norm": 0.4252273142337799, "learning_rate": 2.8051441309902995e-06, "loss": 0.3079, "step": 4110 }, { "epoch": 2.040873738209499, "grad_norm": 0.45310845971107483, "learning_rate": 2.802548650011619e-06, "loss": 0.3847, "step": 4111 }, { "epoch": 2.0413701803739865, "grad_norm": 0.407507061958313, "learning_rate": 2.7999539027022193e-06, "loss": 0.3129, "step": 4112 }, { "epoch": 2.0418666225384743, "grad_norm": 0.44658413529396057, "learning_rate": 2.7973598899284173e-06, "loss": 0.3264, "step": 4113 }, { "epoch": 2.042363064702962, "grad_norm": 0.44813814759254456, "learning_rate": 2.7947666125562833e-06, "loss": 0.3717, "step": 4114 }, { "epoch": 2.0428595068674498, "grad_norm": 0.421404093503952, "learning_rate": 2.7921740714516454e-06, "loss": 0.3791, "step": 4115 }, { "epoch": 2.043355949031938, "grad_norm": 0.3877864480018616, "learning_rate": 2.789582267480075e-06, "loss": 0.2907, "step": 4116 }, { "epoch": 2.0438523911964257, "grad_norm": 0.4492279589176178, "learning_rate": 2.7869912015069136e-06, "loss": 0.3847, "step": 4117 }, { "epoch": 2.0443488333609134, "grad_norm": 0.4394359588623047, "learning_rate": 2.784400874397242e-06, "loss": 0.3094, "step": 4118 }, { "epoch": 2.044845275525401, "grad_norm": 0.3913924992084503, "learning_rate": 2.781811287015902e-06, "loss": 0.322, "step": 4119 }, { "epoch": 2.045341717689889, "grad_norm": 0.4452548623085022, "learning_rate": 2.779222440227486e-06, "loss": 0.371, "step": 4120 }, { "epoch": 2.045838159854377, "grad_norm": 0.3964727818965912, "learning_rate": 2.7766343348963392e-06, "loss": 0.2968, "step": 4121 }, { "epoch": 2.046334602018865, "grad_norm": 0.468262642621994, "learning_rate": 2.7740469718865626e-06, "loss": 0.3541, "step": 4122 }, { "epoch": 2.0468310441833526, "grad_norm": 0.4033874571323395, "learning_rate": 2.7714603520620026e-06, "loss": 0.3248, "step": 4123 }, { "epoch": 2.0473274863478403, "grad_norm": 0.39411401748657227, "learning_rate": 2.7688744762862624e-06, "loss": 0.2915, "step": 4124 }, { "epoch": 2.0478239285123285, "grad_norm": 0.4659746587276459, "learning_rate": 2.7662893454226956e-06, "loss": 0.3929, "step": 4125 }, { "epoch": 2.0483203706768163, "grad_norm": 0.3982316255569458, "learning_rate": 2.763704960334408e-06, "loss": 0.385, "step": 4126 }, { "epoch": 2.048816812841304, "grad_norm": 0.39206817746162415, "learning_rate": 2.761121321884257e-06, "loss": 0.3392, "step": 4127 }, { "epoch": 2.0493132550057918, "grad_norm": 0.3957848846912384, "learning_rate": 2.758538430934843e-06, "loss": 0.3491, "step": 4128 }, { "epoch": 2.0498096971702795, "grad_norm": 0.43245354294776917, "learning_rate": 2.7559562883485314e-06, "loss": 0.3166, "step": 4129 }, { "epoch": 2.0503061393347677, "grad_norm": 0.5119901895523071, "learning_rate": 2.7533748949874227e-06, "loss": 0.397, "step": 4130 }, { "epoch": 2.0508025814992554, "grad_norm": 0.4098554849624634, "learning_rate": 2.750794251713378e-06, "loss": 0.4022, "step": 4131 }, { "epoch": 2.051299023663743, "grad_norm": 0.40266144275665283, "learning_rate": 2.7482143593880015e-06, "loss": 0.3129, "step": 4132 }, { "epoch": 2.051795465828231, "grad_norm": 0.4585058093070984, "learning_rate": 2.745635218872651e-06, "loss": 0.3521, "step": 4133 }, { "epoch": 2.0522919079927187, "grad_norm": 0.4265661835670471, "learning_rate": 2.743056831028432e-06, "loss": 0.3526, "step": 4134 }, { "epoch": 2.052788350157207, "grad_norm": 0.36517226696014404, "learning_rate": 2.7404791967161937e-06, "loss": 0.3273, "step": 4135 }, { "epoch": 2.0532847923216946, "grad_norm": 0.4529072046279907, "learning_rate": 2.7379023167965447e-06, "loss": 0.3931, "step": 4136 }, { "epoch": 2.0537812344861823, "grad_norm": 0.4473404586315155, "learning_rate": 2.7353261921298303e-06, "loss": 0.3852, "step": 4137 }, { "epoch": 2.05427767665067, "grad_norm": 0.4063047468662262, "learning_rate": 2.7327508235761513e-06, "loss": 0.3393, "step": 4138 }, { "epoch": 2.054774118815158, "grad_norm": 0.4268611669540405, "learning_rate": 2.730176211995348e-06, "loss": 0.3351, "step": 4139 }, { "epoch": 2.055270560979646, "grad_norm": 0.4590359330177307, "learning_rate": 2.7276023582470213e-06, "loss": 0.3247, "step": 4140 }, { "epoch": 2.0557670031441337, "grad_norm": 0.4481455087661743, "learning_rate": 2.725029263190504e-06, "loss": 0.3506, "step": 4141 }, { "epoch": 2.0562634453086215, "grad_norm": 0.4316229224205017, "learning_rate": 2.7224569276848866e-06, "loss": 0.3817, "step": 4142 }, { "epoch": 2.0567598874731092, "grad_norm": 0.39161285758018494, "learning_rate": 2.7198853525890003e-06, "loss": 0.3019, "step": 4143 }, { "epoch": 2.0572563296375974, "grad_norm": 0.46573883295059204, "learning_rate": 2.717314538761425e-06, "loss": 0.3454, "step": 4144 }, { "epoch": 2.057752771802085, "grad_norm": 0.4548417925834656, "learning_rate": 2.7147444870604868e-06, "loss": 0.3372, "step": 4145 }, { "epoch": 2.058249213966573, "grad_norm": 0.48485276103019714, "learning_rate": 2.712175198344251e-06, "loss": 0.3573, "step": 4146 }, { "epoch": 2.0587456561310606, "grad_norm": 0.4360620379447937, "learning_rate": 2.7096066734705406e-06, "loss": 0.3212, "step": 4147 }, { "epoch": 2.0592420982955484, "grad_norm": 0.45463165640830994, "learning_rate": 2.70703891329691e-06, "loss": 0.3377, "step": 4148 }, { "epoch": 2.0597385404600366, "grad_norm": 0.43719518184661865, "learning_rate": 2.7044719186806677e-06, "loss": 0.4111, "step": 4149 }, { "epoch": 2.0602349826245243, "grad_norm": 0.4443175792694092, "learning_rate": 2.7019056904788625e-06, "loss": 0.3849, "step": 4150 }, { "epoch": 2.060731424789012, "grad_norm": 0.4202311336994171, "learning_rate": 2.6993402295482885e-06, "loss": 0.3429, "step": 4151 }, { "epoch": 2.0612278669535, "grad_norm": 0.40373390913009644, "learning_rate": 2.6967755367454855e-06, "loss": 0.2918, "step": 4152 }, { "epoch": 2.0617243091179875, "grad_norm": 0.458944708108902, "learning_rate": 2.694211612926731e-06, "loss": 0.3935, "step": 4153 }, { "epoch": 2.0622207512824757, "grad_norm": 0.42927849292755127, "learning_rate": 2.6916484589480505e-06, "loss": 0.3459, "step": 4154 }, { "epoch": 2.0627171934469635, "grad_norm": 0.44080355763435364, "learning_rate": 2.6890860756652125e-06, "loss": 0.3383, "step": 4155 }, { "epoch": 2.063213635611451, "grad_norm": 0.4461827874183655, "learning_rate": 2.6865244639337263e-06, "loss": 0.306, "step": 4156 }, { "epoch": 2.063710077775939, "grad_norm": 0.4195191562175751, "learning_rate": 2.6839636246088446e-06, "loss": 0.3196, "step": 4157 }, { "epoch": 2.064206519940427, "grad_norm": 0.4617650508880615, "learning_rate": 2.6814035585455628e-06, "loss": 0.3759, "step": 4158 }, { "epoch": 2.064702962104915, "grad_norm": 0.33506351709365845, "learning_rate": 2.6788442665986184e-06, "loss": 0.3381, "step": 4159 }, { "epoch": 2.0651994042694026, "grad_norm": 0.4109790027141571, "learning_rate": 2.6762857496224858e-06, "loss": 0.3776, "step": 4160 }, { "epoch": 2.0656958464338904, "grad_norm": 0.4421955645084381, "learning_rate": 2.673728008471387e-06, "loss": 0.2985, "step": 4161 }, { "epoch": 2.066192288598378, "grad_norm": 0.4522530436515808, "learning_rate": 2.6711710439992812e-06, "loss": 0.3831, "step": 4162 }, { "epoch": 2.0666887307628663, "grad_norm": 0.42549633979797363, "learning_rate": 2.668614857059872e-06, "loss": 0.348, "step": 4163 }, { "epoch": 2.067185172927354, "grad_norm": 0.36245501041412354, "learning_rate": 2.666059448506596e-06, "loss": 0.3267, "step": 4164 }, { "epoch": 2.067681615091842, "grad_norm": 0.4254688322544098, "learning_rate": 2.6635048191926375e-06, "loss": 0.3842, "step": 4165 }, { "epoch": 2.0681780572563295, "grad_norm": 0.43220651149749756, "learning_rate": 2.6609509699709174e-06, "loss": 0.3754, "step": 4166 }, { "epoch": 2.0686744994208173, "grad_norm": 0.4227001667022705, "learning_rate": 2.6583979016940962e-06, "loss": 0.3223, "step": 4167 }, { "epoch": 2.0691709415853055, "grad_norm": 0.40613898634910583, "learning_rate": 2.655845615214577e-06, "loss": 0.2873, "step": 4168 }, { "epoch": 2.069667383749793, "grad_norm": 0.4398278594017029, "learning_rate": 2.6532941113844924e-06, "loss": 0.3437, "step": 4169 }, { "epoch": 2.070163825914281, "grad_norm": 0.4406682550907135, "learning_rate": 2.650743391055728e-06, "loss": 0.3157, "step": 4170 }, { "epoch": 2.0706602680787687, "grad_norm": 0.44534242153167725, "learning_rate": 2.648193455079894e-06, "loss": 0.3109, "step": 4171 }, { "epoch": 2.071156710243257, "grad_norm": 0.42897191643714905, "learning_rate": 2.6456443043083457e-06, "loss": 0.3544, "step": 4172 }, { "epoch": 2.0716531524077446, "grad_norm": 0.4573498070240021, "learning_rate": 2.643095939592177e-06, "loss": 0.3523, "step": 4173 }, { "epoch": 2.0721495945722324, "grad_norm": 0.4571079611778259, "learning_rate": 2.640548361782218e-06, "loss": 0.3309, "step": 4174 }, { "epoch": 2.07264603673672, "grad_norm": 0.4191036820411682, "learning_rate": 2.6380015717290356e-06, "loss": 0.327, "step": 4175 }, { "epoch": 2.073142478901208, "grad_norm": 0.3984687924385071, "learning_rate": 2.6354555702829293e-06, "loss": 0.2992, "step": 4176 }, { "epoch": 2.073638921065696, "grad_norm": 0.4843318462371826, "learning_rate": 2.6329103582939474e-06, "loss": 0.3687, "step": 4177 }, { "epoch": 2.0741353632301838, "grad_norm": 0.4710315465927124, "learning_rate": 2.6303659366118605e-06, "loss": 0.3269, "step": 4178 }, { "epoch": 2.0746318053946715, "grad_norm": 0.4426323175430298, "learning_rate": 2.6278223060861846e-06, "loss": 0.3098, "step": 4179 }, { "epoch": 2.0751282475591593, "grad_norm": 0.45253831148147583, "learning_rate": 2.6252794675661685e-06, "loss": 0.3492, "step": 4180 }, { "epoch": 2.075624689723647, "grad_norm": 0.3788428008556366, "learning_rate": 2.6227374219007963e-06, "loss": 0.2979, "step": 4181 }, { "epoch": 2.076121131888135, "grad_norm": 0.4585331678390503, "learning_rate": 2.620196169938791e-06, "loss": 0.2973, "step": 4182 }, { "epoch": 2.076617574052623, "grad_norm": 0.42894357442855835, "learning_rate": 2.617655712528603e-06, "loss": 0.3579, "step": 4183 }, { "epoch": 2.0771140162171107, "grad_norm": 0.4442853331565857, "learning_rate": 2.615116050518424e-06, "loss": 0.3936, "step": 4184 }, { "epoch": 2.0776104583815984, "grad_norm": 0.4234871566295624, "learning_rate": 2.6125771847561785e-06, "loss": 0.382, "step": 4185 }, { "epoch": 2.0781069005460866, "grad_norm": 0.3825097382068634, "learning_rate": 2.610039116089526e-06, "loss": 0.2412, "step": 4186 }, { "epoch": 2.0786033427105743, "grad_norm": 0.4571526050567627, "learning_rate": 2.607501845365853e-06, "loss": 0.3559, "step": 4187 }, { "epoch": 2.079099784875062, "grad_norm": 0.46988263726234436, "learning_rate": 2.604965373432294e-06, "loss": 0.2681, "step": 4188 }, { "epoch": 2.07959622703955, "grad_norm": 0.4406009912490845, "learning_rate": 2.602429701135701e-06, "loss": 0.3856, "step": 4189 }, { "epoch": 2.0800926692040376, "grad_norm": 0.3998572528362274, "learning_rate": 2.5998948293226684e-06, "loss": 0.2999, "step": 4190 }, { "epoch": 2.0805891113685258, "grad_norm": 0.4524266719818115, "learning_rate": 2.597360758839521e-06, "loss": 0.3287, "step": 4191 }, { "epoch": 2.0810855535330135, "grad_norm": 0.41160324215888977, "learning_rate": 2.5948274905323163e-06, "loss": 0.296, "step": 4192 }, { "epoch": 2.0815819956975012, "grad_norm": 0.466566264629364, "learning_rate": 2.5922950252468455e-06, "loss": 0.352, "step": 4193 }, { "epoch": 2.082078437861989, "grad_norm": 0.441525936126709, "learning_rate": 2.5897633638286256e-06, "loss": 0.2761, "step": 4194 }, { "epoch": 2.0825748800264767, "grad_norm": 0.4637373983860016, "learning_rate": 2.587232507122912e-06, "loss": 0.3674, "step": 4195 }, { "epoch": 2.083071322190965, "grad_norm": 0.43669211864471436, "learning_rate": 2.584702455974689e-06, "loss": 0.3181, "step": 4196 }, { "epoch": 2.0835677643554527, "grad_norm": 0.5066189169883728, "learning_rate": 2.5821732112286726e-06, "loss": 0.4018, "step": 4197 }, { "epoch": 2.0840642065199404, "grad_norm": 0.389910489320755, "learning_rate": 2.579644773729307e-06, "loss": 0.3211, "step": 4198 }, { "epoch": 2.084560648684428, "grad_norm": 0.47645261883735657, "learning_rate": 2.5771171443207703e-06, "loss": 0.3739, "step": 4199 }, { "epoch": 2.085057090848916, "grad_norm": 0.4483402371406555, "learning_rate": 2.574590323846971e-06, "loss": 0.3284, "step": 4200 }, { "epoch": 2.085553533013404, "grad_norm": 0.4532936215400696, "learning_rate": 2.572064313151541e-06, "loss": 0.3772, "step": 4201 }, { "epoch": 2.086049975177892, "grad_norm": 0.4389040172100067, "learning_rate": 2.5695391130778504e-06, "loss": 0.352, "step": 4202 }, { "epoch": 2.0865464173423796, "grad_norm": 0.46475687623023987, "learning_rate": 2.5670147244689926e-06, "loss": 0.3183, "step": 4203 }, { "epoch": 2.0870428595068673, "grad_norm": 0.4798741042613983, "learning_rate": 2.5644911481677937e-06, "loss": 0.3695, "step": 4204 }, { "epoch": 2.0875393016713555, "grad_norm": 0.4322796165943146, "learning_rate": 2.5619683850168087e-06, "loss": 0.3049, "step": 4205 }, { "epoch": 2.0880357438358432, "grad_norm": 0.455157071352005, "learning_rate": 2.5594464358583137e-06, "loss": 0.4215, "step": 4206 }, { "epoch": 2.088532186000331, "grad_norm": 0.4497106671333313, "learning_rate": 2.5569253015343277e-06, "loss": 0.3601, "step": 4207 }, { "epoch": 2.0890286281648187, "grad_norm": 0.48030886054039, "learning_rate": 2.5544049828865823e-06, "loss": 0.3026, "step": 4208 }, { "epoch": 2.0895250703293065, "grad_norm": 0.47945037484169006, "learning_rate": 2.5518854807565473e-06, "loss": 0.3285, "step": 4209 }, { "epoch": 2.0900215124937946, "grad_norm": 0.42838728427886963, "learning_rate": 2.5493667959854106e-06, "loss": 0.2574, "step": 4210 }, { "epoch": 2.0905179546582824, "grad_norm": 0.4652089774608612, "learning_rate": 2.5468489294141003e-06, "loss": 0.4159, "step": 4211 }, { "epoch": 2.09101439682277, "grad_norm": 0.4610602855682373, "learning_rate": 2.5443318818832574e-06, "loss": 0.3357, "step": 4212 }, { "epoch": 2.091510838987258, "grad_norm": 0.409738153219223, "learning_rate": 2.5418156542332557e-06, "loss": 0.3064, "step": 4213 }, { "epoch": 2.0920072811517456, "grad_norm": 0.4369828402996063, "learning_rate": 2.539300247304202e-06, "loss": 0.3289, "step": 4214 }, { "epoch": 2.092503723316234, "grad_norm": 0.42378318309783936, "learning_rate": 2.536785661935914e-06, "loss": 0.3527, "step": 4215 }, { "epoch": 2.0930001654807215, "grad_norm": 0.4231744110584259, "learning_rate": 2.53427189896795e-06, "loss": 0.3234, "step": 4216 }, { "epoch": 2.0934966076452093, "grad_norm": 0.4367859363555908, "learning_rate": 2.5317589592395802e-06, "loss": 0.2931, "step": 4217 }, { "epoch": 2.093993049809697, "grad_norm": 0.48603132367134094, "learning_rate": 2.5292468435898145e-06, "loss": 0.3676, "step": 4218 }, { "epoch": 2.094489491974185, "grad_norm": 0.45780715346336365, "learning_rate": 2.5267355528573745e-06, "loss": 0.3075, "step": 4219 }, { "epoch": 2.094985934138673, "grad_norm": 0.42940473556518555, "learning_rate": 2.524225087880714e-06, "loss": 0.3371, "step": 4220 }, { "epoch": 2.0954823763031607, "grad_norm": 0.43373242020606995, "learning_rate": 2.5217154494980087e-06, "loss": 0.3181, "step": 4221 }, { "epoch": 2.0959788184676484, "grad_norm": 0.47819599509239197, "learning_rate": 2.5192066385471592e-06, "loss": 0.3608, "step": 4222 }, { "epoch": 2.096475260632136, "grad_norm": 0.4177226722240448, "learning_rate": 2.5166986558657904e-06, "loss": 0.3635, "step": 4223 }, { "epoch": 2.0969717027966244, "grad_norm": 0.39384907484054565, "learning_rate": 2.5141915022912454e-06, "loss": 0.3529, "step": 4224 }, { "epoch": 2.097468144961112, "grad_norm": 0.404892235994339, "learning_rate": 2.5116851786605983e-06, "loss": 0.3255, "step": 4225 }, { "epoch": 2.0979645871256, "grad_norm": 0.4300461411476135, "learning_rate": 2.509179685810641e-06, "loss": 0.3243, "step": 4226 }, { "epoch": 2.0984610292900876, "grad_norm": 0.47251594066619873, "learning_rate": 2.5066750245778905e-06, "loss": 0.3513, "step": 4227 }, { "epoch": 2.0989574714545753, "grad_norm": 0.4752540588378906, "learning_rate": 2.504171195798584e-06, "loss": 0.3395, "step": 4228 }, { "epoch": 2.0994539136190635, "grad_norm": 0.44717586040496826, "learning_rate": 2.5016682003086812e-06, "loss": 0.364, "step": 4229 }, { "epoch": 2.0999503557835513, "grad_norm": 0.4122174084186554, "learning_rate": 2.4991660389438687e-06, "loss": 0.2788, "step": 4230 }, { "epoch": 2.100446797948039, "grad_norm": 0.4191160798072815, "learning_rate": 2.496664712539545e-06, "loss": 0.3367, "step": 4231 }, { "epoch": 2.1009432401125268, "grad_norm": 0.4385616183280945, "learning_rate": 2.494164221930836e-06, "loss": 0.3318, "step": 4232 }, { "epoch": 2.101439682277015, "grad_norm": 0.43187275528907776, "learning_rate": 2.491664567952589e-06, "loss": 0.3825, "step": 4233 }, { "epoch": 2.1019361244415027, "grad_norm": 0.42688536643981934, "learning_rate": 2.489165751439372e-06, "loss": 0.3443, "step": 4234 }, { "epoch": 2.1024325666059904, "grad_norm": 0.40727439522743225, "learning_rate": 2.486667773225468e-06, "loss": 0.3304, "step": 4235 }, { "epoch": 2.102929008770478, "grad_norm": 0.42387980222702026, "learning_rate": 2.484170634144884e-06, "loss": 0.3712, "step": 4236 }, { "epoch": 2.103425450934966, "grad_norm": 0.39082571864128113, "learning_rate": 2.481674335031352e-06, "loss": 0.3263, "step": 4237 }, { "epoch": 2.103921893099454, "grad_norm": 0.40998655557632446, "learning_rate": 2.4791788767183144e-06, "loss": 0.3518, "step": 4238 }, { "epoch": 2.104418335263942, "grad_norm": 0.47771933674812317, "learning_rate": 2.476684260038937e-06, "loss": 0.4039, "step": 4239 }, { "epoch": 2.1049147774284296, "grad_norm": 0.4138580560684204, "learning_rate": 2.474190485826106e-06, "loss": 0.3217, "step": 4240 }, { "epoch": 2.1054112195929173, "grad_norm": 0.44883599877357483, "learning_rate": 2.471697554912425e-06, "loss": 0.424, "step": 4241 }, { "epoch": 2.105907661757405, "grad_norm": 0.452683687210083, "learning_rate": 2.4692054681302135e-06, "loss": 0.3716, "step": 4242 }, { "epoch": 2.1064041039218933, "grad_norm": 0.40022513270378113, "learning_rate": 2.466714226311513e-06, "loss": 0.3303, "step": 4243 }, { "epoch": 2.106900546086381, "grad_norm": 0.45559611916542053, "learning_rate": 2.4642238302880817e-06, "loss": 0.3633, "step": 4244 }, { "epoch": 2.1073969882508687, "grad_norm": 0.4281702935695648, "learning_rate": 2.461734280891394e-06, "loss": 0.3079, "step": 4245 }, { "epoch": 2.1078934304153565, "grad_norm": 0.5064395070075989, "learning_rate": 2.4592455789526466e-06, "loss": 0.4107, "step": 4246 }, { "epoch": 2.1083898725798447, "grad_norm": 0.42595088481903076, "learning_rate": 2.4567577253027425e-06, "loss": 0.321, "step": 4247 }, { "epoch": 2.1088863147443324, "grad_norm": 0.4238038957118988, "learning_rate": 2.4542707207723158e-06, "loss": 0.3004, "step": 4248 }, { "epoch": 2.10938275690882, "grad_norm": 0.4704975485801697, "learning_rate": 2.451784566191705e-06, "loss": 0.3211, "step": 4249 }, { "epoch": 2.109879199073308, "grad_norm": 0.41944649815559387, "learning_rate": 2.4492992623909706e-06, "loss": 0.3388, "step": 4250 }, { "epoch": 2.1103756412377956, "grad_norm": 0.44968703389167786, "learning_rate": 2.4468148101998877e-06, "loss": 0.3583, "step": 4251 }, { "epoch": 2.110872083402284, "grad_norm": 0.45173677802085876, "learning_rate": 2.4443312104479487e-06, "loss": 0.3534, "step": 4252 }, { "epoch": 2.1113685255667716, "grad_norm": 0.5125005841255188, "learning_rate": 2.441848463964361e-06, "loss": 0.3662, "step": 4253 }, { "epoch": 2.1118649677312593, "grad_norm": 0.40841639041900635, "learning_rate": 2.4393665715780405e-06, "loss": 0.3233, "step": 4254 }, { "epoch": 2.112361409895747, "grad_norm": 0.42417535185813904, "learning_rate": 2.436885534117632e-06, "loss": 0.3651, "step": 4255 }, { "epoch": 2.112857852060235, "grad_norm": 0.3856864869594574, "learning_rate": 2.4344053524114796e-06, "loss": 0.3169, "step": 4256 }, { "epoch": 2.113354294224723, "grad_norm": 0.491709440946579, "learning_rate": 2.4319260272876533e-06, "loss": 0.431, "step": 4257 }, { "epoch": 2.1138507363892107, "grad_norm": 0.3921709358692169, "learning_rate": 2.429447559573926e-06, "loss": 0.3235, "step": 4258 }, { "epoch": 2.1143471785536985, "grad_norm": 0.42690205574035645, "learning_rate": 2.4269699500977987e-06, "loss": 0.3675, "step": 4259 }, { "epoch": 2.114843620718186, "grad_norm": 0.4312000572681427, "learning_rate": 2.424493199686472e-06, "loss": 0.3737, "step": 4260 }, { "epoch": 2.115340062882674, "grad_norm": 0.40005454421043396, "learning_rate": 2.4220173091668675e-06, "loss": 0.3304, "step": 4261 }, { "epoch": 2.115836505047162, "grad_norm": 0.41077256202697754, "learning_rate": 2.419542279365618e-06, "loss": 0.3385, "step": 4262 }, { "epoch": 2.11633294721165, "grad_norm": 0.47901391983032227, "learning_rate": 2.4170681111090684e-06, "loss": 0.3605, "step": 4263 }, { "epoch": 2.1168293893761376, "grad_norm": 0.45264455676078796, "learning_rate": 2.414594805223278e-06, "loss": 0.3457, "step": 4264 }, { "epoch": 2.1173258315406254, "grad_norm": 0.4205852448940277, "learning_rate": 2.4121223625340134e-06, "loss": 0.327, "step": 4265 }, { "epoch": 2.1178222737051136, "grad_norm": 0.46945562958717346, "learning_rate": 2.4096507838667564e-06, "loss": 0.2896, "step": 4266 }, { "epoch": 2.1183187158696013, "grad_norm": 0.43351614475250244, "learning_rate": 2.407180070046702e-06, "loss": 0.3221, "step": 4267 }, { "epoch": 2.118815158034089, "grad_norm": 0.44975876808166504, "learning_rate": 2.404710221898752e-06, "loss": 0.3367, "step": 4268 }, { "epoch": 2.119311600198577, "grad_norm": 0.3776879608631134, "learning_rate": 2.4022412402475235e-06, "loss": 0.2817, "step": 4269 }, { "epoch": 2.1198080423630645, "grad_norm": 0.48684757947921753, "learning_rate": 2.3997731259173423e-06, "loss": 0.3747, "step": 4270 }, { "epoch": 2.1203044845275527, "grad_norm": 0.4135708212852478, "learning_rate": 2.3973058797322453e-06, "loss": 0.3727, "step": 4271 }, { "epoch": 2.1208009266920405, "grad_norm": 0.4329470694065094, "learning_rate": 2.394839502515976e-06, "loss": 0.3326, "step": 4272 }, { "epoch": 2.121297368856528, "grad_norm": 0.43911978602409363, "learning_rate": 2.3923739950919924e-06, "loss": 0.3103, "step": 4273 }, { "epoch": 2.121793811021016, "grad_norm": 0.44936853647232056, "learning_rate": 2.3899093582834605e-06, "loss": 0.3692, "step": 4274 }, { "epoch": 2.1222902531855037, "grad_norm": 0.45969587564468384, "learning_rate": 2.3874455929132557e-06, "loss": 0.3562, "step": 4275 }, { "epoch": 2.122786695349992, "grad_norm": 0.4526992440223694, "learning_rate": 2.384982699803964e-06, "loss": 0.3291, "step": 4276 }, { "epoch": 2.1232831375144796, "grad_norm": 0.41580381989479065, "learning_rate": 2.382520679777873e-06, "loss": 0.2861, "step": 4277 }, { "epoch": 2.1237795796789674, "grad_norm": 0.4542829394340515, "learning_rate": 2.380059533656991e-06, "loss": 0.3478, "step": 4278 }, { "epoch": 2.124276021843455, "grad_norm": 0.51592618227005, "learning_rate": 2.377599262263023e-06, "loss": 0.4002, "step": 4279 }, { "epoch": 2.1247724640079433, "grad_norm": 0.3736162781715393, "learning_rate": 2.3751398664173906e-06, "loss": 0.2604, "step": 4280 }, { "epoch": 2.125268906172431, "grad_norm": 0.4371238350868225, "learning_rate": 2.372681346941213e-06, "loss": 0.3891, "step": 4281 }, { "epoch": 2.1257653483369188, "grad_norm": 0.4405156970024109, "learning_rate": 2.370223704655331e-06, "loss": 0.3734, "step": 4282 }, { "epoch": 2.1262617905014065, "grad_norm": 0.40513867139816284, "learning_rate": 2.3677669403802788e-06, "loss": 0.3068, "step": 4283 }, { "epoch": 2.1267582326658943, "grad_norm": 0.4634877145290375, "learning_rate": 2.3653110549363036e-06, "loss": 0.3954, "step": 4284 }, { "epoch": 2.1272546748303824, "grad_norm": 0.39060866832733154, "learning_rate": 2.3628560491433637e-06, "loss": 0.3103, "step": 4285 }, { "epoch": 2.12775111699487, "grad_norm": 0.4011937379837036, "learning_rate": 2.3604019238211135e-06, "loss": 0.3261, "step": 4286 }, { "epoch": 2.128247559159358, "grad_norm": 0.43441271781921387, "learning_rate": 2.3579486797889222e-06, "loss": 0.3524, "step": 4287 }, { "epoch": 2.1287440013238457, "grad_norm": 0.4696820080280304, "learning_rate": 2.3554963178658564e-06, "loss": 0.3872, "step": 4288 }, { "epoch": 2.1292404434883334, "grad_norm": 0.41573405265808105, "learning_rate": 2.3530448388707e-06, "loss": 0.4005, "step": 4289 }, { "epoch": 2.1297368856528216, "grad_norm": 0.386624813079834, "learning_rate": 2.3505942436219297e-06, "loss": 0.3257, "step": 4290 }, { "epoch": 2.1302333278173093, "grad_norm": 0.4371504783630371, "learning_rate": 2.348144532937735e-06, "loss": 0.345, "step": 4291 }, { "epoch": 2.130729769981797, "grad_norm": 0.4251922369003296, "learning_rate": 2.345695707636007e-06, "loss": 0.3819, "step": 4292 }, { "epoch": 2.131226212146285, "grad_norm": 0.4503451883792877, "learning_rate": 2.3432477685343426e-06, "loss": 0.3736, "step": 4293 }, { "epoch": 2.1317226543107726, "grad_norm": 0.4129239618778229, "learning_rate": 2.3408007164500427e-06, "loss": 0.3193, "step": 4294 }, { "epoch": 2.1322190964752608, "grad_norm": 0.4165392518043518, "learning_rate": 2.338354552200108e-06, "loss": 0.2694, "step": 4295 }, { "epoch": 2.1327155386397485, "grad_norm": 0.4440717399120331, "learning_rate": 2.3359092766012517e-06, "loss": 0.3649, "step": 4296 }, { "epoch": 2.1332119808042362, "grad_norm": 0.4262228012084961, "learning_rate": 2.33346489046988e-06, "loss": 0.3428, "step": 4297 }, { "epoch": 2.133708422968724, "grad_norm": 0.46945157647132874, "learning_rate": 2.3310213946221094e-06, "loss": 0.3691, "step": 4298 }, { "epoch": 2.134204865133212, "grad_norm": 0.42152267694473267, "learning_rate": 2.3285787898737565e-06, "loss": 0.3125, "step": 4299 }, { "epoch": 2.1347013072977, "grad_norm": 0.4469188451766968, "learning_rate": 2.32613707704034e-06, "loss": 0.3551, "step": 4300 }, { "epoch": 2.1351977494621877, "grad_norm": 0.4023858308792114, "learning_rate": 2.3236962569370843e-06, "loss": 0.3319, "step": 4301 }, { "epoch": 2.1356941916266754, "grad_norm": 0.43680617213249207, "learning_rate": 2.3212563303789082e-06, "loss": 0.3847, "step": 4302 }, { "epoch": 2.136190633791163, "grad_norm": 0.44754505157470703, "learning_rate": 2.318817298180439e-06, "loss": 0.3336, "step": 4303 }, { "epoch": 2.1366870759556513, "grad_norm": 0.4171426594257355, "learning_rate": 2.3163791611560036e-06, "loss": 0.3365, "step": 4304 }, { "epoch": 2.137183518120139, "grad_norm": 0.47023066878318787, "learning_rate": 2.3139419201196316e-06, "loss": 0.3242, "step": 4305 }, { "epoch": 2.137679960284627, "grad_norm": 0.46170347929000854, "learning_rate": 2.3115055758850476e-06, "loss": 0.2972, "step": 4306 }, { "epoch": 2.1381764024491146, "grad_norm": 0.42740753293037415, "learning_rate": 2.3090701292656808e-06, "loss": 0.3717, "step": 4307 }, { "epoch": 2.1386728446136027, "grad_norm": 0.3632584512233734, "learning_rate": 2.306635581074666e-06, "loss": 0.3357, "step": 4308 }, { "epoch": 2.1391692867780905, "grad_norm": 0.4245937764644623, "learning_rate": 2.304201932124827e-06, "loss": 0.353, "step": 4309 }, { "epoch": 2.1396657289425782, "grad_norm": 0.3412758409976959, "learning_rate": 2.3017691832286953e-06, "loss": 0.2916, "step": 4310 }, { "epoch": 2.140162171107066, "grad_norm": 0.40147268772125244, "learning_rate": 2.2993373351984994e-06, "loss": 0.3305, "step": 4311 }, { "epoch": 2.1406586132715537, "grad_norm": 0.44473862648010254, "learning_rate": 2.2969063888461697e-06, "loss": 0.3472, "step": 4312 }, { "epoch": 2.141155055436042, "grad_norm": 0.47599512338638306, "learning_rate": 2.294476344983328e-06, "loss": 0.4051, "step": 4313 }, { "epoch": 2.1416514976005296, "grad_norm": 0.38925305008888245, "learning_rate": 2.292047204421303e-06, "loss": 0.3026, "step": 4314 }, { "epoch": 2.1421479397650174, "grad_norm": 0.39799532294273376, "learning_rate": 2.2896189679711186e-06, "loss": 0.3036, "step": 4315 }, { "epoch": 2.142644381929505, "grad_norm": 0.409470796585083, "learning_rate": 2.2871916364434963e-06, "loss": 0.3728, "step": 4316 }, { "epoch": 2.143140824093993, "grad_norm": 0.4887690544128418, "learning_rate": 2.284765210648859e-06, "loss": 0.3329, "step": 4317 }, { "epoch": 2.143637266258481, "grad_norm": 0.4220064580440521, "learning_rate": 2.282339691397318e-06, "loss": 0.3344, "step": 4318 }, { "epoch": 2.144133708422969, "grad_norm": 0.43507370352745056, "learning_rate": 2.279915079498696e-06, "loss": 0.3422, "step": 4319 }, { "epoch": 2.1446301505874565, "grad_norm": 0.43894264101982117, "learning_rate": 2.277491375762499e-06, "loss": 0.3458, "step": 4320 }, { "epoch": 2.1451265927519443, "grad_norm": 0.40508148074150085, "learning_rate": 2.2750685809979378e-06, "loss": 0.303, "step": 4321 }, { "epoch": 2.145623034916432, "grad_norm": 0.4541653096675873, "learning_rate": 2.2726466960139176e-06, "loss": 0.3576, "step": 4322 }, { "epoch": 2.14611947708092, "grad_norm": 0.45842814445495605, "learning_rate": 2.270225721619041e-06, "loss": 0.3542, "step": 4323 }, { "epoch": 2.146615919245408, "grad_norm": 0.45352545380592346, "learning_rate": 2.2678056586216062e-06, "loss": 0.4393, "step": 4324 }, { "epoch": 2.1471123614098957, "grad_norm": 0.36776086688041687, "learning_rate": 2.2653865078296017e-06, "loss": 0.3223, "step": 4325 }, { "epoch": 2.1476088035743834, "grad_norm": 0.4044423997402191, "learning_rate": 2.2629682700507225e-06, "loss": 0.3211, "step": 4326 }, { "epoch": 2.1481052457388716, "grad_norm": 0.48328256607055664, "learning_rate": 2.2605509460923488e-06, "loss": 0.3198, "step": 4327 }, { "epoch": 2.1486016879033594, "grad_norm": 0.4170074164867401, "learning_rate": 2.258134536761561e-06, "loss": 0.3215, "step": 4328 }, { "epoch": 2.149098130067847, "grad_norm": 0.4501346945762634, "learning_rate": 2.2557190428651282e-06, "loss": 0.3449, "step": 4329 }, { "epoch": 2.149594572232335, "grad_norm": 0.3950890004634857, "learning_rate": 2.253304465209524e-06, "loss": 0.2956, "step": 4330 }, { "epoch": 2.1500910143968226, "grad_norm": 0.478067547082901, "learning_rate": 2.250890804600909e-06, "loss": 0.3547, "step": 4331 }, { "epoch": 2.150587456561311, "grad_norm": 0.4014423191547394, "learning_rate": 2.2484780618451357e-06, "loss": 0.2999, "step": 4332 }, { "epoch": 2.1510838987257985, "grad_norm": 0.4323996603488922, "learning_rate": 2.2460662377477554e-06, "loss": 0.3115, "step": 4333 }, { "epoch": 2.1515803408902863, "grad_norm": 0.4004603922367096, "learning_rate": 2.243655333114011e-06, "loss": 0.3727, "step": 4334 }, { "epoch": 2.152076783054774, "grad_norm": 0.4520457088947296, "learning_rate": 2.2412453487488394e-06, "loss": 0.3939, "step": 4335 }, { "epoch": 2.1525732252192618, "grad_norm": 0.43610280752182007, "learning_rate": 2.2388362854568628e-06, "loss": 0.3121, "step": 4336 }, { "epoch": 2.15306966738375, "grad_norm": 0.4656255841255188, "learning_rate": 2.236428144042411e-06, "loss": 0.3206, "step": 4337 }, { "epoch": 2.1535661095482377, "grad_norm": 0.43329688906669617, "learning_rate": 2.234020925309489e-06, "loss": 0.3232, "step": 4338 }, { "epoch": 2.1540625517127254, "grad_norm": 0.49855464696884155, "learning_rate": 2.2316146300618057e-06, "loss": 0.3424, "step": 4339 }, { "epoch": 2.154558993877213, "grad_norm": 0.4009709060192108, "learning_rate": 2.2292092591027565e-06, "loss": 0.3094, "step": 4340 }, { "epoch": 2.1550554360417014, "grad_norm": 0.4623003602027893, "learning_rate": 2.2268048132354303e-06, "loss": 0.3976, "step": 4341 }, { "epoch": 2.155551878206189, "grad_norm": 0.3907858431339264, "learning_rate": 2.224401293262607e-06, "loss": 0.2802, "step": 4342 }, { "epoch": 2.156048320370677, "grad_norm": 0.41457995772361755, "learning_rate": 2.2219986999867537e-06, "loss": 0.352, "step": 4343 }, { "epoch": 2.1565447625351646, "grad_norm": 0.4728369116783142, "learning_rate": 2.2195970342100328e-06, "loss": 0.4042, "step": 4344 }, { "epoch": 2.1570412046996523, "grad_norm": 0.35919520258903503, "learning_rate": 2.217196296734294e-06, "loss": 0.3077, "step": 4345 }, { "epoch": 2.1575376468641405, "grad_norm": 0.3938879370689392, "learning_rate": 2.21479648836108e-06, "loss": 0.3229, "step": 4346 }, { "epoch": 2.1580340890286283, "grad_norm": 0.4553435444831848, "learning_rate": 2.212397609891623e-06, "loss": 0.3368, "step": 4347 }, { "epoch": 2.158530531193116, "grad_norm": 0.42943522334098816, "learning_rate": 2.209999662126837e-06, "loss": 0.3227, "step": 4348 }, { "epoch": 2.1590269733576037, "grad_norm": 0.46576058864593506, "learning_rate": 2.20760264586734e-06, "loss": 0.3771, "step": 4349 }, { "epoch": 2.1595234155220915, "grad_norm": 0.4401981830596924, "learning_rate": 2.2052065619134243e-06, "loss": 0.3285, "step": 4350 }, { "epoch": 2.1600198576865797, "grad_norm": 0.46383804082870483, "learning_rate": 2.2028114110650796e-06, "loss": 0.3706, "step": 4351 }, { "epoch": 2.1605162998510674, "grad_norm": 0.4151746928691864, "learning_rate": 2.200417194121981e-06, "loss": 0.3513, "step": 4352 }, { "epoch": 2.161012742015555, "grad_norm": 0.4157862961292267, "learning_rate": 2.198023911883495e-06, "loss": 0.3319, "step": 4353 }, { "epoch": 2.161509184180043, "grad_norm": 0.39830923080444336, "learning_rate": 2.1956315651486694e-06, "loss": 0.3355, "step": 4354 }, { "epoch": 2.1620056263445306, "grad_norm": 0.4003085196018219, "learning_rate": 2.1932401547162436e-06, "loss": 0.3197, "step": 4355 }, { "epoch": 2.162502068509019, "grad_norm": 0.4796973764896393, "learning_rate": 2.1908496813846503e-06, "loss": 0.3734, "step": 4356 }, { "epoch": 2.1629985106735066, "grad_norm": 0.39683228731155396, "learning_rate": 2.188460145951998e-06, "loss": 0.3342, "step": 4357 }, { "epoch": 2.1634949528379943, "grad_norm": 0.41947683691978455, "learning_rate": 2.1860715492160922e-06, "loss": 0.3986, "step": 4358 }, { "epoch": 2.163991395002482, "grad_norm": 0.39974528551101685, "learning_rate": 2.1836838919744136e-06, "loss": 0.2931, "step": 4359 }, { "epoch": 2.1644878371669702, "grad_norm": 0.4382435977458954, "learning_rate": 2.1812971750241436e-06, "loss": 0.3845, "step": 4360 }, { "epoch": 2.164984279331458, "grad_norm": 0.4251062273979187, "learning_rate": 2.178911399162137e-06, "loss": 0.331, "step": 4361 }, { "epoch": 2.1654807214959457, "grad_norm": 0.45285022258758545, "learning_rate": 2.1765265651849415e-06, "loss": 0.3059, "step": 4362 }, { "epoch": 2.1659771636604335, "grad_norm": 0.4466625154018402, "learning_rate": 2.1741426738887885e-06, "loss": 0.3271, "step": 4363 }, { "epoch": 2.166473605824921, "grad_norm": 0.4767611026763916, "learning_rate": 2.1717597260695934e-06, "loss": 0.3485, "step": 4364 }, { "epoch": 2.1669700479894094, "grad_norm": 0.3946076035499573, "learning_rate": 2.1693777225229605e-06, "loss": 0.3007, "step": 4365 }, { "epoch": 2.167466490153897, "grad_norm": 0.4125174880027771, "learning_rate": 2.16699666404417e-06, "loss": 0.3766, "step": 4366 }, { "epoch": 2.167962932318385, "grad_norm": 0.4427288770675659, "learning_rate": 2.1646165514282014e-06, "loss": 0.2939, "step": 4367 }, { "epoch": 2.1684593744828726, "grad_norm": 0.4176521599292755, "learning_rate": 2.162237385469702e-06, "loss": 0.3162, "step": 4368 }, { "epoch": 2.168955816647361, "grad_norm": 0.4499415159225464, "learning_rate": 2.1598591669630135e-06, "loss": 0.3972, "step": 4369 }, { "epoch": 2.1694522588118486, "grad_norm": 0.45687296986579895, "learning_rate": 2.1574818967021595e-06, "loss": 0.3642, "step": 4370 }, { "epoch": 2.1699487009763363, "grad_norm": 0.45527738332748413, "learning_rate": 2.1551055754808436e-06, "loss": 0.3371, "step": 4371 }, { "epoch": 2.170445143140824, "grad_norm": 0.4789164960384369, "learning_rate": 2.1527302040924588e-06, "loss": 0.3194, "step": 4372 }, { "epoch": 2.170941585305312, "grad_norm": 0.4433867931365967, "learning_rate": 2.1503557833300714e-06, "loss": 0.3367, "step": 4373 }, { "epoch": 2.1714380274698, "grad_norm": 0.47083181142807007, "learning_rate": 2.14798231398644e-06, "loss": 0.3152, "step": 4374 }, { "epoch": 2.1719344696342877, "grad_norm": 0.3914371430873871, "learning_rate": 2.1456097968539996e-06, "loss": 0.2777, "step": 4375 }, { "epoch": 2.1724309117987755, "grad_norm": 0.5225337147712708, "learning_rate": 2.1432382327248724e-06, "loss": 0.3994, "step": 4376 }, { "epoch": 2.172927353963263, "grad_norm": 0.41275835037231445, "learning_rate": 2.140867622390853e-06, "loss": 0.3116, "step": 4377 }, { "epoch": 2.173423796127751, "grad_norm": 0.4064953029155731, "learning_rate": 2.1384979666434295e-06, "loss": 0.3373, "step": 4378 }, { "epoch": 2.173920238292239, "grad_norm": 0.452466756105423, "learning_rate": 2.1361292662737655e-06, "loss": 0.3581, "step": 4379 }, { "epoch": 2.174416680456727, "grad_norm": 0.45091336965560913, "learning_rate": 2.1337615220727015e-06, "loss": 0.3741, "step": 4380 }, { "epoch": 2.1749131226212146, "grad_norm": 0.4395720064640045, "learning_rate": 2.1313947348307655e-06, "loss": 0.3318, "step": 4381 }, { "epoch": 2.1754095647857024, "grad_norm": 0.4136495590209961, "learning_rate": 2.1290289053381635e-06, "loss": 0.3309, "step": 4382 }, { "epoch": 2.17590600695019, "grad_norm": 0.490142285823822, "learning_rate": 2.1266640343847826e-06, "loss": 0.4012, "step": 4383 }, { "epoch": 2.1764024491146783, "grad_norm": 0.366044819355011, "learning_rate": 2.124300122760186e-06, "loss": 0.2588, "step": 4384 }, { "epoch": 2.176898891279166, "grad_norm": 0.40190520882606506, "learning_rate": 2.1219371712536214e-06, "loss": 0.3178, "step": 4385 }, { "epoch": 2.1773953334436538, "grad_norm": 0.43896904587745667, "learning_rate": 2.119575180654014e-06, "loss": 0.3682, "step": 4386 }, { "epoch": 2.1778917756081415, "grad_norm": 0.4051996171474457, "learning_rate": 2.1172141517499676e-06, "loss": 0.3134, "step": 4387 }, { "epoch": 2.1783882177726293, "grad_norm": 0.4141786992549896, "learning_rate": 2.114854085329769e-06, "loss": 0.3273, "step": 4388 }, { "epoch": 2.1788846599371174, "grad_norm": 0.43144798278808594, "learning_rate": 2.112494982181373e-06, "loss": 0.3708, "step": 4389 }, { "epoch": 2.179381102101605, "grad_norm": 0.40625542402267456, "learning_rate": 2.110136843092428e-06, "loss": 0.3507, "step": 4390 }, { "epoch": 2.179877544266093, "grad_norm": 0.41572561860084534, "learning_rate": 2.1077796688502478e-06, "loss": 0.3047, "step": 4391 }, { "epoch": 2.1803739864305807, "grad_norm": 0.4131183922290802, "learning_rate": 2.1054234602418294e-06, "loss": 0.3538, "step": 4392 }, { "epoch": 2.180870428595069, "grad_norm": 0.42586129903793335, "learning_rate": 2.1030682180538475e-06, "loss": 0.3583, "step": 4393 }, { "epoch": 2.1813668707595566, "grad_norm": 0.37267592549324036, "learning_rate": 2.100713943072653e-06, "loss": 0.3608, "step": 4394 }, { "epoch": 2.1818633129240443, "grad_norm": 0.3717781901359558, "learning_rate": 2.0983606360842773e-06, "loss": 0.3213, "step": 4395 }, { "epoch": 2.182359755088532, "grad_norm": 0.46972596645355225, "learning_rate": 2.096008297874419e-06, "loss": 0.3906, "step": 4396 }, { "epoch": 2.18285619725302, "grad_norm": 0.3941284120082855, "learning_rate": 2.0936569292284675e-06, "loss": 0.2662, "step": 4397 }, { "epoch": 2.183352639417508, "grad_norm": 0.44333264231681824, "learning_rate": 2.091306530931475e-06, "loss": 0.2762, "step": 4398 }, { "epoch": 2.1838490815819958, "grad_norm": 0.4553477466106415, "learning_rate": 2.0889571037681807e-06, "loss": 0.3462, "step": 4399 }, { "epoch": 2.1843455237464835, "grad_norm": 0.4351314306259155, "learning_rate": 2.0866086485229875e-06, "loss": 0.3133, "step": 4400 }, { "epoch": 2.1848419659109712, "grad_norm": 0.4964914321899414, "learning_rate": 2.0842611659799868e-06, "loss": 0.3571, "step": 4401 }, { "epoch": 2.1853384080754594, "grad_norm": 0.3801079988479614, "learning_rate": 2.081914656922939e-06, "loss": 0.3027, "step": 4402 }, { "epoch": 2.185834850239947, "grad_norm": 0.4177214503288269, "learning_rate": 2.0795691221352766e-06, "loss": 0.3522, "step": 4403 }, { "epoch": 2.186331292404435, "grad_norm": 0.4137610197067261, "learning_rate": 2.0772245624001114e-06, "loss": 0.3673, "step": 4404 }, { "epoch": 2.1868277345689227, "grad_norm": 0.417184054851532, "learning_rate": 2.0748809785002285e-06, "loss": 0.3156, "step": 4405 }, { "epoch": 2.1873241767334104, "grad_norm": 0.4371083378791809, "learning_rate": 2.072538371218088e-06, "loss": 0.3426, "step": 4406 }, { "epoch": 2.1878206188978986, "grad_norm": 0.39851927757263184, "learning_rate": 2.0701967413358177e-06, "loss": 0.3628, "step": 4407 }, { "epoch": 2.1883170610623863, "grad_norm": 0.3969198167324066, "learning_rate": 2.067856089635231e-06, "loss": 0.3551, "step": 4408 }, { "epoch": 2.188813503226874, "grad_norm": 0.4470338225364685, "learning_rate": 2.065516416897804e-06, "loss": 0.3763, "step": 4409 }, { "epoch": 2.189309945391362, "grad_norm": 0.45061829686164856, "learning_rate": 2.06317772390469e-06, "loss": 0.3336, "step": 4410 }, { "epoch": 2.1898063875558496, "grad_norm": 0.4556463956832886, "learning_rate": 2.060840011436715e-06, "loss": 0.2999, "step": 4411 }, { "epoch": 2.1903028297203377, "grad_norm": 0.43682175874710083, "learning_rate": 2.058503280274379e-06, "loss": 0.3045, "step": 4412 }, { "epoch": 2.1907992718848255, "grad_norm": 0.4462187588214874, "learning_rate": 2.0561675311978533e-06, "loss": 0.3365, "step": 4413 }, { "epoch": 2.1912957140493132, "grad_norm": 0.45138922333717346, "learning_rate": 2.0538327649869793e-06, "loss": 0.367, "step": 4414 }, { "epoch": 2.191792156213801, "grad_norm": 0.43662944436073303, "learning_rate": 2.0514989824212723e-06, "loss": 0.3626, "step": 4415 }, { "epoch": 2.1922885983782887, "grad_norm": 0.39309221506118774, "learning_rate": 2.049166184279919e-06, "loss": 0.3478, "step": 4416 }, { "epoch": 2.192785040542777, "grad_norm": 0.44707146286964417, "learning_rate": 2.0468343713417773e-06, "loss": 0.3693, "step": 4417 }, { "epoch": 2.1932814827072646, "grad_norm": 0.41367337107658386, "learning_rate": 2.0445035443853765e-06, "loss": 0.382, "step": 4418 }, { "epoch": 2.1937779248717524, "grad_norm": 0.3997436463832855, "learning_rate": 2.0421737041889167e-06, "loss": 0.3373, "step": 4419 }, { "epoch": 2.19427436703624, "grad_norm": 0.40144258737564087, "learning_rate": 2.0398448515302694e-06, "loss": 0.2749, "step": 4420 }, { "epoch": 2.1947708092007283, "grad_norm": 0.42855939269065857, "learning_rate": 2.0375169871869722e-06, "loss": 0.3665, "step": 4421 }, { "epoch": 2.195267251365216, "grad_norm": 0.4647752642631531, "learning_rate": 2.0351901119362368e-06, "loss": 0.394, "step": 4422 }, { "epoch": 2.195763693529704, "grad_norm": 0.39508455991744995, "learning_rate": 2.0328642265549435e-06, "loss": 0.335, "step": 4423 }, { "epoch": 2.1962601356941915, "grad_norm": 0.397916316986084, "learning_rate": 2.0305393318196432e-06, "loss": 0.3696, "step": 4424 }, { "epoch": 2.1967565778586793, "grad_norm": 0.4299086630344391, "learning_rate": 2.0282154285065566e-06, "loss": 0.3364, "step": 4425 }, { "epoch": 2.1972530200231675, "grad_norm": 0.4065285325050354, "learning_rate": 2.0258925173915658e-06, "loss": 0.32, "step": 4426 }, { "epoch": 2.197749462187655, "grad_norm": 0.4544898271560669, "learning_rate": 2.0235705992502353e-06, "loss": 0.42, "step": 4427 }, { "epoch": 2.198245904352143, "grad_norm": 0.40495315194129944, "learning_rate": 2.021249674857785e-06, "loss": 0.3247, "step": 4428 }, { "epoch": 2.1987423465166307, "grad_norm": 0.43766799569129944, "learning_rate": 2.0189297449891123e-06, "loss": 0.3307, "step": 4429 }, { "epoch": 2.199238788681119, "grad_norm": 0.44873708486557007, "learning_rate": 2.016610810418773e-06, "loss": 0.3774, "step": 4430 }, { "epoch": 2.1997352308456066, "grad_norm": 0.4125984013080597, "learning_rate": 2.0142928719210035e-06, "loss": 0.2848, "step": 4431 }, { "epoch": 2.2002316730100944, "grad_norm": 0.459154337644577, "learning_rate": 2.011975930269696e-06, "loss": 0.354, "step": 4432 }, { "epoch": 2.200728115174582, "grad_norm": 0.3941761553287506, "learning_rate": 2.0096599862384147e-06, "loss": 0.2969, "step": 4433 }, { "epoch": 2.20122455733907, "grad_norm": 0.43152061104774475, "learning_rate": 2.0073450406003907e-06, "loss": 0.3855, "step": 4434 }, { "epoch": 2.201720999503558, "grad_norm": 0.3929770886898041, "learning_rate": 2.0050310941285226e-06, "loss": 0.3532, "step": 4435 }, { "epoch": 2.202217441668046, "grad_norm": 0.44988277554512024, "learning_rate": 2.002718147595375e-06, "loss": 0.3409, "step": 4436 }, { "epoch": 2.2027138838325335, "grad_norm": 0.407967209815979, "learning_rate": 2.0004062017731724e-06, "loss": 0.3259, "step": 4437 }, { "epoch": 2.2032103259970213, "grad_norm": 0.5060053467750549, "learning_rate": 1.9980952574338185e-06, "loss": 0.3295, "step": 4438 }, { "epoch": 2.203706768161509, "grad_norm": 0.391111820936203, "learning_rate": 1.9957853153488694e-06, "loss": 0.3451, "step": 4439 }, { "epoch": 2.204203210325997, "grad_norm": 0.4370705187320709, "learning_rate": 1.9934763762895526e-06, "loss": 0.3126, "step": 4440 }, { "epoch": 2.204699652490485, "grad_norm": 0.403546005487442, "learning_rate": 1.991168441026762e-06, "loss": 0.2901, "step": 4441 }, { "epoch": 2.2051960946549727, "grad_norm": 0.4228035509586334, "learning_rate": 1.9888615103310527e-06, "loss": 0.3883, "step": 4442 }, { "epoch": 2.2056925368194604, "grad_norm": 0.4078860282897949, "learning_rate": 1.9865555849726488e-06, "loss": 0.369, "step": 4443 }, { "epoch": 2.206188978983948, "grad_norm": 0.45260530710220337, "learning_rate": 1.9842506657214327e-06, "loss": 0.3975, "step": 4444 }, { "epoch": 2.2066854211484364, "grad_norm": 0.42439189553260803, "learning_rate": 1.9819467533469554e-06, "loss": 0.2958, "step": 4445 }, { "epoch": 2.207181863312924, "grad_norm": 0.42832687497138977, "learning_rate": 1.979643848618431e-06, "loss": 0.3761, "step": 4446 }, { "epoch": 2.207678305477412, "grad_norm": 0.45614710450172424, "learning_rate": 1.977341952304739e-06, "loss": 0.4114, "step": 4447 }, { "epoch": 2.2081747476418996, "grad_norm": 0.40144574642181396, "learning_rate": 1.9750410651744138e-06, "loss": 0.2647, "step": 4448 }, { "epoch": 2.2086711898063873, "grad_norm": 0.4113912880420685, "learning_rate": 1.9727411879956654e-06, "loss": 0.372, "step": 4449 }, { "epoch": 2.2091676319708755, "grad_norm": 0.40744513273239136, "learning_rate": 1.9704423215363594e-06, "loss": 0.3387, "step": 4450 }, { "epoch": 2.2096640741353633, "grad_norm": 0.4104136824607849, "learning_rate": 1.968144466564022e-06, "loss": 0.3537, "step": 4451 }, { "epoch": 2.210160516299851, "grad_norm": 0.39940494298934937, "learning_rate": 1.9658476238458458e-06, "loss": 0.323, "step": 4452 }, { "epoch": 2.2106569584643387, "grad_norm": 0.39433297514915466, "learning_rate": 1.9635517941486843e-06, "loss": 0.3363, "step": 4453 }, { "epoch": 2.211153400628827, "grad_norm": 0.44619613885879517, "learning_rate": 1.961256978239054e-06, "loss": 0.3367, "step": 4454 }, { "epoch": 2.2116498427933147, "grad_norm": 0.46700048446655273, "learning_rate": 1.9589631768831293e-06, "loss": 0.4033, "step": 4455 }, { "epoch": 2.2121462849578024, "grad_norm": 0.37980127334594727, "learning_rate": 1.956670390846748e-06, "loss": 0.2902, "step": 4456 }, { "epoch": 2.21264272712229, "grad_norm": 0.44534948468208313, "learning_rate": 1.9543786208954106e-06, "loss": 0.3589, "step": 4457 }, { "epoch": 2.213139169286778, "grad_norm": 0.46090003848075867, "learning_rate": 1.952087867794277e-06, "loss": 0.3247, "step": 4458 }, { "epoch": 2.213635611451266, "grad_norm": 0.40404900908470154, "learning_rate": 1.949798132308167e-06, "loss": 0.3165, "step": 4459 }, { "epoch": 2.214132053615754, "grad_norm": 0.3929890990257263, "learning_rate": 1.947509415201558e-06, "loss": 0.343, "step": 4460 }, { "epoch": 2.2146284957802416, "grad_norm": 0.4108649790287018, "learning_rate": 1.945221717238597e-06, "loss": 0.3627, "step": 4461 }, { "epoch": 2.2151249379447293, "grad_norm": 0.41246142983436584, "learning_rate": 1.942935039183078e-06, "loss": 0.3659, "step": 4462 }, { "epoch": 2.2156213801092175, "grad_norm": 0.39782601594924927, "learning_rate": 1.9406493817984632e-06, "loss": 0.3099, "step": 4463 }, { "epoch": 2.2161178222737052, "grad_norm": 0.4278465211391449, "learning_rate": 1.9383647458478718e-06, "loss": 0.3387, "step": 4464 }, { "epoch": 2.216614264438193, "grad_norm": 0.42858412861824036, "learning_rate": 1.9360811320940805e-06, "loss": 0.3631, "step": 4465 }, { "epoch": 2.2171107066026807, "grad_norm": 0.42920124530792236, "learning_rate": 1.933798541299528e-06, "loss": 0.4011, "step": 4466 }, { "epoch": 2.2176071487671685, "grad_norm": 0.4548744261264801, "learning_rate": 1.9315169742263048e-06, "loss": 0.3347, "step": 4467 }, { "epoch": 2.2181035909316567, "grad_norm": 0.45375892519950867, "learning_rate": 1.9292364316361707e-06, "loss": 0.2893, "step": 4468 }, { "epoch": 2.2186000330961444, "grad_norm": 0.4195388853549957, "learning_rate": 1.9269569142905316e-06, "loss": 0.3696, "step": 4469 }, { "epoch": 2.219096475260632, "grad_norm": 0.43051791191101074, "learning_rate": 1.9246784229504593e-06, "loss": 0.3491, "step": 4470 }, { "epoch": 2.21959291742512, "grad_norm": 0.423942893743515, "learning_rate": 1.9224009583766763e-06, "loss": 0.3409, "step": 4471 }, { "epoch": 2.2200893595896076, "grad_norm": 0.6093710660934448, "learning_rate": 1.92012452132957e-06, "loss": 0.3595, "step": 4472 }, { "epoch": 2.220585801754096, "grad_norm": 0.39890649914741516, "learning_rate": 1.917849112569181e-06, "loss": 0.3028, "step": 4473 }, { "epoch": 2.2210822439185836, "grad_norm": 0.47966083884239197, "learning_rate": 1.9155747328552027e-06, "loss": 0.4124, "step": 4474 }, { "epoch": 2.2215786860830713, "grad_norm": 0.4005206525325775, "learning_rate": 1.913301382946994e-06, "loss": 0.3391, "step": 4475 }, { "epoch": 2.222075128247559, "grad_norm": 0.4030780494213104, "learning_rate": 1.91102906360356e-06, "loss": 0.3276, "step": 4476 }, { "epoch": 2.222571570412047, "grad_norm": 0.43816637992858887, "learning_rate": 1.9087577755835694e-06, "loss": 0.3721, "step": 4477 }, { "epoch": 2.223068012576535, "grad_norm": 0.40633440017700195, "learning_rate": 1.9064875196453392e-06, "loss": 0.3841, "step": 4478 }, { "epoch": 2.2235644547410227, "grad_norm": 0.440552294254303, "learning_rate": 1.9042182965468525e-06, "loss": 0.3488, "step": 4479 }, { "epoch": 2.2240608969055105, "grad_norm": 0.43409767746925354, "learning_rate": 1.9019501070457363e-06, "loss": 0.358, "step": 4480 }, { "epoch": 2.224557339069998, "grad_norm": 0.4341605603694916, "learning_rate": 1.8996829518992793e-06, "loss": 0.3535, "step": 4481 }, { "epoch": 2.2250537812344864, "grad_norm": 0.405476838350296, "learning_rate": 1.8974168318644221e-06, "loss": 0.3005, "step": 4482 }, { "epoch": 2.225550223398974, "grad_norm": 0.4646248519420624, "learning_rate": 1.8951517476977615e-06, "loss": 0.3769, "step": 4483 }, { "epoch": 2.226046665563462, "grad_norm": 0.44415801763534546, "learning_rate": 1.892887700155549e-06, "loss": 0.3542, "step": 4484 }, { "epoch": 2.2265431077279496, "grad_norm": 0.4541037976741791, "learning_rate": 1.8906246899936853e-06, "loss": 0.2808, "step": 4485 }, { "epoch": 2.2270395498924374, "grad_norm": 0.41896188259124756, "learning_rate": 1.8883627179677287e-06, "loss": 0.3132, "step": 4486 }, { "epoch": 2.2275359920569255, "grad_norm": 0.45562857389450073, "learning_rate": 1.8861017848328917e-06, "loss": 0.3543, "step": 4487 }, { "epoch": 2.2280324342214133, "grad_norm": 0.420515775680542, "learning_rate": 1.8838418913440376e-06, "loss": 0.3258, "step": 4488 }, { "epoch": 2.228528876385901, "grad_norm": 0.4576808512210846, "learning_rate": 1.8815830382556832e-06, "loss": 0.4153, "step": 4489 }, { "epoch": 2.2290253185503888, "grad_norm": 0.45186924934387207, "learning_rate": 1.8793252263219985e-06, "loss": 0.3489, "step": 4490 }, { "epoch": 2.2295217607148765, "grad_norm": 0.42380762100219727, "learning_rate": 1.8770684562968079e-06, "loss": 0.3222, "step": 4491 }, { "epoch": 2.2300182028793647, "grad_norm": 0.44841188192367554, "learning_rate": 1.8748127289335805e-06, "loss": 0.3246, "step": 4492 }, { "epoch": 2.2305146450438524, "grad_norm": 0.4065743386745453, "learning_rate": 1.8725580449854453e-06, "loss": 0.3388, "step": 4493 }, { "epoch": 2.23101108720834, "grad_norm": 0.4238337278366089, "learning_rate": 1.87030440520518e-06, "loss": 0.3773, "step": 4494 }, { "epoch": 2.231507529372828, "grad_norm": 0.40748488903045654, "learning_rate": 1.8680518103452134e-06, "loss": 0.2971, "step": 4495 }, { "epoch": 2.232003971537316, "grad_norm": 0.45488423109054565, "learning_rate": 1.865800261157627e-06, "loss": 0.3722, "step": 4496 }, { "epoch": 2.232500413701804, "grad_norm": 0.412970632314682, "learning_rate": 1.863549758394147e-06, "loss": 0.2926, "step": 4497 }, { "epoch": 2.2329968558662916, "grad_norm": 0.3825896382331848, "learning_rate": 1.8613003028061627e-06, "loss": 0.2767, "step": 4498 }, { "epoch": 2.2334932980307793, "grad_norm": 0.44398850202560425, "learning_rate": 1.8590518951447001e-06, "loss": 0.397, "step": 4499 }, { "epoch": 2.233989740195267, "grad_norm": 0.4100760221481323, "learning_rate": 1.8568045361604453e-06, "loss": 0.287, "step": 4500 }, { "epoch": 2.2344861823597553, "grad_norm": 0.436306357383728, "learning_rate": 1.8545582266037254e-06, "loss": 0.4052, "step": 4501 }, { "epoch": 2.234982624524243, "grad_norm": 0.3919224739074707, "learning_rate": 1.8523129672245283e-06, "loss": 0.3453, "step": 4502 }, { "epoch": 2.2354790666887308, "grad_norm": 0.39826637506484985, "learning_rate": 1.8500687587724803e-06, "loss": 0.35, "step": 4503 }, { "epoch": 2.2359755088532185, "grad_norm": 0.42131495475769043, "learning_rate": 1.8478256019968637e-06, "loss": 0.3248, "step": 4504 }, { "epoch": 2.2364719510177062, "grad_norm": 0.4309840202331543, "learning_rate": 1.8455834976466069e-06, "loss": 0.3367, "step": 4505 }, { "epoch": 2.2369683931821944, "grad_norm": 0.39320582151412964, "learning_rate": 1.8433424464702882e-06, "loss": 0.3327, "step": 4506 }, { "epoch": 2.237464835346682, "grad_norm": 0.4249469041824341, "learning_rate": 1.841102449216135e-06, "loss": 0.3379, "step": 4507 }, { "epoch": 2.23796127751117, "grad_norm": 0.36070379614830017, "learning_rate": 1.8388635066320164e-06, "loss": 0.3477, "step": 4508 }, { "epoch": 2.2384577196756577, "grad_norm": 0.4114246666431427, "learning_rate": 1.8366256194654613e-06, "loss": 0.3515, "step": 4509 }, { "epoch": 2.2389541618401454, "grad_norm": 0.45044994354248047, "learning_rate": 1.8343887884636353e-06, "loss": 0.3547, "step": 4510 }, { "epoch": 2.2394506040046336, "grad_norm": 0.4055950939655304, "learning_rate": 1.8321530143733552e-06, "loss": 0.32, "step": 4511 }, { "epoch": 2.2399470461691213, "grad_norm": 0.4462197721004486, "learning_rate": 1.8299182979410867e-06, "loss": 0.3768, "step": 4512 }, { "epoch": 2.240443488333609, "grad_norm": 0.4534740447998047, "learning_rate": 1.8276846399129405e-06, "loss": 0.3318, "step": 4513 }, { "epoch": 2.240939930498097, "grad_norm": 0.4348461627960205, "learning_rate": 1.825452041034676e-06, "loss": 0.3301, "step": 4514 }, { "epoch": 2.241436372662585, "grad_norm": 0.4104790687561035, "learning_rate": 1.8232205020516925e-06, "loss": 0.3502, "step": 4515 }, { "epoch": 2.2419328148270727, "grad_norm": 0.4617813229560852, "learning_rate": 1.8209900237090461e-06, "loss": 0.3431, "step": 4516 }, { "epoch": 2.2424292569915605, "grad_norm": 0.3942581117153168, "learning_rate": 1.8187606067514284e-06, "loss": 0.2832, "step": 4517 }, { "epoch": 2.2429256991560482, "grad_norm": 0.46036264300346375, "learning_rate": 1.8165322519231832e-06, "loss": 0.3603, "step": 4518 }, { "epoch": 2.243422141320536, "grad_norm": 0.45281556248664856, "learning_rate": 1.8143049599682972e-06, "loss": 0.3722, "step": 4519 }, { "epoch": 2.243918583485024, "grad_norm": 0.45646318793296814, "learning_rate": 1.8120787316304028e-06, "loss": 0.343, "step": 4520 }, { "epoch": 2.244415025649512, "grad_norm": 0.43664637207984924, "learning_rate": 1.8098535676527785e-06, "loss": 0.3253, "step": 4521 }, { "epoch": 2.2449114678139996, "grad_norm": 0.4619198441505432, "learning_rate": 1.8076294687783424e-06, "loss": 0.38, "step": 4522 }, { "epoch": 2.2454079099784874, "grad_norm": 0.38872337341308594, "learning_rate": 1.8054064357496636e-06, "loss": 0.284, "step": 4523 }, { "epoch": 2.2459043521429756, "grad_norm": 0.5079551935195923, "learning_rate": 1.8031844693089513e-06, "loss": 0.35, "step": 4524 }, { "epoch": 2.2464007943074633, "grad_norm": 0.4524703025817871, "learning_rate": 1.8009635701980615e-06, "loss": 0.3217, "step": 4525 }, { "epoch": 2.246897236471951, "grad_norm": 0.43264123797416687, "learning_rate": 1.7987437391584894e-06, "loss": 0.4138, "step": 4526 }, { "epoch": 2.247393678636439, "grad_norm": 0.39154377579689026, "learning_rate": 1.7965249769313776e-06, "loss": 0.3171, "step": 4527 }, { "epoch": 2.2478901208009265, "grad_norm": 0.40263739228248596, "learning_rate": 1.79430728425751e-06, "loss": 0.341, "step": 4528 }, { "epoch": 2.2483865629654147, "grad_norm": 0.3941449224948883, "learning_rate": 1.7920906618773142e-06, "loss": 0.346, "step": 4529 }, { "epoch": 2.2488830051299025, "grad_norm": 0.3752302825450897, "learning_rate": 1.7898751105308605e-06, "loss": 0.33, "step": 4530 }, { "epoch": 2.24937944729439, "grad_norm": 0.4420984089374542, "learning_rate": 1.7876606309578608e-06, "loss": 0.3675, "step": 4531 }, { "epoch": 2.249875889458878, "grad_norm": 0.43281951546669006, "learning_rate": 1.7854472238976717e-06, "loss": 0.3255, "step": 4532 }, { "epoch": 2.2503723316233657, "grad_norm": 0.38242608308792114, "learning_rate": 1.7832348900892864e-06, "loss": 0.3097, "step": 4533 }, { "epoch": 2.250868773787854, "grad_norm": 0.4318788945674896, "learning_rate": 1.781023630271344e-06, "loss": 0.3928, "step": 4534 }, { "epoch": 2.2513652159523416, "grad_norm": 0.39582085609436035, "learning_rate": 1.7788134451821248e-06, "loss": 0.2959, "step": 4535 }, { "epoch": 2.2518616581168294, "grad_norm": 0.4171285927295685, "learning_rate": 1.7766043355595498e-06, "loss": 0.3355, "step": 4536 }, { "epoch": 2.252358100281317, "grad_norm": 0.39119628071784973, "learning_rate": 1.774396302141181e-06, "loss": 0.303, "step": 4537 }, { "epoch": 2.252854542445805, "grad_norm": 0.43872135877609253, "learning_rate": 1.7721893456642165e-06, "loss": 0.349, "step": 4538 }, { "epoch": 2.253350984610293, "grad_norm": 0.42840903997421265, "learning_rate": 1.7699834668655065e-06, "loss": 0.3288, "step": 4539 }, { "epoch": 2.253847426774781, "grad_norm": 0.44687387347221375, "learning_rate": 1.7677786664815278e-06, "loss": 0.3235, "step": 4540 }, { "epoch": 2.2543438689392685, "grad_norm": 0.48114320635795593, "learning_rate": 1.7655749452484067e-06, "loss": 0.3362, "step": 4541 }, { "epoch": 2.2548403111037563, "grad_norm": 0.4416487514972687, "learning_rate": 1.7633723039019018e-06, "loss": 0.3099, "step": 4542 }, { "epoch": 2.255336753268244, "grad_norm": 0.46805527806282043, "learning_rate": 1.7611707431774193e-06, "loss": 0.3695, "step": 4543 }, { "epoch": 2.255833195432732, "grad_norm": 0.39465561509132385, "learning_rate": 1.758970263810001e-06, "loss": 0.3395, "step": 4544 }, { "epoch": 2.25632963759722, "grad_norm": 0.4274292588233948, "learning_rate": 1.756770866534322e-06, "loss": 0.3402, "step": 4545 }, { "epoch": 2.2568260797617077, "grad_norm": 0.4021953344345093, "learning_rate": 1.7545725520847078e-06, "loss": 0.3662, "step": 4546 }, { "epoch": 2.2573225219261954, "grad_norm": 0.4043031632900238, "learning_rate": 1.7523753211951112e-06, "loss": 0.3633, "step": 4547 }, { "epoch": 2.2578189640906836, "grad_norm": 0.421076238155365, "learning_rate": 1.7501791745991308e-06, "loss": 0.2963, "step": 4548 }, { "epoch": 2.2583154062551714, "grad_norm": 0.41383078694343567, "learning_rate": 1.7479841130299957e-06, "loss": 0.361, "step": 4549 }, { "epoch": 2.258811848419659, "grad_norm": 0.43998613953590393, "learning_rate": 1.7457901372205832e-06, "loss": 0.36, "step": 4550 }, { "epoch": 2.259308290584147, "grad_norm": 0.4457276463508606, "learning_rate": 1.7435972479033981e-06, "loss": 0.2952, "step": 4551 }, { "epoch": 2.259804732748635, "grad_norm": 0.42193734645843506, "learning_rate": 1.7414054458105878e-06, "loss": 0.3594, "step": 4552 }, { "epoch": 2.2603011749131228, "grad_norm": 0.44577518105506897, "learning_rate": 1.7392147316739356e-06, "loss": 0.3286, "step": 4553 }, { "epoch": 2.2607976170776105, "grad_norm": 0.43403273820877075, "learning_rate": 1.7370251062248606e-06, "loss": 0.3168, "step": 4554 }, { "epoch": 2.2612940592420983, "grad_norm": 0.4554133415222168, "learning_rate": 1.734836570194422e-06, "loss": 0.2891, "step": 4555 }, { "epoch": 2.261790501406586, "grad_norm": 0.4937637746334076, "learning_rate": 1.732649124313307e-06, "loss": 0.338, "step": 4556 }, { "epoch": 2.262286943571074, "grad_norm": 0.4336298406124115, "learning_rate": 1.7304627693118508e-06, "loss": 0.3595, "step": 4557 }, { "epoch": 2.262783385735562, "grad_norm": 0.3662022054195404, "learning_rate": 1.7282775059200136e-06, "loss": 0.3214, "step": 4558 }, { "epoch": 2.2632798279000497, "grad_norm": 0.40454113483428955, "learning_rate": 1.7260933348673963e-06, "loss": 0.3151, "step": 4559 }, { "epoch": 2.2637762700645374, "grad_norm": 0.43116483092308044, "learning_rate": 1.723910256883235e-06, "loss": 0.3372, "step": 4560 }, { "epoch": 2.264272712229025, "grad_norm": 0.429593950510025, "learning_rate": 1.7217282726963996e-06, "loss": 0.3248, "step": 4561 }, { "epoch": 2.2647691543935133, "grad_norm": 0.4863281846046448, "learning_rate": 1.7195473830353971e-06, "loss": 0.3158, "step": 4562 }, { "epoch": 2.265265596558001, "grad_norm": 0.4464089870452881, "learning_rate": 1.7173675886283642e-06, "loss": 0.3242, "step": 4563 }, { "epoch": 2.265762038722489, "grad_norm": 0.39699307084083557, "learning_rate": 1.7151888902030762e-06, "loss": 0.3498, "step": 4564 }, { "epoch": 2.2662584808869766, "grad_norm": 0.3791615068912506, "learning_rate": 1.7130112884869415e-06, "loss": 0.3433, "step": 4565 }, { "epoch": 2.2667549230514643, "grad_norm": 0.4204130470752716, "learning_rate": 1.7108347842070023e-06, "loss": 0.371, "step": 4566 }, { "epoch": 2.2672513652159525, "grad_norm": 0.45214423537254333, "learning_rate": 1.7086593780899353e-06, "loss": 0.3657, "step": 4567 }, { "epoch": 2.2677478073804402, "grad_norm": 0.40628817677497864, "learning_rate": 1.7064850708620457e-06, "loss": 0.3231, "step": 4568 }, { "epoch": 2.268244249544928, "grad_norm": 0.40316957235336304, "learning_rate": 1.704311863249281e-06, "loss": 0.3278, "step": 4569 }, { "epoch": 2.2687406917094157, "grad_norm": 0.490764856338501, "learning_rate": 1.7021397559772118e-06, "loss": 0.4081, "step": 4570 }, { "epoch": 2.2692371338739035, "grad_norm": 0.42259156703948975, "learning_rate": 1.6999687497710472e-06, "loss": 0.3283, "step": 4571 }, { "epoch": 2.2697335760383917, "grad_norm": 0.36716532707214355, "learning_rate": 1.697798845355627e-06, "loss": 0.3205, "step": 4572 }, { "epoch": 2.2702300182028794, "grad_norm": 0.40500175952911377, "learning_rate": 1.6956300434554256e-06, "loss": 0.3572, "step": 4573 }, { "epoch": 2.270726460367367, "grad_norm": 0.42635005712509155, "learning_rate": 1.6934623447945431e-06, "loss": 0.3383, "step": 4574 }, { "epoch": 2.271222902531855, "grad_norm": 0.43109357357025146, "learning_rate": 1.6912957500967164e-06, "loss": 0.3202, "step": 4575 }, { "epoch": 2.271719344696343, "grad_norm": 0.4385204613208771, "learning_rate": 1.6891302600853137e-06, "loss": 0.3223, "step": 4576 }, { "epoch": 2.272215786860831, "grad_norm": 0.45578157901763916, "learning_rate": 1.6869658754833323e-06, "loss": 0.3442, "step": 4577 }, { "epoch": 2.2727122290253186, "grad_norm": 0.44964537024497986, "learning_rate": 1.684802597013404e-06, "loss": 0.3266, "step": 4578 }, { "epoch": 2.2732086711898063, "grad_norm": 0.4123380780220032, "learning_rate": 1.682640425397783e-06, "loss": 0.2901, "step": 4579 }, { "epoch": 2.273705113354294, "grad_norm": 0.4421735405921936, "learning_rate": 1.6804793613583663e-06, "loss": 0.3292, "step": 4580 }, { "epoch": 2.2742015555187822, "grad_norm": 0.37220680713653564, "learning_rate": 1.6783194056166697e-06, "loss": 0.2981, "step": 4581 }, { "epoch": 2.27469799768327, "grad_norm": 0.42912858724594116, "learning_rate": 1.676160558893845e-06, "loss": 0.335, "step": 4582 }, { "epoch": 2.2751944398477577, "grad_norm": 0.42633792757987976, "learning_rate": 1.674002821910673e-06, "loss": 0.402, "step": 4583 }, { "epoch": 2.2756908820122455, "grad_norm": 0.3913803696632385, "learning_rate": 1.671846195387563e-06, "loss": 0.3436, "step": 4584 }, { "epoch": 2.2761873241767336, "grad_norm": 0.4652297794818878, "learning_rate": 1.6696906800445562e-06, "loss": 0.333, "step": 4585 }, { "epoch": 2.2766837663412214, "grad_norm": 0.40978190302848816, "learning_rate": 1.6675362766013148e-06, "loss": 0.326, "step": 4586 }, { "epoch": 2.277180208505709, "grad_norm": 0.402926504611969, "learning_rate": 1.6653829857771432e-06, "loss": 0.3317, "step": 4587 }, { "epoch": 2.277676650670197, "grad_norm": 0.438890278339386, "learning_rate": 1.6632308082909604e-06, "loss": 0.3709, "step": 4588 }, { "epoch": 2.2781730928346846, "grad_norm": 0.4148527979850769, "learning_rate": 1.6610797448613225e-06, "loss": 0.3381, "step": 4589 }, { "epoch": 2.278669534999173, "grad_norm": 0.40374332666397095, "learning_rate": 1.6589297962064111e-06, "loss": 0.3265, "step": 4590 }, { "epoch": 2.2791659771636605, "grad_norm": 0.4662332236766815, "learning_rate": 1.6567809630440356e-06, "loss": 0.3214, "step": 4591 }, { "epoch": 2.2796624193281483, "grad_norm": 0.4132120609283447, "learning_rate": 1.6546332460916347e-06, "loss": 0.341, "step": 4592 }, { "epoch": 2.280158861492636, "grad_norm": 0.430291086435318, "learning_rate": 1.6524866460662686e-06, "loss": 0.3622, "step": 4593 }, { "epoch": 2.2806553036571238, "grad_norm": 0.4110223650932312, "learning_rate": 1.6503411636846318e-06, "loss": 0.3575, "step": 4594 }, { "epoch": 2.281151745821612, "grad_norm": 0.4546332061290741, "learning_rate": 1.648196799663041e-06, "loss": 0.3254, "step": 4595 }, { "epoch": 2.2816481879860997, "grad_norm": 0.4558647871017456, "learning_rate": 1.646053554717444e-06, "loss": 0.3967, "step": 4596 }, { "epoch": 2.2821446301505874, "grad_norm": 0.4162381887435913, "learning_rate": 1.6439114295634068e-06, "loss": 0.3101, "step": 4597 }, { "epoch": 2.282641072315075, "grad_norm": 0.4120921492576599, "learning_rate": 1.6417704249161326e-06, "loss": 0.3233, "step": 4598 }, { "epoch": 2.283137514479563, "grad_norm": 0.4449867904186249, "learning_rate": 1.63963054149044e-06, "loss": 0.3933, "step": 4599 }, { "epoch": 2.283633956644051, "grad_norm": 0.414262980222702, "learning_rate": 1.6374917800007806e-06, "loss": 0.3195, "step": 4600 }, { "epoch": 2.284130398808539, "grad_norm": 0.4111655354499817, "learning_rate": 1.6353541411612272e-06, "loss": 0.3838, "step": 4601 }, { "epoch": 2.2846268409730266, "grad_norm": 0.41771483421325684, "learning_rate": 1.6332176256854809e-06, "loss": 0.3332, "step": 4602 }, { "epoch": 2.2851232831375143, "grad_norm": 0.43784958124160767, "learning_rate": 1.6310822342868664e-06, "loss": 0.4111, "step": 4603 }, { "epoch": 2.285619725302002, "grad_norm": 0.40150508284568787, "learning_rate": 1.6289479676783305e-06, "loss": 0.2652, "step": 4604 }, { "epoch": 2.2861161674664903, "grad_norm": 0.43342024087905884, "learning_rate": 1.6268148265724476e-06, "loss": 0.3797, "step": 4605 }, { "epoch": 2.286612609630978, "grad_norm": 0.39215460419654846, "learning_rate": 1.624682811681416e-06, "loss": 0.3345, "step": 4606 }, { "epoch": 2.2871090517954658, "grad_norm": 0.41226306557655334, "learning_rate": 1.6225519237170578e-06, "loss": 0.3295, "step": 4607 }, { "epoch": 2.2876054939599535, "grad_norm": 0.44180911779403687, "learning_rate": 1.6204221633908202e-06, "loss": 0.3588, "step": 4608 }, { "epoch": 2.2881019361244417, "grad_norm": 0.4031664729118347, "learning_rate": 1.6182935314137665e-06, "loss": 0.3432, "step": 4609 }, { "epoch": 2.2885983782889294, "grad_norm": 0.40454909205436707, "learning_rate": 1.6161660284965969e-06, "loss": 0.2673, "step": 4610 }, { "epoch": 2.289094820453417, "grad_norm": 0.47645869851112366, "learning_rate": 1.6140396553496208e-06, "loss": 0.3497, "step": 4611 }, { "epoch": 2.289591262617905, "grad_norm": 0.47244659066200256, "learning_rate": 1.6119144126827784e-06, "loss": 0.3624, "step": 4612 }, { "epoch": 2.290087704782393, "grad_norm": 0.44255393743515015, "learning_rate": 1.609790301205631e-06, "loss": 0.3278, "step": 4613 }, { "epoch": 2.290584146946881, "grad_norm": 0.4377044141292572, "learning_rate": 1.607667321627361e-06, "loss": 0.3747, "step": 4614 }, { "epoch": 2.2910805891113686, "grad_norm": 0.4600472152233124, "learning_rate": 1.605545474656775e-06, "loss": 0.3275, "step": 4615 }, { "epoch": 2.2915770312758563, "grad_norm": 0.44072335958480835, "learning_rate": 1.6034247610022962e-06, "loss": 0.3216, "step": 4616 }, { "epoch": 2.292073473440344, "grad_norm": 0.39113736152648926, "learning_rate": 1.6013051813719788e-06, "loss": 0.3053, "step": 4617 }, { "epoch": 2.2925699156048323, "grad_norm": 0.44909703731536865, "learning_rate": 1.5991867364734887e-06, "loss": 0.3309, "step": 4618 }, { "epoch": 2.29306635776932, "grad_norm": 0.4481925070285797, "learning_rate": 1.5970694270141197e-06, "loss": 0.3805, "step": 4619 }, { "epoch": 2.2935627999338077, "grad_norm": 0.40864360332489014, "learning_rate": 1.5949532537007795e-06, "loss": 0.2607, "step": 4620 }, { "epoch": 2.2940592420982955, "grad_norm": 0.4149336814880371, "learning_rate": 1.5928382172400064e-06, "loss": 0.3001, "step": 4621 }, { "epoch": 2.2945556842627832, "grad_norm": 0.406631737947464, "learning_rate": 1.59072431833795e-06, "loss": 0.3211, "step": 4622 }, { "epoch": 2.2950521264272714, "grad_norm": 0.4545438885688782, "learning_rate": 1.5886115577003847e-06, "loss": 0.3319, "step": 4623 }, { "epoch": 2.295548568591759, "grad_norm": 0.398913711309433, "learning_rate": 1.5864999360327039e-06, "loss": 0.3092, "step": 4624 }, { "epoch": 2.296045010756247, "grad_norm": 0.4592550992965698, "learning_rate": 1.5843894540399201e-06, "loss": 0.3613, "step": 4625 }, { "epoch": 2.2965414529207346, "grad_norm": 0.4327591359615326, "learning_rate": 1.582280112426669e-06, "loss": 0.3284, "step": 4626 }, { "epoch": 2.2970378950852224, "grad_norm": 0.377633273601532, "learning_rate": 1.580171911897196e-06, "loss": 0.2636, "step": 4627 }, { "epoch": 2.2975343372497106, "grad_norm": 0.459224134683609, "learning_rate": 1.5780648531553794e-06, "loss": 0.3974, "step": 4628 }, { "epoch": 2.2980307794141983, "grad_norm": 0.4237324893474579, "learning_rate": 1.5759589369047035e-06, "loss": 0.3198, "step": 4629 }, { "epoch": 2.298527221578686, "grad_norm": 0.42491745948791504, "learning_rate": 1.573854163848278e-06, "loss": 0.3146, "step": 4630 }, { "epoch": 2.299023663743174, "grad_norm": 0.4257364869117737, "learning_rate": 1.5717505346888301e-06, "loss": 0.3266, "step": 4631 }, { "epoch": 2.2995201059076615, "grad_norm": 0.47055453062057495, "learning_rate": 1.5696480501287037e-06, "loss": 0.3823, "step": 4632 }, { "epoch": 2.3000165480721497, "grad_norm": 0.434597373008728, "learning_rate": 1.567546710869864e-06, "loss": 0.3165, "step": 4633 }, { "epoch": 2.3005129902366375, "grad_norm": 0.44514718651771545, "learning_rate": 1.565446517613886e-06, "loss": 0.3617, "step": 4634 }, { "epoch": 2.301009432401125, "grad_norm": 0.4669404625892639, "learning_rate": 1.56334747106197e-06, "loss": 0.3092, "step": 4635 }, { "epoch": 2.301505874565613, "grad_norm": 0.4472537636756897, "learning_rate": 1.5612495719149306e-06, "loss": 0.3117, "step": 4636 }, { "epoch": 2.3020023167301007, "grad_norm": 0.4474393129348755, "learning_rate": 1.5591528208731993e-06, "loss": 0.3315, "step": 4637 }, { "epoch": 2.302498758894589, "grad_norm": 0.4747469425201416, "learning_rate": 1.5570572186368255e-06, "loss": 0.3798, "step": 4638 }, { "epoch": 2.3029952010590766, "grad_norm": 0.36179131269454956, "learning_rate": 1.554962765905469e-06, "loss": 0.284, "step": 4639 }, { "epoch": 2.3034916432235644, "grad_norm": 0.49285393953323364, "learning_rate": 1.5528694633784175e-06, "loss": 0.3809, "step": 4640 }, { "epoch": 2.303988085388052, "grad_norm": 0.4174538552761078, "learning_rate": 1.5507773117545628e-06, "loss": 0.3127, "step": 4641 }, { "epoch": 2.3044845275525403, "grad_norm": 0.4515141546726227, "learning_rate": 1.5486863117324185e-06, "loss": 0.3594, "step": 4642 }, { "epoch": 2.304980969717028, "grad_norm": 0.4267148971557617, "learning_rate": 1.5465964640101134e-06, "loss": 0.3223, "step": 4643 }, { "epoch": 2.305477411881516, "grad_norm": 0.43061694502830505, "learning_rate": 1.5445077692853926e-06, "loss": 0.326, "step": 4644 }, { "epoch": 2.3059738540460035, "grad_norm": 0.4885096251964569, "learning_rate": 1.5424202282556106e-06, "loss": 0.4006, "step": 4645 }, { "epoch": 2.3064702962104917, "grad_norm": 0.4629594087600708, "learning_rate": 1.5403338416177428e-06, "loss": 0.3222, "step": 4646 }, { "epoch": 2.3069667383749795, "grad_norm": 0.4771069884300232, "learning_rate": 1.5382486100683768e-06, "loss": 0.3571, "step": 4647 }, { "epoch": 2.307463180539467, "grad_norm": 0.41213008761405945, "learning_rate": 1.5361645343037146e-06, "loss": 0.2837, "step": 4648 }, { "epoch": 2.307959622703955, "grad_norm": 0.4050709903240204, "learning_rate": 1.5340816150195743e-06, "loss": 0.3332, "step": 4649 }, { "epoch": 2.3084560648684427, "grad_norm": 0.44785672426223755, "learning_rate": 1.5319998529113812e-06, "loss": 0.3556, "step": 4650 }, { "epoch": 2.308952507032931, "grad_norm": 0.38182011246681213, "learning_rate": 1.5299192486741848e-06, "loss": 0.2848, "step": 4651 }, { "epoch": 2.3094489491974186, "grad_norm": 0.4338184595108032, "learning_rate": 1.5278398030026386e-06, "loss": 0.4025, "step": 4652 }, { "epoch": 2.3099453913619064, "grad_norm": 0.4409044086933136, "learning_rate": 1.5257615165910139e-06, "loss": 0.4067, "step": 4653 }, { "epoch": 2.310441833526394, "grad_norm": 0.422421395778656, "learning_rate": 1.5236843901331943e-06, "loss": 0.4099, "step": 4654 }, { "epoch": 2.310938275690882, "grad_norm": 0.4092070758342743, "learning_rate": 1.521608424322676e-06, "loss": 0.3404, "step": 4655 }, { "epoch": 2.31143471785537, "grad_norm": 0.42359551787376404, "learning_rate": 1.519533619852569e-06, "loss": 0.3605, "step": 4656 }, { "epoch": 2.3119311600198578, "grad_norm": 0.33251211047172546, "learning_rate": 1.517459977415589e-06, "loss": 0.2636, "step": 4657 }, { "epoch": 2.3124276021843455, "grad_norm": 0.43157294392585754, "learning_rate": 1.5153874977040756e-06, "loss": 0.3925, "step": 4658 }, { "epoch": 2.3129240443488333, "grad_norm": 0.42048144340515137, "learning_rate": 1.5133161814099683e-06, "loss": 0.3452, "step": 4659 }, { "epoch": 2.313420486513321, "grad_norm": 0.41880276799201965, "learning_rate": 1.511246029224826e-06, "loss": 0.3685, "step": 4660 }, { "epoch": 2.313916928677809, "grad_norm": 0.38877540826797485, "learning_rate": 1.5091770418398149e-06, "loss": 0.3682, "step": 4661 }, { "epoch": 2.314413370842297, "grad_norm": 0.3780767321586609, "learning_rate": 1.5071092199457144e-06, "loss": 0.352, "step": 4662 }, { "epoch": 2.3149098130067847, "grad_norm": 0.4801352024078369, "learning_rate": 1.5050425642329152e-06, "loss": 0.3991, "step": 4663 }, { "epoch": 2.3154062551712724, "grad_norm": 0.41431066393852234, "learning_rate": 1.5029770753914148e-06, "loss": 0.3399, "step": 4664 }, { "epoch": 2.31590269733576, "grad_norm": 0.39619165658950806, "learning_rate": 1.5009127541108247e-06, "loss": 0.353, "step": 4665 }, { "epoch": 2.3163991395002483, "grad_norm": 0.416487455368042, "learning_rate": 1.4988496010803667e-06, "loss": 0.2968, "step": 4666 }, { "epoch": 2.316895581664736, "grad_norm": 0.4518337547779083, "learning_rate": 1.4967876169888724e-06, "loss": 0.3717, "step": 4667 }, { "epoch": 2.317392023829224, "grad_norm": 0.8021722435951233, "learning_rate": 1.4947268025247774e-06, "loss": 0.3047, "step": 4668 }, { "epoch": 2.3178884659937116, "grad_norm": 0.3635554909706116, "learning_rate": 1.4926671583761381e-06, "loss": 0.3502, "step": 4669 }, { "epoch": 2.3183849081581998, "grad_norm": 0.46435701847076416, "learning_rate": 1.490608685230609e-06, "loss": 0.4305, "step": 4670 }, { "epoch": 2.3188813503226875, "grad_norm": 0.4224591851234436, "learning_rate": 1.48855138377546e-06, "loss": 0.3249, "step": 4671 }, { "epoch": 2.3193777924871752, "grad_norm": 0.388500839471817, "learning_rate": 1.486495254697568e-06, "loss": 0.2761, "step": 4672 }, { "epoch": 2.319874234651663, "grad_norm": 0.43835124373435974, "learning_rate": 1.4844402986834188e-06, "loss": 0.3196, "step": 4673 }, { "epoch": 2.320370676816151, "grad_norm": 0.45497190952301025, "learning_rate": 1.4823865164191077e-06, "loss": 0.4132, "step": 4674 }, { "epoch": 2.320867118980639, "grad_norm": 0.4097789227962494, "learning_rate": 1.480333908590334e-06, "loss": 0.3649, "step": 4675 }, { "epoch": 2.3213635611451267, "grad_norm": 0.3902345895767212, "learning_rate": 1.4782824758824088e-06, "loss": 0.3277, "step": 4676 }, { "epoch": 2.3218600033096144, "grad_norm": 0.4132847189903259, "learning_rate": 1.4762322189802502e-06, "loss": 0.295, "step": 4677 }, { "epoch": 2.322356445474102, "grad_norm": 0.4887129068374634, "learning_rate": 1.4741831385683824e-06, "loss": 0.3974, "step": 4678 }, { "epoch": 2.3228528876385903, "grad_norm": 0.418792188167572, "learning_rate": 1.4721352353309403e-06, "loss": 0.3007, "step": 4679 }, { "epoch": 2.323349329803078, "grad_norm": 0.4381471872329712, "learning_rate": 1.4700885099516577e-06, "loss": 0.3607, "step": 4680 }, { "epoch": 2.323845771967566, "grad_norm": 0.4127000570297241, "learning_rate": 1.468042963113887e-06, "loss": 0.353, "step": 4681 }, { "epoch": 2.3243422141320536, "grad_norm": 0.4173278212547302, "learning_rate": 1.4659985955005767e-06, "loss": 0.3836, "step": 4682 }, { "epoch": 2.3248386562965413, "grad_norm": 0.36011481285095215, "learning_rate": 1.4639554077942859e-06, "loss": 0.3157, "step": 4683 }, { "epoch": 2.3253350984610295, "grad_norm": 0.40642544627189636, "learning_rate": 1.4619134006771802e-06, "loss": 0.2945, "step": 4684 }, { "epoch": 2.3258315406255172, "grad_norm": 0.46933019161224365, "learning_rate": 1.4598725748310304e-06, "loss": 0.3789, "step": 4685 }, { "epoch": 2.326327982790005, "grad_norm": 0.3836582601070404, "learning_rate": 1.4578329309372136e-06, "loss": 0.3183, "step": 4686 }, { "epoch": 2.3268244249544927, "grad_norm": 0.43576887249946594, "learning_rate": 1.4557944696767078e-06, "loss": 0.3608, "step": 4687 }, { "epoch": 2.3273208671189805, "grad_norm": 0.4457939565181732, "learning_rate": 1.4537571917301051e-06, "loss": 0.3499, "step": 4688 }, { "epoch": 2.3278173092834686, "grad_norm": 0.41153064370155334, "learning_rate": 1.4517210977775936e-06, "loss": 0.308, "step": 4689 }, { "epoch": 2.3283137514479564, "grad_norm": 0.44118109345436096, "learning_rate": 1.4496861884989716e-06, "loss": 0.3059, "step": 4690 }, { "epoch": 2.328810193612444, "grad_norm": 0.42221561074256897, "learning_rate": 1.4476524645736362e-06, "loss": 0.3378, "step": 4691 }, { "epoch": 2.329306635776932, "grad_norm": 0.41236257553100586, "learning_rate": 1.4456199266805986e-06, "loss": 0.3223, "step": 4692 }, { "epoch": 2.3298030779414196, "grad_norm": 0.4876934587955475, "learning_rate": 1.443588575498463e-06, "loss": 0.3956, "step": 4693 }, { "epoch": 2.330299520105908, "grad_norm": 0.40866953134536743, "learning_rate": 1.4415584117054443e-06, "loss": 0.3617, "step": 4694 }, { "epoch": 2.3307959622703955, "grad_norm": 0.3988583981990814, "learning_rate": 1.4395294359793589e-06, "loss": 0.2965, "step": 4695 }, { "epoch": 2.3312924044348833, "grad_norm": 0.4515492916107178, "learning_rate": 1.4375016489976268e-06, "loss": 0.374, "step": 4696 }, { "epoch": 2.331788846599371, "grad_norm": 0.42344552278518677, "learning_rate": 1.4354750514372717e-06, "loss": 0.3293, "step": 4697 }, { "epoch": 2.3322852887638588, "grad_norm": 0.3670346140861511, "learning_rate": 1.4334496439749157e-06, "loss": 0.3097, "step": 4698 }, { "epoch": 2.332781730928347, "grad_norm": 0.39799100160598755, "learning_rate": 1.4314254272867933e-06, "loss": 0.3215, "step": 4699 }, { "epoch": 2.3332781730928347, "grad_norm": 0.449623167514801, "learning_rate": 1.4294024020487307e-06, "loss": 0.3671, "step": 4700 }, { "epoch": 2.3337746152573224, "grad_norm": 0.4155915379524231, "learning_rate": 1.4273805689361625e-06, "loss": 0.343, "step": 4701 }, { "epoch": 2.33427105742181, "grad_norm": 0.4114641845226288, "learning_rate": 1.4253599286241242e-06, "loss": 0.2764, "step": 4702 }, { "epoch": 2.3347674995862984, "grad_norm": 0.487522691488266, "learning_rate": 1.423340481787252e-06, "loss": 0.3173, "step": 4703 }, { "epoch": 2.335263941750786, "grad_norm": 0.42470860481262207, "learning_rate": 1.4213222290997863e-06, "loss": 0.3229, "step": 4704 }, { "epoch": 2.335760383915274, "grad_norm": 0.4463742971420288, "learning_rate": 1.4193051712355638e-06, "loss": 0.2975, "step": 4705 }, { "epoch": 2.3362568260797616, "grad_norm": 0.41798868775367737, "learning_rate": 1.4172893088680268e-06, "loss": 0.3164, "step": 4706 }, { "epoch": 2.33675326824425, "grad_norm": 0.38847479224205017, "learning_rate": 1.4152746426702169e-06, "loss": 0.3398, "step": 4707 }, { "epoch": 2.3372497104087375, "grad_norm": 0.4717247486114502, "learning_rate": 1.4132611733147767e-06, "loss": 0.3137, "step": 4708 }, { "epoch": 2.3377461525732253, "grad_norm": 0.5046491026878357, "learning_rate": 1.4112489014739477e-06, "loss": 0.3621, "step": 4709 }, { "epoch": 2.338242594737713, "grad_norm": 0.41629406809806824, "learning_rate": 1.4092378278195746e-06, "loss": 0.3294, "step": 4710 }, { "epoch": 2.3387390369022008, "grad_norm": 0.43124741315841675, "learning_rate": 1.4072279530231004e-06, "loss": 0.3984, "step": 4711 }, { "epoch": 2.339235479066689, "grad_norm": 0.3907816708087921, "learning_rate": 1.4052192777555645e-06, "loss": 0.278, "step": 4712 }, { "epoch": 2.3397319212311767, "grad_norm": 0.41972947120666504, "learning_rate": 1.4032118026876118e-06, "loss": 0.3669, "step": 4713 }, { "epoch": 2.3402283633956644, "grad_norm": 0.38748371601104736, "learning_rate": 1.4012055284894827e-06, "loss": 0.3316, "step": 4714 }, { "epoch": 2.340724805560152, "grad_norm": 0.41739657521247864, "learning_rate": 1.399200455831019e-06, "loss": 0.3925, "step": 4715 }, { "epoch": 2.34122124772464, "grad_norm": 0.39347895979881287, "learning_rate": 1.3971965853816577e-06, "loss": 0.3023, "step": 4716 }, { "epoch": 2.341717689889128, "grad_norm": 0.44396212697029114, "learning_rate": 1.3951939178104374e-06, "loss": 0.3225, "step": 4717 }, { "epoch": 2.342214132053616, "grad_norm": 0.45033806562423706, "learning_rate": 1.3931924537859948e-06, "loss": 0.3553, "step": 4718 }, { "epoch": 2.3427105742181036, "grad_norm": 0.3951842188835144, "learning_rate": 1.3911921939765643e-06, "loss": 0.2898, "step": 4719 }, { "epoch": 2.3432070163825913, "grad_norm": 0.3761337697505951, "learning_rate": 1.3891931390499802e-06, "loss": 0.3518, "step": 4720 }, { "epoch": 2.343703458547079, "grad_norm": 0.39553794264793396, "learning_rate": 1.3871952896736673e-06, "loss": 0.3258, "step": 4721 }, { "epoch": 2.3441999007115673, "grad_norm": 0.4333491325378418, "learning_rate": 1.38519864651466e-06, "loss": 0.3424, "step": 4722 }, { "epoch": 2.344696342876055, "grad_norm": 0.49229103326797485, "learning_rate": 1.3832032102395775e-06, "loss": 0.3492, "step": 4723 }, { "epoch": 2.3451927850405427, "grad_norm": 0.4482724070549011, "learning_rate": 1.3812089815146446e-06, "loss": 0.2975, "step": 4724 }, { "epoch": 2.3456892272050305, "grad_norm": 0.48818257451057434, "learning_rate": 1.3792159610056794e-06, "loss": 0.3286, "step": 4725 }, { "epoch": 2.3461856693695182, "grad_norm": 0.414328396320343, "learning_rate": 1.3772241493780975e-06, "loss": 0.3298, "step": 4726 }, { "epoch": 2.3466821115340064, "grad_norm": 0.4104086756706238, "learning_rate": 1.3752335472969113e-06, "loss": 0.343, "step": 4727 }, { "epoch": 2.347178553698494, "grad_norm": 0.44355010986328125, "learning_rate": 1.3732441554267257e-06, "loss": 0.3491, "step": 4728 }, { "epoch": 2.347674995862982, "grad_norm": 0.42705658078193665, "learning_rate": 1.37125597443175e-06, "loss": 0.3586, "step": 4729 }, { "epoch": 2.3481714380274696, "grad_norm": 0.44761499762535095, "learning_rate": 1.3692690049757783e-06, "loss": 0.353, "step": 4730 }, { "epoch": 2.348667880191958, "grad_norm": 0.39621543884277344, "learning_rate": 1.3672832477222086e-06, "loss": 0.3379, "step": 4731 }, { "epoch": 2.3491643223564456, "grad_norm": 0.5052608847618103, "learning_rate": 1.365298703334031e-06, "loss": 0.3545, "step": 4732 }, { "epoch": 2.3496607645209333, "grad_norm": 0.4000193774700165, "learning_rate": 1.3633153724738302e-06, "loss": 0.3522, "step": 4733 }, { "epoch": 2.350157206685421, "grad_norm": 0.3689286708831787, "learning_rate": 1.3613332558037883e-06, "loss": 0.3199, "step": 4734 }, { "epoch": 2.350653648849909, "grad_norm": 0.4059460163116455, "learning_rate": 1.3593523539856763e-06, "loss": 0.3221, "step": 4735 }, { "epoch": 2.351150091014397, "grad_norm": 0.4361700415611267, "learning_rate": 1.3573726676808686e-06, "loss": 0.4004, "step": 4736 }, { "epoch": 2.3516465331788847, "grad_norm": 0.4463023543357849, "learning_rate": 1.3553941975503243e-06, "loss": 0.327, "step": 4737 }, { "epoch": 2.3521429753433725, "grad_norm": 0.4456254243850708, "learning_rate": 1.3534169442546046e-06, "loss": 0.3175, "step": 4738 }, { "epoch": 2.35263941750786, "grad_norm": 0.393862783908844, "learning_rate": 1.3514409084538555e-06, "loss": 0.3064, "step": 4739 }, { "epoch": 2.3531358596723484, "grad_norm": 0.42767858505249023, "learning_rate": 1.3494660908078272e-06, "loss": 0.3761, "step": 4740 }, { "epoch": 2.353632301836836, "grad_norm": 0.4451149106025696, "learning_rate": 1.3474924919758542e-06, "loss": 0.3682, "step": 4741 }, { "epoch": 2.354128744001324, "grad_norm": 0.40329062938690186, "learning_rate": 1.3455201126168682e-06, "loss": 0.286, "step": 4742 }, { "epoch": 2.3546251861658116, "grad_norm": 0.4672844409942627, "learning_rate": 1.3435489533893937e-06, "loss": 0.3913, "step": 4743 }, { "epoch": 2.3551216283302994, "grad_norm": 0.4067973494529724, "learning_rate": 1.3415790149515461e-06, "loss": 0.3551, "step": 4744 }, { "epoch": 2.3556180704947876, "grad_norm": 0.43316665291786194, "learning_rate": 1.3396102979610377e-06, "loss": 0.3507, "step": 4745 }, { "epoch": 2.3561145126592753, "grad_norm": 0.4347943067550659, "learning_rate": 1.3376428030751643e-06, "loss": 0.3277, "step": 4746 }, { "epoch": 2.356610954823763, "grad_norm": 0.4165733754634857, "learning_rate": 1.3356765309508224e-06, "loss": 0.3394, "step": 4747 }, { "epoch": 2.357107396988251, "grad_norm": 0.47923481464385986, "learning_rate": 1.3337114822444958e-06, "loss": 0.3784, "step": 4748 }, { "epoch": 2.3576038391527385, "grad_norm": 0.43682754039764404, "learning_rate": 1.3317476576122607e-06, "loss": 0.3285, "step": 4749 }, { "epoch": 2.3581002813172267, "grad_norm": 0.3649154007434845, "learning_rate": 1.3297850577097853e-06, "loss": 0.3473, "step": 4750 }, { "epoch": 2.3585967234817145, "grad_norm": 0.3843662738800049, "learning_rate": 1.3278236831923286e-06, "loss": 0.313, "step": 4751 }, { "epoch": 2.359093165646202, "grad_norm": 0.43532857298851013, "learning_rate": 1.3258635347147407e-06, "loss": 0.3256, "step": 4752 }, { "epoch": 2.35958960781069, "grad_norm": 0.4188254773616791, "learning_rate": 1.3239046129314603e-06, "loss": 0.3124, "step": 4753 }, { "epoch": 2.3600860499751777, "grad_norm": 0.44068804383277893, "learning_rate": 1.3219469184965184e-06, "loss": 0.3817, "step": 4754 }, { "epoch": 2.360582492139666, "grad_norm": 0.41440194845199585, "learning_rate": 1.3199904520635365e-06, "loss": 0.3548, "step": 4755 }, { "epoch": 2.3610789343041536, "grad_norm": 0.3935437500476837, "learning_rate": 1.3180352142857256e-06, "loss": 0.4177, "step": 4756 }, { "epoch": 2.3615753764686414, "grad_norm": 0.3852834105491638, "learning_rate": 1.3160812058158883e-06, "loss": 0.311, "step": 4757 }, { "epoch": 2.362071818633129, "grad_norm": 0.43684306740760803, "learning_rate": 1.3141284273064099e-06, "loss": 0.3403, "step": 4758 }, { "epoch": 2.362568260797617, "grad_norm": 0.39165374636650085, "learning_rate": 1.3121768794092753e-06, "loss": 0.3226, "step": 4759 }, { "epoch": 2.363064702962105, "grad_norm": 0.40484899282455444, "learning_rate": 1.3102265627760507e-06, "loss": 0.3531, "step": 4760 }, { "epoch": 2.3635611451265928, "grad_norm": 0.39212653040885925, "learning_rate": 1.3082774780578954e-06, "loss": 0.3753, "step": 4761 }, { "epoch": 2.3640575872910805, "grad_norm": 0.4275614023208618, "learning_rate": 1.306329625905552e-06, "loss": 0.3551, "step": 4762 }, { "epoch": 2.3645540294555683, "grad_norm": 0.4036747217178345, "learning_rate": 1.3043830069693607e-06, "loss": 0.3931, "step": 4763 }, { "epoch": 2.3650504716200564, "grad_norm": 0.37943270802497864, "learning_rate": 1.3024376218992407e-06, "loss": 0.3322, "step": 4764 }, { "epoch": 2.365546913784544, "grad_norm": 0.42817193269729614, "learning_rate": 1.3004934713447047e-06, "loss": 0.3237, "step": 4765 }, { "epoch": 2.366043355949032, "grad_norm": 0.41725918650627136, "learning_rate": 1.2985505559548516e-06, "loss": 0.3194, "step": 4766 }, { "epoch": 2.3665397981135197, "grad_norm": 0.4459720253944397, "learning_rate": 1.296608876378368e-06, "loss": 0.3659, "step": 4767 }, { "epoch": 2.367036240278008, "grad_norm": 0.40066421031951904, "learning_rate": 1.2946684332635295e-06, "loss": 0.3003, "step": 4768 }, { "epoch": 2.3675326824424956, "grad_norm": 0.430555135011673, "learning_rate": 1.2927292272581925e-06, "loss": 0.33, "step": 4769 }, { "epoch": 2.3680291246069833, "grad_norm": 0.4203220009803772, "learning_rate": 1.290791259009812e-06, "loss": 0.3374, "step": 4770 }, { "epoch": 2.368525566771471, "grad_norm": 0.43084949254989624, "learning_rate": 1.2888545291654175e-06, "loss": 0.3091, "step": 4771 }, { "epoch": 2.369022008935959, "grad_norm": 0.43762367963790894, "learning_rate": 1.2869190383716323e-06, "loss": 0.314, "step": 4772 }, { "epoch": 2.369518451100447, "grad_norm": 0.45806318521499634, "learning_rate": 1.2849847872746646e-06, "loss": 0.3999, "step": 4773 }, { "epoch": 2.3700148932649348, "grad_norm": 0.3978629410266876, "learning_rate": 1.2830517765203082e-06, "loss": 0.3273, "step": 4774 }, { "epoch": 2.3705113354294225, "grad_norm": 0.39332714676856995, "learning_rate": 1.281120006753943e-06, "loss": 0.3391, "step": 4775 }, { "epoch": 2.3710077775939102, "grad_norm": 0.40848296880722046, "learning_rate": 1.2791894786205322e-06, "loss": 0.3552, "step": 4776 }, { "epoch": 2.371504219758398, "grad_norm": 0.39326179027557373, "learning_rate": 1.2772601927646305e-06, "loss": 0.3284, "step": 4777 }, { "epoch": 2.372000661922886, "grad_norm": 0.4450971186161041, "learning_rate": 1.2753321498303711e-06, "loss": 0.3395, "step": 4778 }, { "epoch": 2.372497104087374, "grad_norm": 0.42867422103881836, "learning_rate": 1.2734053504614757e-06, "loss": 0.3447, "step": 4779 }, { "epoch": 2.3729935462518617, "grad_norm": 0.4530094861984253, "learning_rate": 1.271479795301251e-06, "loss": 0.3437, "step": 4780 }, { "epoch": 2.3734899884163494, "grad_norm": 0.40537533164024353, "learning_rate": 1.2695554849925862e-06, "loss": 0.2965, "step": 4781 }, { "epoch": 2.373986430580837, "grad_norm": 0.4388904869556427, "learning_rate": 1.2676324201779593e-06, "loss": 0.3825, "step": 4782 }, { "epoch": 2.3744828727453253, "grad_norm": 0.4037007987499237, "learning_rate": 1.265710601499426e-06, "loss": 0.321, "step": 4783 }, { "epoch": 2.374979314909813, "grad_norm": 0.44492611289024353, "learning_rate": 1.2637900295986293e-06, "loss": 0.3903, "step": 4784 }, { "epoch": 2.375475757074301, "grad_norm": 0.4162834584712982, "learning_rate": 1.2618707051167983e-06, "loss": 0.3014, "step": 4785 }, { "epoch": 2.3759721992387886, "grad_norm": 0.45769307017326355, "learning_rate": 1.2599526286947427e-06, "loss": 0.3403, "step": 4786 }, { "epoch": 2.3764686414032763, "grad_norm": 0.42258816957473755, "learning_rate": 1.258035800972855e-06, "loss": 0.2777, "step": 4787 }, { "epoch": 2.3769650835677645, "grad_norm": 0.4349762499332428, "learning_rate": 1.2561202225911117e-06, "loss": 0.3185, "step": 4788 }, { "epoch": 2.3774615257322522, "grad_norm": 0.431128591299057, "learning_rate": 1.2542058941890734e-06, "loss": 0.3224, "step": 4789 }, { "epoch": 2.37795796789674, "grad_norm": 0.3996073305606842, "learning_rate": 1.2522928164058817e-06, "loss": 0.3586, "step": 4790 }, { "epoch": 2.3784544100612277, "grad_norm": 0.39067089557647705, "learning_rate": 1.2503809898802615e-06, "loss": 0.3603, "step": 4791 }, { "epoch": 2.378950852225716, "grad_norm": 0.3896176218986511, "learning_rate": 1.2484704152505205e-06, "loss": 0.3554, "step": 4792 }, { "epoch": 2.3794472943902036, "grad_norm": 0.39275914430618286, "learning_rate": 1.246561093154548e-06, "loss": 0.291, "step": 4793 }, { "epoch": 2.3799437365546914, "grad_norm": 0.45585617423057556, "learning_rate": 1.2446530242298117e-06, "loss": 0.3273, "step": 4794 }, { "epoch": 2.380440178719179, "grad_norm": 0.3971922695636749, "learning_rate": 1.2427462091133662e-06, "loss": 0.3077, "step": 4795 }, { "epoch": 2.380936620883667, "grad_norm": 0.43668943643569946, "learning_rate": 1.2408406484418455e-06, "loss": 0.3321, "step": 4796 }, { "epoch": 2.381433063048155, "grad_norm": 0.43405476212501526, "learning_rate": 1.2389363428514634e-06, "loss": 0.3585, "step": 4797 }, { "epoch": 2.381929505212643, "grad_norm": 0.37429606914520264, "learning_rate": 1.2370332929780182e-06, "loss": 0.3055, "step": 4798 }, { "epoch": 2.3824259473771305, "grad_norm": 0.4015836715698242, "learning_rate": 1.235131499456882e-06, "loss": 0.3767, "step": 4799 }, { "epoch": 2.3829223895416183, "grad_norm": 0.37390851974487305, "learning_rate": 1.233230962923017e-06, "loss": 0.3076, "step": 4800 }, { "epoch": 2.3834188317061065, "grad_norm": 0.4418709874153137, "learning_rate": 1.2313316840109573e-06, "loss": 0.3757, "step": 4801 }, { "epoch": 2.383915273870594, "grad_norm": 0.40747496485710144, "learning_rate": 1.2294336633548215e-06, "loss": 0.3282, "step": 4802 }, { "epoch": 2.384411716035082, "grad_norm": 0.37169674038887024, "learning_rate": 1.227536901588307e-06, "loss": 0.2811, "step": 4803 }, { "epoch": 2.3849081581995697, "grad_norm": 0.4351259171962738, "learning_rate": 1.2256413993446915e-06, "loss": 0.3877, "step": 4804 }, { "epoch": 2.3854046003640574, "grad_norm": 0.38984188437461853, "learning_rate": 1.2237471572568328e-06, "loss": 0.3384, "step": 4805 }, { "epoch": 2.3859010425285456, "grad_norm": 0.4114418029785156, "learning_rate": 1.2218541759571623e-06, "loss": 0.3855, "step": 4806 }, { "epoch": 2.3863974846930334, "grad_norm": 0.42697662115097046, "learning_rate": 1.2199624560777006e-06, "loss": 0.3054, "step": 4807 }, { "epoch": 2.386893926857521, "grad_norm": 0.3859255313873291, "learning_rate": 1.2180719982500383e-06, "loss": 0.3233, "step": 4808 }, { "epoch": 2.387390369022009, "grad_norm": 0.4708155393600464, "learning_rate": 1.2161828031053502e-06, "loss": 0.3674, "step": 4809 }, { "epoch": 2.3878868111864966, "grad_norm": 0.4130772054195404, "learning_rate": 1.2142948712743824e-06, "loss": 0.3179, "step": 4810 }, { "epoch": 2.388383253350985, "grad_norm": 0.4339039623737335, "learning_rate": 1.2124082033874706e-06, "loss": 0.3519, "step": 4811 }, { "epoch": 2.3888796955154725, "grad_norm": 0.4015069901943207, "learning_rate": 1.2105228000745173e-06, "loss": 0.2905, "step": 4812 }, { "epoch": 2.3893761376799603, "grad_norm": 0.44015610218048096, "learning_rate": 1.208638661965008e-06, "loss": 0.3842, "step": 4813 }, { "epoch": 2.389872579844448, "grad_norm": 0.3657735288143158, "learning_rate": 1.2067557896880066e-06, "loss": 0.3345, "step": 4814 }, { "epoch": 2.3903690220089358, "grad_norm": 0.44600313901901245, "learning_rate": 1.2048741838721523e-06, "loss": 0.3772, "step": 4815 }, { "epoch": 2.390865464173424, "grad_norm": 0.412936270236969, "learning_rate": 1.2029938451456636e-06, "loss": 0.2719, "step": 4816 }, { "epoch": 2.3913619063379117, "grad_norm": 0.45029786229133606, "learning_rate": 1.20111477413633e-06, "loss": 0.3777, "step": 4817 }, { "epoch": 2.3918583485023994, "grad_norm": 0.39951321482658386, "learning_rate": 1.1992369714715285e-06, "loss": 0.3598, "step": 4818 }, { "epoch": 2.392354790666887, "grad_norm": 0.40321165323257446, "learning_rate": 1.1973604377782017e-06, "loss": 0.3166, "step": 4819 }, { "epoch": 2.392851232831375, "grad_norm": 0.43500909209251404, "learning_rate": 1.195485173682875e-06, "loss": 0.351, "step": 4820 }, { "epoch": 2.393347674995863, "grad_norm": 0.4494200348854065, "learning_rate": 1.1936111798116474e-06, "loss": 0.3611, "step": 4821 }, { "epoch": 2.393844117160351, "grad_norm": 0.4344160854816437, "learning_rate": 1.1917384567901946e-06, "loss": 0.3591, "step": 4822 }, { "epoch": 2.3943405593248386, "grad_norm": 0.4019527733325958, "learning_rate": 1.1898670052437705e-06, "loss": 0.2678, "step": 4823 }, { "epoch": 2.3948370014893263, "grad_norm": 0.43428412079811096, "learning_rate": 1.1879968257971979e-06, "loss": 0.3577, "step": 4824 }, { "epoch": 2.3953334436538145, "grad_norm": 0.4661874771118164, "learning_rate": 1.1861279190748804e-06, "loss": 0.3478, "step": 4825 }, { "epoch": 2.3958298858183023, "grad_norm": 0.4305312931537628, "learning_rate": 1.1842602857007957e-06, "loss": 0.276, "step": 4826 }, { "epoch": 2.39632632798279, "grad_norm": 0.4232960045337677, "learning_rate": 1.1823939262984958e-06, "loss": 0.331, "step": 4827 }, { "epoch": 2.3968227701472777, "grad_norm": 0.4636482000350952, "learning_rate": 1.180528841491108e-06, "loss": 0.3951, "step": 4828 }, { "epoch": 2.397319212311766, "grad_norm": 0.4090307950973511, "learning_rate": 1.1786650319013298e-06, "loss": 0.29, "step": 4829 }, { "epoch": 2.3978156544762537, "grad_norm": 0.418504536151886, "learning_rate": 1.1768024981514426e-06, "loss": 0.3373, "step": 4830 }, { "epoch": 2.3983120966407414, "grad_norm": 0.4742494821548462, "learning_rate": 1.174941240863291e-06, "loss": 0.3636, "step": 4831 }, { "epoch": 2.398808538805229, "grad_norm": 0.4275146424770355, "learning_rate": 1.1730812606582996e-06, "loss": 0.3063, "step": 4832 }, { "epoch": 2.399304980969717, "grad_norm": 0.42652377486228943, "learning_rate": 1.1712225581574655e-06, "loss": 0.2706, "step": 4833 }, { "epoch": 2.399801423134205, "grad_norm": 0.4575517177581787, "learning_rate": 1.16936513398136e-06, "loss": 0.3595, "step": 4834 }, { "epoch": 2.400297865298693, "grad_norm": 0.41959941387176514, "learning_rate": 1.167508988750124e-06, "loss": 0.3206, "step": 4835 }, { "epoch": 2.4007943074631806, "grad_norm": 0.45792245864868164, "learning_rate": 1.1656541230834756e-06, "loss": 0.3065, "step": 4836 }, { "epoch": 2.4012907496276683, "grad_norm": 0.44934341311454773, "learning_rate": 1.1638005376007034e-06, "loss": 0.376, "step": 4837 }, { "epoch": 2.401787191792156, "grad_norm": 0.394243061542511, "learning_rate": 1.1619482329206694e-06, "loss": 0.3404, "step": 4838 }, { "epoch": 2.4022836339566442, "grad_norm": 0.39140018820762634, "learning_rate": 1.1600972096618102e-06, "loss": 0.283, "step": 4839 }, { "epoch": 2.402780076121132, "grad_norm": 0.4157099425792694, "learning_rate": 1.1582474684421262e-06, "loss": 0.3774, "step": 4840 }, { "epoch": 2.4032765182856197, "grad_norm": 0.4719199240207672, "learning_rate": 1.1563990098792028e-06, "loss": 0.3982, "step": 4841 }, { "epoch": 2.4037729604501075, "grad_norm": 0.4340226948261261, "learning_rate": 1.1545518345901851e-06, "loss": 0.3423, "step": 4842 }, { "epoch": 2.404269402614595, "grad_norm": 0.39351943135261536, "learning_rate": 1.1527059431917965e-06, "loss": 0.2967, "step": 4843 }, { "epoch": 2.4047658447790834, "grad_norm": 0.4352165460586548, "learning_rate": 1.1508613363003295e-06, "loss": 0.3378, "step": 4844 }, { "epoch": 2.405262286943571, "grad_norm": 0.39378631114959717, "learning_rate": 1.1490180145316487e-06, "loss": 0.3509, "step": 4845 }, { "epoch": 2.405758729108059, "grad_norm": 0.41207456588745117, "learning_rate": 1.1471759785011903e-06, "loss": 0.3319, "step": 4846 }, { "epoch": 2.4062551712725466, "grad_norm": 0.4485085606575012, "learning_rate": 1.1453352288239561e-06, "loss": 0.2779, "step": 4847 }, { "epoch": 2.4067516134370344, "grad_norm": 0.48085150122642517, "learning_rate": 1.143495766114528e-06, "loss": 0.3643, "step": 4848 }, { "epoch": 2.4072480556015226, "grad_norm": 0.41089245676994324, "learning_rate": 1.141657590987048e-06, "loss": 0.2992, "step": 4849 }, { "epoch": 2.4077444977660103, "grad_norm": 0.41436946392059326, "learning_rate": 1.1398207040552344e-06, "loss": 0.3327, "step": 4850 }, { "epoch": 2.408240939930498, "grad_norm": 0.42750290036201477, "learning_rate": 1.1379851059323739e-06, "loss": 0.3464, "step": 4851 }, { "epoch": 2.408737382094986, "grad_norm": 0.4028381407260895, "learning_rate": 1.1361507972313223e-06, "loss": 0.3361, "step": 4852 }, { "epoch": 2.4092338242594735, "grad_norm": 0.41227832436561584, "learning_rate": 1.1343177785645083e-06, "loss": 0.3367, "step": 4853 }, { "epoch": 2.4097302664239617, "grad_norm": 0.41934096813201904, "learning_rate": 1.1324860505439222e-06, "loss": 0.3071, "step": 4854 }, { "epoch": 2.4102267085884495, "grad_norm": 0.43453651666641235, "learning_rate": 1.1306556137811309e-06, "loss": 0.3272, "step": 4855 }, { "epoch": 2.410723150752937, "grad_norm": 0.4099292457103729, "learning_rate": 1.1288264688872674e-06, "loss": 0.3703, "step": 4856 }, { "epoch": 2.411219592917425, "grad_norm": 0.36775287985801697, "learning_rate": 1.1269986164730351e-06, "loss": 0.314, "step": 4857 }, { "epoch": 2.411716035081913, "grad_norm": 0.3969195783138275, "learning_rate": 1.1251720571487002e-06, "loss": 0.3194, "step": 4858 }, { "epoch": 2.412212477246401, "grad_norm": 0.38939616084098816, "learning_rate": 1.1233467915241037e-06, "loss": 0.3092, "step": 4859 }, { "epoch": 2.4127089194108886, "grad_norm": 0.45217904448509216, "learning_rate": 1.121522820208652e-06, "loss": 0.3564, "step": 4860 }, { "epoch": 2.4132053615753764, "grad_norm": 0.4273780286312103, "learning_rate": 1.1197001438113198e-06, "loss": 0.3159, "step": 4861 }, { "epoch": 2.4137018037398645, "grad_norm": 0.43515071272850037, "learning_rate": 1.1178787629406485e-06, "loss": 0.3701, "step": 4862 }, { "epoch": 2.4141982459043523, "grad_norm": 0.4073216915130615, "learning_rate": 1.1160586782047478e-06, "loss": 0.3369, "step": 4863 }, { "epoch": 2.41469468806884, "grad_norm": 0.47455593943595886, "learning_rate": 1.1142398902112967e-06, "loss": 0.3422, "step": 4864 }, { "epoch": 2.4151911302333278, "grad_norm": 0.40955621004104614, "learning_rate": 1.1124223995675353e-06, "loss": 0.3019, "step": 4865 }, { "epoch": 2.4156875723978155, "grad_norm": 0.3941340148448944, "learning_rate": 1.1106062068802765e-06, "loss": 0.3438, "step": 4866 }, { "epoch": 2.4161840145623037, "grad_norm": 0.44260579347610474, "learning_rate": 1.1087913127558974e-06, "loss": 0.3574, "step": 4867 }, { "epoch": 2.4166804567267914, "grad_norm": 0.4242357611656189, "learning_rate": 1.1069777178003416e-06, "loss": 0.3184, "step": 4868 }, { "epoch": 2.417176898891279, "grad_norm": 0.40991953015327454, "learning_rate": 1.1051654226191205e-06, "loss": 0.3396, "step": 4869 }, { "epoch": 2.417673341055767, "grad_norm": 0.4051346182823181, "learning_rate": 1.103354427817307e-06, "loss": 0.2941, "step": 4870 }, { "epoch": 2.4181697832202547, "grad_norm": 0.4682353436946869, "learning_rate": 1.1015447339995473e-06, "loss": 0.3333, "step": 4871 }, { "epoch": 2.418666225384743, "grad_norm": 0.4608347713947296, "learning_rate": 1.099736341770045e-06, "loss": 0.3037, "step": 4872 }, { "epoch": 2.4191626675492306, "grad_norm": 0.4434851109981537, "learning_rate": 1.0979292517325757e-06, "loss": 0.3831, "step": 4873 }, { "epoch": 2.4196591097137183, "grad_norm": 0.4081478416919708, "learning_rate": 1.0961234644904767e-06, "loss": 0.305, "step": 4874 }, { "epoch": 2.420155551878206, "grad_norm": 0.4458496570587158, "learning_rate": 1.0943189806466515e-06, "loss": 0.3712, "step": 4875 }, { "epoch": 2.420651994042694, "grad_norm": 0.45110681653022766, "learning_rate": 1.0925158008035692e-06, "loss": 0.3856, "step": 4876 }, { "epoch": 2.421148436207182, "grad_norm": 0.35769835114479065, "learning_rate": 1.0907139255632587e-06, "loss": 0.2928, "step": 4877 }, { "epoch": 2.4216448783716698, "grad_norm": 0.4645017385482788, "learning_rate": 1.0889133555273228e-06, "loss": 0.3259, "step": 4878 }, { "epoch": 2.4221413205361575, "grad_norm": 0.43230682611465454, "learning_rate": 1.0871140912969186e-06, "loss": 0.3229, "step": 4879 }, { "epoch": 2.4226377627006452, "grad_norm": 0.4492162764072418, "learning_rate": 1.0853161334727746e-06, "loss": 0.3516, "step": 4880 }, { "epoch": 2.423134204865133, "grad_norm": 0.3925682306289673, "learning_rate": 1.0835194826551754e-06, "loss": 0.3443, "step": 4881 }, { "epoch": 2.423630647029621, "grad_norm": 0.3878360092639923, "learning_rate": 1.08172413944398e-06, "loss": 0.3039, "step": 4882 }, { "epoch": 2.424127089194109, "grad_norm": 0.4314170479774475, "learning_rate": 1.0799301044385996e-06, "loss": 0.3801, "step": 4883 }, { "epoch": 2.4246235313585967, "grad_norm": 0.4437444508075714, "learning_rate": 1.0781373782380162e-06, "loss": 0.345, "step": 4884 }, { "epoch": 2.4251199735230844, "grad_norm": 0.3794514536857605, "learning_rate": 1.0763459614407717e-06, "loss": 0.3044, "step": 4885 }, { "epoch": 2.4256164156875726, "grad_norm": 0.4292650520801544, "learning_rate": 1.074555854644972e-06, "loss": 0.3711, "step": 4886 }, { "epoch": 2.4261128578520603, "grad_norm": 0.40473151206970215, "learning_rate": 1.0727670584482857e-06, "loss": 0.3089, "step": 4887 }, { "epoch": 2.426609300016548, "grad_norm": 0.4143485426902771, "learning_rate": 1.0709795734479395e-06, "loss": 0.367, "step": 4888 }, { "epoch": 2.427105742181036, "grad_norm": 0.4023550748825073, "learning_rate": 1.0691934002407323e-06, "loss": 0.3408, "step": 4889 }, { "epoch": 2.427602184345524, "grad_norm": 0.4332507252693176, "learning_rate": 1.0674085394230132e-06, "loss": 0.3629, "step": 4890 }, { "epoch": 2.4280986265100117, "grad_norm": 0.42425453662872314, "learning_rate": 1.0656249915907012e-06, "loss": 0.3009, "step": 4891 }, { "epoch": 2.4285950686744995, "grad_norm": 0.4235536456108093, "learning_rate": 1.0638427573392745e-06, "loss": 0.3052, "step": 4892 }, { "epoch": 2.4290915108389872, "grad_norm": 0.3881295919418335, "learning_rate": 1.062061837263772e-06, "loss": 0.3132, "step": 4893 }, { "epoch": 2.429587953003475, "grad_norm": 0.46212971210479736, "learning_rate": 1.0602822319587958e-06, "loss": 0.372, "step": 4894 }, { "epoch": 2.430084395167963, "grad_norm": 0.4385566711425781, "learning_rate": 1.0585039420185056e-06, "loss": 0.3435, "step": 4895 }, { "epoch": 2.430580837332451, "grad_norm": 0.4148385226726532, "learning_rate": 1.0567269680366255e-06, "loss": 0.3209, "step": 4896 }, { "epoch": 2.4310772794969386, "grad_norm": 0.455157607793808, "learning_rate": 1.0549513106064386e-06, "loss": 0.3711, "step": 4897 }, { "epoch": 2.4315737216614264, "grad_norm": 0.4562593102455139, "learning_rate": 1.0531769703207883e-06, "loss": 0.3068, "step": 4898 }, { "epoch": 2.432070163825914, "grad_norm": 0.434806764125824, "learning_rate": 1.0514039477720805e-06, "loss": 0.3196, "step": 4899 }, { "epoch": 2.4325666059904023, "grad_norm": 0.39483413100242615, "learning_rate": 1.0496322435522748e-06, "loss": 0.3052, "step": 4900 }, { "epoch": 2.43306304815489, "grad_norm": 0.39831551909446716, "learning_rate": 1.0478618582529004e-06, "loss": 0.3191, "step": 4901 }, { "epoch": 2.433559490319378, "grad_norm": 0.42775651812553406, "learning_rate": 1.0460927924650371e-06, "loss": 0.3939, "step": 4902 }, { "epoch": 2.4340559324838655, "grad_norm": 0.3414803743362427, "learning_rate": 1.0443250467793297e-06, "loss": 0.2812, "step": 4903 }, { "epoch": 2.4345523746483533, "grad_norm": 0.42315468192100525, "learning_rate": 1.0425586217859796e-06, "loss": 0.433, "step": 4904 }, { "epoch": 2.4350488168128415, "grad_norm": 0.4038551151752472, "learning_rate": 1.0407935180747496e-06, "loss": 0.2975, "step": 4905 }, { "epoch": 2.435545258977329, "grad_norm": 0.4035782516002655, "learning_rate": 1.0390297362349572e-06, "loss": 0.2973, "step": 4906 }, { "epoch": 2.436041701141817, "grad_norm": 0.47594505548477173, "learning_rate": 1.0372672768554813e-06, "loss": 0.3858, "step": 4907 }, { "epoch": 2.4365381433063047, "grad_norm": 0.46542277932167053, "learning_rate": 1.0355061405247635e-06, "loss": 0.3698, "step": 4908 }, { "epoch": 2.4370345854707924, "grad_norm": 0.3812120258808136, "learning_rate": 1.0337463278307953e-06, "loss": 0.2842, "step": 4909 }, { "epoch": 2.4375310276352806, "grad_norm": 0.42785266041755676, "learning_rate": 1.0319878393611321e-06, "loss": 0.3643, "step": 4910 }, { "epoch": 2.4380274697997684, "grad_norm": 0.4475156366825104, "learning_rate": 1.0302306757028824e-06, "loss": 0.2978, "step": 4911 }, { "epoch": 2.438523911964256, "grad_norm": 0.4024488031864166, "learning_rate": 1.0284748374427207e-06, "loss": 0.3292, "step": 4912 }, { "epoch": 2.439020354128744, "grad_norm": 0.3762087821960449, "learning_rate": 1.0267203251668689e-06, "loss": 0.3016, "step": 4913 }, { "epoch": 2.4395167962932316, "grad_norm": 0.4280141294002533, "learning_rate": 1.0249671394611134e-06, "loss": 0.3526, "step": 4914 }, { "epoch": 2.44001323845772, "grad_norm": 0.4248987138271332, "learning_rate": 1.0232152809107937e-06, "loss": 0.3377, "step": 4915 }, { "epoch": 2.4405096806222075, "grad_norm": 0.4551001787185669, "learning_rate": 1.0214647501008095e-06, "loss": 0.3409, "step": 4916 }, { "epoch": 2.4410061227866953, "grad_norm": 0.4239339530467987, "learning_rate": 1.0197155476156156e-06, "loss": 0.3203, "step": 4917 }, { "epoch": 2.441502564951183, "grad_norm": 0.39705294370651245, "learning_rate": 1.0179676740392196e-06, "loss": 0.3179, "step": 4918 }, { "epoch": 2.441999007115671, "grad_norm": 0.46548035740852356, "learning_rate": 1.0162211299551944e-06, "loss": 0.3404, "step": 4919 }, { "epoch": 2.442495449280159, "grad_norm": 0.4252835512161255, "learning_rate": 1.0144759159466594e-06, "loss": 0.2837, "step": 4920 }, { "epoch": 2.4429918914446467, "grad_norm": 0.5222649574279785, "learning_rate": 1.0127320325962953e-06, "loss": 0.4287, "step": 4921 }, { "epoch": 2.4434883336091344, "grad_norm": 0.47506436705589294, "learning_rate": 1.0109894804863378e-06, "loss": 0.3557, "step": 4922 }, { "epoch": 2.4439847757736226, "grad_norm": 0.4121977388858795, "learning_rate": 1.0092482601985775e-06, "loss": 0.2863, "step": 4923 }, { "epoch": 2.4444812179381104, "grad_norm": 0.44476863741874695, "learning_rate": 1.0075083723143614e-06, "loss": 0.3314, "step": 4924 }, { "epoch": 2.444977660102598, "grad_norm": 0.42945659160614014, "learning_rate": 1.005769817414589e-06, "loss": 0.3442, "step": 4925 }, { "epoch": 2.445474102267086, "grad_norm": 0.3876710832118988, "learning_rate": 1.0040325960797176e-06, "loss": 0.2636, "step": 4926 }, { "epoch": 2.4459705444315736, "grad_norm": 0.48572227358818054, "learning_rate": 1.0022967088897573e-06, "loss": 0.3549, "step": 4927 }, { "epoch": 2.4464669865960618, "grad_norm": 0.44838011264801025, "learning_rate": 1.0005621564242762e-06, "loss": 0.2937, "step": 4928 }, { "epoch": 2.4469634287605495, "grad_norm": 0.4109857678413391, "learning_rate": 9.988289392623895e-07, "loss": 0.3092, "step": 4929 }, { "epoch": 2.4474598709250373, "grad_norm": 0.4477539658546448, "learning_rate": 9.970970579827771e-07, "loss": 0.3819, "step": 4930 }, { "epoch": 2.447956313089525, "grad_norm": 0.4209615886211395, "learning_rate": 9.953665131636624e-07, "loss": 0.3473, "step": 4931 }, { "epoch": 2.4484527552540127, "grad_norm": 0.3907851576805115, "learning_rate": 9.936373053828297e-07, "loss": 0.3417, "step": 4932 }, { "epoch": 2.448949197418501, "grad_norm": 0.43923887610435486, "learning_rate": 9.919094352176134e-07, "loss": 0.3095, "step": 4933 }, { "epoch": 2.4494456395829887, "grad_norm": 0.44782891869544983, "learning_rate": 9.901829032449028e-07, "loss": 0.324, "step": 4934 }, { "epoch": 2.4499420817474764, "grad_norm": 0.45272982120513916, "learning_rate": 9.884577100411413e-07, "loss": 0.408, "step": 4935 }, { "epoch": 2.450438523911964, "grad_norm": 0.4352302849292755, "learning_rate": 9.867338561823215e-07, "loss": 0.3085, "step": 4936 }, { "epoch": 2.450934966076452, "grad_norm": 0.4174683392047882, "learning_rate": 9.850113422439927e-07, "loss": 0.2923, "step": 4937 }, { "epoch": 2.45143140824094, "grad_norm": 0.3798356056213379, "learning_rate": 9.832901688012554e-07, "loss": 0.2815, "step": 4938 }, { "epoch": 2.451927850405428, "grad_norm": 0.4120887517929077, "learning_rate": 9.815703364287622e-07, "loss": 0.3636, "step": 4939 }, { "epoch": 2.4524242925699156, "grad_norm": 0.3752826750278473, "learning_rate": 9.798518457007206e-07, "loss": 0.3202, "step": 4940 }, { "epoch": 2.4529207347344033, "grad_norm": 0.4152010679244995, "learning_rate": 9.781346971908833e-07, "loss": 0.3742, "step": 4941 }, { "epoch": 2.453417176898891, "grad_norm": 0.38347944617271423, "learning_rate": 9.764188914725647e-07, "loss": 0.3024, "step": 4942 }, { "epoch": 2.4539136190633792, "grad_norm": 0.46754711866378784, "learning_rate": 9.747044291186226e-07, "loss": 0.3921, "step": 4943 }, { "epoch": 2.454410061227867, "grad_norm": 0.39592596888542175, "learning_rate": 9.7299131070147e-07, "loss": 0.3175, "step": 4944 }, { "epoch": 2.4549065033923547, "grad_norm": 0.43484020233154297, "learning_rate": 9.712795367930706e-07, "loss": 0.3807, "step": 4945 }, { "epoch": 2.4554029455568425, "grad_norm": 0.34638145565986633, "learning_rate": 9.695691079649394e-07, "loss": 0.2758, "step": 4946 }, { "epoch": 2.4558993877213307, "grad_norm": 0.43307527899742126, "learning_rate": 9.678600247881431e-07, "loss": 0.3577, "step": 4947 }, { "epoch": 2.4563958298858184, "grad_norm": 0.38251182436943054, "learning_rate": 9.661522878332947e-07, "loss": 0.364, "step": 4948 }, { "epoch": 2.456892272050306, "grad_norm": 0.4067479968070984, "learning_rate": 9.64445897670566e-07, "loss": 0.3195, "step": 4949 }, { "epoch": 2.457388714214794, "grad_norm": 0.40363258123397827, "learning_rate": 9.627408548696704e-07, "loss": 0.3407, "step": 4950 }, { "epoch": 2.4578851563792816, "grad_norm": 0.4304896891117096, "learning_rate": 9.61037159999878e-07, "loss": 0.4037, "step": 4951 }, { "epoch": 2.45838159854377, "grad_norm": 0.37479662895202637, "learning_rate": 9.593348136300028e-07, "loss": 0.32, "step": 4952 }, { "epoch": 2.4588780407082576, "grad_norm": 0.35158973932266235, "learning_rate": 9.57633816328416e-07, "loss": 0.3296, "step": 4953 }, { "epoch": 2.4593744828727453, "grad_norm": 0.4482909142971039, "learning_rate": 9.559341686630319e-07, "loss": 0.4185, "step": 4954 }, { "epoch": 2.459870925037233, "grad_norm": 0.4351275563240051, "learning_rate": 9.542358712013155e-07, "loss": 0.3315, "step": 4955 }, { "epoch": 2.4603673672017212, "grad_norm": 0.4497017562389374, "learning_rate": 9.525389245102867e-07, "loss": 0.375, "step": 4956 }, { "epoch": 2.460863809366209, "grad_norm": 0.36904147267341614, "learning_rate": 9.508433291565061e-07, "loss": 0.3287, "step": 4957 }, { "epoch": 2.4613602515306967, "grad_norm": 0.4194772243499756, "learning_rate": 9.491490857060887e-07, "loss": 0.3317, "step": 4958 }, { "epoch": 2.4618566936951845, "grad_norm": 0.3785308301448822, "learning_rate": 9.474561947246935e-07, "loss": 0.3063, "step": 4959 }, { "epoch": 2.462353135859672, "grad_norm": 0.40041792392730713, "learning_rate": 9.457646567775347e-07, "loss": 0.3493, "step": 4960 }, { "epoch": 2.4628495780241604, "grad_norm": 0.3974493741989136, "learning_rate": 9.440744724293682e-07, "loss": 0.3143, "step": 4961 }, { "epoch": 2.463346020188648, "grad_norm": 0.42294904589653015, "learning_rate": 9.423856422445015e-07, "loss": 0.3463, "step": 4962 }, { "epoch": 2.463842462353136, "grad_norm": 0.4321286380290985, "learning_rate": 9.406981667867888e-07, "loss": 0.348, "step": 4963 }, { "epoch": 2.4643389045176236, "grad_norm": 0.4494534730911255, "learning_rate": 9.390120466196323e-07, "loss": 0.3036, "step": 4964 }, { "epoch": 2.4648353466821113, "grad_norm": 0.37123560905456543, "learning_rate": 9.373272823059836e-07, "loss": 0.295, "step": 4965 }, { "epoch": 2.4653317888465995, "grad_norm": 0.41119396686553955, "learning_rate": 9.356438744083368e-07, "loss": 0.3505, "step": 4966 }, { "epoch": 2.4658282310110873, "grad_norm": 0.40095254778862, "learning_rate": 9.339618234887371e-07, "loss": 0.4115, "step": 4967 }, { "epoch": 2.466324673175575, "grad_norm": 0.4015076756477356, "learning_rate": 9.322811301087753e-07, "loss": 0.3663, "step": 4968 }, { "epoch": 2.4668211153400628, "grad_norm": 0.4070340394973755, "learning_rate": 9.306017948295903e-07, "loss": 0.3198, "step": 4969 }, { "epoch": 2.4673175575045505, "grad_norm": 0.38830164074897766, "learning_rate": 9.289238182118654e-07, "loss": 0.2948, "step": 4970 }, { "epoch": 2.4678139996690387, "grad_norm": 0.505373477935791, "learning_rate": 9.272472008158323e-07, "loss": 0.3951, "step": 4971 }, { "epoch": 2.4683104418335264, "grad_norm": 0.3736472427845001, "learning_rate": 9.255719432012683e-07, "loss": 0.3064, "step": 4972 }, { "epoch": 2.468806883998014, "grad_norm": 0.4481365978717804, "learning_rate": 9.238980459274949e-07, "loss": 0.3299, "step": 4973 }, { "epoch": 2.469303326162502, "grad_norm": 0.4377484917640686, "learning_rate": 9.222255095533816e-07, "loss": 0.3319, "step": 4974 }, { "epoch": 2.4697997683269897, "grad_norm": 0.4421977400779724, "learning_rate": 9.20554334637343e-07, "loss": 0.3605, "step": 4975 }, { "epoch": 2.470296210491478, "grad_norm": 0.3772713243961334, "learning_rate": 9.188845217373399e-07, "loss": 0.3174, "step": 4976 }, { "epoch": 2.4707926526559656, "grad_norm": 0.42920979857444763, "learning_rate": 9.172160714108752e-07, "loss": 0.3437, "step": 4977 }, { "epoch": 2.4712890948204533, "grad_norm": 0.41582080721855164, "learning_rate": 9.15548984214999e-07, "loss": 0.275, "step": 4978 }, { "epoch": 2.471785536984941, "grad_norm": 0.41253283619880676, "learning_rate": 9.138832607063103e-07, "loss": 0.4013, "step": 4979 }, { "epoch": 2.4722819791494293, "grad_norm": 0.4353925287723541, "learning_rate": 9.122189014409449e-07, "loss": 0.334, "step": 4980 }, { "epoch": 2.472778421313917, "grad_norm": 0.4681949317455292, "learning_rate": 9.1055590697459e-07, "loss": 0.3662, "step": 4981 }, { "epoch": 2.4732748634784048, "grad_norm": 0.4250081181526184, "learning_rate": 9.088942778624704e-07, "loss": 0.3485, "step": 4982 }, { "epoch": 2.4737713056428925, "grad_norm": 0.40461334586143494, "learning_rate": 9.072340146593639e-07, "loss": 0.3591, "step": 4983 }, { "epoch": 2.4742677478073807, "grad_norm": 0.38682129979133606, "learning_rate": 9.055751179195832e-07, "loss": 0.317, "step": 4984 }, { "epoch": 2.4747641899718684, "grad_norm": 0.38414981961250305, "learning_rate": 9.039175881969903e-07, "loss": 0.3502, "step": 4985 }, { "epoch": 2.475260632136356, "grad_norm": 0.3873783349990845, "learning_rate": 9.022614260449897e-07, "loss": 0.3159, "step": 4986 }, { "epoch": 2.475757074300844, "grad_norm": 0.4324190616607666, "learning_rate": 9.006066320165285e-07, "loss": 0.4344, "step": 4987 }, { "epoch": 2.4762535164653316, "grad_norm": 0.4135836958885193, "learning_rate": 8.989532066640988e-07, "loss": 0.3419, "step": 4988 }, { "epoch": 2.47674995862982, "grad_norm": 0.3968730866909027, "learning_rate": 8.973011505397306e-07, "loss": 0.3473, "step": 4989 }, { "epoch": 2.4772464007943076, "grad_norm": 0.3740089237689972, "learning_rate": 8.956504641950053e-07, "loss": 0.3219, "step": 4990 }, { "epoch": 2.4777428429587953, "grad_norm": 0.43712687492370605, "learning_rate": 8.940011481810384e-07, "loss": 0.3182, "step": 4991 }, { "epoch": 2.478239285123283, "grad_norm": 0.48936817049980164, "learning_rate": 8.923532030484938e-07, "loss": 0.3722, "step": 4992 }, { "epoch": 2.478735727287771, "grad_norm": 0.4304046928882599, "learning_rate": 8.907066293475752e-07, "loss": 0.4289, "step": 4993 }, { "epoch": 2.479232169452259, "grad_norm": 0.39932650327682495, "learning_rate": 8.890614276280285e-07, "loss": 0.3403, "step": 4994 }, { "epoch": 2.4797286116167467, "grad_norm": 0.38748908042907715, "learning_rate": 8.874175984391431e-07, "loss": 0.3258, "step": 4995 }, { "epoch": 2.4802250537812345, "grad_norm": 0.4391377568244934, "learning_rate": 8.857751423297456e-07, "loss": 0.4096, "step": 4996 }, { "epoch": 2.480721495945722, "grad_norm": 0.39394158124923706, "learning_rate": 8.841340598482117e-07, "loss": 0.3342, "step": 4997 }, { "epoch": 2.48121793811021, "grad_norm": 0.3770965337753296, "learning_rate": 8.824943515424511e-07, "loss": 0.321, "step": 4998 }, { "epoch": 2.481714380274698, "grad_norm": 0.3880070447921753, "learning_rate": 8.808560179599201e-07, "loss": 0.2878, "step": 4999 }, { "epoch": 2.482210822439186, "grad_norm": 0.4300988018512726, "learning_rate": 8.792190596476102e-07, "loss": 0.3374, "step": 5000 }, { "epoch": 2.4827072646036736, "grad_norm": 0.4309059977531433, "learning_rate": 8.775834771520608e-07, "loss": 0.3545, "step": 5001 }, { "epoch": 2.4832037067681614, "grad_norm": 0.4141116440296173, "learning_rate": 8.75949271019349e-07, "loss": 0.322, "step": 5002 }, { "epoch": 2.483700148932649, "grad_norm": 0.48729193210601807, "learning_rate": 8.743164417950883e-07, "loss": 0.3327, "step": 5003 }, { "epoch": 2.4841965910971373, "grad_norm": 0.39224112033843994, "learning_rate": 8.726849900244383e-07, "loss": 0.3651, "step": 5004 }, { "epoch": 2.484693033261625, "grad_norm": 0.37295711040496826, "learning_rate": 8.710549162520954e-07, "loss": 0.3327, "step": 5005 }, { "epoch": 2.485189475426113, "grad_norm": 0.4513383209705353, "learning_rate": 8.694262210222992e-07, "loss": 0.3862, "step": 5006 }, { "epoch": 2.4856859175906005, "grad_norm": 0.41189172863960266, "learning_rate": 8.677989048788238e-07, "loss": 0.3114, "step": 5007 }, { "epoch": 2.4861823597550887, "grad_norm": 0.4466800391674042, "learning_rate": 8.661729683649867e-07, "loss": 0.3241, "step": 5008 }, { "epoch": 2.4866788019195765, "grad_norm": 0.4370972812175751, "learning_rate": 8.645484120236442e-07, "loss": 0.3173, "step": 5009 }, { "epoch": 2.487175244084064, "grad_norm": 0.4732681214809418, "learning_rate": 8.629252363971918e-07, "loss": 0.3736, "step": 5010 }, { "epoch": 2.487671686248552, "grad_norm": 0.39403584599494934, "learning_rate": 8.613034420275634e-07, "loss": 0.2917, "step": 5011 }, { "epoch": 2.4881681284130397, "grad_norm": 0.4636339843273163, "learning_rate": 8.596830294562325e-07, "loss": 0.3378, "step": 5012 }, { "epoch": 2.488664570577528, "grad_norm": 0.4840598702430725, "learning_rate": 8.580639992242113e-07, "loss": 0.3647, "step": 5013 }, { "epoch": 2.4891610127420156, "grad_norm": 0.4111151397228241, "learning_rate": 8.564463518720483e-07, "loss": 0.3544, "step": 5014 }, { "epoch": 2.4896574549065034, "grad_norm": 0.4120572805404663, "learning_rate": 8.548300879398324e-07, "loss": 0.3167, "step": 5015 }, { "epoch": 2.490153897070991, "grad_norm": 0.4743708074092865, "learning_rate": 8.532152079671913e-07, "loss": 0.3492, "step": 5016 }, { "epoch": 2.4906503392354793, "grad_norm": 0.4058235287666321, "learning_rate": 8.516017124932885e-07, "loss": 0.3224, "step": 5017 }, { "epoch": 2.491146781399967, "grad_norm": 0.40863052010536194, "learning_rate": 8.499896020568276e-07, "loss": 0.3707, "step": 5018 }, { "epoch": 2.491643223564455, "grad_norm": 0.3685472011566162, "learning_rate": 8.483788771960455e-07, "loss": 0.3258, "step": 5019 }, { "epoch": 2.4921396657289425, "grad_norm": 0.4659629166126251, "learning_rate": 8.46769538448724e-07, "loss": 0.3536, "step": 5020 }, { "epoch": 2.4926361078934303, "grad_norm": 0.41071459650993347, "learning_rate": 8.451615863521734e-07, "loss": 0.3358, "step": 5021 }, { "epoch": 2.4931325500579185, "grad_norm": 0.45929694175720215, "learning_rate": 8.435550214432486e-07, "loss": 0.282, "step": 5022 }, { "epoch": 2.493628992222406, "grad_norm": 0.43146124482154846, "learning_rate": 8.419498442583335e-07, "loss": 0.342, "step": 5023 }, { "epoch": 2.494125434386894, "grad_norm": 0.42371466755867004, "learning_rate": 8.403460553333586e-07, "loss": 0.358, "step": 5024 }, { "epoch": 2.4946218765513817, "grad_norm": 0.4173057973384857, "learning_rate": 8.387436552037814e-07, "loss": 0.3814, "step": 5025 }, { "epoch": 2.4951183187158694, "grad_norm": 0.3943523168563843, "learning_rate": 8.371426444045994e-07, "loss": 0.3266, "step": 5026 }, { "epoch": 2.4956147608803576, "grad_norm": 0.43229615688323975, "learning_rate": 8.35543023470351e-07, "loss": 0.3299, "step": 5027 }, { "epoch": 2.4961112030448454, "grad_norm": 0.4341732859611511, "learning_rate": 8.339447929351025e-07, "loss": 0.3459, "step": 5028 }, { "epoch": 2.496607645209333, "grad_norm": 0.42122527956962585, "learning_rate": 8.323479533324613e-07, "loss": 0.2797, "step": 5029 }, { "epoch": 2.497104087373821, "grad_norm": 0.47553539276123047, "learning_rate": 8.307525051955656e-07, "loss": 0.3704, "step": 5030 }, { "epoch": 2.4976005295383086, "grad_norm": 0.388159841299057, "learning_rate": 8.29158449057097e-07, "loss": 0.3069, "step": 5031 }, { "epoch": 2.4980969717027968, "grad_norm": 0.41376984119415283, "learning_rate": 8.275657854492636e-07, "loss": 0.3776, "step": 5032 }, { "epoch": 2.4985934138672845, "grad_norm": 0.42286571860313416, "learning_rate": 8.259745149038145e-07, "loss": 0.3777, "step": 5033 }, { "epoch": 2.4990898560317722, "grad_norm": 0.40391090512275696, "learning_rate": 8.243846379520309e-07, "loss": 0.3825, "step": 5034 }, { "epoch": 2.49958629819626, "grad_norm": 0.4074123203754425, "learning_rate": 8.227961551247298e-07, "loss": 0.3401, "step": 5035 }, { "epoch": 2.5000827403607477, "grad_norm": 0.47780841588974, "learning_rate": 8.212090669522632e-07, "loss": 0.3482, "step": 5036 }, { "epoch": 2.500579182525236, "grad_norm": 0.4247952699661255, "learning_rate": 8.196233739645154e-07, "loss": 0.2916, "step": 5037 }, { "epoch": 2.5010756246897237, "grad_norm": 0.39462852478027344, "learning_rate": 8.180390766909063e-07, "loss": 0.3272, "step": 5038 }, { "epoch": 2.5015720668542114, "grad_norm": 0.4666635990142822, "learning_rate": 8.164561756603901e-07, "loss": 0.4186, "step": 5039 }, { "epoch": 2.5020685090186996, "grad_norm": 0.374197393655777, "learning_rate": 8.148746714014544e-07, "loss": 0.2995, "step": 5040 }, { "epoch": 2.502564951183187, "grad_norm": 0.42155349254608154, "learning_rate": 8.132945644421203e-07, "loss": 0.3537, "step": 5041 }, { "epoch": 2.503061393347675, "grad_norm": 0.39392760396003723, "learning_rate": 8.11715855309943e-07, "loss": 0.3616, "step": 5042 }, { "epoch": 2.503557835512163, "grad_norm": 0.355061799287796, "learning_rate": 8.10138544532012e-07, "loss": 0.3657, "step": 5043 }, { "epoch": 2.5040542776766506, "grad_norm": 0.3879982829093933, "learning_rate": 8.08562632634945e-07, "loss": 0.3656, "step": 5044 }, { "epoch": 2.5045507198411388, "grad_norm": 0.4383074641227722, "learning_rate": 8.069881201448987e-07, "loss": 0.4003, "step": 5045 }, { "epoch": 2.5050471620056265, "grad_norm": 0.3911875784397125, "learning_rate": 8.054150075875589e-07, "loss": 0.3366, "step": 5046 }, { "epoch": 2.5055436041701142, "grad_norm": 0.3973695933818817, "learning_rate": 8.038432954881464e-07, "loss": 0.3671, "step": 5047 }, { "epoch": 2.506040046334602, "grad_norm": 0.43567144870758057, "learning_rate": 8.022729843714116e-07, "loss": 0.3174, "step": 5048 }, { "epoch": 2.5065364884990897, "grad_norm": 0.39143046736717224, "learning_rate": 8.007040747616379e-07, "loss": 0.3392, "step": 5049 }, { "epoch": 2.507032930663578, "grad_norm": 0.4285733699798584, "learning_rate": 7.991365671826462e-07, "loss": 0.3581, "step": 5050 }, { "epoch": 2.5075293728280657, "grad_norm": 0.39413416385650635, "learning_rate": 7.975704621577796e-07, "loss": 0.319, "step": 5051 }, { "epoch": 2.5080258149925534, "grad_norm": 0.4794873893260956, "learning_rate": 7.960057602099203e-07, "loss": 0.3843, "step": 5052 }, { "epoch": 2.508522257157041, "grad_norm": 0.4428650438785553, "learning_rate": 7.944424618614794e-07, "loss": 0.3005, "step": 5053 }, { "epoch": 2.509018699321529, "grad_norm": 0.41835644841194153, "learning_rate": 7.928805676344009e-07, "loss": 0.3422, "step": 5054 }, { "epoch": 2.509515141486017, "grad_norm": 0.40410467982292175, "learning_rate": 7.913200780501568e-07, "loss": 0.3367, "step": 5055 }, { "epoch": 2.510011583650505, "grad_norm": 0.38501936197280884, "learning_rate": 7.897609936297529e-07, "loss": 0.3014, "step": 5056 }, { "epoch": 2.5105080258149925, "grad_norm": 0.42952796816825867, "learning_rate": 7.882033148937252e-07, "loss": 0.3287, "step": 5057 }, { "epoch": 2.5110044679794803, "grad_norm": 0.41027867794036865, "learning_rate": 7.866470423621402e-07, "loss": 0.3762, "step": 5058 }, { "epoch": 2.511500910143968, "grad_norm": 0.4502750635147095, "learning_rate": 7.850921765545966e-07, "loss": 0.2945, "step": 5059 }, { "epoch": 2.511997352308456, "grad_norm": 0.39733874797821045, "learning_rate": 7.835387179902182e-07, "loss": 0.3197, "step": 5060 }, { "epoch": 2.512493794472944, "grad_norm": 0.45043399930000305, "learning_rate": 7.819866671876669e-07, "loss": 0.2954, "step": 5061 }, { "epoch": 2.5129902366374317, "grad_norm": 0.39711907505989075, "learning_rate": 7.804360246651271e-07, "loss": 0.3842, "step": 5062 }, { "epoch": 2.5134866788019194, "grad_norm": 0.3854629397392273, "learning_rate": 7.788867909403169e-07, "loss": 0.3925, "step": 5063 }, { "epoch": 2.513983120966407, "grad_norm": 0.36949700117111206, "learning_rate": 7.773389665304842e-07, "loss": 0.3119, "step": 5064 }, { "epoch": 2.5144795631308954, "grad_norm": 0.45777666568756104, "learning_rate": 7.757925519524045e-07, "loss": 0.3317, "step": 5065 }, { "epoch": 2.514976005295383, "grad_norm": 0.413135290145874, "learning_rate": 7.742475477223859e-07, "loss": 0.3342, "step": 5066 }, { "epoch": 2.515472447459871, "grad_norm": 0.4179292321205139, "learning_rate": 7.727039543562586e-07, "loss": 0.3637, "step": 5067 }, { "epoch": 2.5159688896243586, "grad_norm": 0.39852797985076904, "learning_rate": 7.711617723693921e-07, "loss": 0.327, "step": 5068 }, { "epoch": 2.5164653317888463, "grad_norm": 0.44945135712623596, "learning_rate": 7.696210022766753e-07, "loss": 0.3475, "step": 5069 }, { "epoch": 2.5169617739533345, "grad_norm": 0.40098342299461365, "learning_rate": 7.680816445925315e-07, "loss": 0.3416, "step": 5070 }, { "epoch": 2.5174582161178223, "grad_norm": 0.428720086812973, "learning_rate": 7.665436998309067e-07, "loss": 0.3461, "step": 5071 }, { "epoch": 2.51795465828231, "grad_norm": 0.431631863117218, "learning_rate": 7.650071685052835e-07, "loss": 0.338, "step": 5072 }, { "epoch": 2.518451100446798, "grad_norm": 0.3968772888183594, "learning_rate": 7.634720511286664e-07, "loss": 0.3361, "step": 5073 }, { "epoch": 2.518947542611286, "grad_norm": 0.3988153636455536, "learning_rate": 7.619383482135884e-07, "loss": 0.3389, "step": 5074 }, { "epoch": 2.5194439847757737, "grad_norm": 0.4393404424190521, "learning_rate": 7.604060602721114e-07, "loss": 0.3461, "step": 5075 }, { "epoch": 2.5199404269402614, "grad_norm": 0.4237816035747528, "learning_rate": 7.588751878158251e-07, "loss": 0.3585, "step": 5076 }, { "epoch": 2.520436869104749, "grad_norm": 0.36722514033317566, "learning_rate": 7.57345731355848e-07, "loss": 0.3193, "step": 5077 }, { "epoch": 2.5209333112692374, "grad_norm": 0.4812171757221222, "learning_rate": 7.558176914028203e-07, "loss": 0.4514, "step": 5078 }, { "epoch": 2.521429753433725, "grad_norm": 0.35984012484550476, "learning_rate": 7.542910684669153e-07, "loss": 0.2718, "step": 5079 }, { "epoch": 2.521926195598213, "grad_norm": 0.4246314764022827, "learning_rate": 7.527658630578305e-07, "loss": 0.3462, "step": 5080 }, { "epoch": 2.5224226377627006, "grad_norm": 0.40852391719818115, "learning_rate": 7.51242075684791e-07, "loss": 0.3187, "step": 5081 }, { "epoch": 2.5229190799271883, "grad_norm": 0.37064236402511597, "learning_rate": 7.49719706856547e-07, "loss": 0.2963, "step": 5082 }, { "epoch": 2.5234155220916765, "grad_norm": 0.40142834186553955, "learning_rate": 7.48198757081377e-07, "loss": 0.3857, "step": 5083 }, { "epoch": 2.5239119642561643, "grad_norm": 0.4130881130695343, "learning_rate": 7.466792268670853e-07, "loss": 0.3703, "step": 5084 }, { "epoch": 2.524408406420652, "grad_norm": 0.40295645594596863, "learning_rate": 7.451611167209999e-07, "loss": 0.2313, "step": 5085 }, { "epoch": 2.5249048485851397, "grad_norm": 0.4253508746623993, "learning_rate": 7.436444271499776e-07, "loss": 0.3457, "step": 5086 }, { "epoch": 2.5254012907496275, "grad_norm": 0.41305944323539734, "learning_rate": 7.421291586604001e-07, "loss": 0.3329, "step": 5087 }, { "epoch": 2.5258977329141157, "grad_norm": 0.4771255552768707, "learning_rate": 7.406153117581733e-07, "loss": 0.3563, "step": 5088 }, { "epoch": 2.5263941750786034, "grad_norm": 0.38309961557388306, "learning_rate": 7.391028869487316e-07, "loss": 0.346, "step": 5089 }, { "epoch": 2.526890617243091, "grad_norm": 0.37685123085975647, "learning_rate": 7.375918847370294e-07, "loss": 0.3155, "step": 5090 }, { "epoch": 2.527387059407579, "grad_norm": 0.38845428824424744, "learning_rate": 7.360823056275528e-07, "loss": 0.3623, "step": 5091 }, { "epoch": 2.5278835015720666, "grad_norm": 0.4147282838821411, "learning_rate": 7.345741501243065e-07, "loss": 0.3469, "step": 5092 }, { "epoch": 2.528379943736555, "grad_norm": 0.4102749824523926, "learning_rate": 7.330674187308234e-07, "loss": 0.3292, "step": 5093 }, { "epoch": 2.5288763859010426, "grad_norm": 0.4364238381385803, "learning_rate": 7.315621119501609e-07, "loss": 0.3599, "step": 5094 }, { "epoch": 2.5293728280655303, "grad_norm": 0.35878676176071167, "learning_rate": 7.300582302848991e-07, "loss": 0.3021, "step": 5095 }, { "epoch": 2.529869270230018, "grad_norm": 0.42601820826530457, "learning_rate": 7.285557742371446e-07, "loss": 0.3934, "step": 5096 }, { "epoch": 2.530365712394506, "grad_norm": 0.39129552245140076, "learning_rate": 7.270547443085241e-07, "loss": 0.3304, "step": 5097 }, { "epoch": 2.530862154558994, "grad_norm": 0.43485531210899353, "learning_rate": 7.255551410001938e-07, "loss": 0.3807, "step": 5098 }, { "epoch": 2.5313585967234817, "grad_norm": 0.3834298849105835, "learning_rate": 7.240569648128282e-07, "loss": 0.3343, "step": 5099 }, { "epoch": 2.5318550388879695, "grad_norm": 0.4012676179409027, "learning_rate": 7.225602162466294e-07, "loss": 0.3106, "step": 5100 }, { "epoch": 2.5323514810524577, "grad_norm": 0.42800629138946533, "learning_rate": 7.210648958013177e-07, "loss": 0.3355, "step": 5101 }, { "epoch": 2.532847923216945, "grad_norm": 0.4322647452354431, "learning_rate": 7.195710039761444e-07, "loss": 0.3757, "step": 5102 }, { "epoch": 2.533344365381433, "grad_norm": 0.3836856782436371, "learning_rate": 7.180785412698765e-07, "loss": 0.3196, "step": 5103 }, { "epoch": 2.533840807545921, "grad_norm": 0.4290366470813751, "learning_rate": 7.165875081808072e-07, "loss": 0.3842, "step": 5104 }, { "epoch": 2.5343372497104086, "grad_norm": 0.37413981556892395, "learning_rate": 7.150979052067524e-07, "loss": 0.3382, "step": 5105 }, { "epoch": 2.534833691874897, "grad_norm": 0.35967203974723816, "learning_rate": 7.136097328450497e-07, "loss": 0.342, "step": 5106 }, { "epoch": 2.5353301340393846, "grad_norm": 0.4201495349407196, "learning_rate": 7.12122991592561e-07, "loss": 0.335, "step": 5107 }, { "epoch": 2.5358265762038723, "grad_norm": 0.4186602532863617, "learning_rate": 7.106376819456651e-07, "loss": 0.3196, "step": 5108 }, { "epoch": 2.53632301836836, "grad_norm": 0.38059890270233154, "learning_rate": 7.091538044002705e-07, "loss": 0.3179, "step": 5109 }, { "epoch": 2.536819460532848, "grad_norm": 0.3883368670940399, "learning_rate": 7.076713594518014e-07, "loss": 0.3626, "step": 5110 }, { "epoch": 2.537315902697336, "grad_norm": 0.4195650815963745, "learning_rate": 7.061903475952059e-07, "loss": 0.3075, "step": 5111 }, { "epoch": 2.5378123448618237, "grad_norm": 0.43592244386672974, "learning_rate": 7.047107693249544e-07, "loss": 0.3051, "step": 5112 }, { "epoch": 2.5383087870263115, "grad_norm": 0.46090131998062134, "learning_rate": 7.032326251350375e-07, "loss": 0.3523, "step": 5113 }, { "epoch": 2.538805229190799, "grad_norm": 0.397056519985199, "learning_rate": 7.017559155189679e-07, "loss": 0.3724, "step": 5114 }, { "epoch": 2.539301671355287, "grad_norm": 0.4208011031150818, "learning_rate": 7.002806409697776e-07, "loss": 0.3363, "step": 5115 }, { "epoch": 2.539798113519775, "grad_norm": 0.4477211833000183, "learning_rate": 6.988068019800214e-07, "loss": 0.2954, "step": 5116 }, { "epoch": 2.540294555684263, "grad_norm": 0.39174774289131165, "learning_rate": 6.973343990417746e-07, "loss": 0.2973, "step": 5117 }, { "epoch": 2.5407909978487506, "grad_norm": 0.48127833008766174, "learning_rate": 6.958634326466313e-07, "loss": 0.4408, "step": 5118 }, { "epoch": 2.5412874400132384, "grad_norm": 0.3248748183250427, "learning_rate": 6.943939032857094e-07, "loss": 0.2705, "step": 5119 }, { "epoch": 2.541783882177726, "grad_norm": 0.3890293836593628, "learning_rate": 6.929258114496407e-07, "loss": 0.3499, "step": 5120 }, { "epoch": 2.5422803243422143, "grad_norm": 0.4238499104976654, "learning_rate": 6.914591576285862e-07, "loss": 0.3397, "step": 5121 }, { "epoch": 2.542776766506702, "grad_norm": 0.4143533408641815, "learning_rate": 6.899939423122181e-07, "loss": 0.3887, "step": 5122 }, { "epoch": 2.5432732086711898, "grad_norm": 0.36634400486946106, "learning_rate": 6.885301659897336e-07, "loss": 0.3141, "step": 5123 }, { "epoch": 2.5437696508356775, "grad_norm": 0.4114474952220917, "learning_rate": 6.870678291498467e-07, "loss": 0.3248, "step": 5124 }, { "epoch": 2.5442660930001653, "grad_norm": 0.4000788629055023, "learning_rate": 6.856069322807946e-07, "loss": 0.3039, "step": 5125 }, { "epoch": 2.5447625351646534, "grad_norm": 0.43089181184768677, "learning_rate": 6.841474758703276e-07, "loss": 0.3807, "step": 5126 }, { "epoch": 2.545258977329141, "grad_norm": 0.41414231061935425, "learning_rate": 6.826894604057199e-07, "loss": 0.3936, "step": 5127 }, { "epoch": 2.545755419493629, "grad_norm": 0.3763033151626587, "learning_rate": 6.812328863737632e-07, "loss": 0.2437, "step": 5128 }, { "epoch": 2.5462518616581167, "grad_norm": 0.44687846302986145, "learning_rate": 6.797777542607686e-07, "loss": 0.333, "step": 5129 }, { "epoch": 2.5467483038226044, "grad_norm": 0.44205331802368164, "learning_rate": 6.783240645525657e-07, "loss": 0.388, "step": 5130 }, { "epoch": 2.5472447459870926, "grad_norm": 0.3637385666370392, "learning_rate": 6.768718177344985e-07, "loss": 0.346, "step": 5131 }, { "epoch": 2.5477411881515803, "grad_norm": 0.39464423060417175, "learning_rate": 6.75421014291438e-07, "loss": 0.3093, "step": 5132 }, { "epoch": 2.548237630316068, "grad_norm": 0.4188234508037567, "learning_rate": 6.739716547077635e-07, "loss": 0.3237, "step": 5133 }, { "epoch": 2.5487340724805563, "grad_norm": 0.39270272850990295, "learning_rate": 6.72523739467379e-07, "loss": 0.3713, "step": 5134 }, { "epoch": 2.5492305146450436, "grad_norm": 0.35506322979927063, "learning_rate": 6.710772690537037e-07, "loss": 0.3818, "step": 5135 }, { "epoch": 2.5497269568095318, "grad_norm": 0.41362324357032776, "learning_rate": 6.696322439496744e-07, "loss": 0.3589, "step": 5136 }, { "epoch": 2.5502233989740195, "grad_norm": 0.4289000928401947, "learning_rate": 6.681886646377473e-07, "loss": 0.3167, "step": 5137 }, { "epoch": 2.5507198411385072, "grad_norm": 0.3767171800136566, "learning_rate": 6.667465315998906e-07, "loss": 0.2972, "step": 5138 }, { "epoch": 2.5512162833029954, "grad_norm": 0.41003653407096863, "learning_rate": 6.653058453175981e-07, "loss": 0.3483, "step": 5139 }, { "epoch": 2.551712725467483, "grad_norm": 0.38184353709220886, "learning_rate": 6.638666062718718e-07, "loss": 0.3188, "step": 5140 }, { "epoch": 2.552209167631971, "grad_norm": 0.4868563115596771, "learning_rate": 6.624288149432378e-07, "loss": 0.3106, "step": 5141 }, { "epoch": 2.5527056097964587, "grad_norm": 0.386392205953598, "learning_rate": 6.609924718117311e-07, "loss": 0.3361, "step": 5142 }, { "epoch": 2.5532020519609464, "grad_norm": 0.41175609827041626, "learning_rate": 6.595575773569118e-07, "loss": 0.341, "step": 5143 }, { "epoch": 2.5536984941254346, "grad_norm": 0.3797125220298767, "learning_rate": 6.581241320578519e-07, "loss": 0.3216, "step": 5144 }, { "epoch": 2.5541949362899223, "grad_norm": 0.4511738121509552, "learning_rate": 6.566921363931373e-07, "loss": 0.3812, "step": 5145 }, { "epoch": 2.55469137845441, "grad_norm": 0.44436997175216675, "learning_rate": 6.552615908408739e-07, "loss": 0.3311, "step": 5146 }, { "epoch": 2.555187820618898, "grad_norm": 0.3732278645038605, "learning_rate": 6.538324958786818e-07, "loss": 0.3159, "step": 5147 }, { "epoch": 2.5556842627833856, "grad_norm": 0.38182365894317627, "learning_rate": 6.524048519836984e-07, "loss": 0.3408, "step": 5148 }, { "epoch": 2.5561807049478737, "grad_norm": 0.4320935904979706, "learning_rate": 6.509786596325718e-07, "loss": 0.3936, "step": 5149 }, { "epoch": 2.5566771471123615, "grad_norm": 0.46527552604675293, "learning_rate": 6.495539193014727e-07, "loss": 0.3543, "step": 5150 }, { "epoch": 2.5571735892768492, "grad_norm": 0.42937833070755005, "learning_rate": 6.481306314660801e-07, "loss": 0.2927, "step": 5151 }, { "epoch": 2.557670031441337, "grad_norm": 0.4037305414676666, "learning_rate": 6.467087966015928e-07, "loss": 0.2665, "step": 5152 }, { "epoch": 2.5581664736058247, "grad_norm": 0.4507857859134674, "learning_rate": 6.452884151827222e-07, "loss": 0.4002, "step": 5153 }, { "epoch": 2.558662915770313, "grad_norm": 0.37423810362815857, "learning_rate": 6.438694876836954e-07, "loss": 0.3162, "step": 5154 }, { "epoch": 2.5591593579348006, "grad_norm": 0.45287373661994934, "learning_rate": 6.424520145782542e-07, "loss": 0.4004, "step": 5155 }, { "epoch": 2.5596558000992884, "grad_norm": 0.38775691390037537, "learning_rate": 6.410359963396534e-07, "loss": 0.3272, "step": 5156 }, { "epoch": 2.560152242263776, "grad_norm": 0.46058163046836853, "learning_rate": 6.396214334406631e-07, "loss": 0.3411, "step": 5157 }, { "epoch": 2.560648684428264, "grad_norm": 0.4420505464076996, "learning_rate": 6.382083263535677e-07, "loss": 0.2998, "step": 5158 }, { "epoch": 2.561145126592752, "grad_norm": 0.4857316315174103, "learning_rate": 6.367966755501647e-07, "loss": 0.3605, "step": 5159 }, { "epoch": 2.56164156875724, "grad_norm": 0.40420058369636536, "learning_rate": 6.35386481501768e-07, "loss": 0.3083, "step": 5160 }, { "epoch": 2.5621380109217275, "grad_norm": 0.39863115549087524, "learning_rate": 6.339777446791994e-07, "loss": 0.3462, "step": 5161 }, { "epoch": 2.5626344530862153, "grad_norm": 0.4028220772743225, "learning_rate": 6.32570465552802e-07, "loss": 0.2963, "step": 5162 }, { "epoch": 2.563130895250703, "grad_norm": 0.37805092334747314, "learning_rate": 6.311646445924246e-07, "loss": 0.2652, "step": 5163 }, { "epoch": 2.563627337415191, "grad_norm": 0.41421985626220703, "learning_rate": 6.297602822674343e-07, "loss": 0.4043, "step": 5164 }, { "epoch": 2.564123779579679, "grad_norm": 0.36172184348106384, "learning_rate": 6.283573790467091e-07, "loss": 0.34, "step": 5165 }, { "epoch": 2.5646202217441667, "grad_norm": 0.40735721588134766, "learning_rate": 6.269559353986404e-07, "loss": 0.3076, "step": 5166 }, { "epoch": 2.565116663908655, "grad_norm": 0.4130892753601074, "learning_rate": 6.255559517911336e-07, "loss": 0.3224, "step": 5167 }, { "epoch": 2.5656131060731426, "grad_norm": 0.39895138144493103, "learning_rate": 6.241574286916007e-07, "loss": 0.3626, "step": 5168 }, { "epoch": 2.5661095482376304, "grad_norm": 0.3908316195011139, "learning_rate": 6.227603665669762e-07, "loss": 0.3117, "step": 5169 }, { "epoch": 2.566605990402118, "grad_norm": 0.43168070912361145, "learning_rate": 6.21364765883698e-07, "loss": 0.3014, "step": 5170 }, { "epoch": 2.567102432566606, "grad_norm": 0.4551759958267212, "learning_rate": 6.199706271077199e-07, "loss": 0.3914, "step": 5171 }, { "epoch": 2.567598874731094, "grad_norm": 0.3706212043762207, "learning_rate": 6.185779507045053e-07, "loss": 0.3457, "step": 5172 }, { "epoch": 2.568095316895582, "grad_norm": 0.36865705251693726, "learning_rate": 6.171867371390345e-07, "loss": 0.3134, "step": 5173 }, { "epoch": 2.5685917590600695, "grad_norm": 0.37675008177757263, "learning_rate": 6.157969868757923e-07, "loss": 0.3206, "step": 5174 }, { "epoch": 2.5690882012245573, "grad_norm": 0.40548378229141235, "learning_rate": 6.144087003787807e-07, "loss": 0.3406, "step": 5175 }, { "epoch": 2.569584643389045, "grad_norm": 0.4328403174877167, "learning_rate": 6.130218781115105e-07, "loss": 0.3015, "step": 5176 }, { "epoch": 2.570081085553533, "grad_norm": 0.4392825663089752, "learning_rate": 6.116365205370034e-07, "loss": 0.3405, "step": 5177 }, { "epoch": 2.570577527718021, "grad_norm": 0.44470199942588806, "learning_rate": 6.102526281177939e-07, "loss": 0.3825, "step": 5178 }, { "epoch": 2.5710739698825087, "grad_norm": 0.41924235224723816, "learning_rate": 6.088702013159231e-07, "loss": 0.3201, "step": 5179 }, { "epoch": 2.5715704120469964, "grad_norm": 0.4132004380226135, "learning_rate": 6.0748924059295e-07, "loss": 0.3811, "step": 5180 }, { "epoch": 2.572066854211484, "grad_norm": 0.45532771944999695, "learning_rate": 6.061097464099363e-07, "loss": 0.3307, "step": 5181 }, { "epoch": 2.5725632963759724, "grad_norm": 0.4314822554588318, "learning_rate": 6.047317192274593e-07, "loss": 0.327, "step": 5182 }, { "epoch": 2.57305973854046, "grad_norm": 0.4292866289615631, "learning_rate": 6.033551595056048e-07, "loss": 0.3116, "step": 5183 }, { "epoch": 2.573556180704948, "grad_norm": 0.43706804513931274, "learning_rate": 6.019800677039677e-07, "loss": 0.364, "step": 5184 }, { "epoch": 2.5740526228694356, "grad_norm": 0.40517210960388184, "learning_rate": 6.006064442816556e-07, "loss": 0.3281, "step": 5185 }, { "epoch": 2.5745490650339233, "grad_norm": 0.4137116074562073, "learning_rate": 5.99234289697282e-07, "loss": 0.334, "step": 5186 }, { "epoch": 2.5750455071984115, "grad_norm": 0.3814205527305603, "learning_rate": 5.978636044089731e-07, "loss": 0.3716, "step": 5187 }, { "epoch": 2.5755419493628993, "grad_norm": 0.40676984190940857, "learning_rate": 5.96494388874363e-07, "loss": 0.3735, "step": 5188 }, { "epoch": 2.576038391527387, "grad_norm": 0.39299535751342773, "learning_rate": 5.951266435505959e-07, "loss": 0.2966, "step": 5189 }, { "epoch": 2.5765348336918747, "grad_norm": 0.4029751121997833, "learning_rate": 5.937603688943244e-07, "loss": 0.311, "step": 5190 }, { "epoch": 2.5770312758563625, "grad_norm": 0.45711591839790344, "learning_rate": 5.923955653617109e-07, "loss": 0.4111, "step": 5191 }, { "epoch": 2.5775277180208507, "grad_norm": 0.40312328934669495, "learning_rate": 5.910322334084273e-07, "loss": 0.3348, "step": 5192 }, { "epoch": 2.5780241601853384, "grad_norm": 0.45453891158103943, "learning_rate": 5.896703734896508e-07, "loss": 0.3884, "step": 5193 }, { "epoch": 2.578520602349826, "grad_norm": 0.4149002432823181, "learning_rate": 5.883099860600699e-07, "loss": 0.3218, "step": 5194 }, { "epoch": 2.5790170445143143, "grad_norm": 0.4122451841831207, "learning_rate": 5.869510715738824e-07, "loss": 0.3684, "step": 5195 }, { "epoch": 2.5795134866788016, "grad_norm": 0.4042988419532776, "learning_rate": 5.855936304847926e-07, "loss": 0.3173, "step": 5196 }, { "epoch": 2.58000992884329, "grad_norm": 0.36318308115005493, "learning_rate": 5.842376632460117e-07, "loss": 0.3657, "step": 5197 }, { "epoch": 2.5805063710077776, "grad_norm": 0.3918079733848572, "learning_rate": 5.828831703102616e-07, "loss": 0.3379, "step": 5198 }, { "epoch": 2.5810028131722653, "grad_norm": 0.38022321462631226, "learning_rate": 5.815301521297701e-07, "loss": 0.332, "step": 5199 }, { "epoch": 2.5814992553367535, "grad_norm": 0.37212979793548584, "learning_rate": 5.801786091562733e-07, "loss": 0.3417, "step": 5200 }, { "epoch": 2.5819956975012412, "grad_norm": 0.4178694188594818, "learning_rate": 5.788285418410161e-07, "loss": 0.37, "step": 5201 }, { "epoch": 2.582492139665729, "grad_norm": 0.43553441762924194, "learning_rate": 5.774799506347461e-07, "loss": 0.3468, "step": 5202 }, { "epoch": 2.5829885818302167, "grad_norm": 0.4040633738040924, "learning_rate": 5.76132835987725e-07, "loss": 0.2828, "step": 5203 }, { "epoch": 2.5834850239947045, "grad_norm": 0.39154455065727234, "learning_rate": 5.747871983497144e-07, "loss": 0.3653, "step": 5204 }, { "epoch": 2.5839814661591927, "grad_norm": 0.38226351141929626, "learning_rate": 5.734430381699884e-07, "loss": 0.303, "step": 5205 }, { "epoch": 2.5844779083236804, "grad_norm": 0.3849674463272095, "learning_rate": 5.721003558973243e-07, "loss": 0.353, "step": 5206 }, { "epoch": 2.584974350488168, "grad_norm": 0.4214502274990082, "learning_rate": 5.707591519800082e-07, "loss": 0.3367, "step": 5207 }, { "epoch": 2.585470792652656, "grad_norm": 0.43154409527778625, "learning_rate": 5.694194268658315e-07, "loss": 0.3492, "step": 5208 }, { "epoch": 2.5859672348171436, "grad_norm": 0.39097848534584045, "learning_rate": 5.680811810020903e-07, "loss": 0.2892, "step": 5209 }, { "epoch": 2.586463676981632, "grad_norm": 0.4162791669368744, "learning_rate": 5.667444148355916e-07, "loss": 0.3447, "step": 5210 }, { "epoch": 2.5869601191461196, "grad_norm": 0.4398520588874817, "learning_rate": 5.654091288126429e-07, "loss": 0.349, "step": 5211 }, { "epoch": 2.5874565613106073, "grad_norm": 0.4091370701789856, "learning_rate": 5.640753233790602e-07, "loss": 0.303, "step": 5212 }, { "epoch": 2.587953003475095, "grad_norm": 0.40408557653427124, "learning_rate": 5.627429989801653e-07, "loss": 0.3713, "step": 5213 }, { "epoch": 2.588449445639583, "grad_norm": 0.3722696602344513, "learning_rate": 5.614121560607849e-07, "loss": 0.3075, "step": 5214 }, { "epoch": 2.588945887804071, "grad_norm": 0.46419161558151245, "learning_rate": 5.600827950652532e-07, "loss": 0.3317, "step": 5215 }, { "epoch": 2.5894423299685587, "grad_norm": 0.4251377284526825, "learning_rate": 5.58754916437404e-07, "loss": 0.361, "step": 5216 }, { "epoch": 2.5899387721330465, "grad_norm": 0.4136216938495636, "learning_rate": 5.574285206205826e-07, "loss": 0.3076, "step": 5217 }, { "epoch": 2.590435214297534, "grad_norm": 0.45721960067749023, "learning_rate": 5.561036080576354e-07, "loss": 0.365, "step": 5218 }, { "epoch": 2.590931656462022, "grad_norm": 0.37419840693473816, "learning_rate": 5.547801791909163e-07, "loss": 0.2525, "step": 5219 }, { "epoch": 2.59142809862651, "grad_norm": 0.4519163966178894, "learning_rate": 5.534582344622785e-07, "loss": 0.3643, "step": 5220 }, { "epoch": 2.591924540790998, "grad_norm": 0.3719533383846283, "learning_rate": 5.521377743130885e-07, "loss": 0.3049, "step": 5221 }, { "epoch": 2.5924209829554856, "grad_norm": 0.41020241379737854, "learning_rate": 5.508187991842085e-07, "loss": 0.369, "step": 5222 }, { "epoch": 2.5929174251199734, "grad_norm": 0.39278265833854675, "learning_rate": 5.49501309516009e-07, "loss": 0.3386, "step": 5223 }, { "epoch": 2.593413867284461, "grad_norm": 0.4218992590904236, "learning_rate": 5.481853057483644e-07, "loss": 0.3341, "step": 5224 }, { "epoch": 2.5939103094489493, "grad_norm": 0.43890562653541565, "learning_rate": 5.468707883206525e-07, "loss": 0.3824, "step": 5225 }, { "epoch": 2.594406751613437, "grad_norm": 0.40286538004875183, "learning_rate": 5.455577576717563e-07, "loss": 0.3151, "step": 5226 }, { "epoch": 2.5949031937779248, "grad_norm": 0.427056223154068, "learning_rate": 5.442462142400589e-07, "loss": 0.3162, "step": 5227 }, { "epoch": 2.595399635942413, "grad_norm": 0.478605180978775, "learning_rate": 5.429361584634496e-07, "loss": 0.3534, "step": 5228 }, { "epoch": 2.5958960781069007, "grad_norm": 0.4250653088092804, "learning_rate": 5.416275907793212e-07, "loss": 0.2992, "step": 5229 }, { "epoch": 2.5963925202713884, "grad_norm": 0.4386827051639557, "learning_rate": 5.40320511624568e-07, "loss": 0.3714, "step": 5230 }, { "epoch": 2.596888962435876, "grad_norm": 0.3750596046447754, "learning_rate": 5.390149214355884e-07, "loss": 0.3198, "step": 5231 }, { "epoch": 2.597385404600364, "grad_norm": 0.39459678530693054, "learning_rate": 5.37710820648284e-07, "loss": 0.3083, "step": 5232 }, { "epoch": 2.597881846764852, "grad_norm": 0.46172890067100525, "learning_rate": 5.364082096980589e-07, "loss": 0.423, "step": 5233 }, { "epoch": 2.59837828892934, "grad_norm": 0.3533586859703064, "learning_rate": 5.351070890198184e-07, "loss": 0.346, "step": 5234 }, { "epoch": 2.5988747310938276, "grad_norm": 0.387643963098526, "learning_rate": 5.338074590479714e-07, "loss": 0.3636, "step": 5235 }, { "epoch": 2.5993711732583153, "grad_norm": 0.39356479048728943, "learning_rate": 5.3250932021643e-07, "loss": 0.3499, "step": 5236 }, { "epoch": 2.599867615422803, "grad_norm": 0.3974871039390564, "learning_rate": 5.312126729586065e-07, "loss": 0.3407, "step": 5237 }, { "epoch": 2.6003640575872913, "grad_norm": 0.42138174176216125, "learning_rate": 5.299175177074173e-07, "loss": 0.3185, "step": 5238 }, { "epoch": 2.600860499751779, "grad_norm": 0.44385233521461487, "learning_rate": 5.286238548952771e-07, "loss": 0.4051, "step": 5239 }, { "epoch": 2.6013569419162668, "grad_norm": 0.3779393136501312, "learning_rate": 5.273316849541088e-07, "loss": 0.2744, "step": 5240 }, { "epoch": 2.6018533840807545, "grad_norm": 0.5020843744277954, "learning_rate": 5.260410083153289e-07, "loss": 0.3953, "step": 5241 }, { "epoch": 2.6023498262452422, "grad_norm": 0.3890610337257385, "learning_rate": 5.247518254098627e-07, "loss": 0.3029, "step": 5242 }, { "epoch": 2.6028462684097304, "grad_norm": 0.4048387110233307, "learning_rate": 5.234641366681287e-07, "loss": 0.2922, "step": 5243 }, { "epoch": 2.603342710574218, "grad_norm": 0.43060633540153503, "learning_rate": 5.221779425200563e-07, "loss": 0.3722, "step": 5244 }, { "epoch": 2.603839152738706, "grad_norm": 0.4386450946331024, "learning_rate": 5.208932433950675e-07, "loss": 0.3584, "step": 5245 }, { "epoch": 2.6043355949031937, "grad_norm": 0.4278208613395691, "learning_rate": 5.196100397220893e-07, "loss": 0.3193, "step": 5246 }, { "epoch": 2.6048320370676814, "grad_norm": 0.40810051560401917, "learning_rate": 5.183283319295485e-07, "loss": 0.3234, "step": 5247 }, { "epoch": 2.6053284792321696, "grad_norm": 0.38446247577667236, "learning_rate": 5.170481204453725e-07, "loss": 0.2797, "step": 5248 }, { "epoch": 2.6058249213966573, "grad_norm": 0.48662492632865906, "learning_rate": 5.157694056969903e-07, "loss": 0.4504, "step": 5249 }, { "epoch": 2.606321363561145, "grad_norm": 0.3805690109729767, "learning_rate": 5.144921881113269e-07, "loss": 0.3014, "step": 5250 }, { "epoch": 2.606817805725633, "grad_norm": 0.46387720108032227, "learning_rate": 5.132164681148144e-07, "loss": 0.3098, "step": 5251 }, { "epoch": 2.6073142478901206, "grad_norm": 0.3925182521343231, "learning_rate": 5.119422461333784e-07, "loss": 0.2891, "step": 5252 }, { "epoch": 2.6078106900546087, "grad_norm": 0.4332398474216461, "learning_rate": 5.10669522592448e-07, "loss": 0.3903, "step": 5253 }, { "epoch": 2.6083071322190965, "grad_norm": 0.3906312584877014, "learning_rate": 5.093982979169503e-07, "loss": 0.3611, "step": 5254 }, { "epoch": 2.6088035743835842, "grad_norm": 0.3639637529850006, "learning_rate": 5.081285725313134e-07, "loss": 0.3157, "step": 5255 }, { "epoch": 2.6093000165480724, "grad_norm": 0.40434980392456055, "learning_rate": 5.068603468594646e-07, "loss": 0.3377, "step": 5256 }, { "epoch": 2.6097964587125597, "grad_norm": 0.37937048077583313, "learning_rate": 5.055936213248286e-07, "loss": 0.2959, "step": 5257 }, { "epoch": 2.610292900877048, "grad_norm": 0.4656359553337097, "learning_rate": 5.043283963503309e-07, "loss": 0.3729, "step": 5258 }, { "epoch": 2.6107893430415356, "grad_norm": 0.4575078785419464, "learning_rate": 5.030646723583959e-07, "loss": 0.3626, "step": 5259 }, { "epoch": 2.6112857852060234, "grad_norm": 0.4382689595222473, "learning_rate": 5.018024497709473e-07, "loss": 0.3104, "step": 5260 }, { "epoch": 2.6117822273705116, "grad_norm": 0.48447632789611816, "learning_rate": 5.005417290094061e-07, "loss": 0.3595, "step": 5261 }, { "epoch": 2.6122786695349993, "grad_norm": 0.3961464464664459, "learning_rate": 4.992825104946936e-07, "loss": 0.3404, "step": 5262 }, { "epoch": 2.612775111699487, "grad_norm": 0.3737049102783203, "learning_rate": 4.980247946472289e-07, "loss": 0.3148, "step": 5263 }, { "epoch": 2.613271553863975, "grad_norm": 0.41785794496536255, "learning_rate": 4.967685818869273e-07, "loss": 0.3697, "step": 5264 }, { "epoch": 2.6137679960284625, "grad_norm": 0.43333277106285095, "learning_rate": 4.955138726332054e-07, "loss": 0.322, "step": 5265 }, { "epoch": 2.6142644381929507, "grad_norm": 0.4617508351802826, "learning_rate": 4.94260667304976e-07, "loss": 0.3578, "step": 5266 }, { "epoch": 2.6147608803574385, "grad_norm": 0.41388776898384094, "learning_rate": 4.930089663206516e-07, "loss": 0.3346, "step": 5267 }, { "epoch": 2.615257322521926, "grad_norm": 0.39333394169807434, "learning_rate": 4.917587700981391e-07, "loss": 0.3256, "step": 5268 }, { "epoch": 2.615753764686414, "grad_norm": 0.42503270506858826, "learning_rate": 4.905100790548462e-07, "loss": 0.3273, "step": 5269 }, { "epoch": 2.6162502068509017, "grad_norm": 0.3632255494594574, "learning_rate": 4.892628936076766e-07, "loss": 0.276, "step": 5270 }, { "epoch": 2.61674664901539, "grad_norm": 0.4255335330963135, "learning_rate": 4.880172141730316e-07, "loss": 0.3804, "step": 5271 }, { "epoch": 2.6172430911798776, "grad_norm": 0.3983498513698578, "learning_rate": 4.867730411668103e-07, "loss": 0.3491, "step": 5272 }, { "epoch": 2.6177395333443654, "grad_norm": 0.35204508900642395, "learning_rate": 4.855303750044077e-07, "loss": 0.2741, "step": 5273 }, { "epoch": 2.618235975508853, "grad_norm": 0.4140051007270813, "learning_rate": 4.842892161007173e-07, "loss": 0.3089, "step": 5274 }, { "epoch": 2.618732417673341, "grad_norm": 0.4707716107368469, "learning_rate": 4.830495648701266e-07, "loss": 0.3659, "step": 5275 }, { "epoch": 2.619228859837829, "grad_norm": 0.3979301154613495, "learning_rate": 4.818114217265219e-07, "loss": 0.2976, "step": 5276 }, { "epoch": 2.619725302002317, "grad_norm": 0.3870999813079834, "learning_rate": 4.805747870832867e-07, "loss": 0.2729, "step": 5277 }, { "epoch": 2.6202217441668045, "grad_norm": 0.4341373145580292, "learning_rate": 4.79339661353298e-07, "loss": 0.2845, "step": 5278 }, { "epoch": 2.6207181863312923, "grad_norm": 0.48816198110580444, "learning_rate": 4.781060449489333e-07, "loss": 0.3164, "step": 5279 }, { "epoch": 2.62121462849578, "grad_norm": 0.4066382646560669, "learning_rate": 4.768739382820597e-07, "loss": 0.321, "step": 5280 }, { "epoch": 2.621711070660268, "grad_norm": 0.37434160709381104, "learning_rate": 4.7564334176404827e-07, "loss": 0.3113, "step": 5281 }, { "epoch": 2.622207512824756, "grad_norm": 0.40204283595085144, "learning_rate": 4.7441425580575904e-07, "loss": 0.3476, "step": 5282 }, { "epoch": 2.6227039549892437, "grad_norm": 0.4457896649837494, "learning_rate": 4.7318668081755116e-07, "loss": 0.3538, "step": 5283 }, { "epoch": 2.6232003971537314, "grad_norm": 0.41158390045166016, "learning_rate": 4.7196061720927835e-07, "loss": 0.3162, "step": 5284 }, { "epoch": 2.623696839318219, "grad_norm": 0.43966609239578247, "learning_rate": 4.707360653902904e-07, "loss": 0.3385, "step": 5285 }, { "epoch": 2.6241932814827074, "grad_norm": 0.43599119782447815, "learning_rate": 4.695130257694325e-07, "loss": 0.3624, "step": 5286 }, { "epoch": 2.624689723647195, "grad_norm": 0.41547057032585144, "learning_rate": 4.682914987550413e-07, "loss": 0.3143, "step": 5287 }, { "epoch": 2.625186165811683, "grad_norm": 0.4401313066482544, "learning_rate": 4.6707148475495623e-07, "loss": 0.3477, "step": 5288 }, { "epoch": 2.625682607976171, "grad_norm": 0.41044145822525024, "learning_rate": 4.6585298417650306e-07, "loss": 0.3664, "step": 5289 }, { "epoch": 2.6261790501406588, "grad_norm": 0.3978935182094574, "learning_rate": 4.6463599742650745e-07, "loss": 0.2885, "step": 5290 }, { "epoch": 2.6266754923051465, "grad_norm": 0.37075260281562805, "learning_rate": 4.6342052491128664e-07, "loss": 0.3087, "step": 5291 }, { "epoch": 2.6271719344696343, "grad_norm": 0.4228144586086273, "learning_rate": 4.622065670366571e-07, "loss": 0.3167, "step": 5292 }, { "epoch": 2.627668376634122, "grad_norm": 0.43276774883270264, "learning_rate": 4.6099412420792354e-07, "loss": 0.3226, "step": 5293 }, { "epoch": 2.62816481879861, "grad_norm": 0.4261760711669922, "learning_rate": 4.5978319682988826e-07, "loss": 0.3534, "step": 5294 }, { "epoch": 2.628661260963098, "grad_norm": 0.40756475925445557, "learning_rate": 4.5857378530684724e-07, "loss": 0.2845, "step": 5295 }, { "epoch": 2.6291577031275857, "grad_norm": 0.4431335926055908, "learning_rate": 4.573658900425909e-07, "loss": 0.3755, "step": 5296 }, { "epoch": 2.6296541452920734, "grad_norm": 0.44046398997306824, "learning_rate": 4.561595114404022e-07, "loss": 0.4084, "step": 5297 }, { "epoch": 2.630150587456561, "grad_norm": 0.38150879740715027, "learning_rate": 4.5495464990305715e-07, "loss": 0.2905, "step": 5298 }, { "epoch": 2.6306470296210493, "grad_norm": 0.3941110372543335, "learning_rate": 4.537513058328269e-07, "loss": 0.3039, "step": 5299 }, { "epoch": 2.631143471785537, "grad_norm": 0.39968931674957275, "learning_rate": 4.5254947963147553e-07, "loss": 0.2917, "step": 5300 }, { "epoch": 2.631639913950025, "grad_norm": 0.4244963824748993, "learning_rate": 4.513491717002599e-07, "loss": 0.3801, "step": 5301 }, { "epoch": 2.6321363561145126, "grad_norm": 0.48585477471351624, "learning_rate": 4.501503824399306e-07, "loss": 0.3361, "step": 5302 }, { "epoch": 2.6326327982790003, "grad_norm": 0.4123549163341522, "learning_rate": 4.4895311225073014e-07, "loss": 0.3053, "step": 5303 }, { "epoch": 2.6331292404434885, "grad_norm": 0.4248215854167938, "learning_rate": 4.4775736153239657e-07, "loss": 0.3469, "step": 5304 }, { "epoch": 2.6336256826079762, "grad_norm": 0.4050484299659729, "learning_rate": 4.465631306841556e-07, "loss": 0.3033, "step": 5305 }, { "epoch": 2.634122124772464, "grad_norm": 0.41529256105422974, "learning_rate": 4.453704201047293e-07, "loss": 0.3188, "step": 5306 }, { "epoch": 2.6346185669369517, "grad_norm": 0.3924146592617035, "learning_rate": 4.44179230192332e-07, "loss": 0.3561, "step": 5307 }, { "epoch": 2.6351150091014395, "grad_norm": 0.40374988317489624, "learning_rate": 4.429895613446694e-07, "loss": 0.3192, "step": 5308 }, { "epoch": 2.6356114512659277, "grad_norm": 0.4749533534049988, "learning_rate": 4.41801413958941e-07, "loss": 0.3565, "step": 5309 }, { "epoch": 2.6361078934304154, "grad_norm": 0.3969471752643585, "learning_rate": 4.4061478843183294e-07, "loss": 0.2989, "step": 5310 }, { "epoch": 2.636604335594903, "grad_norm": 0.42994800209999084, "learning_rate": 4.39429685159532e-07, "loss": 0.3195, "step": 5311 }, { "epoch": 2.637100777759391, "grad_norm": 0.396959125995636, "learning_rate": 4.38246104537709e-07, "loss": 0.3464, "step": 5312 }, { "epoch": 2.6375972199238786, "grad_norm": 0.4223382771015167, "learning_rate": 4.3706404696153003e-07, "loss": 0.329, "step": 5313 }, { "epoch": 2.638093662088367, "grad_norm": 0.446894109249115, "learning_rate": 4.3588351282565166e-07, "loss": 0.363, "step": 5314 }, { "epoch": 2.6385901042528546, "grad_norm": 0.39421412348747253, "learning_rate": 4.3470450252422416e-07, "loss": 0.3868, "step": 5315 }, { "epoch": 2.6390865464173423, "grad_norm": 0.4173458516597748, "learning_rate": 4.335270164508837e-07, "loss": 0.3531, "step": 5316 }, { "epoch": 2.6395829885818305, "grad_norm": 0.40163642168045044, "learning_rate": 4.3235105499876306e-07, "loss": 0.3427, "step": 5317 }, { "epoch": 2.640079430746318, "grad_norm": 0.3951260447502136, "learning_rate": 4.311766185604832e-07, "loss": 0.305, "step": 5318 }, { "epoch": 2.640575872910806, "grad_norm": 0.3862709105014801, "learning_rate": 4.3000370752815655e-07, "loss": 0.3287, "step": 5319 }, { "epoch": 2.6410723150752937, "grad_norm": 0.4405924081802368, "learning_rate": 4.2883232229338766e-07, "loss": 0.3513, "step": 5320 }, { "epoch": 2.6415687572397815, "grad_norm": 0.4212648868560791, "learning_rate": 4.27662463247267e-07, "loss": 0.3187, "step": 5321 }, { "epoch": 2.6420651994042696, "grad_norm": 0.42270082235336304, "learning_rate": 4.2649413078038215e-07, "loss": 0.3041, "step": 5322 }, { "epoch": 2.6425616415687574, "grad_norm": 0.44336944818496704, "learning_rate": 4.2532732528280497e-07, "loss": 0.3893, "step": 5323 }, { "epoch": 2.643058083733245, "grad_norm": 0.4195363223552704, "learning_rate": 4.241620471441016e-07, "loss": 0.3466, "step": 5324 }, { "epoch": 2.643554525897733, "grad_norm": 0.3751201331615448, "learning_rate": 4.2299829675332636e-07, "loss": 0.3248, "step": 5325 }, { "epoch": 2.6440509680622206, "grad_norm": 0.4204387068748474, "learning_rate": 4.2183607449902355e-07, "loss": 0.3302, "step": 5326 }, { "epoch": 2.644547410226709, "grad_norm": 0.4414141774177551, "learning_rate": 4.2067538076922874e-07, "loss": 0.3358, "step": 5327 }, { "epoch": 2.6450438523911965, "grad_norm": 0.4385027587413788, "learning_rate": 4.195162159514632e-07, "loss": 0.3551, "step": 5328 }, { "epoch": 2.6455402945556843, "grad_norm": 0.41337981820106506, "learning_rate": 4.1835858043274445e-07, "loss": 0.317, "step": 5329 }, { "epoch": 2.646036736720172, "grad_norm": 0.3755105137825012, "learning_rate": 4.172024745995729e-07, "loss": 0.3369, "step": 5330 }, { "epoch": 2.6465331788846598, "grad_norm": 0.4088797867298126, "learning_rate": 4.160478988379413e-07, "loss": 0.3217, "step": 5331 }, { "epoch": 2.647029621049148, "grad_norm": 0.4333304166793823, "learning_rate": 4.148948535333319e-07, "loss": 0.356, "step": 5332 }, { "epoch": 2.6475260632136357, "grad_norm": 0.4034639298915863, "learning_rate": 4.1374333907071406e-07, "loss": 0.3543, "step": 5333 }, { "epoch": 2.6480225053781234, "grad_norm": 0.42004266381263733, "learning_rate": 4.1259335583454854e-07, "loss": 0.3635, "step": 5334 }, { "epoch": 2.648518947542611, "grad_norm": 0.3886498510837555, "learning_rate": 4.114449042087826e-07, "loss": 0.3465, "step": 5335 }, { "epoch": 2.649015389707099, "grad_norm": 0.43245524168014526, "learning_rate": 4.102979845768523e-07, "loss": 0.3592, "step": 5336 }, { "epoch": 2.649511831871587, "grad_norm": 0.45309022068977356, "learning_rate": 4.0915259732168425e-07, "loss": 0.3503, "step": 5337 }, { "epoch": 2.650008274036075, "grad_norm": 0.4470353126525879, "learning_rate": 4.080087428256924e-07, "loss": 0.2906, "step": 5338 }, { "epoch": 2.6505047162005626, "grad_norm": 0.44541165232658386, "learning_rate": 4.068664214707768e-07, "loss": 0.3877, "step": 5339 }, { "epoch": 2.6510011583650503, "grad_norm": 0.41170617938041687, "learning_rate": 4.0572563363832864e-07, "loss": 0.2925, "step": 5340 }, { "epoch": 2.651497600529538, "grad_norm": 0.3989192545413971, "learning_rate": 4.0458637970922645e-07, "loss": 0.3345, "step": 5341 }, { "epoch": 2.6519940426940263, "grad_norm": 0.42276257276535034, "learning_rate": 4.034486600638349e-07, "loss": 0.3437, "step": 5342 }, { "epoch": 2.652490484858514, "grad_norm": 0.47697997093200684, "learning_rate": 4.02312475082009e-07, "loss": 0.2985, "step": 5343 }, { "epoch": 2.6529869270230018, "grad_norm": 0.4040757417678833, "learning_rate": 4.011778251430892e-07, "loss": 0.3288, "step": 5344 }, { "epoch": 2.6534833691874895, "grad_norm": 0.4206240475177765, "learning_rate": 4.000447106259059e-07, "loss": 0.2997, "step": 5345 }, { "epoch": 2.6539798113519772, "grad_norm": 0.39525851607322693, "learning_rate": 3.9891313190877243e-07, "loss": 0.3118, "step": 5346 }, { "epoch": 2.6544762535164654, "grad_norm": 0.40960219502449036, "learning_rate": 3.977830893694934e-07, "loss": 0.3082, "step": 5347 }, { "epoch": 2.654972695680953, "grad_norm": 0.4176686406135559, "learning_rate": 3.9665458338536023e-07, "loss": 0.3292, "step": 5348 }, { "epoch": 2.655469137845441, "grad_norm": 0.3997499346733093, "learning_rate": 3.9552761433314936e-07, "loss": 0.3362, "step": 5349 }, { "epoch": 2.655965580009929, "grad_norm": 0.37190523743629456, "learning_rate": 3.944021825891259e-07, "loss": 0.3616, "step": 5350 }, { "epoch": 2.6564620221744164, "grad_norm": 0.39750856161117554, "learning_rate": 3.932782885290393e-07, "loss": 0.309, "step": 5351 }, { "epoch": 2.6569584643389046, "grad_norm": 0.40532439947128296, "learning_rate": 3.921559325281299e-07, "loss": 0.3309, "step": 5352 }, { "epoch": 2.6574549065033923, "grad_norm": 0.43864166736602783, "learning_rate": 3.9103511496111965e-07, "loss": 0.3681, "step": 5353 }, { "epoch": 2.65795134866788, "grad_norm": 0.45571669936180115, "learning_rate": 3.899158362022193e-07, "loss": 0.3547, "step": 5354 }, { "epoch": 2.6584477908323683, "grad_norm": 0.4227082133293152, "learning_rate": 3.887980966251265e-07, "loss": 0.3421, "step": 5355 }, { "epoch": 2.658944232996856, "grad_norm": 0.39770808815956116, "learning_rate": 3.876818966030238e-07, "loss": 0.3778, "step": 5356 }, { "epoch": 2.6594406751613437, "grad_norm": 0.41290482878685, "learning_rate": 3.865672365085804e-07, "loss": 0.3491, "step": 5357 }, { "epoch": 2.6599371173258315, "grad_norm": 0.3709828555583954, "learning_rate": 3.8545411671394914e-07, "loss": 0.3226, "step": 5358 }, { "epoch": 2.6604335594903192, "grad_norm": 0.4848761260509491, "learning_rate": 3.843425375907739e-07, "loss": 0.3503, "step": 5359 }, { "epoch": 2.6609300016548074, "grad_norm": 0.4368802607059479, "learning_rate": 3.832324995101777e-07, "loss": 0.3673, "step": 5360 }, { "epoch": 2.661426443819295, "grad_norm": 0.38384750485420227, "learning_rate": 3.8212400284277364e-07, "loss": 0.2942, "step": 5361 }, { "epoch": 2.661922885983783, "grad_norm": 0.3703289330005646, "learning_rate": 3.810170479586567e-07, "loss": 0.3691, "step": 5362 }, { "epoch": 2.6624193281482706, "grad_norm": 0.41748741269111633, "learning_rate": 3.799116352274124e-07, "loss": 0.3818, "step": 5363 }, { "epoch": 2.6629157703127584, "grad_norm": 0.39551061391830444, "learning_rate": 3.788077650181049e-07, "loss": 0.2834, "step": 5364 }, { "epoch": 2.6634122124772466, "grad_norm": 0.4190051257610321, "learning_rate": 3.7770543769928724e-07, "loss": 0.3885, "step": 5365 }, { "epoch": 2.6639086546417343, "grad_norm": 0.39607056975364685, "learning_rate": 3.766046536389978e-07, "loss": 0.3248, "step": 5366 }, { "epoch": 2.664405096806222, "grad_norm": 0.37451350688934326, "learning_rate": 3.7550541320475697e-07, "loss": 0.2965, "step": 5367 }, { "epoch": 2.66490153897071, "grad_norm": 0.4252866506576538, "learning_rate": 3.744077167635729e-07, "loss": 0.3849, "step": 5368 }, { "epoch": 2.6653979811351975, "grad_norm": 0.43208128213882446, "learning_rate": 3.7331156468193353e-07, "loss": 0.3709, "step": 5369 }, { "epoch": 2.6658944232996857, "grad_norm": 0.41361191868782043, "learning_rate": 3.722169573258183e-07, "loss": 0.3219, "step": 5370 }, { "epoch": 2.6663908654641735, "grad_norm": 0.3845941722393036, "learning_rate": 3.7112389506068435e-07, "loss": 0.2872, "step": 5371 }, { "epoch": 2.666887307628661, "grad_norm": 0.46908697485923767, "learning_rate": 3.7003237825147533e-07, "loss": 0.3721, "step": 5372 }, { "epoch": 2.667383749793149, "grad_norm": 0.534870445728302, "learning_rate": 3.689424072626202e-07, "loss": 0.4049, "step": 5373 }, { "epoch": 2.6678801919576367, "grad_norm": 0.3685465157032013, "learning_rate": 3.678539824580296e-07, "loss": 0.2857, "step": 5374 }, { "epoch": 2.668376634122125, "grad_norm": 0.42399147152900696, "learning_rate": 3.6676710420110063e-07, "loss": 0.3091, "step": 5375 }, { "epoch": 2.6688730762866126, "grad_norm": 0.40354123711586, "learning_rate": 3.656817728547107e-07, "loss": 0.3004, "step": 5376 }, { "epoch": 2.6693695184511004, "grad_norm": 0.40734443068504333, "learning_rate": 3.6459798878122233e-07, "loss": 0.283, "step": 5377 }, { "epoch": 2.669865960615588, "grad_norm": 0.3915700316429138, "learning_rate": 3.635157523424826e-07, "loss": 0.3413, "step": 5378 }, { "epoch": 2.670362402780076, "grad_norm": 0.40994152426719666, "learning_rate": 3.624350638998209e-07, "loss": 0.3211, "step": 5379 }, { "epoch": 2.670858844944564, "grad_norm": 0.4409846365451813, "learning_rate": 3.613559238140496e-07, "loss": 0.3188, "step": 5380 }, { "epoch": 2.671355287109052, "grad_norm": 0.4032416343688965, "learning_rate": 3.6027833244546286e-07, "loss": 0.3889, "step": 5381 }, { "epoch": 2.6718517292735395, "grad_norm": 0.3930935263633728, "learning_rate": 3.5920229015384165e-07, "loss": 0.3746, "step": 5382 }, { "epoch": 2.6723481714380277, "grad_norm": 0.38996103405952454, "learning_rate": 3.581277972984448e-07, "loss": 0.3477, "step": 5383 }, { "epoch": 2.6728446136025155, "grad_norm": 0.4104796350002289, "learning_rate": 3.5705485423801755e-07, "loss": 0.3273, "step": 5384 }, { "epoch": 2.673341055767003, "grad_norm": 0.37324443459510803, "learning_rate": 3.559834613307861e-07, "loss": 0.2977, "step": 5385 }, { "epoch": 2.673837497931491, "grad_norm": 0.44736579060554504, "learning_rate": 3.549136189344604e-07, "loss": 0.4364, "step": 5386 }, { "epoch": 2.6743339400959787, "grad_norm": 0.4066753387451172, "learning_rate": 3.5384532740623033e-07, "loss": 0.3027, "step": 5387 }, { "epoch": 2.674830382260467, "grad_norm": 0.4133888781070709, "learning_rate": 3.5277858710277e-07, "loss": 0.3524, "step": 5388 }, { "epoch": 2.6753268244249546, "grad_norm": 0.39746853709220886, "learning_rate": 3.5171339838023453e-07, "loss": 0.3554, "step": 5389 }, { "epoch": 2.6758232665894424, "grad_norm": 0.4471295177936554, "learning_rate": 3.5064976159426224e-07, "loss": 0.3164, "step": 5390 }, { "epoch": 2.67631970875393, "grad_norm": 0.36807534098625183, "learning_rate": 3.495876770999729e-07, "loss": 0.2715, "step": 5391 }, { "epoch": 2.676816150918418, "grad_norm": 0.4565495252609253, "learning_rate": 3.4852714525196507e-07, "loss": 0.3532, "step": 5392 }, { "epoch": 2.677312593082906, "grad_norm": 0.4066723585128784, "learning_rate": 3.4746816640432556e-07, "loss": 0.3251, "step": 5393 }, { "epoch": 2.6778090352473938, "grad_norm": 0.4128762483596802, "learning_rate": 3.4641074091061545e-07, "loss": 0.3804, "step": 5394 }, { "epoch": 2.6783054774118815, "grad_norm": 0.39160338044166565, "learning_rate": 3.4535486912388115e-07, "loss": 0.3439, "step": 5395 }, { "epoch": 2.6788019195763693, "grad_norm": 0.3895488977432251, "learning_rate": 3.443005513966502e-07, "loss": 0.3074, "step": 5396 }, { "epoch": 2.679298361740857, "grad_norm": 0.4199369251728058, "learning_rate": 3.4324778808092985e-07, "loss": 0.392, "step": 5397 }, { "epoch": 2.679794803905345, "grad_norm": 0.3909336030483246, "learning_rate": 3.421965795282106e-07, "loss": 0.2999, "step": 5398 }, { "epoch": 2.680291246069833, "grad_norm": 0.415353924036026, "learning_rate": 3.411469260894601e-07, "loss": 0.3376, "step": 5399 }, { "epoch": 2.6807876882343207, "grad_norm": 0.37286052107810974, "learning_rate": 3.400988281151313e-07, "loss": 0.3018, "step": 5400 }, { "epoch": 2.6812841303988084, "grad_norm": 0.42965075373649597, "learning_rate": 3.3905228595515425e-07, "loss": 0.3868, "step": 5401 }, { "epoch": 2.681780572563296, "grad_norm": 0.39182618260383606, "learning_rate": 3.3800729995894124e-07, "loss": 0.3383, "step": 5402 }, { "epoch": 2.6822770147277843, "grad_norm": 0.43483614921569824, "learning_rate": 3.3696387047538525e-07, "loss": 0.3417, "step": 5403 }, { "epoch": 2.682773456892272, "grad_norm": 0.405405193567276, "learning_rate": 3.359219978528583e-07, "loss": 0.3442, "step": 5404 }, { "epoch": 2.68326989905676, "grad_norm": 0.43408116698265076, "learning_rate": 3.348816824392143e-07, "loss": 0.311, "step": 5405 }, { "epoch": 2.6837663412212476, "grad_norm": 0.46078187227249146, "learning_rate": 3.338429245817848e-07, "loss": 0.3402, "step": 5406 }, { "epoch": 2.6842627833857353, "grad_norm": 0.4082134962081909, "learning_rate": 3.3280572462738415e-07, "loss": 0.304, "step": 5407 }, { "epoch": 2.6847592255502235, "grad_norm": 0.42821004986763, "learning_rate": 3.3177008292230415e-07, "loss": 0.2926, "step": 5408 }, { "epoch": 2.6852556677147112, "grad_norm": 0.42237335443496704, "learning_rate": 3.307359998123194e-07, "loss": 0.3463, "step": 5409 }, { "epoch": 2.685752109879199, "grad_norm": 0.41988107562065125, "learning_rate": 3.297034756426787e-07, "loss": 0.3656, "step": 5410 }, { "epoch": 2.686248552043687, "grad_norm": 0.4328193664550781, "learning_rate": 3.286725107581179e-07, "loss": 0.3465, "step": 5411 }, { "epoch": 2.6867449942081745, "grad_norm": 0.394646018743515, "learning_rate": 3.276431055028445e-07, "loss": 0.2921, "step": 5412 }, { "epoch": 2.6872414363726627, "grad_norm": 0.4117482602596283, "learning_rate": 3.2661526022055135e-07, "loss": 0.3647, "step": 5413 }, { "epoch": 2.6877378785371504, "grad_norm": 0.43236175179481506, "learning_rate": 3.255889752544067e-07, "loss": 0.3004, "step": 5414 }, { "epoch": 2.688234320701638, "grad_norm": 0.4607480466365814, "learning_rate": 3.2456425094706034e-07, "loss": 0.355, "step": 5415 }, { "epoch": 2.6887307628661263, "grad_norm": 0.4474388659000397, "learning_rate": 3.2354108764063973e-07, "loss": 0.3507, "step": 5416 }, { "epoch": 2.689227205030614, "grad_norm": 0.4551199972629547, "learning_rate": 3.2251948567674993e-07, "loss": 0.3611, "step": 5417 }, { "epoch": 2.689723647195102, "grad_norm": 0.4252608120441437, "learning_rate": 3.214994453964776e-07, "loss": 0.3063, "step": 5418 }, { "epoch": 2.6902200893595896, "grad_norm": 0.39766666293144226, "learning_rate": 3.204809671403852e-07, "loss": 0.2976, "step": 5419 }, { "epoch": 2.6907165315240773, "grad_norm": 0.4200887382030487, "learning_rate": 3.194640512485159e-07, "loss": 0.3612, "step": 5420 }, { "epoch": 2.6912129736885655, "grad_norm": 0.378934770822525, "learning_rate": 3.184486980603907e-07, "loss": 0.3181, "step": 5421 }, { "epoch": 2.6917094158530532, "grad_norm": 0.43321555852890015, "learning_rate": 3.1743490791500577e-07, "loss": 0.4022, "step": 5422 }, { "epoch": 2.692205858017541, "grad_norm": 0.38692647218704224, "learning_rate": 3.1642268115084196e-07, "loss": 0.3329, "step": 5423 }, { "epoch": 2.6927023001820287, "grad_norm": 0.40146857500076294, "learning_rate": 3.1541201810585175e-07, "loss": 0.286, "step": 5424 }, { "epoch": 2.6931987423465165, "grad_norm": 0.4264281690120697, "learning_rate": 3.14402919117468e-07, "loss": 0.3712, "step": 5425 }, { "epoch": 2.6936951845110046, "grad_norm": 0.3650306463241577, "learning_rate": 3.133953845226029e-07, "loss": 0.2744, "step": 5426 }, { "epoch": 2.6941916266754924, "grad_norm": 0.47155314683914185, "learning_rate": 3.1238941465764337e-07, "loss": 0.3838, "step": 5427 }, { "epoch": 2.69468806883998, "grad_norm": 0.38654661178588867, "learning_rate": 3.1138500985845755e-07, "loss": 0.3444, "step": 5428 }, { "epoch": 2.695184511004468, "grad_norm": 0.4202434718608856, "learning_rate": 3.103821704603854e-07, "loss": 0.3166, "step": 5429 }, { "epoch": 2.6956809531689556, "grad_norm": 0.393587201833725, "learning_rate": 3.093808967982515e-07, "loss": 0.343, "step": 5430 }, { "epoch": 2.696177395333444, "grad_norm": 0.41841790080070496, "learning_rate": 3.08381189206351e-07, "loss": 0.2924, "step": 5431 }, { "epoch": 2.6966738374979315, "grad_norm": 0.40029358863830566, "learning_rate": 3.0738304801846144e-07, "loss": 0.3623, "step": 5432 }, { "epoch": 2.6971702796624193, "grad_norm": 0.45242542028427124, "learning_rate": 3.0638647356783236e-07, "loss": 0.4152, "step": 5433 }, { "epoch": 2.697666721826907, "grad_norm": 0.38616931438446045, "learning_rate": 3.0539146618719596e-07, "loss": 0.3765, "step": 5434 }, { "epoch": 2.6981631639913948, "grad_norm": 0.3872362971305847, "learning_rate": 3.043980262087559e-07, "loss": 0.3124, "step": 5435 }, { "epoch": 2.698659606155883, "grad_norm": 0.41213861107826233, "learning_rate": 3.0340615396419524e-07, "loss": 0.3719, "step": 5436 }, { "epoch": 2.6991560483203707, "grad_norm": 0.41493046283721924, "learning_rate": 3.0241584978467354e-07, "loss": 0.3452, "step": 5437 }, { "epoch": 2.6996524904848584, "grad_norm": 0.37400972843170166, "learning_rate": 3.0142711400082626e-07, "loss": 0.3609, "step": 5438 }, { "epoch": 2.700148932649346, "grad_norm": 0.3746316432952881, "learning_rate": 3.004399469427666e-07, "loss": 0.2861, "step": 5439 }, { "epoch": 2.700645374813834, "grad_norm": 0.37637075781822205, "learning_rate": 2.994543489400797e-07, "loss": 0.3505, "step": 5440 }, { "epoch": 2.701141816978322, "grad_norm": 0.40387675166130066, "learning_rate": 2.9847032032183366e-07, "loss": 0.3218, "step": 5441 }, { "epoch": 2.70163825914281, "grad_norm": 0.41622886061668396, "learning_rate": 2.974878614165666e-07, "loss": 0.374, "step": 5442 }, { "epoch": 2.7021347013072976, "grad_norm": 0.3589390218257904, "learning_rate": 2.965069725522951e-07, "loss": 0.3375, "step": 5443 }, { "epoch": 2.702631143471786, "grad_norm": 0.453521728515625, "learning_rate": 2.955276540565122e-07, "loss": 0.3564, "step": 5444 }, { "epoch": 2.7031275856362735, "grad_norm": 0.46870410442352295, "learning_rate": 2.945499062561846e-07, "loss": 0.2841, "step": 5445 }, { "epoch": 2.7036240278007613, "grad_norm": 0.41607415676116943, "learning_rate": 2.9357372947775684e-07, "loss": 0.3337, "step": 5446 }, { "epoch": 2.704120469965249, "grad_norm": 0.3908180296421051, "learning_rate": 2.925991240471471e-07, "loss": 0.3106, "step": 5447 }, { "epoch": 2.7046169121297368, "grad_norm": 0.4316963851451874, "learning_rate": 2.916260902897494e-07, "loss": 0.3524, "step": 5448 }, { "epoch": 2.705113354294225, "grad_norm": 0.45699816942214966, "learning_rate": 2.9065462853043345e-07, "loss": 0.3305, "step": 5449 }, { "epoch": 2.7056097964587127, "grad_norm": 0.3711526393890381, "learning_rate": 2.896847390935442e-07, "loss": 0.2825, "step": 5450 }, { "epoch": 2.7061062386232004, "grad_norm": 0.4218170940876007, "learning_rate": 2.887164223029015e-07, "loss": 0.3091, "step": 5451 }, { "epoch": 2.706602680787688, "grad_norm": 0.4424842894077301, "learning_rate": 2.8774967848179956e-07, "loss": 0.336, "step": 5452 }, { "epoch": 2.707099122952176, "grad_norm": 0.4174235761165619, "learning_rate": 2.8678450795300907e-07, "loss": 0.3641, "step": 5453 }, { "epoch": 2.707595565116664, "grad_norm": 0.3761306405067444, "learning_rate": 2.8582091103877274e-07, "loss": 0.3128, "step": 5454 }, { "epoch": 2.708092007281152, "grad_norm": 0.4222295582294464, "learning_rate": 2.848588880608094e-07, "loss": 0.3409, "step": 5455 }, { "epoch": 2.7085884494456396, "grad_norm": 0.37286752462387085, "learning_rate": 2.8389843934031327e-07, "loss": 0.285, "step": 5456 }, { "epoch": 2.7090848916101273, "grad_norm": 0.44079747796058655, "learning_rate": 2.8293956519795216e-07, "loss": 0.3713, "step": 5457 }, { "epoch": 2.709581333774615, "grad_norm": 0.3826771080493927, "learning_rate": 2.8198226595386736e-07, "loss": 0.3394, "step": 5458 }, { "epoch": 2.7100777759391033, "grad_norm": 0.5094356536865234, "learning_rate": 2.810265419276753e-07, "loss": 0.3934, "step": 5459 }, { "epoch": 2.710574218103591, "grad_norm": 0.37687331438064575, "learning_rate": 2.800723934384658e-07, "loss": 0.3454, "step": 5460 }, { "epoch": 2.7110706602680787, "grad_norm": 0.3838455080986023, "learning_rate": 2.79119820804804e-07, "loss": 0.3158, "step": 5461 }, { "epoch": 2.7115671024325665, "grad_norm": 0.43037182092666626, "learning_rate": 2.7816882434472836e-07, "loss": 0.3426, "step": 5462 }, { "epoch": 2.7120635445970542, "grad_norm": 0.44415760040283203, "learning_rate": 2.772194043757481e-07, "loss": 0.3892, "step": 5463 }, { "epoch": 2.7125599867615424, "grad_norm": 0.4604109227657318, "learning_rate": 2.762715612148525e-07, "loss": 0.3063, "step": 5464 }, { "epoch": 2.71305642892603, "grad_norm": 0.43064025044441223, "learning_rate": 2.7532529517849795e-07, "loss": 0.3806, "step": 5465 }, { "epoch": 2.713552871090518, "grad_norm": 0.43869179487228394, "learning_rate": 2.7438060658261825e-07, "loss": 0.3026, "step": 5466 }, { "epoch": 2.7140493132550056, "grad_norm": 0.4150119423866272, "learning_rate": 2.7343749574261836e-07, "loss": 0.3435, "step": 5467 }, { "epoch": 2.7145457554194934, "grad_norm": 0.40988361835479736, "learning_rate": 2.7249596297337755e-07, "loss": 0.301, "step": 5468 }, { "epoch": 2.7150421975839816, "grad_norm": 0.45916664600372314, "learning_rate": 2.715560085892494e-07, "loss": 0.3982, "step": 5469 }, { "epoch": 2.7155386397484693, "grad_norm": 0.4514376223087311, "learning_rate": 2.7061763290405606e-07, "loss": 0.3242, "step": 5470 }, { "epoch": 2.716035081912957, "grad_norm": 0.43188929557800293, "learning_rate": 2.6968083623109984e-07, "loss": 0.2869, "step": 5471 }, { "epoch": 2.7165315240774452, "grad_norm": 0.43210798501968384, "learning_rate": 2.687456188831483e-07, "loss": 0.3824, "step": 5472 }, { "epoch": 2.7170279662419325, "grad_norm": 0.4095292389392853, "learning_rate": 2.678119811724461e-07, "loss": 0.3411, "step": 5473 }, { "epoch": 2.7175244084064207, "grad_norm": 0.39188265800476074, "learning_rate": 2.6687992341070944e-07, "loss": 0.3073, "step": 5474 }, { "epoch": 2.7180208505709085, "grad_norm": 0.4033234417438507, "learning_rate": 2.6594944590912774e-07, "loss": 0.3332, "step": 5475 }, { "epoch": 2.718517292735396, "grad_norm": 0.3782966136932373, "learning_rate": 2.650205489783625e-07, "loss": 0.3248, "step": 5476 }, { "epoch": 2.7190137348998844, "grad_norm": 0.4632273316383362, "learning_rate": 2.6409323292854563e-07, "loss": 0.4386, "step": 5477 }, { "epoch": 2.719510177064372, "grad_norm": 0.3797893226146698, "learning_rate": 2.6316749806928277e-07, "loss": 0.3079, "step": 5478 }, { "epoch": 2.72000661922886, "grad_norm": 0.439754456281662, "learning_rate": 2.6224334470965284e-07, "loss": 0.3491, "step": 5479 }, { "epoch": 2.7205030613933476, "grad_norm": 0.367870956659317, "learning_rate": 2.613207731582057e-07, "loss": 0.3183, "step": 5480 }, { "epoch": 2.7209995035578354, "grad_norm": 0.41652312874794006, "learning_rate": 2.60399783722961e-07, "loss": 0.3246, "step": 5481 }, { "epoch": 2.7214959457223236, "grad_norm": 0.39525216817855835, "learning_rate": 2.594803767114146e-07, "loss": 0.3403, "step": 5482 }, { "epoch": 2.7219923878868113, "grad_norm": 0.42034703493118286, "learning_rate": 2.5856255243052964e-07, "loss": 0.3909, "step": 5483 }, { "epoch": 2.722488830051299, "grad_norm": 0.40849390625953674, "learning_rate": 2.5764631118674275e-07, "loss": 0.2807, "step": 5484 }, { "epoch": 2.722985272215787, "grad_norm": 0.42509549856185913, "learning_rate": 2.5673165328596315e-07, "loss": 0.3193, "step": 5485 }, { "epoch": 2.7234817143802745, "grad_norm": 0.44716307520866394, "learning_rate": 2.5581857903356935e-07, "loss": 0.3701, "step": 5486 }, { "epoch": 2.7239781565447627, "grad_norm": 0.37942925095558167, "learning_rate": 2.5490708873441295e-07, "loss": 0.3342, "step": 5487 }, { "epoch": 2.7244745987092505, "grad_norm": 0.3869624733924866, "learning_rate": 2.5399718269281505e-07, "loss": 0.3093, "step": 5488 }, { "epoch": 2.724971040873738, "grad_norm": 0.40599703788757324, "learning_rate": 2.5308886121256816e-07, "loss": 0.3471, "step": 5489 }, { "epoch": 2.725467483038226, "grad_norm": 0.4271185100078583, "learning_rate": 2.5218212459693636e-07, "loss": 0.3979, "step": 5490 }, { "epoch": 2.7259639252027137, "grad_norm": 0.3867637515068054, "learning_rate": 2.5127697314865475e-07, "loss": 0.3105, "step": 5491 }, { "epoch": 2.726460367367202, "grad_norm": 0.3886217176914215, "learning_rate": 2.5037340716992874e-07, "loss": 0.3428, "step": 5492 }, { "epoch": 2.7269568095316896, "grad_norm": 0.3669852018356323, "learning_rate": 2.494714269624343e-07, "loss": 0.367, "step": 5493 }, { "epoch": 2.7274532516961774, "grad_norm": 0.3700280487537384, "learning_rate": 2.485710328273194e-07, "loss": 0.3959, "step": 5494 }, { "epoch": 2.727949693860665, "grad_norm": 0.3612445890903473, "learning_rate": 2.4767222506519863e-07, "loss": 0.3448, "step": 5495 }, { "epoch": 2.728446136025153, "grad_norm": 0.37657469511032104, "learning_rate": 2.467750039761613e-07, "loss": 0.3608, "step": 5496 }, { "epoch": 2.728942578189641, "grad_norm": 0.3734438419342041, "learning_rate": 2.4587936985976445e-07, "loss": 0.3052, "step": 5497 }, { "epoch": 2.7294390203541288, "grad_norm": 0.3909023702144623, "learning_rate": 2.4498532301503563e-07, "loss": 0.3018, "step": 5498 }, { "epoch": 2.7299354625186165, "grad_norm": 0.4059731066226959, "learning_rate": 2.440928637404749e-07, "loss": 0.3408, "step": 5499 }, { "epoch": 2.7304319046831043, "grad_norm": 0.4051850438117981, "learning_rate": 2.4320199233404675e-07, "loss": 0.3404, "step": 5500 }, { "epoch": 2.730928346847592, "grad_norm": 0.42812708020210266, "learning_rate": 2.4231270909319203e-07, "loss": 0.3377, "step": 5501 }, { "epoch": 2.73142478901208, "grad_norm": 0.40564393997192383, "learning_rate": 2.4142501431481613e-07, "loss": 0.3307, "step": 5502 }, { "epoch": 2.731921231176568, "grad_norm": 0.43526867032051086, "learning_rate": 2.4053890829529804e-07, "loss": 0.3426, "step": 5503 }, { "epoch": 2.7324176733410557, "grad_norm": 0.4112488627433777, "learning_rate": 2.396543913304822e-07, "loss": 0.3234, "step": 5504 }, { "epoch": 2.732914115505544, "grad_norm": 0.4496501088142395, "learning_rate": 2.387714637156874e-07, "loss": 0.4301, "step": 5505 }, { "epoch": 2.7334105576700316, "grad_norm": 0.3873920440673828, "learning_rate": 2.3789012574569726e-07, "loss": 0.2102, "step": 5506 }, { "epoch": 2.7339069998345193, "grad_norm": 0.46437519788742065, "learning_rate": 2.3701037771476642e-07, "loss": 0.3196, "step": 5507 }, { "epoch": 2.734403441999007, "grad_norm": 0.40325406193733215, "learning_rate": 2.361322199166205e-07, "loss": 0.3725, "step": 5508 }, { "epoch": 2.734899884163495, "grad_norm": 0.37573060393333435, "learning_rate": 2.352556526444516e-07, "loss": 0.3125, "step": 5509 }, { "epoch": 2.735396326327983, "grad_norm": 0.4340779185295105, "learning_rate": 2.3438067619092176e-07, "loss": 0.3953, "step": 5510 }, { "epoch": 2.7358927684924708, "grad_norm": 0.40689072012901306, "learning_rate": 2.335072908481606e-07, "loss": 0.3216, "step": 5511 }, { "epoch": 2.7363892106569585, "grad_norm": 0.4699394106864929, "learning_rate": 2.3263549690777044e-07, "loss": 0.3313, "step": 5512 }, { "epoch": 2.7368856528214462, "grad_norm": 0.4393646717071533, "learning_rate": 2.3176529466081733e-07, "loss": 0.3082, "step": 5513 }, { "epoch": 2.737382094985934, "grad_norm": 0.4442075490951538, "learning_rate": 2.3089668439783885e-07, "loss": 0.3292, "step": 5514 }, { "epoch": 2.737878537150422, "grad_norm": 0.4245433807373047, "learning_rate": 2.3002966640884084e-07, "loss": 0.3763, "step": 5515 }, { "epoch": 2.73837497931491, "grad_norm": 0.3830384612083435, "learning_rate": 2.2916424098329614e-07, "loss": 0.332, "step": 5516 }, { "epoch": 2.7388714214793977, "grad_norm": 0.37740617990493774, "learning_rate": 2.2830040841014812e-07, "loss": 0.3075, "step": 5517 }, { "epoch": 2.7393678636438854, "grad_norm": 0.38811296224594116, "learning_rate": 2.2743816897780547e-07, "loss": 0.3321, "step": 5518 }, { "epoch": 2.739864305808373, "grad_norm": 0.3993789851665497, "learning_rate": 2.265775229741468e-07, "loss": 0.3127, "step": 5519 }, { "epoch": 2.7403607479728613, "grad_norm": 0.40702342987060547, "learning_rate": 2.2571847068651898e-07, "loss": 0.365, "step": 5520 }, { "epoch": 2.740857190137349, "grad_norm": 0.4278445243835449, "learning_rate": 2.2486101240173585e-07, "loss": 0.3147, "step": 5521 }, { "epoch": 2.741353632301837, "grad_norm": 0.4015655517578125, "learning_rate": 2.2400514840608012e-07, "loss": 0.3073, "step": 5522 }, { "epoch": 2.7418500744663246, "grad_norm": 0.4397425353527069, "learning_rate": 2.231508789853004e-07, "loss": 0.4209, "step": 5523 }, { "epoch": 2.7423465166308123, "grad_norm": 0.4194750487804413, "learning_rate": 2.222982044246158e-07, "loss": 0.3078, "step": 5524 }, { "epoch": 2.7428429587953005, "grad_norm": 0.43593141436576843, "learning_rate": 2.2144712500870913e-07, "loss": 0.3509, "step": 5525 }, { "epoch": 2.7433394009597882, "grad_norm": 0.4024130702018738, "learning_rate": 2.2059764102173364e-07, "loss": 0.2853, "step": 5526 }, { "epoch": 2.743835843124276, "grad_norm": 0.40416717529296875, "learning_rate": 2.1974975274730857e-07, "loss": 0.3035, "step": 5527 }, { "epoch": 2.7443322852887637, "grad_norm": 0.4255681037902832, "learning_rate": 2.1890346046852197e-07, "loss": 0.3282, "step": 5528 }, { "epoch": 2.7448287274532515, "grad_norm": 0.4180999994277954, "learning_rate": 2.1805876446792607e-07, "loss": 0.3285, "step": 5529 }, { "epoch": 2.7453251696177396, "grad_norm": 0.38148921728134155, "learning_rate": 2.1721566502754255e-07, "loss": 0.3093, "step": 5530 }, { "epoch": 2.7458216117822274, "grad_norm": 0.391960084438324, "learning_rate": 2.1637416242886012e-07, "loss": 0.3287, "step": 5531 }, { "epoch": 2.746318053946715, "grad_norm": 0.45747220516204834, "learning_rate": 2.1553425695283293e-07, "loss": 0.2941, "step": 5532 }, { "epoch": 2.7468144961112033, "grad_norm": 0.40619924664497375, "learning_rate": 2.1469594887988277e-07, "loss": 0.3227, "step": 5533 }, { "epoch": 2.7473109382756906, "grad_norm": 0.42584800720214844, "learning_rate": 2.1385923848989797e-07, "loss": 0.3248, "step": 5534 }, { "epoch": 2.747807380440179, "grad_norm": 0.43798989057540894, "learning_rate": 2.13024126062234e-07, "loss": 0.3612, "step": 5535 }, { "epoch": 2.7483038226046665, "grad_norm": 0.3704552948474884, "learning_rate": 2.1219061187571056e-07, "loss": 0.2937, "step": 5536 }, { "epoch": 2.7488002647691543, "grad_norm": 0.45479345321655273, "learning_rate": 2.1135869620861671e-07, "loss": 0.4091, "step": 5537 }, { "epoch": 2.7492967069336425, "grad_norm": 0.4124833047389984, "learning_rate": 2.1052837933870583e-07, "loss": 0.3479, "step": 5538 }, { "epoch": 2.74979314909813, "grad_norm": 0.3866921067237854, "learning_rate": 2.09699661543199e-07, "loss": 0.343, "step": 5539 }, { "epoch": 2.750289591262618, "grad_norm": 0.3996095657348633, "learning_rate": 2.0887254309878202e-07, "loss": 0.3495, "step": 5540 }, { "epoch": 2.7507860334271057, "grad_norm": 0.380301296710968, "learning_rate": 2.0804702428160629e-07, "loss": 0.312, "step": 5541 }, { "epoch": 2.7512824755915934, "grad_norm": 0.37395673990249634, "learning_rate": 2.072231053672924e-07, "loss": 0.3131, "step": 5542 }, { "epoch": 2.7517789177560816, "grad_norm": 0.41218167543411255, "learning_rate": 2.0640078663092256e-07, "loss": 0.3801, "step": 5543 }, { "epoch": 2.7522753599205694, "grad_norm": 0.38472992181777954, "learning_rate": 2.055800683470477e-07, "loss": 0.288, "step": 5544 }, { "epoch": 2.752771802085057, "grad_norm": 0.45680516958236694, "learning_rate": 2.0476095078968195e-07, "loss": 0.3738, "step": 5545 }, { "epoch": 2.753268244249545, "grad_norm": 0.4016171097755432, "learning_rate": 2.0394343423230824e-07, "loss": 0.3219, "step": 5546 }, { "epoch": 2.7537646864140326, "grad_norm": 0.3902214765548706, "learning_rate": 2.0312751894787208e-07, "loss": 0.3851, "step": 5547 }, { "epoch": 2.754261128578521, "grad_norm": 0.3829158842563629, "learning_rate": 2.0231320520878507e-07, "loss": 0.3223, "step": 5548 }, { "epoch": 2.7547575707430085, "grad_norm": 0.3888126015663147, "learning_rate": 2.0150049328692578e-07, "loss": 0.3839, "step": 5549 }, { "epoch": 2.7552540129074963, "grad_norm": 0.38957181572914124, "learning_rate": 2.0068938345363497e-07, "loss": 0.339, "step": 5550 }, { "epoch": 2.755750455071984, "grad_norm": 0.4039171040058136, "learning_rate": 1.9987987597972212e-07, "loss": 0.4115, "step": 5551 }, { "epoch": 2.7562468972364718, "grad_norm": 0.44173017144203186, "learning_rate": 1.9907197113545716e-07, "loss": 0.3349, "step": 5552 }, { "epoch": 2.75674333940096, "grad_norm": 0.4175202250480652, "learning_rate": 1.9826566919058043e-07, "loss": 0.2891, "step": 5553 }, { "epoch": 2.7572397815654477, "grad_norm": 0.42621278762817383, "learning_rate": 1.9746097041429212e-07, "loss": 0.3358, "step": 5554 }, { "epoch": 2.7577362237299354, "grad_norm": 0.414017915725708, "learning_rate": 1.9665787507525958e-07, "loss": 0.2931, "step": 5555 }, { "epoch": 2.758232665894423, "grad_norm": 0.38516101241111755, "learning_rate": 1.958563834416155e-07, "loss": 0.2982, "step": 5556 }, { "epoch": 2.758729108058911, "grad_norm": 0.39604052901268005, "learning_rate": 1.9505649578095532e-07, "loss": 0.3232, "step": 5557 }, { "epoch": 2.759225550223399, "grad_norm": 0.46611452102661133, "learning_rate": 1.9425821236034094e-07, "loss": 0.3229, "step": 5558 }, { "epoch": 2.759721992387887, "grad_norm": 0.40672630071640015, "learning_rate": 1.9346153344629583e-07, "loss": 0.3565, "step": 5559 }, { "epoch": 2.7602184345523746, "grad_norm": 0.3890216648578644, "learning_rate": 1.9266645930481053e-07, "loss": 0.3527, "step": 5560 }, { "epoch": 2.7607148767168623, "grad_norm": 0.3842744827270508, "learning_rate": 1.9187299020133775e-07, "loss": 0.3489, "step": 5561 }, { "epoch": 2.76121131888135, "grad_norm": 0.4090636670589447, "learning_rate": 1.910811264007967e-07, "loss": 0.3454, "step": 5562 }, { "epoch": 2.7617077610458383, "grad_norm": 0.4101029336452484, "learning_rate": 1.9029086816756804e-07, "loss": 0.3792, "step": 5563 }, { "epoch": 2.762204203210326, "grad_norm": 0.3835335969924927, "learning_rate": 1.8950221576549743e-07, "loss": 0.3552, "step": 5564 }, { "epoch": 2.7627006453748137, "grad_norm": 0.41774940490722656, "learning_rate": 1.887151694578959e-07, "loss": 0.3688, "step": 5565 }, { "epoch": 2.763197087539302, "grad_norm": 0.42919084429740906, "learning_rate": 1.8792972950753495e-07, "loss": 0.2938, "step": 5566 }, { "epoch": 2.7636935297037892, "grad_norm": 0.43168744444847107, "learning_rate": 1.8714589617665314e-07, "loss": 0.3155, "step": 5567 }, { "epoch": 2.7641899718682774, "grad_norm": 0.3771354556083679, "learning_rate": 1.8636366972694996e-07, "loss": 0.3078, "step": 5568 }, { "epoch": 2.764686414032765, "grad_norm": 0.43734851479530334, "learning_rate": 1.8558305041958992e-07, "loss": 0.3111, "step": 5569 }, { "epoch": 2.765182856197253, "grad_norm": 0.39887431263923645, "learning_rate": 1.8480403851520167e-07, "loss": 0.3501, "step": 5570 }, { "epoch": 2.765679298361741, "grad_norm": 0.4364931285381317, "learning_rate": 1.840266342738739e-07, "loss": 0.3285, "step": 5571 }, { "epoch": 2.766175740526229, "grad_norm": 0.3780038356781006, "learning_rate": 1.832508379551634e-07, "loss": 0.3388, "step": 5572 }, { "epoch": 2.7666721826907166, "grad_norm": 0.3972470760345459, "learning_rate": 1.8247664981808522e-07, "loss": 0.3672, "step": 5573 }, { "epoch": 2.7671686248552043, "grad_norm": 0.42592653632164, "learning_rate": 1.8170407012112146e-07, "loss": 0.3544, "step": 5574 }, { "epoch": 2.767665067019692, "grad_norm": 0.41457757353782654, "learning_rate": 1.8093309912221302e-07, "loss": 0.3336, "step": 5575 }, { "epoch": 2.7681615091841802, "grad_norm": 0.4397576153278351, "learning_rate": 1.8016373707876956e-07, "loss": 0.3309, "step": 5576 }, { "epoch": 2.768657951348668, "grad_norm": 0.3965109586715698, "learning_rate": 1.7939598424765726e-07, "loss": 0.334, "step": 5577 }, { "epoch": 2.7691543935131557, "grad_norm": 0.41245967149734497, "learning_rate": 1.7862984088520886e-07, "loss": 0.3765, "step": 5578 }, { "epoch": 2.7696508356776435, "grad_norm": 0.355829119682312, "learning_rate": 1.778653072472203e-07, "loss": 0.3325, "step": 5579 }, { "epoch": 2.770147277842131, "grad_norm": 0.4044354259967804, "learning_rate": 1.7710238358894683e-07, "loss": 0.3421, "step": 5580 }, { "epoch": 2.7706437200066194, "grad_norm": 0.3869410455226898, "learning_rate": 1.763410701651086e-07, "loss": 0.3159, "step": 5581 }, { "epoch": 2.771140162171107, "grad_norm": 0.40044987201690674, "learning_rate": 1.7558136722988617e-07, "loss": 0.3414, "step": 5582 }, { "epoch": 2.771636604335595, "grad_norm": 0.3946406841278076, "learning_rate": 1.7482327503692552e-07, "loss": 0.3377, "step": 5583 }, { "epoch": 2.7721330465000826, "grad_norm": 0.40170231461524963, "learning_rate": 1.7406679383933255e-07, "loss": 0.3228, "step": 5584 }, { "epoch": 2.7726294886645704, "grad_norm": 0.43349331617355347, "learning_rate": 1.7331192388967523e-07, "loss": 0.3425, "step": 5585 }, { "epoch": 2.7731259308290586, "grad_norm": 0.4213322103023529, "learning_rate": 1.7255866543998412e-07, "loss": 0.3385, "step": 5586 }, { "epoch": 2.7736223729935463, "grad_norm": 0.3882998824119568, "learning_rate": 1.7180701874175198e-07, "loss": 0.3066, "step": 5587 }, { "epoch": 2.774118815158034, "grad_norm": 0.4347441792488098, "learning_rate": 1.710569840459342e-07, "loss": 0.3501, "step": 5588 }, { "epoch": 2.774615257322522, "grad_norm": 0.4374261796474457, "learning_rate": 1.7030856160294485e-07, "loss": 0.3312, "step": 5589 }, { "epoch": 2.7751116994870095, "grad_norm": 0.45612287521362305, "learning_rate": 1.695617516626641e-07, "loss": 0.3217, "step": 5590 }, { "epoch": 2.7756081416514977, "grad_norm": 0.4140341281890869, "learning_rate": 1.6881655447442968e-07, "loss": 0.3864, "step": 5591 }, { "epoch": 2.7761045838159855, "grad_norm": 0.37536442279815674, "learning_rate": 1.680729702870437e-07, "loss": 0.3546, "step": 5592 }, { "epoch": 2.776601025980473, "grad_norm": 0.4267463684082031, "learning_rate": 1.6733099934876873e-07, "loss": 0.316, "step": 5593 }, { "epoch": 2.777097468144961, "grad_norm": 0.40759530663490295, "learning_rate": 1.6659064190732764e-07, "loss": 0.3422, "step": 5594 }, { "epoch": 2.7775939103094487, "grad_norm": 0.3903905153274536, "learning_rate": 1.6585189820990776e-07, "loss": 0.3464, "step": 5595 }, { "epoch": 2.778090352473937, "grad_norm": 0.3592281937599182, "learning_rate": 1.6511476850315344e-07, "loss": 0.2979, "step": 5596 }, { "epoch": 2.7785867946384246, "grad_norm": 0.41320571303367615, "learning_rate": 1.643792530331728e-07, "loss": 0.3541, "step": 5597 }, { "epoch": 2.7790832368029124, "grad_norm": 0.4212174415588379, "learning_rate": 1.6364535204553444e-07, "loss": 0.3165, "step": 5598 }, { "epoch": 2.7795796789674005, "grad_norm": 0.3999731242656708, "learning_rate": 1.62913065785269e-07, "loss": 0.3199, "step": 5599 }, { "epoch": 2.7800761211318883, "grad_norm": 0.41962122917175293, "learning_rate": 1.621823944968659e-07, "loss": 0.3574, "step": 5600 }, { "epoch": 2.780572563296376, "grad_norm": 0.43282949924468994, "learning_rate": 1.6145333842427612e-07, "loss": 0.3745, "step": 5601 }, { "epoch": 2.7810690054608638, "grad_norm": 0.3729257881641388, "learning_rate": 1.6072589781091274e-07, "loss": 0.2974, "step": 5602 }, { "epoch": 2.7815654476253515, "grad_norm": 0.3955931067466736, "learning_rate": 1.6000007289964815e-07, "loss": 0.3851, "step": 5603 }, { "epoch": 2.7820618897898397, "grad_norm": 0.3522312641143799, "learning_rate": 1.5927586393281458e-07, "loss": 0.2963, "step": 5604 }, { "epoch": 2.7825583319543274, "grad_norm": 0.4400804936885834, "learning_rate": 1.5855327115220698e-07, "loss": 0.3862, "step": 5605 }, { "epoch": 2.783054774118815, "grad_norm": 0.44675981998443604, "learning_rate": 1.57832294799079e-07, "loss": 0.3191, "step": 5606 }, { "epoch": 2.783551216283303, "grad_norm": 0.3684999644756317, "learning_rate": 1.5711293511414482e-07, "loss": 0.3255, "step": 5607 }, { "epoch": 2.7840476584477907, "grad_norm": 0.3327977657318115, "learning_rate": 1.5639519233757895e-07, "loss": 0.326, "step": 5608 }, { "epoch": 2.784544100612279, "grad_norm": 0.4028298258781433, "learning_rate": 1.556790667090169e-07, "loss": 0.3441, "step": 5609 }, { "epoch": 2.7850405427767666, "grad_norm": 0.40507131814956665, "learning_rate": 1.5496455846755242e-07, "loss": 0.3624, "step": 5610 }, { "epoch": 2.7855369849412543, "grad_norm": 0.5011515617370605, "learning_rate": 1.542516678517425e-07, "loss": 0.3574, "step": 5611 }, { "epoch": 2.786033427105742, "grad_norm": 0.40797507762908936, "learning_rate": 1.5354039509959894e-07, "loss": 0.379, "step": 5612 }, { "epoch": 2.78652986927023, "grad_norm": 0.42064592242240906, "learning_rate": 1.5283074044859904e-07, "loss": 0.2762, "step": 5613 }, { "epoch": 2.787026311434718, "grad_norm": 0.3897102177143097, "learning_rate": 1.5212270413567544e-07, "loss": 0.3257, "step": 5614 }, { "epoch": 2.7875227535992058, "grad_norm": 0.4186277389526367, "learning_rate": 1.514162863972235e-07, "loss": 0.3487, "step": 5615 }, { "epoch": 2.7880191957636935, "grad_norm": 0.38658642768859863, "learning_rate": 1.5071148746909569e-07, "loss": 0.3047, "step": 5616 }, { "epoch": 2.7885156379281812, "grad_norm": 0.4081628918647766, "learning_rate": 1.5000830758660656e-07, "loss": 0.3422, "step": 5617 }, { "epoch": 2.789012080092669, "grad_norm": 0.40198972821235657, "learning_rate": 1.493067469845283e-07, "loss": 0.3893, "step": 5618 }, { "epoch": 2.789508522257157, "grad_norm": 0.3986530303955078, "learning_rate": 1.486068058970913e-07, "loss": 0.3137, "step": 5619 }, { "epoch": 2.790004964421645, "grad_norm": 0.39720138907432556, "learning_rate": 1.479084845579898e-07, "loss": 0.3524, "step": 5620 }, { "epoch": 2.7905014065861327, "grad_norm": 0.38843733072280884, "learning_rate": 1.4721178320037167e-07, "loss": 0.3259, "step": 5621 }, { "epoch": 2.7909978487506204, "grad_norm": 0.382226824760437, "learning_rate": 1.4651670205684863e-07, "loss": 0.3158, "step": 5622 }, { "epoch": 2.791494290915108, "grad_norm": 0.3747442066669464, "learning_rate": 1.4582324135948734e-07, "loss": 0.3852, "step": 5623 }, { "epoch": 2.7919907330795963, "grad_norm": 0.4528321921825409, "learning_rate": 1.4513140133981752e-07, "loss": 0.3226, "step": 5624 }, { "epoch": 2.792487175244084, "grad_norm": 0.409446656703949, "learning_rate": 1.4444118222882387e-07, "loss": 0.2763, "step": 5625 }, { "epoch": 2.792983617408572, "grad_norm": 0.42727628350257874, "learning_rate": 1.4375258425695317e-07, "loss": 0.4215, "step": 5626 }, { "epoch": 2.79348005957306, "grad_norm": 0.4044087827205658, "learning_rate": 1.4306560765410925e-07, "loss": 0.318, "step": 5627 }, { "epoch": 2.7939765017375473, "grad_norm": 0.40617257356643677, "learning_rate": 1.4238025264965428e-07, "loss": 0.3713, "step": 5628 }, { "epoch": 2.7944729439020355, "grad_norm": 0.37889471650123596, "learning_rate": 1.4169651947241069e-07, "loss": 0.2916, "step": 5629 }, { "epoch": 2.7949693860665232, "grad_norm": 0.4430776834487915, "learning_rate": 1.4101440835065705e-07, "loss": 0.3369, "step": 5630 }, { "epoch": 2.795465828231011, "grad_norm": 0.41295963525772095, "learning_rate": 1.4033391951213392e-07, "loss": 0.3157, "step": 5631 }, { "epoch": 2.795962270395499, "grad_norm": 0.39726823568344116, "learning_rate": 1.3965505318403572e-07, "loss": 0.3375, "step": 5632 }, { "epoch": 2.796458712559987, "grad_norm": 0.41550153493881226, "learning_rate": 1.389778095930183e-07, "loss": 0.3694, "step": 5633 }, { "epoch": 2.7969551547244746, "grad_norm": 0.34945812821388245, "learning_rate": 1.3830218896519532e-07, "loss": 0.3506, "step": 5634 }, { "epoch": 2.7974515968889624, "grad_norm": 0.40394625067710876, "learning_rate": 1.3762819152613793e-07, "loss": 0.3345, "step": 5635 }, { "epoch": 2.79794803905345, "grad_norm": 0.40280991792678833, "learning_rate": 1.3695581750087562e-07, "loss": 0.2915, "step": 5636 }, { "epoch": 2.7984444812179383, "grad_norm": 0.4210692346096039, "learning_rate": 1.3628506711389545e-07, "loss": 0.3518, "step": 5637 }, { "epoch": 2.798940923382426, "grad_norm": 0.4187069833278656, "learning_rate": 1.3561594058914218e-07, "loss": 0.3655, "step": 5638 }, { "epoch": 2.799437365546914, "grad_norm": 0.3845495879650116, "learning_rate": 1.3494843815002047e-07, "loss": 0.2899, "step": 5639 }, { "epoch": 2.7999338077114015, "grad_norm": 0.42045947909355164, "learning_rate": 1.3428256001939034e-07, "loss": 0.3594, "step": 5640 }, { "epoch": 2.8004302498758893, "grad_norm": 0.3866070806980133, "learning_rate": 1.3361830641957118e-07, "loss": 0.319, "step": 5641 }, { "epoch": 2.8009266920403775, "grad_norm": 0.3916507661342621, "learning_rate": 1.3295567757233729e-07, "loss": 0.3283, "step": 5642 }, { "epoch": 2.801423134204865, "grad_norm": 0.4049995243549347, "learning_rate": 1.3229467369892446e-07, "loss": 0.3107, "step": 5643 }, { "epoch": 2.801919576369353, "grad_norm": 0.46600762009620667, "learning_rate": 1.3163529502002337e-07, "loss": 0.3268, "step": 5644 }, { "epoch": 2.8024160185338407, "grad_norm": 0.43006351590156555, "learning_rate": 1.3097754175578182e-07, "loss": 0.2969, "step": 5645 }, { "epoch": 2.8029124606983284, "grad_norm": 0.34895065426826477, "learning_rate": 1.303214141258069e-07, "loss": 0.2855, "step": 5646 }, { "epoch": 2.8034089028628166, "grad_norm": 0.4171791076660156, "learning_rate": 1.2966691234916119e-07, "loss": 0.3372, "step": 5647 }, { "epoch": 2.8039053450273044, "grad_norm": 0.4083437919616699, "learning_rate": 1.290140366443654e-07, "loss": 0.3283, "step": 5648 }, { "epoch": 2.804401787191792, "grad_norm": 0.3871341347694397, "learning_rate": 1.2836278722939576e-07, "loss": 0.3332, "step": 5649 }, { "epoch": 2.80489822935628, "grad_norm": 0.39147478342056274, "learning_rate": 1.2771316432168889e-07, "loss": 0.3419, "step": 5650 }, { "epoch": 2.8053946715207676, "grad_norm": 0.4568330645561218, "learning_rate": 1.270651681381341e-07, "loss": 0.3572, "step": 5651 }, { "epoch": 2.805891113685256, "grad_norm": 0.40049341320991516, "learning_rate": 1.2641879889508158e-07, "loss": 0.278, "step": 5652 }, { "epoch": 2.8063875558497435, "grad_norm": 0.4152141809463501, "learning_rate": 1.2577405680833433e-07, "loss": 0.3548, "step": 5653 }, { "epoch": 2.8068839980142313, "grad_norm": 0.4032229781150818, "learning_rate": 1.2513094209315625e-07, "loss": 0.3148, "step": 5654 }, { "epoch": 2.807380440178719, "grad_norm": 0.3819986879825592, "learning_rate": 1.24489454964265e-07, "loss": 0.3867, "step": 5655 }, { "epoch": 2.8078768823432068, "grad_norm": 0.3465961515903473, "learning_rate": 1.2384959563583542e-07, "loss": 0.3494, "step": 5656 }, { "epoch": 2.808373324507695, "grad_norm": 0.3691265881061554, "learning_rate": 1.2321136432149938e-07, "loss": 0.3873, "step": 5657 }, { "epoch": 2.8088697666721827, "grad_norm": 0.3849101662635803, "learning_rate": 1.2257476123434474e-07, "loss": 0.3492, "step": 5658 }, { "epoch": 2.8093662088366704, "grad_norm": 0.4279913902282715, "learning_rate": 1.2193978658691708e-07, "loss": 0.2941, "step": 5659 }, { "epoch": 2.8098626510011586, "grad_norm": 0.4405595660209656, "learning_rate": 1.2130644059121565e-07, "loss": 0.3163, "step": 5660 }, { "epoch": 2.8103590931656464, "grad_norm": 0.4128803610801697, "learning_rate": 1.2067472345869858e-07, "loss": 0.3149, "step": 5661 }, { "epoch": 2.810855535330134, "grad_norm": 0.40669357776641846, "learning_rate": 1.2004463540027822e-07, "loss": 0.3939, "step": 5662 }, { "epoch": 2.811351977494622, "grad_norm": 0.4245052635669708, "learning_rate": 1.1941617662632466e-07, "loss": 0.3492, "step": 5663 }, { "epoch": 2.8118484196591096, "grad_norm": 0.43527984619140625, "learning_rate": 1.1878934734666281e-07, "loss": 0.3664, "step": 5664 }, { "epoch": 2.8123448618235978, "grad_norm": 0.3805314898490906, "learning_rate": 1.1816414777057361e-07, "loss": 0.2598, "step": 5665 }, { "epoch": 2.8128413039880855, "grad_norm": 0.4063083231449127, "learning_rate": 1.1754057810679509e-07, "loss": 0.3365, "step": 5666 }, { "epoch": 2.8133377461525733, "grad_norm": 0.4397348165512085, "learning_rate": 1.1691863856351904e-07, "loss": 0.3287, "step": 5667 }, { "epoch": 2.813834188317061, "grad_norm": 0.40094977617263794, "learning_rate": 1.1629832934839491e-07, "loss": 0.3051, "step": 5668 }, { "epoch": 2.8143306304815487, "grad_norm": 0.40386635065078735, "learning_rate": 1.1567965066852704e-07, "loss": 0.3696, "step": 5669 }, { "epoch": 2.814827072646037, "grad_norm": 0.4364614188671112, "learning_rate": 1.1506260273047576e-07, "loss": 0.2757, "step": 5670 }, { "epoch": 2.8153235148105247, "grad_norm": 0.43934112787246704, "learning_rate": 1.1444718574025516e-07, "loss": 0.3131, "step": 5671 }, { "epoch": 2.8158199569750124, "grad_norm": 0.4163736402988434, "learning_rate": 1.1383339990333753e-07, "loss": 0.3473, "step": 5672 }, { "epoch": 2.8163163991395, "grad_norm": 0.42462676763534546, "learning_rate": 1.1322124542465008e-07, "loss": 0.365, "step": 5673 }, { "epoch": 2.816812841303988, "grad_norm": 0.37780794501304626, "learning_rate": 1.1261072250857264e-07, "loss": 0.3265, "step": 5674 }, { "epoch": 2.817309283468476, "grad_norm": 0.41820523142814636, "learning_rate": 1.1200183135894327e-07, "loss": 0.3332, "step": 5675 }, { "epoch": 2.817805725632964, "grad_norm": 0.4030681550502777, "learning_rate": 1.113945721790538e-07, "loss": 0.3763, "step": 5676 }, { "epoch": 2.8183021677974516, "grad_norm": 0.38245952129364014, "learning_rate": 1.1078894517165206e-07, "loss": 0.3065, "step": 5677 }, { "epoch": 2.8187986099619393, "grad_norm": 0.35790058970451355, "learning_rate": 1.1018495053894018e-07, "loss": 0.3041, "step": 5678 }, { "epoch": 2.819295052126427, "grad_norm": 0.38997283577919006, "learning_rate": 1.095825884825752e-07, "loss": 0.3503, "step": 5679 }, { "epoch": 2.8197914942909152, "grad_norm": 0.3888738751411438, "learning_rate": 1.0898185920366954e-07, "loss": 0.3462, "step": 5680 }, { "epoch": 2.820287936455403, "grad_norm": 0.39366623759269714, "learning_rate": 1.0838276290279115e-07, "loss": 0.3778, "step": 5681 }, { "epoch": 2.8207843786198907, "grad_norm": 0.40260618925094604, "learning_rate": 1.0778529977996166e-07, "loss": 0.2931, "step": 5682 }, { "epoch": 2.8212808207843785, "grad_norm": 0.41703835129737854, "learning_rate": 1.0718947003465652e-07, "loss": 0.3261, "step": 5683 }, { "epoch": 2.821777262948866, "grad_norm": 0.4332718849182129, "learning_rate": 1.0659527386580882e-07, "loss": 0.3692, "step": 5684 }, { "epoch": 2.8222737051133544, "grad_norm": 0.44160082936286926, "learning_rate": 1.0600271147180374e-07, "loss": 0.3248, "step": 5685 }, { "epoch": 2.822770147277842, "grad_norm": 0.45093557238578796, "learning_rate": 1.0541178305048139e-07, "loss": 0.3589, "step": 5686 }, { "epoch": 2.82326658944233, "grad_norm": 0.36293932795524597, "learning_rate": 1.0482248879913725e-07, "loss": 0.3024, "step": 5687 }, { "epoch": 2.823763031606818, "grad_norm": 0.39521634578704834, "learning_rate": 1.0423482891452119e-07, "loss": 0.3388, "step": 5688 }, { "epoch": 2.8242594737713054, "grad_norm": 0.38599103689193726, "learning_rate": 1.0364880359283625e-07, "loss": 0.3598, "step": 5689 }, { "epoch": 2.8247559159357936, "grad_norm": 0.3976854681968689, "learning_rate": 1.0306441302973924e-07, "loss": 0.3617, "step": 5690 }, { "epoch": 2.8252523581002813, "grad_norm": 0.3567262291908264, "learning_rate": 1.024816574203441e-07, "loss": 0.2878, "step": 5691 }, { "epoch": 2.825748800264769, "grad_norm": 0.42374634742736816, "learning_rate": 1.0190053695921631e-07, "loss": 0.3756, "step": 5692 }, { "epoch": 2.8262452424292572, "grad_norm": 0.3691122829914093, "learning_rate": 1.0132105184037677e-07, "loss": 0.3391, "step": 5693 }, { "epoch": 2.826741684593745, "grad_norm": 0.40426990389823914, "learning_rate": 1.007432022572985e-07, "loss": 0.3578, "step": 5694 }, { "epoch": 2.8272381267582327, "grad_norm": 0.38420569896698, "learning_rate": 1.001669884029105e-07, "loss": 0.3589, "step": 5695 }, { "epoch": 2.8277345689227205, "grad_norm": 0.4101555347442627, "learning_rate": 9.959241046959611e-08, "loss": 0.324, "step": 5696 }, { "epoch": 2.828231011087208, "grad_norm": 0.4496050179004669, "learning_rate": 9.90194686491891e-08, "loss": 0.3783, "step": 5697 }, { "epoch": 2.8287274532516964, "grad_norm": 0.38552477955818176, "learning_rate": 9.84481631329809e-08, "loss": 0.2735, "step": 5698 }, { "epoch": 2.829223895416184, "grad_norm": 0.43160563707351685, "learning_rate": 9.787849411171391e-08, "loss": 0.3336, "step": 5699 }, { "epoch": 2.829720337580672, "grad_norm": 0.3522651791572571, "learning_rate": 9.731046177558545e-08, "loss": 0.3532, "step": 5700 }, { "epoch": 2.8302167797451596, "grad_norm": 0.4407612979412079, "learning_rate": 9.674406631424549e-08, "loss": 0.4055, "step": 5701 }, { "epoch": 2.8307132219096474, "grad_norm": 0.41404494643211365, "learning_rate": 9.617930791679997e-08, "loss": 0.3472, "step": 5702 }, { "epoch": 2.8312096640741355, "grad_norm": 0.39883071184158325, "learning_rate": 9.561618677180418e-08, "loss": 0.2988, "step": 5703 }, { "epoch": 2.8317061062386233, "grad_norm": 0.39975976943969727, "learning_rate": 9.505470306726994e-08, "loss": 0.3552, "step": 5704 }, { "epoch": 2.832202548403111, "grad_norm": 0.3969367742538452, "learning_rate": 9.449485699066174e-08, "loss": 0.3697, "step": 5705 }, { "epoch": 2.8326989905675988, "grad_norm": 0.40568220615386963, "learning_rate": 9.393664872889619e-08, "loss": 0.3258, "step": 5706 }, { "epoch": 2.8331954327320865, "grad_norm": 0.4054194688796997, "learning_rate": 9.338007846834474e-08, "loss": 0.3553, "step": 5707 }, { "epoch": 2.8336918748965747, "grad_norm": 0.42730197310447693, "learning_rate": 9.282514639482986e-08, "loss": 0.3478, "step": 5708 }, { "epoch": 2.8341883170610624, "grad_norm": 0.4237118661403656, "learning_rate": 9.227185269362893e-08, "loss": 0.3808, "step": 5709 }, { "epoch": 2.83468475922555, "grad_norm": 0.3718591332435608, "learning_rate": 9.172019754947192e-08, "loss": 0.2829, "step": 5710 }, { "epoch": 2.835181201390038, "grad_norm": 0.4048415720462799, "learning_rate": 9.117018114654153e-08, "loss": 0.3148, "step": 5711 }, { "epoch": 2.8356776435545257, "grad_norm": 0.40610775351524353, "learning_rate": 9.062180366847306e-08, "loss": 0.3521, "step": 5712 }, { "epoch": 2.836174085719014, "grad_norm": 0.37505561113357544, "learning_rate": 9.007506529835452e-08, "loss": 0.3074, "step": 5713 }, { "epoch": 2.8366705278835016, "grad_norm": 0.4041326344013214, "learning_rate": 8.952996621872767e-08, "loss": 0.3941, "step": 5714 }, { "epoch": 2.8371669700479893, "grad_norm": 0.3897143006324768, "learning_rate": 8.898650661158582e-08, "loss": 0.3202, "step": 5715 }, { "epoch": 2.837663412212477, "grad_norm": 0.38799551129341125, "learning_rate": 8.844468665837546e-08, "loss": 0.3275, "step": 5716 }, { "epoch": 2.838159854376965, "grad_norm": 0.40541133284568787, "learning_rate": 8.790450653999527e-08, "loss": 0.3511, "step": 5717 }, { "epoch": 2.838656296541453, "grad_norm": 0.3959459066390991, "learning_rate": 8.736596643679762e-08, "loss": 0.3943, "step": 5718 }, { "epoch": 2.8391527387059408, "grad_norm": 0.3728752136230469, "learning_rate": 8.682906652858536e-08, "loss": 0.3069, "step": 5719 }, { "epoch": 2.8396491808704285, "grad_norm": 0.44820937514305115, "learning_rate": 8.629380699461453e-08, "loss": 0.355, "step": 5720 }, { "epoch": 2.8401456230349167, "grad_norm": 0.4480232000350952, "learning_rate": 8.576018801359553e-08, "loss": 0.3265, "step": 5721 }, { "epoch": 2.8406420651994044, "grad_norm": 0.44813811779022217, "learning_rate": 8.52282097636875e-08, "loss": 0.3163, "step": 5722 }, { "epoch": 2.841138507363892, "grad_norm": 0.3961264193058014, "learning_rate": 8.469787242250504e-08, "loss": 0.3358, "step": 5723 }, { "epoch": 2.84163494952838, "grad_norm": 0.3519952893257141, "learning_rate": 8.416917616711095e-08, "loss": 0.3206, "step": 5724 }, { "epoch": 2.8421313916928677, "grad_norm": 0.39172908663749695, "learning_rate": 8.364212117402515e-08, "loss": 0.3165, "step": 5725 }, { "epoch": 2.842627833857356, "grad_norm": 0.40848448872566223, "learning_rate": 8.311670761921576e-08, "loss": 0.3728, "step": 5726 }, { "epoch": 2.8431242760218436, "grad_norm": 0.35839521884918213, "learning_rate": 8.259293567810412e-08, "loss": 0.325, "step": 5727 }, { "epoch": 2.8436207181863313, "grad_norm": 0.3775790333747864, "learning_rate": 8.207080552556313e-08, "loss": 0.3153, "step": 5728 }, { "epoch": 2.844117160350819, "grad_norm": 0.43258991837501526, "learning_rate": 8.155031733591889e-08, "loss": 0.3359, "step": 5729 }, { "epoch": 2.844613602515307, "grad_norm": 0.4094749689102173, "learning_rate": 8.103147128294742e-08, "loss": 0.3443, "step": 5730 }, { "epoch": 2.845110044679795, "grad_norm": 0.3896683156490326, "learning_rate": 8.051426753987734e-08, "loss": 0.2968, "step": 5731 }, { "epoch": 2.8456064868442827, "grad_norm": 0.4444136619567871, "learning_rate": 7.999870627938944e-08, "loss": 0.3757, "step": 5732 }, { "epoch": 2.8461029290087705, "grad_norm": 0.4167706370353699, "learning_rate": 7.94847876736149e-08, "loss": 0.302, "step": 5733 }, { "epoch": 2.8465993711732582, "grad_norm": 0.39528876543045044, "learning_rate": 7.897251189413758e-08, "loss": 0.3246, "step": 5734 }, { "epoch": 2.847095813337746, "grad_norm": 0.3681027293205261, "learning_rate": 7.846187911199287e-08, "loss": 0.3662, "step": 5735 }, { "epoch": 2.847592255502234, "grad_norm": 0.40667811036109924, "learning_rate": 7.795288949766611e-08, "loss": 0.3533, "step": 5736 }, { "epoch": 2.848088697666722, "grad_norm": 0.41571998596191406, "learning_rate": 7.744554322109633e-08, "loss": 0.2979, "step": 5737 }, { "epoch": 2.8485851398312096, "grad_norm": 0.4148336350917816, "learning_rate": 7.693984045167192e-08, "loss": 0.3683, "step": 5738 }, { "epoch": 2.8490815819956974, "grad_norm": 0.4461182653903961, "learning_rate": 7.643578135823338e-08, "loss": 0.3144, "step": 5739 }, { "epoch": 2.849578024160185, "grad_norm": 0.39310595393180847, "learning_rate": 7.593336610907221e-08, "loss": 0.2899, "step": 5740 }, { "epoch": 2.8500744663246733, "grad_norm": 0.40152573585510254, "learning_rate": 7.543259487193144e-08, "loss": 0.3626, "step": 5741 }, { "epoch": 2.850570908489161, "grad_norm": 0.3983107805252075, "learning_rate": 7.493346781400457e-08, "loss": 0.287, "step": 5742 }, { "epoch": 2.851067350653649, "grad_norm": 0.41202110052108765, "learning_rate": 7.443598510193716e-08, "loss": 0.3391, "step": 5743 }, { "epoch": 2.8515637928181365, "grad_norm": 0.39261916279792786, "learning_rate": 7.394014690182583e-08, "loss": 0.3575, "step": 5744 }, { "epoch": 2.8520602349826243, "grad_norm": 0.4239805340766907, "learning_rate": 7.344595337921534e-08, "loss": 0.3222, "step": 5745 }, { "epoch": 2.8525566771471125, "grad_norm": 0.400960236787796, "learning_rate": 7.29534046991054e-08, "loss": 0.3416, "step": 5746 }, { "epoch": 2.8530531193116, "grad_norm": 0.3858281373977661, "learning_rate": 7.246250102594332e-08, "loss": 0.3483, "step": 5747 }, { "epoch": 2.853549561476088, "grad_norm": 0.38175177574157715, "learning_rate": 7.197324252362969e-08, "loss": 0.316, "step": 5748 }, { "epoch": 2.854046003640576, "grad_norm": 0.41201308369636536, "learning_rate": 7.148562935551384e-08, "loss": 0.3478, "step": 5749 }, { "epoch": 2.8545424458050634, "grad_norm": 0.4445143938064575, "learning_rate": 7.099966168439665e-08, "loss": 0.3174, "step": 5750 }, { "epoch": 2.8550388879695516, "grad_norm": 0.39158493280410767, "learning_rate": 7.051533967252999e-08, "loss": 0.3135, "step": 5751 }, { "epoch": 2.8555353301340394, "grad_norm": 0.4652399718761444, "learning_rate": 7.003266348161508e-08, "loss": 0.3404, "step": 5752 }, { "epoch": 2.856031772298527, "grad_norm": 0.4369666278362274, "learning_rate": 6.955163327280467e-08, "loss": 0.3682, "step": 5753 }, { "epoch": 2.8565282144630153, "grad_norm": 0.3941148817539215, "learning_rate": 6.907224920670141e-08, "loss": 0.3217, "step": 5754 }, { "epoch": 2.857024656627503, "grad_norm": 0.3931836187839508, "learning_rate": 6.859451144336005e-08, "loss": 0.3405, "step": 5755 }, { "epoch": 2.857521098791991, "grad_norm": 0.4035256505012512, "learning_rate": 6.811842014228243e-08, "loss": 0.3571, "step": 5756 }, { "epoch": 2.8580175409564785, "grad_norm": 0.3862469494342804, "learning_rate": 6.764397546242307e-08, "loss": 0.3185, "step": 5757 }, { "epoch": 2.8585139831209663, "grad_norm": 0.42715984582901, "learning_rate": 6.717117756218639e-08, "loss": 0.3234, "step": 5758 }, { "epoch": 2.8590104252854545, "grad_norm": 0.37161341309547424, "learning_rate": 6.670002659942664e-08, "loss": 0.3599, "step": 5759 }, { "epoch": 2.859506867449942, "grad_norm": 0.37804287672042847, "learning_rate": 6.623052273144914e-08, "loss": 0.3718, "step": 5760 }, { "epoch": 2.86000330961443, "grad_norm": 0.4040234386920929, "learning_rate": 6.576266611500681e-08, "loss": 0.3859, "step": 5761 }, { "epoch": 2.8604997517789177, "grad_norm": 0.45269304513931274, "learning_rate": 6.529645690630526e-08, "loss": 0.3567, "step": 5762 }, { "epoch": 2.8609961939434054, "grad_norm": 0.4460732638835907, "learning_rate": 6.483189526099887e-08, "loss": 0.2597, "step": 5763 }, { "epoch": 2.8614926361078936, "grad_norm": 0.4080105125904083, "learning_rate": 6.436898133419301e-08, "loss": 0.3342, "step": 5764 }, { "epoch": 2.8619890782723814, "grad_norm": 0.3918408155441284, "learning_rate": 6.390771528044016e-08, "loss": 0.322, "step": 5765 }, { "epoch": 2.862485520436869, "grad_norm": 0.43365371227264404, "learning_rate": 6.344809725374601e-08, "loss": 0.372, "step": 5766 }, { "epoch": 2.862981962601357, "grad_norm": 0.4400475025177002, "learning_rate": 6.29901274075645e-08, "loss": 0.3934, "step": 5767 }, { "epoch": 2.8634784047658446, "grad_norm": 0.4010108411312103, "learning_rate": 6.253380589479829e-08, "loss": 0.2855, "step": 5768 }, { "epoch": 2.8639748469303328, "grad_norm": 0.3814796209335327, "learning_rate": 6.207913286780221e-08, "loss": 0.2945, "step": 5769 }, { "epoch": 2.8644712890948205, "grad_norm": 0.4063533544540405, "learning_rate": 6.162610847837813e-08, "loss": 0.3135, "step": 5770 }, { "epoch": 2.8649677312593083, "grad_norm": 0.42633187770843506, "learning_rate": 6.117473287777897e-08, "loss": 0.3913, "step": 5771 }, { "epoch": 2.865464173423796, "grad_norm": 0.43415120244026184, "learning_rate": 6.072500621670585e-08, "loss": 0.3192, "step": 5772 }, { "epoch": 2.8659606155882837, "grad_norm": 0.3872830867767334, "learning_rate": 6.027692864531198e-08, "loss": 0.3983, "step": 5773 }, { "epoch": 2.866457057752772, "grad_norm": 0.40884390473365784, "learning_rate": 5.983050031319714e-08, "loss": 0.3446, "step": 5774 }, { "epoch": 2.8669534999172597, "grad_norm": 0.4053225815296173, "learning_rate": 5.938572136941156e-08, "loss": 0.3469, "step": 5775 }, { "epoch": 2.8674499420817474, "grad_norm": 0.4514065682888031, "learning_rate": 5.8942591962455334e-08, "loss": 0.3391, "step": 5776 }, { "epoch": 2.867946384246235, "grad_norm": 0.3633047342300415, "learning_rate": 5.8501112240277325e-08, "loss": 0.2924, "step": 5777 }, { "epoch": 2.868442826410723, "grad_norm": 0.4189305305480957, "learning_rate": 5.806128235027575e-08, "loss": 0.3273, "step": 5778 }, { "epoch": 2.868939268575211, "grad_norm": 0.3792153596878052, "learning_rate": 5.762310243929703e-08, "loss": 0.3215, "step": 5779 }, { "epoch": 2.869435710739699, "grad_norm": 0.33800220489501953, "learning_rate": 5.718657265363858e-08, "loss": 0.3427, "step": 5780 }, { "epoch": 2.8699321529041866, "grad_norm": 0.4286692440509796, "learning_rate": 5.6751693139044385e-08, "loss": 0.3966, "step": 5781 }, { "epoch": 2.8704285950686748, "grad_norm": 0.39143723249435425, "learning_rate": 5.6318464040710505e-08, "loss": 0.2575, "step": 5782 }, { "epoch": 2.8709250372331625, "grad_norm": 0.44471025466918945, "learning_rate": 5.5886885503279584e-08, "loss": 0.2993, "step": 5783 }, { "epoch": 2.8714214793976502, "grad_norm": 0.41497012972831726, "learning_rate": 5.5456957670843584e-08, "loss": 0.3363, "step": 5784 }, { "epoch": 2.871917921562138, "grad_norm": 0.38614141941070557, "learning_rate": 5.502868068694489e-08, "loss": 0.2759, "step": 5785 }, { "epoch": 2.8724143637266257, "grad_norm": 0.45891204476356506, "learning_rate": 5.460205469457247e-08, "loss": 0.3948, "step": 5786 }, { "epoch": 2.872910805891114, "grad_norm": 0.37147071957588196, "learning_rate": 5.417707983616571e-08, "loss": 0.3182, "step": 5787 }, { "epoch": 2.8734072480556017, "grad_norm": 0.4025980532169342, "learning_rate": 5.375375625361168e-08, "loss": 0.2802, "step": 5788 }, { "epoch": 2.8739036902200894, "grad_norm": 0.40240663290023804, "learning_rate": 5.3332084088247305e-08, "loss": 0.3693, "step": 5789 }, { "epoch": 2.874400132384577, "grad_norm": 0.41056540608406067, "learning_rate": 5.2912063480857204e-08, "loss": 0.381, "step": 5790 }, { "epoch": 2.874896574549065, "grad_norm": 0.3302866220474243, "learning_rate": 5.2493694571673635e-08, "loss": 0.307, "step": 5791 }, { "epoch": 2.875393016713553, "grad_norm": 0.36400750279426575, "learning_rate": 5.207697750038099e-08, "loss": 0.327, "step": 5792 }, { "epoch": 2.875889458878041, "grad_norm": 0.3649979829788208, "learning_rate": 5.166191240610741e-08, "loss": 0.3223, "step": 5793 }, { "epoch": 2.8763859010425286, "grad_norm": 0.40915513038635254, "learning_rate": 5.1248499427433704e-08, "loss": 0.3542, "step": 5794 }, { "epoch": 2.8768823432070163, "grad_norm": 0.4373628795146942, "learning_rate": 5.083673870238559e-08, "loss": 0.3303, "step": 5795 }, { "epoch": 2.877378785371504, "grad_norm": 0.36582931876182556, "learning_rate": 5.0426630368440314e-08, "loss": 0.3132, "step": 5796 }, { "epoch": 2.8778752275359922, "grad_norm": 0.4322242736816406, "learning_rate": 5.001817456252111e-08, "loss": 0.3583, "step": 5797 }, { "epoch": 2.87837166970048, "grad_norm": 0.3963957130908966, "learning_rate": 4.9611371421000034e-08, "loss": 0.326, "step": 5798 }, { "epoch": 2.8788681118649677, "grad_norm": 0.3832509517669678, "learning_rate": 4.9206221079698414e-08, "loss": 0.3303, "step": 5799 }, { "epoch": 2.8793645540294555, "grad_norm": 0.42877867817878723, "learning_rate": 4.8802723673884164e-08, "loss": 0.3108, "step": 5800 }, { "epoch": 2.879860996193943, "grad_norm": 0.43497028946876526, "learning_rate": 4.8400879338274534e-08, "loss": 0.3406, "step": 5801 }, { "epoch": 2.8803574383584314, "grad_norm": 0.4111415147781372, "learning_rate": 4.800068820703385e-08, "loss": 0.3383, "step": 5802 }, { "epoch": 2.880853880522919, "grad_norm": 0.37414711713790894, "learning_rate": 4.760215041377636e-08, "loss": 0.2528, "step": 5803 }, { "epoch": 2.881350322687407, "grad_norm": 0.40475592017173767, "learning_rate": 4.7205266091561175e-08, "loss": 0.3358, "step": 5804 }, { "epoch": 2.8818467648518946, "grad_norm": 0.3978184461593628, "learning_rate": 4.6810035372898964e-08, "loss": 0.3756, "step": 5805 }, { "epoch": 2.8823432070163824, "grad_norm": 0.3941318988800049, "learning_rate": 4.641645838974473e-08, "loss": 0.3184, "step": 5806 }, { "epoch": 2.8828396491808705, "grad_norm": 0.4028162658214569, "learning_rate": 4.602453527350503e-08, "loss": 0.3113, "step": 5807 }, { "epoch": 2.8833360913453583, "grad_norm": 0.43801549077033997, "learning_rate": 4.5634266155031304e-08, "loss": 0.371, "step": 5808 }, { "epoch": 2.883832533509846, "grad_norm": 0.41522839665412903, "learning_rate": 4.524565116462321e-08, "loss": 0.3769, "step": 5809 }, { "epoch": 2.884328975674334, "grad_norm": 0.34430480003356934, "learning_rate": 4.4858690432030285e-08, "loss": 0.2477, "step": 5810 }, { "epoch": 2.8848254178388215, "grad_norm": 0.3948115110397339, "learning_rate": 4.447338408644697e-08, "loss": 0.33, "step": 5811 }, { "epoch": 2.8853218600033097, "grad_norm": 0.40096315741539, "learning_rate": 4.4089732256517026e-08, "loss": 0.3026, "step": 5812 }, { "epoch": 2.8858183021677974, "grad_norm": 0.43000489473342896, "learning_rate": 4.370773507033077e-08, "loss": 0.3588, "step": 5813 }, { "epoch": 2.886314744332285, "grad_norm": 0.42548805475234985, "learning_rate": 4.332739265542785e-08, "loss": 0.346, "step": 5814 }, { "epoch": 2.8868111864967734, "grad_norm": 0.3812214434146881, "learning_rate": 4.294870513879335e-08, "loss": 0.371, "step": 5815 }, { "epoch": 2.887307628661261, "grad_norm": 0.38840770721435547, "learning_rate": 4.257167264686113e-08, "loss": 0.3472, "step": 5816 }, { "epoch": 2.887804070825749, "grad_norm": 0.3825359642505646, "learning_rate": 4.219629530551217e-08, "loss": 0.3148, "step": 5817 }, { "epoch": 2.8883005129902366, "grad_norm": 0.39728114008903503, "learning_rate": 4.1822573240073995e-08, "loss": 0.4246, "step": 5818 }, { "epoch": 2.8887969551547243, "grad_norm": 0.3555436134338379, "learning_rate": 4.145050657532346e-08, "loss": 0.2964, "step": 5819 }, { "epoch": 2.8892933973192125, "grad_norm": 0.4042604863643646, "learning_rate": 4.108009543548286e-08, "loss": 0.3621, "step": 5820 }, { "epoch": 2.8897898394837003, "grad_norm": 0.43916961550712585, "learning_rate": 4.071133994422216e-08, "loss": 0.2858, "step": 5821 }, { "epoch": 2.890286281648188, "grad_norm": 0.43632930517196655, "learning_rate": 4.034424022465899e-08, "loss": 0.3831, "step": 5822 }, { "epoch": 2.8907827238126758, "grad_norm": 0.41308557987213135, "learning_rate": 3.9978796399358086e-08, "loss": 0.2933, "step": 5823 }, { "epoch": 2.8912791659771635, "grad_norm": 0.4222511053085327, "learning_rate": 3.961500859033074e-08, "loss": 0.3947, "step": 5824 }, { "epoch": 2.8917756081416517, "grad_norm": 0.36017948389053345, "learning_rate": 3.925287691903701e-08, "loss": 0.324, "step": 5825 }, { "epoch": 2.8922720503061394, "grad_norm": 0.40511274337768555, "learning_rate": 3.8892401506381846e-08, "loss": 0.3114, "step": 5826 }, { "epoch": 2.892768492470627, "grad_norm": 0.433391809463501, "learning_rate": 3.8533582472717877e-08, "loss": 0.3639, "step": 5827 }, { "epoch": 2.893264934635115, "grad_norm": 0.36917147040367126, "learning_rate": 3.817641993784593e-08, "loss": 0.3043, "step": 5828 }, { "epoch": 2.8937613767996027, "grad_norm": 0.36369264125823975, "learning_rate": 3.782091402101229e-08, "loss": 0.318, "step": 5829 }, { "epoch": 2.894257818964091, "grad_norm": 0.39375248551368713, "learning_rate": 3.746706484091145e-08, "loss": 0.4327, "step": 5830 }, { "epoch": 2.8947542611285786, "grad_norm": 0.3349100351333618, "learning_rate": 3.711487251568335e-08, "loss": 0.277, "step": 5831 }, { "epoch": 2.8952507032930663, "grad_norm": 0.4353281557559967, "learning_rate": 3.67643371629145e-08, "loss": 0.3891, "step": 5832 }, { "epoch": 2.895747145457554, "grad_norm": 0.3791491985321045, "learning_rate": 3.641545889964126e-08, "loss": 0.2793, "step": 5833 }, { "epoch": 2.896243587622042, "grad_norm": 0.421994149684906, "learning_rate": 3.606823784234326e-08, "loss": 0.3582, "step": 5834 }, { "epoch": 2.89674002978653, "grad_norm": 0.46146440505981445, "learning_rate": 3.572267410694885e-08, "loss": 0.3436, "step": 5835 }, { "epoch": 2.8972364719510177, "grad_norm": 0.39460471272468567, "learning_rate": 3.5378767808831315e-08, "loss": 0.271, "step": 5836 }, { "epoch": 2.8977329141155055, "grad_norm": 0.4422290325164795, "learning_rate": 3.503651906281269e-08, "loss": 0.3256, "step": 5837 }, { "epoch": 2.8982293562799932, "grad_norm": 0.4112197756767273, "learning_rate": 3.469592798316046e-08, "loss": 0.3294, "step": 5838 }, { "epoch": 2.898725798444481, "grad_norm": 0.41615530848503113, "learning_rate": 3.435699468358755e-08, "loss": 0.3955, "step": 5839 }, { "epoch": 2.899222240608969, "grad_norm": 0.425538569688797, "learning_rate": 3.401971927725623e-08, "loss": 0.3566, "step": 5840 }, { "epoch": 2.899718682773457, "grad_norm": 0.3735370635986328, "learning_rate": 3.368410187677196e-08, "loss": 0.2813, "step": 5841 }, { "epoch": 2.9002151249379446, "grad_norm": 0.4067594110965729, "learning_rate": 3.3350142594190115e-08, "loss": 0.3572, "step": 5842 }, { "epoch": 2.900711567102433, "grad_norm": 0.41241151094436646, "learning_rate": 3.301784154100818e-08, "loss": 0.334, "step": 5843 }, { "epoch": 2.90120800926692, "grad_norm": 0.35503342747688293, "learning_rate": 3.268719882817517e-08, "loss": 0.2741, "step": 5844 }, { "epoch": 2.9017044514314083, "grad_norm": 0.4114900231361389, "learning_rate": 3.235821456608168e-08, "loss": 0.3255, "step": 5845 }, { "epoch": 2.902200893595896, "grad_norm": 0.4036605954170227, "learning_rate": 3.203088886456762e-08, "loss": 0.3463, "step": 5846 }, { "epoch": 2.902697335760384, "grad_norm": 0.37679773569107056, "learning_rate": 3.17052218329178e-08, "loss": 0.32, "step": 5847 }, { "epoch": 2.903193777924872, "grad_norm": 0.4022218585014343, "learning_rate": 3.138121357986357e-08, "loss": 0.3112, "step": 5848 }, { "epoch": 2.9036902200893597, "grad_norm": 0.404115229845047, "learning_rate": 3.105886421358284e-08, "loss": 0.3192, "step": 5849 }, { "epoch": 2.9041866622538475, "grad_norm": 0.40639063715934753, "learning_rate": 3.073817384169841e-08, "loss": 0.3616, "step": 5850 }, { "epoch": 2.904683104418335, "grad_norm": 0.402972012758255, "learning_rate": 3.041914257128131e-08, "loss": 0.3215, "step": 5851 }, { "epoch": 2.905179546582823, "grad_norm": 0.391849160194397, "learning_rate": 3.010177050884633e-08, "loss": 0.3207, "step": 5852 }, { "epoch": 2.905675988747311, "grad_norm": 0.40480294823646545, "learning_rate": 2.9786057760355925e-08, "loss": 0.3303, "step": 5853 }, { "epoch": 2.906172430911799, "grad_norm": 0.4177452027797699, "learning_rate": 2.9472004431218004e-08, "loss": 0.3033, "step": 5854 }, { "epoch": 2.9066688730762866, "grad_norm": 0.3902990221977234, "learning_rate": 2.9159610626286472e-08, "loss": 0.3276, "step": 5855 }, { "epoch": 2.9071653152407744, "grad_norm": 0.40494850277900696, "learning_rate": 2.8848876449860673e-08, "loss": 0.329, "step": 5856 }, { "epoch": 2.907661757405262, "grad_norm": 0.4051888883113861, "learning_rate": 2.8539802005687068e-08, "loss": 0.2954, "step": 5857 }, { "epoch": 2.9081581995697503, "grad_norm": 0.3387204706668854, "learning_rate": 2.823238739695644e-08, "loss": 0.3149, "step": 5858 }, { "epoch": 2.908654641734238, "grad_norm": 0.41355210542678833, "learning_rate": 2.792663272630669e-08, "loss": 0.3701, "step": 5859 }, { "epoch": 2.909151083898726, "grad_norm": 0.39439114928245544, "learning_rate": 2.7622538095820606e-08, "loss": 0.2954, "step": 5860 }, { "epoch": 2.9096475260632135, "grad_norm": 0.4017498791217804, "learning_rate": 2.7320103607027527e-08, "loss": 0.347, "step": 5861 }, { "epoch": 2.9101439682277013, "grad_norm": 0.3897707760334015, "learning_rate": 2.701932936090168e-08, "loss": 0.3005, "step": 5862 }, { "epoch": 2.9106404103921895, "grad_norm": 0.4245111048221588, "learning_rate": 2.672021545786385e-08, "loss": 0.3607, "step": 5863 }, { "epoch": 2.911136852556677, "grad_norm": 0.4133397340774536, "learning_rate": 2.642276199777971e-08, "loss": 0.3083, "step": 5864 }, { "epoch": 2.911633294721165, "grad_norm": 0.4096420109272003, "learning_rate": 2.612696907996093e-08, "loss": 0.3512, "step": 5865 }, { "epoch": 2.9121297368856527, "grad_norm": 0.4079381227493286, "learning_rate": 2.583283680316462e-08, "loss": 0.3265, "step": 5866 }, { "epoch": 2.9126261790501404, "grad_norm": 0.3718471825122833, "learning_rate": 2.5540365265594446e-08, "loss": 0.2935, "step": 5867 }, { "epoch": 2.9131226212146286, "grad_norm": 0.4039651155471802, "learning_rate": 2.5249554564897305e-08, "loss": 0.3973, "step": 5868 }, { "epoch": 2.9136190633791164, "grad_norm": 0.44374462962150574, "learning_rate": 2.496040479816775e-08, "loss": 0.3338, "step": 5869 }, { "epoch": 2.914115505543604, "grad_norm": 0.411701500415802, "learning_rate": 2.467291606194522e-08, "loss": 0.2805, "step": 5870 }, { "epoch": 2.914611947708092, "grad_norm": 0.40065711736679077, "learning_rate": 2.4387088452214046e-08, "loss": 0.3113, "step": 5871 }, { "epoch": 2.9151083898725796, "grad_norm": 0.4410300850868225, "learning_rate": 2.4102922064404566e-08, "loss": 0.3219, "step": 5872 }, { "epoch": 2.9156048320370678, "grad_norm": 0.38630005717277527, "learning_rate": 2.3820416993391437e-08, "loss": 0.3413, "step": 5873 }, { "epoch": 2.9161012742015555, "grad_norm": 0.37381550669670105, "learning_rate": 2.3539573333496436e-08, "loss": 0.3624, "step": 5874 }, { "epoch": 2.9165977163660433, "grad_norm": 0.42805519700050354, "learning_rate": 2.326039117848511e-08, "loss": 0.3056, "step": 5875 }, { "epoch": 2.9170941585305314, "grad_norm": 0.40787017345428467, "learning_rate": 2.298287062156901e-08, "loss": 0.3923, "step": 5876 }, { "epoch": 2.917590600695019, "grad_norm": 0.37354597449302673, "learning_rate": 2.270701175540402e-08, "loss": 0.2958, "step": 5877 }, { "epoch": 2.918087042859507, "grad_norm": 0.38818827271461487, "learning_rate": 2.243281467209313e-08, "loss": 0.3187, "step": 5878 }, { "epoch": 2.9185834850239947, "grad_norm": 0.3986647129058838, "learning_rate": 2.2160279463182554e-08, "loss": 0.3663, "step": 5879 }, { "epoch": 2.9190799271884824, "grad_norm": 0.4397013187408447, "learning_rate": 2.1889406219663955e-08, "loss": 0.3995, "step": 5880 }, { "epoch": 2.9195763693529706, "grad_norm": 0.37760892510414124, "learning_rate": 2.16201950319761e-08, "loss": 0.2758, "step": 5881 }, { "epoch": 2.9200728115174583, "grad_norm": 0.39512211084365845, "learning_rate": 2.135264598999931e-08, "loss": 0.3347, "step": 5882 }, { "epoch": 2.920569253681946, "grad_norm": 0.40310758352279663, "learning_rate": 2.1086759183062132e-08, "loss": 0.3382, "step": 5883 }, { "epoch": 2.921065695846434, "grad_norm": 0.4171357750892639, "learning_rate": 2.0822534699936892e-08, "loss": 0.303, "step": 5884 }, { "epoch": 2.9215621380109216, "grad_norm": 0.3941986858844757, "learning_rate": 2.0559972628840795e-08, "loss": 0.3672, "step": 5885 }, { "epoch": 2.9220585801754098, "grad_norm": 0.40636542439460754, "learning_rate": 2.0299073057435946e-08, "loss": 0.3353, "step": 5886 }, { "epoch": 2.9225550223398975, "grad_norm": 0.36976101994514465, "learning_rate": 2.0039836072829888e-08, "loss": 0.3406, "step": 5887 }, { "epoch": 2.9230514645043852, "grad_norm": 0.4198930859565735, "learning_rate": 1.978226176157505e-08, "loss": 0.3618, "step": 5888 }, { "epoch": 2.923547906668873, "grad_norm": 0.40854188799858093, "learning_rate": 1.9526350209667645e-08, "loss": 0.3101, "step": 5889 }, { "epoch": 2.9240443488333607, "grad_norm": 0.36653128266334534, "learning_rate": 1.9272101502550432e-08, "loss": 0.3121, "step": 5890 }, { "epoch": 2.924540790997849, "grad_norm": 0.3916032612323761, "learning_rate": 1.901951572510996e-08, "loss": 0.3946, "step": 5891 }, { "epoch": 2.9250372331623367, "grad_norm": 0.35889187455177307, "learning_rate": 1.8768592961677655e-08, "loss": 0.3079, "step": 5892 }, { "epoch": 2.9255336753268244, "grad_norm": 0.43059906363487244, "learning_rate": 1.8519333296029286e-08, "loss": 0.3838, "step": 5893 }, { "epoch": 2.926030117491312, "grad_norm": 0.41640961170196533, "learning_rate": 1.827173681138661e-08, "loss": 0.3405, "step": 5894 }, { "epoch": 2.9265265596558, "grad_norm": 0.37810561060905457, "learning_rate": 1.802580359041517e-08, "loss": 0.3111, "step": 5895 }, { "epoch": 2.927023001820288, "grad_norm": 0.35692206025123596, "learning_rate": 1.7781533715225952e-08, "loss": 0.3626, "step": 5896 }, { "epoch": 2.927519443984776, "grad_norm": 0.3907829225063324, "learning_rate": 1.7538927267372606e-08, "loss": 0.3024, "step": 5897 }, { "epoch": 2.9280158861492636, "grad_norm": 0.4815133810043335, "learning_rate": 1.7297984327856456e-08, "loss": 0.3668, "step": 5898 }, { "epoch": 2.9285123283137513, "grad_norm": 0.3621024489402771, "learning_rate": 1.7058704977120366e-08, "loss": 0.3235, "step": 5899 }, { "epoch": 2.929008770478239, "grad_norm": 0.3734637498855591, "learning_rate": 1.6821089295053773e-08, "loss": 0.3668, "step": 5900 }, { "epoch": 2.9295052126427272, "grad_norm": 0.3868551552295685, "learning_rate": 1.6585137360990434e-08, "loss": 0.3349, "step": 5901 }, { "epoch": 2.930001654807215, "grad_norm": 0.44644153118133545, "learning_rate": 1.6350849253708444e-08, "loss": 0.3703, "step": 5902 }, { "epoch": 2.9304980969717027, "grad_norm": 0.4223407804965973, "learning_rate": 1.6118225051429125e-08, "loss": 0.326, "step": 5903 }, { "epoch": 2.930994539136191, "grad_norm": 0.38821113109588623, "learning_rate": 1.5887264831820348e-08, "loss": 0.316, "step": 5904 }, { "epoch": 2.931490981300678, "grad_norm": 0.36502233147621155, "learning_rate": 1.5657968671993208e-08, "loss": 0.3402, "step": 5905 }, { "epoch": 2.9319874234651664, "grad_norm": 0.40602684020996094, "learning_rate": 1.543033664850313e-08, "loss": 0.323, "step": 5906 }, { "epoch": 2.932483865629654, "grad_norm": 0.38417020440101624, "learning_rate": 1.5204368837350437e-08, "loss": 0.3494, "step": 5907 }, { "epoch": 2.932980307794142, "grad_norm": 0.3801725208759308, "learning_rate": 1.498006531398033e-08, "loss": 0.3227, "step": 5908 }, { "epoch": 2.93347674995863, "grad_norm": 0.3998197019100189, "learning_rate": 1.4757426153280685e-08, "loss": 0.3628, "step": 5909 }, { "epoch": 2.933973192123118, "grad_norm": 0.44463130831718445, "learning_rate": 1.4536451429585374e-08, "loss": 0.3487, "step": 5910 }, { "epoch": 2.9344696342876055, "grad_norm": 0.3958512544631958, "learning_rate": 1.4317141216671493e-08, "loss": 0.2828, "step": 5911 }, { "epoch": 2.9349660764520933, "grad_norm": 0.3877900242805481, "learning_rate": 1.409949558776047e-08, "loss": 0.3163, "step": 5912 }, { "epoch": 2.935462518616581, "grad_norm": 0.39859646558761597, "learning_rate": 1.3883514615519178e-08, "loss": 0.3545, "step": 5913 }, { "epoch": 2.935958960781069, "grad_norm": 0.3664691746234894, "learning_rate": 1.3669198372056602e-08, "loss": 0.3171, "step": 5914 }, { "epoch": 2.936455402945557, "grad_norm": 0.40622106194496155, "learning_rate": 1.3456546928928282e-08, "loss": 0.3913, "step": 5915 }, { "epoch": 2.9369518451100447, "grad_norm": 0.3957062065601349, "learning_rate": 1.324556035713187e-08, "loss": 0.338, "step": 5916 }, { "epoch": 2.9374482872745324, "grad_norm": 0.4229326844215393, "learning_rate": 1.3036238727110462e-08, "loss": 0.3561, "step": 5917 }, { "epoch": 2.93794472943902, "grad_norm": 0.3582483232021332, "learning_rate": 1.2828582108750376e-08, "loss": 0.3058, "step": 5918 }, { "epoch": 2.9384411716035084, "grad_norm": 0.3800439238548279, "learning_rate": 1.2622590571383376e-08, "loss": 0.3446, "step": 5919 }, { "epoch": 2.938937613767996, "grad_norm": 0.4294756054878235, "learning_rate": 1.241826418378389e-08, "loss": 0.3597, "step": 5920 }, { "epoch": 2.939434055932484, "grad_norm": 0.42666664719581604, "learning_rate": 1.2215603014170685e-08, "loss": 0.3078, "step": 5921 }, { "epoch": 2.9399304980969716, "grad_norm": 0.41346028447151184, "learning_rate": 1.2014607130207967e-08, "loss": 0.3568, "step": 5922 }, { "epoch": 2.9404269402614593, "grad_norm": 0.36675578355789185, "learning_rate": 1.1815276599001501e-08, "loss": 0.3339, "step": 5923 }, { "epoch": 2.9409233824259475, "grad_norm": 0.4169031083583832, "learning_rate": 1.1617611487103054e-08, "loss": 0.3329, "step": 5924 }, { "epoch": 2.9414198245904353, "grad_norm": 0.41136229038238525, "learning_rate": 1.1421611860507054e-08, "loss": 0.3769, "step": 5925 }, { "epoch": 2.941916266754923, "grad_norm": 0.38694676756858826, "learning_rate": 1.1227277784652823e-08, "loss": 0.307, "step": 5926 }, { "epoch": 2.9424127089194108, "grad_norm": 0.401894748210907, "learning_rate": 1.1034609324423463e-08, "loss": 0.3452, "step": 5927 }, { "epoch": 2.9429091510838985, "grad_norm": 0.40450942516326904, "learning_rate": 1.084360654414529e-08, "loss": 0.3503, "step": 5928 }, { "epoch": 2.9434055932483867, "grad_norm": 0.390234112739563, "learning_rate": 1.0654269507589522e-08, "loss": 0.3512, "step": 5929 }, { "epoch": 2.9439020354128744, "grad_norm": 0.3913736939430237, "learning_rate": 1.0466598277970031e-08, "loss": 0.2905, "step": 5930 }, { "epoch": 2.944398477577362, "grad_norm": 0.3972131311893463, "learning_rate": 1.0280592917945032e-08, "loss": 0.3199, "step": 5931 }, { "epoch": 2.94489491974185, "grad_norm": 0.41364341974258423, "learning_rate": 1.009625348961707e-08, "loss": 0.3389, "step": 5932 }, { "epoch": 2.9453913619063377, "grad_norm": 0.4339613616466522, "learning_rate": 9.913580054532468e-09, "loss": 0.377, "step": 5933 }, { "epoch": 2.945887804070826, "grad_norm": 0.4408336877822876, "learning_rate": 9.732572673680218e-09, "loss": 0.2839, "step": 5934 }, { "epoch": 2.9463842462353136, "grad_norm": 0.45494088530540466, "learning_rate": 9.5532314074942e-09, "loss": 0.3116, "step": 5935 }, { "epoch": 2.9468806883998013, "grad_norm": 0.398556649684906, "learning_rate": 9.375556315850964e-09, "loss": 0.3015, "step": 5936 }, { "epoch": 2.9473771305642895, "grad_norm": 0.3832356333732605, "learning_rate": 9.199547458071945e-09, "loss": 0.3758, "step": 5937 }, { "epoch": 2.9478735727287773, "grad_norm": 0.4625343084335327, "learning_rate": 9.025204892921801e-09, "loss": 0.3309, "step": 5938 }, { "epoch": 2.948370014893265, "grad_norm": 0.377578467130661, "learning_rate": 8.852528678608418e-09, "loss": 0.3544, "step": 5939 }, { "epoch": 2.9488664570577527, "grad_norm": 0.42411988973617554, "learning_rate": 8.681518872784011e-09, "loss": 0.3543, "step": 5940 }, { "epoch": 2.9493628992222405, "grad_norm": 0.402251273393631, "learning_rate": 8.512175532543466e-09, "loss": 0.3551, "step": 5941 }, { "epoch": 2.9498593413867287, "grad_norm": 0.36432355642318726, "learning_rate": 8.344498714427107e-09, "loss": 0.2799, "step": 5942 }, { "epoch": 2.9503557835512164, "grad_norm": 0.42732107639312744, "learning_rate": 8.178488474416269e-09, "loss": 0.3306, "step": 5943 }, { "epoch": 2.950852225715704, "grad_norm": 0.4496327340602875, "learning_rate": 8.014144867938279e-09, "loss": 0.3649, "step": 5944 }, { "epoch": 2.951348667880192, "grad_norm": 0.4010052978992462, "learning_rate": 7.851467949862579e-09, "loss": 0.2899, "step": 5945 }, { "epoch": 2.9518451100446796, "grad_norm": 0.43203189969062805, "learning_rate": 7.690457774502947e-09, "loss": 0.3728, "step": 5946 }, { "epoch": 2.952341552209168, "grad_norm": 0.3798103332519531, "learning_rate": 7.531114395615823e-09, "loss": 0.3096, "step": 5947 }, { "epoch": 2.9528379943736556, "grad_norm": 0.410781592130661, "learning_rate": 7.373437866401434e-09, "loss": 0.3897, "step": 5948 }, { "epoch": 2.9533344365381433, "grad_norm": 0.41284239292144775, "learning_rate": 7.2174282395043314e-09, "loss": 0.3656, "step": 5949 }, { "epoch": 2.953830878702631, "grad_norm": 0.40179452300071716, "learning_rate": 7.06308556701174e-09, "loss": 0.2992, "step": 5950 }, { "epoch": 2.954327320867119, "grad_norm": 0.39929237961769104, "learning_rate": 6.910409900454107e-09, "loss": 0.3021, "step": 5951 }, { "epoch": 2.954823763031607, "grad_norm": 0.37522345781326294, "learning_rate": 6.759401290806211e-09, "loss": 0.3185, "step": 5952 }, { "epoch": 2.9553202051960947, "grad_norm": 0.39308589696884155, "learning_rate": 6.610059788485501e-09, "loss": 0.2883, "step": 5953 }, { "epoch": 2.9558166473605825, "grad_norm": 0.42211636900901794, "learning_rate": 6.462385443353203e-09, "loss": 0.4194, "step": 5954 }, { "epoch": 2.95631308952507, "grad_norm": 0.4073699116706848, "learning_rate": 6.316378304713211e-09, "loss": 0.3418, "step": 5955 }, { "epoch": 2.956809531689558, "grad_norm": 0.4387965500354767, "learning_rate": 6.172038421313753e-09, "loss": 0.3184, "step": 5956 }, { "epoch": 2.957305973854046, "grad_norm": 0.4155275225639343, "learning_rate": 6.029365841345724e-09, "loss": 0.3347, "step": 5957 }, { "epoch": 2.957802416018534, "grad_norm": 0.3686511814594269, "learning_rate": 5.888360612444355e-09, "loss": 0.3436, "step": 5958 }, { "epoch": 2.9582988581830216, "grad_norm": 0.4298279881477356, "learning_rate": 5.749022781686431e-09, "loss": 0.3729, "step": 5959 }, { "epoch": 2.9587953003475094, "grad_norm": 0.4082029163837433, "learning_rate": 5.6113523955941825e-09, "loss": 0.3092, "step": 5960 }, { "epoch": 2.959291742511997, "grad_norm": 0.40297847986221313, "learning_rate": 5.475349500130844e-09, "loss": 0.2866, "step": 5961 }, { "epoch": 2.9597881846764853, "grad_norm": 0.3974447250366211, "learning_rate": 5.341014140705092e-09, "loss": 0.3446, "step": 5962 }, { "epoch": 2.960284626840973, "grad_norm": 0.396836519241333, "learning_rate": 5.208346362167161e-09, "loss": 0.3607, "step": 5963 }, { "epoch": 2.960781069005461, "grad_norm": 0.3953862190246582, "learning_rate": 5.077346208811618e-09, "loss": 0.3298, "step": 5964 }, { "epoch": 2.961277511169949, "grad_norm": 0.39926475286483765, "learning_rate": 4.948013724375145e-09, "loss": 0.3508, "step": 5965 }, { "epoch": 2.9617739533344363, "grad_norm": 0.41339659690856934, "learning_rate": 4.820348952039311e-09, "loss": 0.2914, "step": 5966 }, { "epoch": 2.9622703954989245, "grad_norm": 0.451115220785141, "learning_rate": 4.694351934427799e-09, "loss": 0.3616, "step": 5967 }, { "epoch": 2.962766837663412, "grad_norm": 0.34510934352874756, "learning_rate": 4.5700227136069585e-09, "loss": 0.2782, "step": 5968 }, { "epoch": 2.9632632798279, "grad_norm": 0.3730282783508301, "learning_rate": 4.447361331087474e-09, "loss": 0.3924, "step": 5969 }, { "epoch": 2.963759721992388, "grad_norm": 0.3956826329231262, "learning_rate": 4.326367827822142e-09, "loss": 0.3215, "step": 5970 }, { "epoch": 2.964256164156876, "grad_norm": 0.3380967378616333, "learning_rate": 4.207042244208092e-09, "loss": 0.3385, "step": 5971 }, { "epoch": 2.9647526063213636, "grad_norm": 0.38357189297676086, "learning_rate": 4.0893846200840135e-09, "loss": 0.3703, "step": 5972 }, { "epoch": 2.9652490484858514, "grad_norm": 0.4218943417072296, "learning_rate": 3.973394994733481e-09, "loss": 0.3278, "step": 5973 }, { "epoch": 2.965745490650339, "grad_norm": 0.4508315622806549, "learning_rate": 3.85907340688163e-09, "loss": 0.3934, "step": 5974 }, { "epoch": 2.9662419328148273, "grad_norm": 0.3736129701137543, "learning_rate": 3.746419894697928e-09, "loss": 0.3083, "step": 5975 }, { "epoch": 2.966738374979315, "grad_norm": 0.38193729519844055, "learning_rate": 3.635434495793955e-09, "loss": 0.3538, "step": 5976 }, { "epoch": 2.9672348171438028, "grad_norm": 0.41116863489151, "learning_rate": 3.5261172472245143e-09, "loss": 0.3227, "step": 5977 }, { "epoch": 2.9677312593082905, "grad_norm": 0.4126330614089966, "learning_rate": 3.4184681854876335e-09, "loss": 0.3022, "step": 5978 }, { "epoch": 2.9682277014727783, "grad_norm": 0.3922833204269409, "learning_rate": 3.3124873465251172e-09, "loss": 0.299, "step": 5979 }, { "epoch": 2.9687241436372664, "grad_norm": 0.4300999641418457, "learning_rate": 3.208174765720329e-09, "loss": 0.3705, "step": 5980 }, { "epoch": 2.969220585801754, "grad_norm": 0.4145447015762329, "learning_rate": 3.1055304779009645e-09, "loss": 0.326, "step": 5981 }, { "epoch": 2.969717027966242, "grad_norm": 0.41671282052993774, "learning_rate": 3.004554517336833e-09, "loss": 0.3399, "step": 5982 }, { "epoch": 2.9702134701307297, "grad_norm": 0.40134647488594055, "learning_rate": 2.905246917740967e-09, "loss": 0.2987, "step": 5983 }, { "epoch": 2.9707099122952174, "grad_norm": 0.4094807803630829, "learning_rate": 2.8076077122696222e-09, "loss": 0.3883, "step": 5984 }, { "epoch": 2.9712063544597056, "grad_norm": 0.3986111283302307, "learning_rate": 2.711636933522277e-09, "loss": 0.38, "step": 5985 }, { "epoch": 2.9717027966241933, "grad_norm": 0.3667377531528473, "learning_rate": 2.617334613540523e-09, "loss": 0.2488, "step": 5986 }, { "epoch": 2.972199238788681, "grad_norm": 0.47176724672317505, "learning_rate": 2.5247007838091753e-09, "loss": 0.3231, "step": 5987 }, { "epoch": 2.972695680953169, "grad_norm": 0.37057533860206604, "learning_rate": 2.4337354752562714e-09, "loss": 0.378, "step": 5988 }, { "epoch": 2.9731921231176566, "grad_norm": 0.32265734672546387, "learning_rate": 2.3444387182530726e-09, "loss": 0.2921, "step": 5989 }, { "epoch": 2.9736885652821448, "grad_norm": 0.41314250230789185, "learning_rate": 2.256810542612953e-09, "loss": 0.3735, "step": 5990 }, { "epoch": 2.9741850074466325, "grad_norm": 0.4503249228000641, "learning_rate": 2.170850977592509e-09, "loss": 0.3618, "step": 5991 }, { "epoch": 2.9746814496111202, "grad_norm": 0.3712320327758789, "learning_rate": 2.0865600518915618e-09, "loss": 0.3568, "step": 5992 }, { "epoch": 2.975177891775608, "grad_norm": 0.3775106966495514, "learning_rate": 2.0039377936525995e-09, "loss": 0.2892, "step": 5993 }, { "epoch": 2.9756743339400957, "grad_norm": 0.47457629442214966, "learning_rate": 1.922984230460778e-09, "loss": 0.3683, "step": 5994 }, { "epoch": 2.976170776104584, "grad_norm": 0.4218675196170807, "learning_rate": 1.8436993893444777e-09, "loss": 0.3673, "step": 5995 }, { "epoch": 2.9766672182690717, "grad_norm": 0.4291117787361145, "learning_rate": 1.7660832967741904e-09, "loss": 0.3413, "step": 5996 }, { "epoch": 2.9771636604335594, "grad_norm": 0.4028535485267639, "learning_rate": 1.6901359786641869e-09, "loss": 0.3202, "step": 5997 }, { "epoch": 2.9776601025980476, "grad_norm": 0.4545499384403229, "learning_rate": 1.615857460371406e-09, "loss": 0.3952, "step": 5998 }, { "epoch": 2.9781565447625353, "grad_norm": 0.34265637397766113, "learning_rate": 1.5432477666954548e-09, "loss": 0.2909, "step": 5999 }, { "epoch": 2.978652986927023, "grad_norm": 0.4560388922691345, "learning_rate": 1.4723069218780528e-09, "loss": 0.3599, "step": 6000 }, { "epoch": 2.979149429091511, "grad_norm": 0.3601605296134949, "learning_rate": 1.403034949605253e-09, "loss": 0.3016, "step": 6001 }, { "epoch": 2.9796458712559986, "grad_norm": 0.4446221888065338, "learning_rate": 1.3354318730052219e-09, "loss": 0.3032, "step": 6002 }, { "epoch": 2.9801423134204867, "grad_norm": 0.44241365790367126, "learning_rate": 1.2694977146476828e-09, "loss": 0.3101, "step": 6003 }, { "epoch": 2.9806387555849745, "grad_norm": 0.4097326099872589, "learning_rate": 1.2052324965466934e-09, "loss": 0.3075, "step": 6004 }, { "epoch": 2.9811351977494622, "grad_norm": 0.4109030067920685, "learning_rate": 1.1426362401595337e-09, "loss": 0.3823, "step": 6005 }, { "epoch": 2.98163163991395, "grad_norm": 0.3601486086845398, "learning_rate": 1.0817089663844872e-09, "loss": 0.325, "step": 6006 }, { "epoch": 2.9821280820784377, "grad_norm": 0.4172886908054352, "learning_rate": 1.0224506955636148e-09, "loss": 0.3689, "step": 6007 }, { "epoch": 2.982624524242926, "grad_norm": 0.37111717462539673, "learning_rate": 9.648614474816465e-10, "loss": 0.3371, "step": 6008 }, { "epoch": 2.9831209664074136, "grad_norm": 0.42854925990104675, "learning_rate": 9.089412413665344e-10, "loss": 0.342, "step": 6009 }, { "epoch": 2.9836174085719014, "grad_norm": 0.41237252950668335, "learning_rate": 8.54690095887789e-10, "loss": 0.342, "step": 6010 }, { "epoch": 2.984113850736389, "grad_norm": 0.4051414132118225, "learning_rate": 8.021080291592542e-10, "loss": 0.2995, "step": 6011 }, { "epoch": 2.984610292900877, "grad_norm": 0.38592445850372314, "learning_rate": 7.511950587357764e-10, "loss": 0.3278, "step": 6012 }, { "epoch": 2.985106735065365, "grad_norm": 0.4302501678466797, "learning_rate": 7.019512016165353e-10, "loss": 0.3596, "step": 6013 }, { "epoch": 2.985603177229853, "grad_norm": 0.38670867681503296, "learning_rate": 6.543764742422687e-10, "loss": 0.3469, "step": 6014 }, { "epoch": 2.9860996193943405, "grad_norm": 0.43910959362983704, "learning_rate": 6.084708924969373e-10, "loss": 0.3164, "step": 6015 }, { "epoch": 2.9865960615588283, "grad_norm": 0.41321438550949097, "learning_rate": 5.642344717071702e-10, "loss": 0.3219, "step": 6016 }, { "epoch": 2.987092503723316, "grad_norm": 0.36079317331314087, "learning_rate": 5.21667226642264e-10, "loss": 0.3613, "step": 6017 }, { "epoch": 2.987588945887804, "grad_norm": 0.4007396101951599, "learning_rate": 4.807691715147389e-10, "loss": 0.3933, "step": 6018 }, { "epoch": 2.988085388052292, "grad_norm": 0.3821730613708496, "learning_rate": 4.4154031997867274e-10, "loss": 0.3415, "step": 6019 }, { "epoch": 2.9885818302167797, "grad_norm": 0.43206629157066345, "learning_rate": 4.039806851324768e-10, "loss": 0.3035, "step": 6020 }, { "epoch": 2.9890782723812674, "grad_norm": 0.42903342843055725, "learning_rate": 3.6809027951500987e-10, "loss": 0.3032, "step": 6021 }, { "epoch": 2.989574714545755, "grad_norm": 0.3958488404750824, "learning_rate": 3.338691151100193e-10, "loss": 0.3582, "step": 6022 }, { "epoch": 2.9900711567102434, "grad_norm": 0.3680386245250702, "learning_rate": 3.013172033422551e-10, "loss": 0.3443, "step": 6023 }, { "epoch": 2.990567598874731, "grad_norm": 0.3931167423725128, "learning_rate": 2.7043455508080075e-10, "loss": 0.3353, "step": 6024 }, { "epoch": 2.991064041039219, "grad_norm": 0.4450364410877228, "learning_rate": 2.412211806362974e-10, "loss": 0.3468, "step": 6025 }, { "epoch": 2.991560483203707, "grad_norm": 0.37305769324302673, "learning_rate": 2.1367708976205436e-10, "loss": 0.3186, "step": 6026 }, { "epoch": 2.9920569253681943, "grad_norm": 0.395853728055954, "learning_rate": 1.8780229165404894e-10, "loss": 0.3624, "step": 6027 }, { "epoch": 2.9925533675326825, "grad_norm": 0.42830660939216614, "learning_rate": 1.6359679495148162e-10, "loss": 0.3619, "step": 6028 }, { "epoch": 2.9930498096971703, "grad_norm": 0.42042437195777893, "learning_rate": 1.4106060773622088e-10, "loss": 0.3172, "step": 6029 }, { "epoch": 2.993546251861658, "grad_norm": 0.3937179744243622, "learning_rate": 1.2019373753224816e-10, "loss": 0.2913, "step": 6030 }, { "epoch": 2.994042694026146, "grad_norm": 0.45192936062812805, "learning_rate": 1.0099619130621296e-10, "loss": 0.3279, "step": 6031 }, { "epoch": 2.994539136190634, "grad_norm": 0.3935205042362213, "learning_rate": 8.346797546798791e-11, "loss": 0.3455, "step": 6032 }, { "epoch": 2.9950355783551217, "grad_norm": 0.3690321445465088, "learning_rate": 6.760909586900343e-11, "loss": 0.2601, "step": 6033 }, { "epoch": 2.9955320205196094, "grad_norm": 0.40416309237480164, "learning_rate": 5.3419557805578504e-11, "loss": 0.3745, "step": 6034 }, { "epoch": 2.996028462684097, "grad_norm": 0.3968682587146759, "learning_rate": 4.0899366013924524e-11, "loss": 0.3478, "step": 6035 }, { "epoch": 2.9965249048485854, "grad_norm": 0.41780444979667664, "learning_rate": 3.00485246745863e-11, "loss": 0.3405, "step": 6036 }, { "epoch": 2.997021347013073, "grad_norm": 0.4347599446773529, "learning_rate": 2.086703741022156e-11, "loss": 0.3113, "step": 6037 }, { "epoch": 2.997517789177561, "grad_norm": 0.41572025418281555, "learning_rate": 1.3354907286711184e-11, "loss": 0.3252, "step": 6038 }, { "epoch": 2.9980142313420486, "grad_norm": 0.46962642669677734, "learning_rate": 7.512136812048987e-12, "loss": 0.3442, "step": 6039 }, { "epoch": 2.9985106735065363, "grad_norm": 0.37187421321868896, "learning_rate": 3.3387279363417123e-12, "loss": 0.2766, "step": 6040 }, { "epoch": 2.9990071156710245, "grad_norm": 0.40865832567214966, "learning_rate": 8.346820540294787e-13, "loss": 0.3321, "step": 6041 }, { "epoch": 2.9995035578355123, "grad_norm": 0.42493414878845215, "learning_rate": 0.0, "loss": 0.329, "step": 6042 }, { "epoch": 2.9995035578355123, "step": 6042, "total_flos": 5085377890156544.0, "train_loss": 0.39752762397219116, "train_runtime": 188233.9041, "train_samples_per_second": 3.082, "train_steps_per_second": 0.032 } ], "logging_steps": 1.0, "max_steps": 6042, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5085377890156544.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }