{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9995035578355123, "eval_steps": 500, "global_step": 6042, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0004964421644878371, "grad_norm": 4.836659908294678, "learning_rate": 1.652892561983471e-08, "loss": 0.7524, "step": 1 }, { "epoch": 0.0009928843289756743, "grad_norm": 5.3149333000183105, "learning_rate": 3.305785123966942e-08, "loss": 0.8429, "step": 2 }, { "epoch": 0.0014893264934635116, "grad_norm": 4.796141624450684, "learning_rate": 4.958677685950414e-08, "loss": 0.751, "step": 3 }, { "epoch": 0.0019857686579513485, "grad_norm": 5.192905426025391, "learning_rate": 6.611570247933884e-08, "loss": 0.8157, "step": 4 }, { "epoch": 0.0024822108224391857, "grad_norm": 5.575966835021973, "learning_rate": 8.264462809917357e-08, "loss": 0.8351, "step": 5 }, { "epoch": 0.002978652986927023, "grad_norm": 5.241466522216797, "learning_rate": 9.917355371900828e-08, "loss": 0.7967, "step": 6 }, { "epoch": 0.0034750951514148603, "grad_norm": 5.0240254402160645, "learning_rate": 1.1570247933884297e-07, "loss": 0.7923, "step": 7 }, { "epoch": 0.003971537315902697, "grad_norm": 5.488057613372803, "learning_rate": 1.3223140495867768e-07, "loss": 0.8166, "step": 8 }, { "epoch": 0.004467979480390534, "grad_norm": 4.93000602722168, "learning_rate": 1.487603305785124e-07, "loss": 0.7666, "step": 9 }, { "epoch": 0.004964421644878371, "grad_norm": 5.198759078979492, "learning_rate": 1.6528925619834713e-07, "loss": 0.793, "step": 10 }, { "epoch": 0.005460863809366208, "grad_norm": 5.355788230895996, "learning_rate": 1.8181818181818183e-07, "loss": 0.8131, "step": 11 }, { "epoch": 0.005957305973854046, "grad_norm": 5.4223952293396, "learning_rate": 1.9834710743801655e-07, "loss": 0.8186, "step": 12 }, { "epoch": 0.0064537481383418836, "grad_norm": 5.424191474914551, "learning_rate": 2.1487603305785125e-07, "loss": 0.8298, "step": 13 }, { "epoch": 0.006950190302829721, "grad_norm": 4.9827799797058105, "learning_rate": 2.3140495867768595e-07, "loss": 0.8089, "step": 14 }, { "epoch": 0.007446632467317558, "grad_norm": 5.092190742492676, "learning_rate": 2.4793388429752067e-07, "loss": 0.8057, "step": 15 }, { "epoch": 0.007943074631805394, "grad_norm": 5.324559688568115, "learning_rate": 2.6446280991735537e-07, "loss": 0.8056, "step": 16 }, { "epoch": 0.008439516796293232, "grad_norm": 5.1414618492126465, "learning_rate": 2.809917355371901e-07, "loss": 0.7843, "step": 17 }, { "epoch": 0.008935958960781068, "grad_norm": 4.609241485595703, "learning_rate": 2.975206611570248e-07, "loss": 0.7466, "step": 18 }, { "epoch": 0.009432401125268906, "grad_norm": 4.858317852020264, "learning_rate": 3.1404958677685957e-07, "loss": 0.7902, "step": 19 }, { "epoch": 0.009928843289756743, "grad_norm": 4.602066993713379, "learning_rate": 3.3057851239669426e-07, "loss": 0.7504, "step": 20 }, { "epoch": 0.01042528545424458, "grad_norm": 4.686375141143799, "learning_rate": 3.4710743801652896e-07, "loss": 0.7521, "step": 21 }, { "epoch": 0.010921727618732417, "grad_norm": 4.359705448150635, "learning_rate": 3.6363636363636366e-07, "loss": 0.7075, "step": 22 }, { "epoch": 0.011418169783220255, "grad_norm": 4.879459857940674, "learning_rate": 3.8016528925619836e-07, "loss": 0.8005, "step": 23 }, { "epoch": 0.011914611947708093, "grad_norm": 4.037625789642334, "learning_rate": 3.966942148760331e-07, "loss": 0.7572, "step": 24 }, { "epoch": 0.012411054112195929, "grad_norm": 4.0548176765441895, "learning_rate": 4.132231404958678e-07, "loss": 0.7733, "step": 25 }, { "epoch": 0.012907496276683767, "grad_norm": 4.1129045486450195, "learning_rate": 4.297520661157025e-07, "loss": 0.8074, "step": 26 }, { "epoch": 0.013403938441171603, "grad_norm": 3.949422836303711, "learning_rate": 4.462809917355372e-07, "loss": 0.7764, "step": 27 }, { "epoch": 0.013900380605659441, "grad_norm": 3.7277393341064453, "learning_rate": 4.628099173553719e-07, "loss": 0.7438, "step": 28 }, { "epoch": 0.014396822770147278, "grad_norm": 3.6715855598449707, "learning_rate": 4.793388429752067e-07, "loss": 0.747, "step": 29 }, { "epoch": 0.014893264934635116, "grad_norm": 3.680730104446411, "learning_rate": 4.958677685950413e-07, "loss": 0.7393, "step": 30 }, { "epoch": 0.015389707099122952, "grad_norm": 3.6033551692962646, "learning_rate": 5.123966942148761e-07, "loss": 0.7148, "step": 31 }, { "epoch": 0.015886149263610788, "grad_norm": 2.9870569705963135, "learning_rate": 5.289256198347107e-07, "loss": 0.738, "step": 32 }, { "epoch": 0.016382591428098628, "grad_norm": 2.2443594932556152, "learning_rate": 5.454545454545455e-07, "loss": 0.7005, "step": 33 }, { "epoch": 0.016879033592586464, "grad_norm": 2.295828342437744, "learning_rate": 5.619834710743802e-07, "loss": 0.7373, "step": 34 }, { "epoch": 0.0173754757570743, "grad_norm": 2.1921496391296387, "learning_rate": 5.78512396694215e-07, "loss": 0.7048, "step": 35 }, { "epoch": 0.017871917921562137, "grad_norm": 2.1132678985595703, "learning_rate": 5.950413223140496e-07, "loss": 0.6839, "step": 36 }, { "epoch": 0.018368360086049976, "grad_norm": 2.0354158878326416, "learning_rate": 6.115702479338844e-07, "loss": 0.7283, "step": 37 }, { "epoch": 0.018864802250537813, "grad_norm": 1.9216876029968262, "learning_rate": 6.280991735537191e-07, "loss": 0.6796, "step": 38 }, { "epoch": 0.01936124441502565, "grad_norm": 1.8712728023529053, "learning_rate": 6.446280991735538e-07, "loss": 0.7107, "step": 39 }, { "epoch": 0.019857686579513485, "grad_norm": 1.667904019355774, "learning_rate": 6.611570247933885e-07, "loss": 0.6655, "step": 40 }, { "epoch": 0.020354128744001325, "grad_norm": 1.7772026062011719, "learning_rate": 6.776859504132232e-07, "loss": 0.6989, "step": 41 }, { "epoch": 0.02085057090848916, "grad_norm": 1.6387361288070679, "learning_rate": 6.942148760330579e-07, "loss": 0.7029, "step": 42 }, { "epoch": 0.021347013072976997, "grad_norm": 1.4074972867965698, "learning_rate": 7.107438016528927e-07, "loss": 0.6835, "step": 43 }, { "epoch": 0.021843455237464834, "grad_norm": 1.2412465810775757, "learning_rate": 7.272727272727273e-07, "loss": 0.6493, "step": 44 }, { "epoch": 0.022339897401952673, "grad_norm": 1.4219928979873657, "learning_rate": 7.438016528925621e-07, "loss": 0.6762, "step": 45 }, { "epoch": 0.02283633956644051, "grad_norm": 1.6131993532180786, "learning_rate": 7.603305785123967e-07, "loss": 0.6754, "step": 46 }, { "epoch": 0.023332781730928346, "grad_norm": 1.5659677982330322, "learning_rate": 7.768595041322315e-07, "loss": 0.6037, "step": 47 }, { "epoch": 0.023829223895416186, "grad_norm": 1.6397186517715454, "learning_rate": 7.933884297520662e-07, "loss": 0.6424, "step": 48 }, { "epoch": 0.024325666059904022, "grad_norm": 1.8034029006958008, "learning_rate": 8.099173553719009e-07, "loss": 0.6975, "step": 49 }, { "epoch": 0.024822108224391858, "grad_norm": 1.4526926279067993, "learning_rate": 8.264462809917356e-07, "loss": 0.631, "step": 50 }, { "epoch": 0.025318550388879695, "grad_norm": 1.4311025142669678, "learning_rate": 8.429752066115703e-07, "loss": 0.6223, "step": 51 }, { "epoch": 0.025814992553367534, "grad_norm": 1.2271875143051147, "learning_rate": 8.59504132231405e-07, "loss": 0.6029, "step": 52 }, { "epoch": 0.02631143471785537, "grad_norm": 1.177642583847046, "learning_rate": 8.760330578512398e-07, "loss": 0.6487, "step": 53 }, { "epoch": 0.026807876882343207, "grad_norm": 1.0982283353805542, "learning_rate": 8.925619834710744e-07, "loss": 0.6257, "step": 54 }, { "epoch": 0.027304319046831043, "grad_norm": 1.0642224550247192, "learning_rate": 9.090909090909091e-07, "loss": 0.6247, "step": 55 }, { "epoch": 0.027800761211318883, "grad_norm": 0.9516341090202332, "learning_rate": 9.256198347107438e-07, "loss": 0.6647, "step": 56 }, { "epoch": 0.02829720337580672, "grad_norm": 0.7882149815559387, "learning_rate": 9.421487603305785e-07, "loss": 0.6296, "step": 57 }, { "epoch": 0.028793645540294555, "grad_norm": 0.910771906375885, "learning_rate": 9.586776859504134e-07, "loss": 0.6412, "step": 58 }, { "epoch": 0.02929008770478239, "grad_norm": 0.875616729259491, "learning_rate": 9.75206611570248e-07, "loss": 0.6397, "step": 59 }, { "epoch": 0.02978652986927023, "grad_norm": 0.903028666973114, "learning_rate": 9.917355371900827e-07, "loss": 0.6461, "step": 60 }, { "epoch": 0.030282972033758068, "grad_norm": 0.8827576637268066, "learning_rate": 1.0082644628099174e-06, "loss": 0.6053, "step": 61 }, { "epoch": 0.030779414198245904, "grad_norm": 0.9439021944999695, "learning_rate": 1.0247933884297522e-06, "loss": 0.5646, "step": 62 }, { "epoch": 0.031275856362733744, "grad_norm": 0.8183777928352356, "learning_rate": 1.041322314049587e-06, "loss": 0.6005, "step": 63 }, { "epoch": 0.031772298527221576, "grad_norm": 0.8222289681434631, "learning_rate": 1.0578512396694215e-06, "loss": 0.5855, "step": 64 }, { "epoch": 0.032268740691709416, "grad_norm": 0.7704228758811951, "learning_rate": 1.0743801652892562e-06, "loss": 0.6196, "step": 65 }, { "epoch": 0.032765182856197256, "grad_norm": 0.748343288898468, "learning_rate": 1.090909090909091e-06, "loss": 0.6149, "step": 66 }, { "epoch": 0.03326162502068509, "grad_norm": 0.6501196026802063, "learning_rate": 1.1074380165289257e-06, "loss": 0.5875, "step": 67 }, { "epoch": 0.03375806718517293, "grad_norm": 0.7058147192001343, "learning_rate": 1.1239669421487605e-06, "loss": 0.5852, "step": 68 }, { "epoch": 0.03425450934966076, "grad_norm": 0.7080379128456116, "learning_rate": 1.140495867768595e-06, "loss": 0.6212, "step": 69 }, { "epoch": 0.0347509515141486, "grad_norm": 0.6531385779380798, "learning_rate": 1.15702479338843e-06, "loss": 0.5804, "step": 70 }, { "epoch": 0.03524739367863644, "grad_norm": 0.6582463383674622, "learning_rate": 1.1735537190082645e-06, "loss": 0.559, "step": 71 }, { "epoch": 0.03574383584312427, "grad_norm": 0.5853129029273987, "learning_rate": 1.1900826446280993e-06, "loss": 0.5395, "step": 72 }, { "epoch": 0.03624027800761211, "grad_norm": 0.5918567776679993, "learning_rate": 1.206611570247934e-06, "loss": 0.5385, "step": 73 }, { "epoch": 0.03673672017209995, "grad_norm": 0.6878299713134766, "learning_rate": 1.2231404958677688e-06, "loss": 0.5902, "step": 74 }, { "epoch": 0.037233162336587786, "grad_norm": 0.5517659187316895, "learning_rate": 1.2396694214876035e-06, "loss": 0.566, "step": 75 }, { "epoch": 0.037729604501075625, "grad_norm": 0.6881161332130432, "learning_rate": 1.2561983471074383e-06, "loss": 0.5766, "step": 76 }, { "epoch": 0.038226046665563465, "grad_norm": 0.6725229024887085, "learning_rate": 1.2727272727272728e-06, "loss": 0.605, "step": 77 }, { "epoch": 0.0387224888300513, "grad_norm": 0.6387733817100525, "learning_rate": 1.2892561983471076e-06, "loss": 0.5672, "step": 78 }, { "epoch": 0.03921893099453914, "grad_norm": 0.5807753205299377, "learning_rate": 1.3057851239669423e-06, "loss": 0.5866, "step": 79 }, { "epoch": 0.03971537315902697, "grad_norm": 0.5125914216041565, "learning_rate": 1.322314049586777e-06, "loss": 0.5618, "step": 80 }, { "epoch": 0.04021181532351481, "grad_norm": 0.5212637782096863, "learning_rate": 1.3388429752066118e-06, "loss": 0.5432, "step": 81 }, { "epoch": 0.04070825748800265, "grad_norm": 0.4973706603050232, "learning_rate": 1.3553719008264463e-06, "loss": 0.5761, "step": 82 }, { "epoch": 0.04120469965249048, "grad_norm": 0.5210216045379639, "learning_rate": 1.371900826446281e-06, "loss": 0.564, "step": 83 }, { "epoch": 0.04170114181697832, "grad_norm": 0.5238592028617859, "learning_rate": 1.3884297520661158e-06, "loss": 0.5646, "step": 84 }, { "epoch": 0.04219758398146616, "grad_norm": 0.48789557814598083, "learning_rate": 1.4049586776859506e-06, "loss": 0.5344, "step": 85 }, { "epoch": 0.042694026145953995, "grad_norm": 0.5198727250099182, "learning_rate": 1.4214876033057853e-06, "loss": 0.5449, "step": 86 }, { "epoch": 0.043190468310441835, "grad_norm": 0.5006489157676697, "learning_rate": 1.4380165289256199e-06, "loss": 0.5686, "step": 87 }, { "epoch": 0.04368691047492967, "grad_norm": 0.5170336365699768, "learning_rate": 1.4545454545454546e-06, "loss": 0.5813, "step": 88 }, { "epoch": 0.04418335263941751, "grad_norm": 0.513331949710846, "learning_rate": 1.4710743801652894e-06, "loss": 0.5293, "step": 89 }, { "epoch": 0.04467979480390535, "grad_norm": 0.5772969722747803, "learning_rate": 1.4876033057851241e-06, "loss": 0.5895, "step": 90 }, { "epoch": 0.04517623696839318, "grad_norm": 0.4578842520713806, "learning_rate": 1.5041322314049589e-06, "loss": 0.5363, "step": 91 }, { "epoch": 0.04567267913288102, "grad_norm": 0.45573586225509644, "learning_rate": 1.5206611570247934e-06, "loss": 0.5204, "step": 92 }, { "epoch": 0.04616912129736886, "grad_norm": 0.504004716873169, "learning_rate": 1.5371900826446282e-06, "loss": 0.5337, "step": 93 }, { "epoch": 0.04666556346185669, "grad_norm": 0.5125177502632141, "learning_rate": 1.553719008264463e-06, "loss": 0.565, "step": 94 }, { "epoch": 0.04716200562634453, "grad_norm": 0.4796072244644165, "learning_rate": 1.5702479338842977e-06, "loss": 0.5406, "step": 95 }, { "epoch": 0.04765844779083237, "grad_norm": 0.5068469047546387, "learning_rate": 1.5867768595041324e-06, "loss": 0.5453, "step": 96 }, { "epoch": 0.048154889955320204, "grad_norm": 0.5826280117034912, "learning_rate": 1.603305785123967e-06, "loss": 0.5959, "step": 97 }, { "epoch": 0.048651332119808044, "grad_norm": 0.4251062870025635, "learning_rate": 1.6198347107438017e-06, "loss": 0.5215, "step": 98 }, { "epoch": 0.04914777428429588, "grad_norm": 0.5344546437263489, "learning_rate": 1.6363636363636365e-06, "loss": 0.5127, "step": 99 }, { "epoch": 0.049644216448783716, "grad_norm": 0.44020047783851624, "learning_rate": 1.6528925619834712e-06, "loss": 0.512, "step": 100 }, { "epoch": 0.050140658613271556, "grad_norm": 0.4951499402523041, "learning_rate": 1.669421487603306e-06, "loss": 0.5514, "step": 101 }, { "epoch": 0.05063710077775939, "grad_norm": 0.5466610193252563, "learning_rate": 1.6859504132231405e-06, "loss": 0.52, "step": 102 }, { "epoch": 0.05113354294224723, "grad_norm": 0.44786763191223145, "learning_rate": 1.7024793388429753e-06, "loss": 0.513, "step": 103 }, { "epoch": 0.05162998510673507, "grad_norm": 0.5605816841125488, "learning_rate": 1.71900826446281e-06, "loss": 0.5484, "step": 104 }, { "epoch": 0.0521264272712229, "grad_norm": 0.5020123720169067, "learning_rate": 1.7355371900826448e-06, "loss": 0.5224, "step": 105 }, { "epoch": 0.05262286943571074, "grad_norm": 0.5250580310821533, "learning_rate": 1.7520661157024795e-06, "loss": 0.5442, "step": 106 }, { "epoch": 0.053119311600198574, "grad_norm": 0.422644704580307, "learning_rate": 1.768595041322314e-06, "loss": 0.5147, "step": 107 }, { "epoch": 0.053615753764686414, "grad_norm": 0.48443830013275146, "learning_rate": 1.7851239669421488e-06, "loss": 0.5518, "step": 108 }, { "epoch": 0.05411219592917425, "grad_norm": 0.49258866906166077, "learning_rate": 1.8016528925619835e-06, "loss": 0.5371, "step": 109 }, { "epoch": 0.054608638093662086, "grad_norm": 0.4371580481529236, "learning_rate": 1.8181818181818183e-06, "loss": 0.5257, "step": 110 }, { "epoch": 0.055105080258149926, "grad_norm": 0.4786614179611206, "learning_rate": 1.8347107438016533e-06, "loss": 0.5289, "step": 111 }, { "epoch": 0.055601522422637766, "grad_norm": 0.47392064332962036, "learning_rate": 1.8512396694214876e-06, "loss": 0.5369, "step": 112 }, { "epoch": 0.0560979645871256, "grad_norm": 0.41380053758621216, "learning_rate": 1.8677685950413223e-06, "loss": 0.4992, "step": 113 }, { "epoch": 0.05659440675161344, "grad_norm": 0.4443593919277191, "learning_rate": 1.884297520661157e-06, "loss": 0.544, "step": 114 }, { "epoch": 0.05709084891610127, "grad_norm": 0.5296859741210938, "learning_rate": 1.900826446280992e-06, "loss": 0.5335, "step": 115 }, { "epoch": 0.05758729108058911, "grad_norm": 0.471079021692276, "learning_rate": 1.917355371900827e-06, "loss": 0.528, "step": 116 }, { "epoch": 0.05808373324507695, "grad_norm": 0.47272923588752747, "learning_rate": 1.9338842975206613e-06, "loss": 0.481, "step": 117 }, { "epoch": 0.05858017540956478, "grad_norm": 0.4893951416015625, "learning_rate": 1.950413223140496e-06, "loss": 0.5453, "step": 118 }, { "epoch": 0.05907661757405262, "grad_norm": 0.43387681245803833, "learning_rate": 1.966942148760331e-06, "loss": 0.534, "step": 119 }, { "epoch": 0.05957305973854046, "grad_norm": 0.4546177089214325, "learning_rate": 1.9834710743801654e-06, "loss": 0.5566, "step": 120 }, { "epoch": 0.060069501903028295, "grad_norm": 0.44194188714027405, "learning_rate": 2.0000000000000003e-06, "loss": 0.5637, "step": 121 }, { "epoch": 0.060565944067516135, "grad_norm": 0.47346624732017517, "learning_rate": 2.016528925619835e-06, "loss": 0.5359, "step": 122 }, { "epoch": 0.061062386232003975, "grad_norm": 0.43716663122177124, "learning_rate": 2.0330578512396694e-06, "loss": 0.5046, "step": 123 }, { "epoch": 0.06155882839649181, "grad_norm": 0.4675371050834656, "learning_rate": 2.0495867768595044e-06, "loss": 0.5265, "step": 124 }, { "epoch": 0.06205527056097965, "grad_norm": 0.4213584065437317, "learning_rate": 2.066115702479339e-06, "loss": 0.5094, "step": 125 }, { "epoch": 0.06255171272546749, "grad_norm": 0.4952092170715332, "learning_rate": 2.082644628099174e-06, "loss": 0.5555, "step": 126 }, { "epoch": 0.06304815488995533, "grad_norm": 0.48540616035461426, "learning_rate": 2.0991735537190084e-06, "loss": 0.4934, "step": 127 }, { "epoch": 0.06354459705444315, "grad_norm": 0.45101940631866455, "learning_rate": 2.115702479338843e-06, "loss": 0.5092, "step": 128 }, { "epoch": 0.06404103921893099, "grad_norm": 0.566597580909729, "learning_rate": 2.132231404958678e-06, "loss": 0.5129, "step": 129 }, { "epoch": 0.06453748138341883, "grad_norm": 0.5152295231819153, "learning_rate": 2.1487603305785124e-06, "loss": 0.5184, "step": 130 }, { "epoch": 0.06503392354790667, "grad_norm": 0.447128027677536, "learning_rate": 2.1652892561983474e-06, "loss": 0.5305, "step": 131 }, { "epoch": 0.06553036571239451, "grad_norm": 0.4262375235557556, "learning_rate": 2.181818181818182e-06, "loss": 0.5097, "step": 132 }, { "epoch": 0.06602680787688234, "grad_norm": 0.4780712425708771, "learning_rate": 2.1983471074380165e-06, "loss": 0.524, "step": 133 }, { "epoch": 0.06652325004137018, "grad_norm": 0.5405784249305725, "learning_rate": 2.2148760330578515e-06, "loss": 0.5342, "step": 134 }, { "epoch": 0.06701969220585802, "grad_norm": 0.4136822521686554, "learning_rate": 2.231404958677686e-06, "loss": 0.4854, "step": 135 }, { "epoch": 0.06751613437034586, "grad_norm": 0.47009584307670593, "learning_rate": 2.247933884297521e-06, "loss": 0.517, "step": 136 }, { "epoch": 0.0680125765348337, "grad_norm": 0.49956777691841125, "learning_rate": 2.2644628099173555e-06, "loss": 0.5489, "step": 137 }, { "epoch": 0.06850901869932152, "grad_norm": 0.468935489654541, "learning_rate": 2.28099173553719e-06, "loss": 0.4909, "step": 138 }, { "epoch": 0.06900546086380936, "grad_norm": 0.41704845428466797, "learning_rate": 2.297520661157025e-06, "loss": 0.5192, "step": 139 }, { "epoch": 0.0695019030282972, "grad_norm": 0.4615921378135681, "learning_rate": 2.31404958677686e-06, "loss": 0.5048, "step": 140 }, { "epoch": 0.06999834519278504, "grad_norm": 0.5095983147621155, "learning_rate": 2.3305785123966945e-06, "loss": 0.4952, "step": 141 }, { "epoch": 0.07049478735727288, "grad_norm": 0.4944060742855072, "learning_rate": 2.347107438016529e-06, "loss": 0.4915, "step": 142 }, { "epoch": 0.07099122952176072, "grad_norm": 0.4781531095504761, "learning_rate": 2.363636363636364e-06, "loss": 0.5454, "step": 143 }, { "epoch": 0.07148767168624855, "grad_norm": 0.46591994166374207, "learning_rate": 2.3801652892561985e-06, "loss": 0.5103, "step": 144 }, { "epoch": 0.07198411385073639, "grad_norm": 0.416408896446228, "learning_rate": 2.3966942148760335e-06, "loss": 0.5235, "step": 145 }, { "epoch": 0.07248055601522423, "grad_norm": 0.5863150358200073, "learning_rate": 2.413223140495868e-06, "loss": 0.5406, "step": 146 }, { "epoch": 0.07297699817971207, "grad_norm": 0.4707125723361969, "learning_rate": 2.4297520661157026e-06, "loss": 0.521, "step": 147 }, { "epoch": 0.0734734403441999, "grad_norm": 0.45167362689971924, "learning_rate": 2.4462809917355375e-06, "loss": 0.5112, "step": 148 }, { "epoch": 0.07396988250868773, "grad_norm": 0.5400341153144836, "learning_rate": 2.462809917355372e-06, "loss": 0.5265, "step": 149 }, { "epoch": 0.07446632467317557, "grad_norm": 0.5047646164894104, "learning_rate": 2.479338842975207e-06, "loss": 0.537, "step": 150 }, { "epoch": 0.07496276683766341, "grad_norm": 0.43332019448280334, "learning_rate": 2.4958677685950416e-06, "loss": 0.5413, "step": 151 }, { "epoch": 0.07545920900215125, "grad_norm": 0.5011436939239502, "learning_rate": 2.5123966942148765e-06, "loss": 0.5313, "step": 152 }, { "epoch": 0.07595565116663909, "grad_norm": 0.544281542301178, "learning_rate": 2.528925619834711e-06, "loss": 0.5229, "step": 153 }, { "epoch": 0.07645209333112693, "grad_norm": 0.4853697419166565, "learning_rate": 2.5454545454545456e-06, "loss": 0.5099, "step": 154 }, { "epoch": 0.07694853549561476, "grad_norm": 0.46537184715270996, "learning_rate": 2.56198347107438e-06, "loss": 0.5201, "step": 155 }, { "epoch": 0.0774449776601026, "grad_norm": 0.438852459192276, "learning_rate": 2.578512396694215e-06, "loss": 0.5187, "step": 156 }, { "epoch": 0.07794141982459044, "grad_norm": 0.5434144735336304, "learning_rate": 2.5950413223140496e-06, "loss": 0.5259, "step": 157 }, { "epoch": 0.07843786198907828, "grad_norm": 0.4954397976398468, "learning_rate": 2.6115702479338846e-06, "loss": 0.556, "step": 158 }, { "epoch": 0.07893430415356611, "grad_norm": 0.4687674343585968, "learning_rate": 2.628099173553719e-06, "loss": 0.5115, "step": 159 }, { "epoch": 0.07943074631805394, "grad_norm": 0.49049097299575806, "learning_rate": 2.644628099173554e-06, "loss": 0.5287, "step": 160 }, { "epoch": 0.07992718848254178, "grad_norm": 0.4293896555900574, "learning_rate": 2.6611570247933886e-06, "loss": 0.4948, "step": 161 }, { "epoch": 0.08042363064702962, "grad_norm": 0.48649606108665466, "learning_rate": 2.6776859504132236e-06, "loss": 0.4869, "step": 162 }, { "epoch": 0.08092007281151746, "grad_norm": 0.4996541440486908, "learning_rate": 2.694214876033058e-06, "loss": 0.5287, "step": 163 }, { "epoch": 0.0814165149760053, "grad_norm": 0.44091328978538513, "learning_rate": 2.7107438016528927e-06, "loss": 0.5483, "step": 164 }, { "epoch": 0.08191295714049314, "grad_norm": 0.45916053652763367, "learning_rate": 2.7272727272727272e-06, "loss": 0.5329, "step": 165 }, { "epoch": 0.08240939930498097, "grad_norm": 0.5104344487190247, "learning_rate": 2.743801652892562e-06, "loss": 0.4642, "step": 166 }, { "epoch": 0.0829058414694688, "grad_norm": 0.45562416315078735, "learning_rate": 2.7603305785123967e-06, "loss": 0.513, "step": 167 }, { "epoch": 0.08340228363395664, "grad_norm": 0.44878631830215454, "learning_rate": 2.7768595041322317e-06, "loss": 0.5028, "step": 168 }, { "epoch": 0.08389872579844448, "grad_norm": 0.5299433469772339, "learning_rate": 2.7933884297520662e-06, "loss": 0.4912, "step": 169 }, { "epoch": 0.08439516796293232, "grad_norm": 0.5315517783164978, "learning_rate": 2.809917355371901e-06, "loss": 0.5287, "step": 170 }, { "epoch": 0.08489161012742015, "grad_norm": 0.503352165222168, "learning_rate": 2.8264462809917357e-06, "loss": 0.4984, "step": 171 }, { "epoch": 0.08538805229190799, "grad_norm": 0.4920940101146698, "learning_rate": 2.8429752066115707e-06, "loss": 0.4808, "step": 172 }, { "epoch": 0.08588449445639583, "grad_norm": 0.5016526579856873, "learning_rate": 2.8595041322314052e-06, "loss": 0.5222, "step": 173 }, { "epoch": 0.08638093662088367, "grad_norm": 0.5049303770065308, "learning_rate": 2.8760330578512398e-06, "loss": 0.5059, "step": 174 }, { "epoch": 0.08687737878537151, "grad_norm": 0.593731701374054, "learning_rate": 2.8925619834710743e-06, "loss": 0.4946, "step": 175 }, { "epoch": 0.08737382094985933, "grad_norm": 0.4857478737831116, "learning_rate": 2.9090909090909093e-06, "loss": 0.5047, "step": 176 }, { "epoch": 0.08787026311434717, "grad_norm": 0.4972445070743561, "learning_rate": 2.925619834710744e-06, "loss": 0.5031, "step": 177 }, { "epoch": 0.08836670527883501, "grad_norm": 0.48068660497665405, "learning_rate": 2.9421487603305788e-06, "loss": 0.49, "step": 178 }, { "epoch": 0.08886314744332285, "grad_norm": 0.5001876950263977, "learning_rate": 2.9586776859504133e-06, "loss": 0.5324, "step": 179 }, { "epoch": 0.0893595896078107, "grad_norm": 0.4273318350315094, "learning_rate": 2.9752066115702483e-06, "loss": 0.4891, "step": 180 }, { "epoch": 0.08985603177229853, "grad_norm": 0.5405133962631226, "learning_rate": 2.9917355371900832e-06, "loss": 0.5005, "step": 181 }, { "epoch": 0.09035247393678636, "grad_norm": 0.4924674332141876, "learning_rate": 3.0082644628099178e-06, "loss": 0.4995, "step": 182 }, { "epoch": 0.0908489161012742, "grad_norm": 0.4584423005580902, "learning_rate": 3.0247933884297527e-06, "loss": 0.4992, "step": 183 }, { "epoch": 0.09134535826576204, "grad_norm": 0.5190091133117676, "learning_rate": 3.041322314049587e-06, "loss": 0.5292, "step": 184 }, { "epoch": 0.09184180043024988, "grad_norm": 0.42255643010139465, "learning_rate": 3.0578512396694214e-06, "loss": 0.4657, "step": 185 }, { "epoch": 0.09233824259473772, "grad_norm": 0.4375031590461731, "learning_rate": 3.0743801652892563e-06, "loss": 0.5014, "step": 186 }, { "epoch": 0.09283468475922554, "grad_norm": 0.4717262089252472, "learning_rate": 3.090909090909091e-06, "loss": 0.5011, "step": 187 }, { "epoch": 0.09333112692371338, "grad_norm": 0.4770236015319824, "learning_rate": 3.107438016528926e-06, "loss": 0.4974, "step": 188 }, { "epoch": 0.09382756908820122, "grad_norm": 0.46440058946609497, "learning_rate": 3.123966942148761e-06, "loss": 0.4641, "step": 189 }, { "epoch": 0.09432401125268906, "grad_norm": 0.46080029010772705, "learning_rate": 3.1404958677685953e-06, "loss": 0.5044, "step": 190 }, { "epoch": 0.0948204534171769, "grad_norm": 0.46080946922302246, "learning_rate": 3.1570247933884303e-06, "loss": 0.4846, "step": 191 }, { "epoch": 0.09531689558166474, "grad_norm": 0.5287591218948364, "learning_rate": 3.173553719008265e-06, "loss": 0.4908, "step": 192 }, { "epoch": 0.09581333774615257, "grad_norm": 0.4524436593055725, "learning_rate": 3.1900826446281e-06, "loss": 0.5209, "step": 193 }, { "epoch": 0.09630977991064041, "grad_norm": 0.44511228799819946, "learning_rate": 3.206611570247934e-06, "loss": 0.4984, "step": 194 }, { "epoch": 0.09680622207512825, "grad_norm": 0.41785427927970886, "learning_rate": 3.2231404958677685e-06, "loss": 0.4833, "step": 195 }, { "epoch": 0.09730266423961609, "grad_norm": 0.5000106692314148, "learning_rate": 3.2396694214876034e-06, "loss": 0.4883, "step": 196 }, { "epoch": 0.09779910640410393, "grad_norm": 0.5033714175224304, "learning_rate": 3.2561983471074384e-06, "loss": 0.5219, "step": 197 }, { "epoch": 0.09829554856859175, "grad_norm": 0.45910710096359253, "learning_rate": 3.272727272727273e-06, "loss": 0.4908, "step": 198 }, { "epoch": 0.0987919907330796, "grad_norm": 0.4290245473384857, "learning_rate": 3.289256198347108e-06, "loss": 0.4774, "step": 199 }, { "epoch": 0.09928843289756743, "grad_norm": 0.46596047282218933, "learning_rate": 3.3057851239669424e-06, "loss": 0.5044, "step": 200 }, { "epoch": 0.09978487506205527, "grad_norm": 0.43261897563934326, "learning_rate": 3.3223140495867774e-06, "loss": 0.4874, "step": 201 }, { "epoch": 0.10028131722654311, "grad_norm": 0.48163965344429016, "learning_rate": 3.338842975206612e-06, "loss": 0.4655, "step": 202 }, { "epoch": 0.10077775939103094, "grad_norm": 0.5434346199035645, "learning_rate": 3.355371900826447e-06, "loss": 0.5173, "step": 203 }, { "epoch": 0.10127420155551878, "grad_norm": 0.4714270532131195, "learning_rate": 3.371900826446281e-06, "loss": 0.4988, "step": 204 }, { "epoch": 0.10177064372000662, "grad_norm": 0.46320074796676636, "learning_rate": 3.388429752066116e-06, "loss": 0.5065, "step": 205 }, { "epoch": 0.10226708588449446, "grad_norm": 0.47154536843299866, "learning_rate": 3.4049586776859505e-06, "loss": 0.4719, "step": 206 }, { "epoch": 0.1027635280489823, "grad_norm": 0.4461115002632141, "learning_rate": 3.4214876033057855e-06, "loss": 0.4856, "step": 207 }, { "epoch": 0.10325997021347014, "grad_norm": 0.4610353708267212, "learning_rate": 3.43801652892562e-06, "loss": 0.4837, "step": 208 }, { "epoch": 0.10375641237795796, "grad_norm": 0.522070586681366, "learning_rate": 3.454545454545455e-06, "loss": 0.528, "step": 209 }, { "epoch": 0.1042528545424458, "grad_norm": 0.4722146987915039, "learning_rate": 3.4710743801652895e-06, "loss": 0.4873, "step": 210 }, { "epoch": 0.10474929670693364, "grad_norm": 0.5237631797790527, "learning_rate": 3.4876033057851245e-06, "loss": 0.5035, "step": 211 }, { "epoch": 0.10524573887142148, "grad_norm": 0.4807029068470001, "learning_rate": 3.504132231404959e-06, "loss": 0.4694, "step": 212 }, { "epoch": 0.10574218103590932, "grad_norm": 0.4918181002140045, "learning_rate": 3.520661157024794e-06, "loss": 0.4942, "step": 213 }, { "epoch": 0.10623862320039715, "grad_norm": 0.5076276063919067, "learning_rate": 3.537190082644628e-06, "loss": 0.5218, "step": 214 }, { "epoch": 0.10673506536488499, "grad_norm": 0.4402696192264557, "learning_rate": 3.553719008264463e-06, "loss": 0.4819, "step": 215 }, { "epoch": 0.10723150752937283, "grad_norm": 0.4877113103866577, "learning_rate": 3.5702479338842976e-06, "loss": 0.5, "step": 216 }, { "epoch": 0.10772794969386067, "grad_norm": 0.5089818239212036, "learning_rate": 3.5867768595041325e-06, "loss": 0.5192, "step": 217 }, { "epoch": 0.1082243918583485, "grad_norm": 0.475002646446228, "learning_rate": 3.603305785123967e-06, "loss": 0.4903, "step": 218 }, { "epoch": 0.10872083402283635, "grad_norm": 0.481044739484787, "learning_rate": 3.619834710743802e-06, "loss": 0.4658, "step": 219 }, { "epoch": 0.10921727618732417, "grad_norm": 0.44593513011932373, "learning_rate": 3.6363636363636366e-06, "loss": 0.4769, "step": 220 }, { "epoch": 0.10971371835181201, "grad_norm": 0.5056861639022827, "learning_rate": 3.6528925619834715e-06, "loss": 0.5433, "step": 221 }, { "epoch": 0.11021016051629985, "grad_norm": 0.5379074811935425, "learning_rate": 3.6694214876033065e-06, "loss": 0.4918, "step": 222 }, { "epoch": 0.11070660268078769, "grad_norm": 0.4794224798679352, "learning_rate": 3.685950413223141e-06, "loss": 0.4672, "step": 223 }, { "epoch": 0.11120304484527553, "grad_norm": 0.4862600862979889, "learning_rate": 3.702479338842975e-06, "loss": 0.5218, "step": 224 }, { "epoch": 0.11169948700976336, "grad_norm": 0.46349775791168213, "learning_rate": 3.71900826446281e-06, "loss": 0.4716, "step": 225 }, { "epoch": 0.1121959291742512, "grad_norm": 0.49628591537475586, "learning_rate": 3.7355371900826447e-06, "loss": 0.4983, "step": 226 }, { "epoch": 0.11269237133873904, "grad_norm": 0.48531922698020935, "learning_rate": 3.7520661157024796e-06, "loss": 0.4977, "step": 227 }, { "epoch": 0.11318881350322688, "grad_norm": 0.5405210256576538, "learning_rate": 3.768595041322314e-06, "loss": 0.5208, "step": 228 }, { "epoch": 0.11368525566771472, "grad_norm": 0.4777161478996277, "learning_rate": 3.785123966942149e-06, "loss": 0.5104, "step": 229 }, { "epoch": 0.11418169783220254, "grad_norm": 0.45581114292144775, "learning_rate": 3.801652892561984e-06, "loss": 0.4421, "step": 230 }, { "epoch": 0.11467813999669038, "grad_norm": 0.5788438320159912, "learning_rate": 3.818181818181819e-06, "loss": 0.5165, "step": 231 }, { "epoch": 0.11517458216117822, "grad_norm": 0.46671468019485474, "learning_rate": 3.834710743801654e-06, "loss": 0.473, "step": 232 }, { "epoch": 0.11567102432566606, "grad_norm": 0.48879775404930115, "learning_rate": 3.851239669421488e-06, "loss": 0.5066, "step": 233 }, { "epoch": 0.1161674664901539, "grad_norm": 0.4615035355091095, "learning_rate": 3.867768595041323e-06, "loss": 0.5037, "step": 234 }, { "epoch": 0.11666390865464174, "grad_norm": 0.5123949646949768, "learning_rate": 3.884297520661157e-06, "loss": 0.4932, "step": 235 }, { "epoch": 0.11716035081912957, "grad_norm": 0.5143529176712036, "learning_rate": 3.900826446280992e-06, "loss": 0.4991, "step": 236 }, { "epoch": 0.1176567929836174, "grad_norm": 0.5800143480300903, "learning_rate": 3.917355371900827e-06, "loss": 0.4711, "step": 237 }, { "epoch": 0.11815323514810525, "grad_norm": 0.5588797926902771, "learning_rate": 3.933884297520662e-06, "loss": 0.4971, "step": 238 }, { "epoch": 0.11864967731259309, "grad_norm": 0.5042149424552917, "learning_rate": 3.950413223140496e-06, "loss": 0.4546, "step": 239 }, { "epoch": 0.11914611947708093, "grad_norm": 0.5424992442131042, "learning_rate": 3.966942148760331e-06, "loss": 0.455, "step": 240 }, { "epoch": 0.11964256164156875, "grad_norm": 0.5117504000663757, "learning_rate": 3.983471074380166e-06, "loss": 0.4842, "step": 241 }, { "epoch": 0.12013900380605659, "grad_norm": 0.5285716652870178, "learning_rate": 4.000000000000001e-06, "loss": 0.4853, "step": 242 }, { "epoch": 0.12063544597054443, "grad_norm": 0.5046334266662598, "learning_rate": 4.016528925619834e-06, "loss": 0.4967, "step": 243 }, { "epoch": 0.12113188813503227, "grad_norm": 0.46027588844299316, "learning_rate": 4.03305785123967e-06, "loss": 0.4771, "step": 244 }, { "epoch": 0.12162833029952011, "grad_norm": 0.4687836170196533, "learning_rate": 4.049586776859504e-06, "loss": 0.4643, "step": 245 }, { "epoch": 0.12212477246400795, "grad_norm": 0.5623156428337097, "learning_rate": 4.066115702479339e-06, "loss": 0.4742, "step": 246 }, { "epoch": 0.12262121462849578, "grad_norm": 0.5897552371025085, "learning_rate": 4.082644628099174e-06, "loss": 0.4913, "step": 247 }, { "epoch": 0.12311765679298362, "grad_norm": 0.5428889393806458, "learning_rate": 4.099173553719009e-06, "loss": 0.4785, "step": 248 }, { "epoch": 0.12361409895747145, "grad_norm": 0.5449258685112, "learning_rate": 4.115702479338843e-06, "loss": 0.5055, "step": 249 }, { "epoch": 0.1241105411219593, "grad_norm": 0.5663251876831055, "learning_rate": 4.132231404958678e-06, "loss": 0.5286, "step": 250 }, { "epoch": 0.12460698328644713, "grad_norm": 0.47049033641815186, "learning_rate": 4.148760330578513e-06, "loss": 0.4696, "step": 251 }, { "epoch": 0.12510342545093497, "grad_norm": 0.47088587284088135, "learning_rate": 4.165289256198348e-06, "loss": 0.4784, "step": 252 }, { "epoch": 0.1255998676154228, "grad_norm": 0.48449811339378357, "learning_rate": 4.181818181818182e-06, "loss": 0.4519, "step": 253 }, { "epoch": 0.12609630977991065, "grad_norm": 0.49795690178871155, "learning_rate": 4.198347107438017e-06, "loss": 0.4887, "step": 254 }, { "epoch": 0.12659275194439848, "grad_norm": 0.44713225960731506, "learning_rate": 4.214876033057851e-06, "loss": 0.5047, "step": 255 }, { "epoch": 0.1270891941088863, "grad_norm": 0.4805045425891876, "learning_rate": 4.231404958677686e-06, "loss": 0.493, "step": 256 }, { "epoch": 0.12758563627337416, "grad_norm": 0.4586676359176636, "learning_rate": 4.247933884297521e-06, "loss": 0.4946, "step": 257 }, { "epoch": 0.12808207843786198, "grad_norm": 0.43448084592819214, "learning_rate": 4.264462809917356e-06, "loss": 0.4991, "step": 258 }, { "epoch": 0.12857852060234984, "grad_norm": 0.4580869674682617, "learning_rate": 4.28099173553719e-06, "loss": 0.4682, "step": 259 }, { "epoch": 0.12907496276683766, "grad_norm": 0.455931156873703, "learning_rate": 4.297520661157025e-06, "loss": 0.4285, "step": 260 }, { "epoch": 0.1295714049313255, "grad_norm": 0.5193980932235718, "learning_rate": 4.31404958677686e-06, "loss": 0.4869, "step": 261 }, { "epoch": 0.13006784709581334, "grad_norm": 0.5041037797927856, "learning_rate": 4.330578512396695e-06, "loss": 0.4936, "step": 262 }, { "epoch": 0.13056428926030117, "grad_norm": 0.4899240732192993, "learning_rate": 4.347107438016529e-06, "loss": 0.4429, "step": 263 }, { "epoch": 0.13106073142478902, "grad_norm": 0.5020573139190674, "learning_rate": 4.363636363636364e-06, "loss": 0.4761, "step": 264 }, { "epoch": 0.13155717358927685, "grad_norm": 0.4513068199157715, "learning_rate": 4.3801652892561984e-06, "loss": 0.4765, "step": 265 }, { "epoch": 0.13205361575376467, "grad_norm": 0.5553326606750488, "learning_rate": 4.396694214876033e-06, "loss": 0.4837, "step": 266 }, { "epoch": 0.13255005791825253, "grad_norm": 0.4545823037624359, "learning_rate": 4.413223140495868e-06, "loss": 0.4947, "step": 267 }, { "epoch": 0.13304650008274035, "grad_norm": 0.48257866501808167, "learning_rate": 4.429752066115703e-06, "loss": 0.4672, "step": 268 }, { "epoch": 0.1335429422472282, "grad_norm": 0.4781899154186249, "learning_rate": 4.4462809917355374e-06, "loss": 0.482, "step": 269 }, { "epoch": 0.13403938441171603, "grad_norm": 0.5053334832191467, "learning_rate": 4.462809917355372e-06, "loss": 0.5123, "step": 270 }, { "epoch": 0.13453582657620386, "grad_norm": 0.44820624589920044, "learning_rate": 4.479338842975207e-06, "loss": 0.4532, "step": 271 }, { "epoch": 0.1350322687406917, "grad_norm": 0.46712854504585266, "learning_rate": 4.495867768595042e-06, "loss": 0.4709, "step": 272 }, { "epoch": 0.13552871090517954, "grad_norm": 0.507508397102356, "learning_rate": 4.5123966942148764e-06, "loss": 0.4449, "step": 273 }, { "epoch": 0.1360251530696674, "grad_norm": 0.4891042411327362, "learning_rate": 4.528925619834711e-06, "loss": 0.4751, "step": 274 }, { "epoch": 0.13652159523415522, "grad_norm": 0.44514766335487366, "learning_rate": 4.5454545454545455e-06, "loss": 0.5007, "step": 275 }, { "epoch": 0.13701803739864304, "grad_norm": 0.4793578088283539, "learning_rate": 4.56198347107438e-06, "loss": 0.4627, "step": 276 }, { "epoch": 0.1375144795631309, "grad_norm": 0.5303834676742554, "learning_rate": 4.5785123966942154e-06, "loss": 0.5055, "step": 277 }, { "epoch": 0.13801092172761872, "grad_norm": 0.4806853234767914, "learning_rate": 4.59504132231405e-06, "loss": 0.493, "step": 278 }, { "epoch": 0.13850736389210658, "grad_norm": 0.5118285417556763, "learning_rate": 4.6115702479338845e-06, "loss": 0.4799, "step": 279 }, { "epoch": 0.1390038060565944, "grad_norm": 0.5062780380249023, "learning_rate": 4.62809917355372e-06, "loss": 0.4731, "step": 280 }, { "epoch": 0.13950024822108226, "grad_norm": 0.45382899045944214, "learning_rate": 4.6446280991735544e-06, "loss": 0.471, "step": 281 }, { "epoch": 0.13999669038557008, "grad_norm": 0.5055738091468811, "learning_rate": 4.661157024793389e-06, "loss": 0.4732, "step": 282 }, { "epoch": 0.1404931325500579, "grad_norm": 0.48965659737586975, "learning_rate": 4.6776859504132235e-06, "loss": 0.5131, "step": 283 }, { "epoch": 0.14098957471454576, "grad_norm": 0.44945916533470154, "learning_rate": 4.694214876033058e-06, "loss": 0.4862, "step": 284 }, { "epoch": 0.1414860168790336, "grad_norm": 0.46862703561782837, "learning_rate": 4.710743801652893e-06, "loss": 0.4929, "step": 285 }, { "epoch": 0.14198245904352144, "grad_norm": 0.5184916853904724, "learning_rate": 4.727272727272728e-06, "loss": 0.4486, "step": 286 }, { "epoch": 0.14247890120800927, "grad_norm": 0.4448219835758209, "learning_rate": 4.7438016528925625e-06, "loss": 0.4961, "step": 287 }, { "epoch": 0.1429753433724971, "grad_norm": 0.45541977882385254, "learning_rate": 4.760330578512397e-06, "loss": 0.4797, "step": 288 }, { "epoch": 0.14347178553698495, "grad_norm": 0.5043569207191467, "learning_rate": 4.776859504132232e-06, "loss": 0.4897, "step": 289 }, { "epoch": 0.14396822770147277, "grad_norm": 0.499625563621521, "learning_rate": 4.793388429752067e-06, "loss": 0.4936, "step": 290 }, { "epoch": 0.14446466986596063, "grad_norm": 0.48366841673851013, "learning_rate": 4.8099173553719015e-06, "loss": 0.4908, "step": 291 }, { "epoch": 0.14496111203044845, "grad_norm": 0.4458218514919281, "learning_rate": 4.826446280991736e-06, "loss": 0.4714, "step": 292 }, { "epoch": 0.14545755419493628, "grad_norm": 0.4775134325027466, "learning_rate": 4.842975206611571e-06, "loss": 0.4592, "step": 293 }, { "epoch": 0.14595399635942413, "grad_norm": 0.4653535485267639, "learning_rate": 4.859504132231405e-06, "loss": 0.4775, "step": 294 }, { "epoch": 0.14645043852391196, "grad_norm": 0.45629966259002686, "learning_rate": 4.87603305785124e-06, "loss": 0.4369, "step": 295 }, { "epoch": 0.1469468806883998, "grad_norm": 0.47791731357574463, "learning_rate": 4.892561983471075e-06, "loss": 0.4946, "step": 296 }, { "epoch": 0.14744332285288764, "grad_norm": 0.5227943658828735, "learning_rate": 4.90909090909091e-06, "loss": 0.4716, "step": 297 }, { "epoch": 0.14793976501737546, "grad_norm": 0.5027649402618408, "learning_rate": 4.925619834710744e-06, "loss": 0.4802, "step": 298 }, { "epoch": 0.14843620718186332, "grad_norm": 0.4739227294921875, "learning_rate": 4.942148760330579e-06, "loss": 0.4981, "step": 299 }, { "epoch": 0.14893264934635114, "grad_norm": 0.4456350803375244, "learning_rate": 4.958677685950414e-06, "loss": 0.4524, "step": 300 }, { "epoch": 0.149429091510839, "grad_norm": 0.5501533150672913, "learning_rate": 4.975206611570249e-06, "loss": 0.5035, "step": 301 }, { "epoch": 0.14992553367532682, "grad_norm": 0.4739851653575897, "learning_rate": 4.991735537190083e-06, "loss": 0.4829, "step": 302 }, { "epoch": 0.15042197583981465, "grad_norm": 0.4384133815765381, "learning_rate": 5.008264462809918e-06, "loss": 0.4972, "step": 303 }, { "epoch": 0.1509184180043025, "grad_norm": 0.47192367911338806, "learning_rate": 5.024793388429753e-06, "loss": 0.4973, "step": 304 }, { "epoch": 0.15141486016879033, "grad_norm": 0.5129241347312927, "learning_rate": 5.041322314049587e-06, "loss": 0.4818, "step": 305 }, { "epoch": 0.15191130233327818, "grad_norm": 0.47309520840644836, "learning_rate": 5.057851239669422e-06, "loss": 0.4605, "step": 306 }, { "epoch": 0.152407744497766, "grad_norm": 0.4565262794494629, "learning_rate": 5.074380165289257e-06, "loss": 0.5012, "step": 307 }, { "epoch": 0.15290418666225386, "grad_norm": 0.5649682283401489, "learning_rate": 5.090909090909091e-06, "loss": 0.4851, "step": 308 }, { "epoch": 0.1534006288267417, "grad_norm": 0.5007046461105347, "learning_rate": 5.107438016528926e-06, "loss": 0.4737, "step": 309 }, { "epoch": 0.1538970709912295, "grad_norm": 0.4587409198284149, "learning_rate": 5.12396694214876e-06, "loss": 0.4698, "step": 310 }, { "epoch": 0.15439351315571737, "grad_norm": 0.554371178150177, "learning_rate": 5.140495867768596e-06, "loss": 0.5093, "step": 311 }, { "epoch": 0.1548899553202052, "grad_norm": 0.6391235589981079, "learning_rate": 5.15702479338843e-06, "loss": 0.5113, "step": 312 }, { "epoch": 0.15538639748469305, "grad_norm": 0.47578248381614685, "learning_rate": 5.173553719008266e-06, "loss": 0.4764, "step": 313 }, { "epoch": 0.15588283964918087, "grad_norm": 0.4372629225254059, "learning_rate": 5.190082644628099e-06, "loss": 0.4525, "step": 314 }, { "epoch": 0.1563792818136687, "grad_norm": 0.48702529072761536, "learning_rate": 5.206611570247935e-06, "loss": 0.4571, "step": 315 }, { "epoch": 0.15687572397815655, "grad_norm": 0.5586503148078918, "learning_rate": 5.223140495867769e-06, "loss": 0.4619, "step": 316 }, { "epoch": 0.15737216614264438, "grad_norm": 0.4967249929904938, "learning_rate": 5.239669421487605e-06, "loss": 0.4507, "step": 317 }, { "epoch": 0.15786860830713223, "grad_norm": 0.46293962001800537, "learning_rate": 5.256198347107438e-06, "loss": 0.4681, "step": 318 }, { "epoch": 0.15836505047162006, "grad_norm": 0.48820367455482483, "learning_rate": 5.272727272727273e-06, "loss": 0.4258, "step": 319 }, { "epoch": 0.15886149263610788, "grad_norm": 0.5513306856155396, "learning_rate": 5.289256198347108e-06, "loss": 0.4547, "step": 320 }, { "epoch": 0.15935793480059574, "grad_norm": 0.5193266272544861, "learning_rate": 5.305785123966942e-06, "loss": 0.4738, "step": 321 }, { "epoch": 0.15985437696508356, "grad_norm": 0.5217421650886536, "learning_rate": 5.322314049586777e-06, "loss": 0.477, "step": 322 }, { "epoch": 0.16035081912957141, "grad_norm": 0.5688762664794922, "learning_rate": 5.338842975206612e-06, "loss": 0.4891, "step": 323 }, { "epoch": 0.16084726129405924, "grad_norm": 0.5410491228103638, "learning_rate": 5.355371900826447e-06, "loss": 0.4561, "step": 324 }, { "epoch": 0.16134370345854707, "grad_norm": 0.4965986907482147, "learning_rate": 5.371900826446281e-06, "loss": 0.4593, "step": 325 }, { "epoch": 0.16184014562303492, "grad_norm": 0.4827443063259125, "learning_rate": 5.388429752066116e-06, "loss": 0.4846, "step": 326 }, { "epoch": 0.16233658778752275, "grad_norm": 0.5257661938667297, "learning_rate": 5.404958677685951e-06, "loss": 0.4627, "step": 327 }, { "epoch": 0.1628330299520106, "grad_norm": 0.5212469100952148, "learning_rate": 5.421487603305785e-06, "loss": 0.4994, "step": 328 }, { "epoch": 0.16332947211649843, "grad_norm": 0.4847158193588257, "learning_rate": 5.438016528925621e-06, "loss": 0.4702, "step": 329 }, { "epoch": 0.16382591428098628, "grad_norm": 0.5451292991638184, "learning_rate": 5.4545454545454545e-06, "loss": 0.4888, "step": 330 }, { "epoch": 0.1643223564454741, "grad_norm": 0.5609362721443176, "learning_rate": 5.47107438016529e-06, "loss": 0.466, "step": 331 }, { "epoch": 0.16481879860996193, "grad_norm": 0.5777375102043152, "learning_rate": 5.487603305785124e-06, "loss": 0.484, "step": 332 }, { "epoch": 0.16531524077444978, "grad_norm": 0.5333461165428162, "learning_rate": 5.50413223140496e-06, "loss": 0.4848, "step": 333 }, { "epoch": 0.1658116829389376, "grad_norm": 0.5682827234268188, "learning_rate": 5.5206611570247935e-06, "loss": 0.4811, "step": 334 }, { "epoch": 0.16630812510342546, "grad_norm": 0.5143479108810425, "learning_rate": 5.537190082644629e-06, "loss": 0.4859, "step": 335 }, { "epoch": 0.1668045672679133, "grad_norm": 0.518049418926239, "learning_rate": 5.553719008264463e-06, "loss": 0.4517, "step": 336 }, { "epoch": 0.16730100943240112, "grad_norm": 0.4674343764781952, "learning_rate": 5.570247933884299e-06, "loss": 0.461, "step": 337 }, { "epoch": 0.16779745159688897, "grad_norm": 0.4906681180000305, "learning_rate": 5.5867768595041325e-06, "loss": 0.4899, "step": 338 }, { "epoch": 0.1682938937613768, "grad_norm": 0.5533522367477417, "learning_rate": 5.603305785123967e-06, "loss": 0.4492, "step": 339 }, { "epoch": 0.16879033592586465, "grad_norm": 0.4547993242740631, "learning_rate": 5.619834710743802e-06, "loss": 0.476, "step": 340 }, { "epoch": 0.16928677809035247, "grad_norm": 0.4794315695762634, "learning_rate": 5.636363636363636e-06, "loss": 0.4446, "step": 341 }, { "epoch": 0.1697832202548403, "grad_norm": 0.479896605014801, "learning_rate": 5.6528925619834715e-06, "loss": 0.4877, "step": 342 }, { "epoch": 0.17027966241932815, "grad_norm": 0.463527113199234, "learning_rate": 5.669421487603306e-06, "loss": 0.4728, "step": 343 }, { "epoch": 0.17077610458381598, "grad_norm": 0.4583745002746582, "learning_rate": 5.685950413223141e-06, "loss": 0.4636, "step": 344 }, { "epoch": 0.17127254674830383, "grad_norm": 0.5159062147140503, "learning_rate": 5.702479338842976e-06, "loss": 0.5084, "step": 345 }, { "epoch": 0.17176898891279166, "grad_norm": 0.5537452101707458, "learning_rate": 5.7190082644628105e-06, "loss": 0.4623, "step": 346 }, { "epoch": 0.17226543107727949, "grad_norm": 0.5073652863502502, "learning_rate": 5.735537190082645e-06, "loss": 0.4823, "step": 347 }, { "epoch": 0.17276187324176734, "grad_norm": 0.470602422952652, "learning_rate": 5.7520661157024795e-06, "loss": 0.4485, "step": 348 }, { "epoch": 0.17325831540625516, "grad_norm": 0.5273879170417786, "learning_rate": 5.768595041322315e-06, "loss": 0.4452, "step": 349 }, { "epoch": 0.17375475757074302, "grad_norm": 0.5119819045066833, "learning_rate": 5.785123966942149e-06, "loss": 0.4533, "step": 350 }, { "epoch": 0.17425119973523084, "grad_norm": 0.5326038002967834, "learning_rate": 5.801652892561984e-06, "loss": 0.4585, "step": 351 }, { "epoch": 0.17474764189971867, "grad_norm": 0.4701957106590271, "learning_rate": 5.8181818181818185e-06, "loss": 0.4942, "step": 352 }, { "epoch": 0.17524408406420652, "grad_norm": 0.5402072072029114, "learning_rate": 5.834710743801654e-06, "loss": 0.4904, "step": 353 }, { "epoch": 0.17574052622869435, "grad_norm": 0.5501916408538818, "learning_rate": 5.851239669421488e-06, "loss": 0.4475, "step": 354 }, { "epoch": 0.1762369683931822, "grad_norm": 0.5606446862220764, "learning_rate": 5.867768595041323e-06, "loss": 0.5119, "step": 355 }, { "epoch": 0.17673341055767003, "grad_norm": 0.6060822606086731, "learning_rate": 5.8842975206611575e-06, "loss": 0.5109, "step": 356 }, { "epoch": 0.17722985272215788, "grad_norm": 0.44840753078460693, "learning_rate": 5.900826446280993e-06, "loss": 0.4641, "step": 357 }, { "epoch": 0.1777262948866457, "grad_norm": 0.5451337099075317, "learning_rate": 5.917355371900827e-06, "loss": 0.4764, "step": 358 }, { "epoch": 0.17822273705113353, "grad_norm": 0.5529732704162598, "learning_rate": 5.933884297520661e-06, "loss": 0.4662, "step": 359 }, { "epoch": 0.1787191792156214, "grad_norm": 0.49527376890182495, "learning_rate": 5.9504132231404965e-06, "loss": 0.4919, "step": 360 }, { "epoch": 0.1792156213801092, "grad_norm": 0.5089897513389587, "learning_rate": 5.966942148760331e-06, "loss": 0.4531, "step": 361 }, { "epoch": 0.17971206354459707, "grad_norm": 0.470611035823822, "learning_rate": 5.9834710743801665e-06, "loss": 0.4695, "step": 362 }, { "epoch": 0.1802085057090849, "grad_norm": 0.4354614317417145, "learning_rate": 6e-06, "loss": 0.4657, "step": 363 }, { "epoch": 0.18070494787357272, "grad_norm": 0.5020565986633301, "learning_rate": 6.0165289256198355e-06, "loss": 0.4562, "step": 364 }, { "epoch": 0.18120139003806057, "grad_norm": 0.511317253112793, "learning_rate": 6.03305785123967e-06, "loss": 0.4931, "step": 365 }, { "epoch": 0.1816978322025484, "grad_norm": 0.5127801895141602, "learning_rate": 6.0495867768595055e-06, "loss": 0.4729, "step": 366 }, { "epoch": 0.18219427436703625, "grad_norm": 0.47494497895240784, "learning_rate": 6.066115702479339e-06, "loss": 0.4412, "step": 367 }, { "epoch": 0.18269071653152408, "grad_norm": 0.42556092143058777, "learning_rate": 6.082644628099174e-06, "loss": 0.4708, "step": 368 }, { "epoch": 0.1831871586960119, "grad_norm": 0.49851569533348083, "learning_rate": 6.099173553719009e-06, "loss": 0.4788, "step": 369 }, { "epoch": 0.18368360086049976, "grad_norm": 0.5370215773582458, "learning_rate": 6.115702479338843e-06, "loss": 0.4786, "step": 370 }, { "epoch": 0.18418004302498758, "grad_norm": 0.5141631364822388, "learning_rate": 6.132231404958678e-06, "loss": 0.4616, "step": 371 }, { "epoch": 0.18467648518947544, "grad_norm": 0.5496755838394165, "learning_rate": 6.148760330578513e-06, "loss": 0.4687, "step": 372 }, { "epoch": 0.18517292735396326, "grad_norm": 0.423684298992157, "learning_rate": 6.165289256198348e-06, "loss": 0.457, "step": 373 }, { "epoch": 0.1856693695184511, "grad_norm": 0.5061066746711731, "learning_rate": 6.181818181818182e-06, "loss": 0.4337, "step": 374 }, { "epoch": 0.18616581168293894, "grad_norm": 0.4746078550815582, "learning_rate": 6.198347107438017e-06, "loss": 0.4773, "step": 375 }, { "epoch": 0.18666225384742677, "grad_norm": 0.5324608087539673, "learning_rate": 6.214876033057852e-06, "loss": 0.4964, "step": 376 }, { "epoch": 0.18715869601191462, "grad_norm": 0.507277250289917, "learning_rate": 6.231404958677686e-06, "loss": 0.482, "step": 377 }, { "epoch": 0.18765513817640245, "grad_norm": 0.459421843290329, "learning_rate": 6.247933884297522e-06, "loss": 0.4621, "step": 378 }, { "epoch": 0.18815158034089027, "grad_norm": 0.5192954540252686, "learning_rate": 6.264462809917355e-06, "loss": 0.4992, "step": 379 }, { "epoch": 0.18864802250537813, "grad_norm": 0.5399184226989746, "learning_rate": 6.280991735537191e-06, "loss": 0.4746, "step": 380 }, { "epoch": 0.18914446466986595, "grad_norm": 0.5837967395782471, "learning_rate": 6.297520661157025e-06, "loss": 0.4285, "step": 381 }, { "epoch": 0.1896409068343538, "grad_norm": 0.5200411677360535, "learning_rate": 6.314049586776861e-06, "loss": 0.493, "step": 382 }, { "epoch": 0.19013734899884163, "grad_norm": 0.5000556707382202, "learning_rate": 6.330578512396694e-06, "loss": 0.465, "step": 383 }, { "epoch": 0.19063379116332949, "grad_norm": 0.4674074947834015, "learning_rate": 6.34710743801653e-06, "loss": 0.4366, "step": 384 }, { "epoch": 0.1911302333278173, "grad_norm": 0.5919782519340515, "learning_rate": 6.363636363636364e-06, "loss": 0.4751, "step": 385 }, { "epoch": 0.19162667549230514, "grad_norm": 0.4572955369949341, "learning_rate": 6.3801652892562e-06, "loss": 0.4673, "step": 386 }, { "epoch": 0.192123117656793, "grad_norm": 0.5009949207305908, "learning_rate": 6.396694214876033e-06, "loss": 0.4764, "step": 387 }, { "epoch": 0.19261955982128082, "grad_norm": 0.4954918920993805, "learning_rate": 6.413223140495868e-06, "loss": 0.4868, "step": 388 }, { "epoch": 0.19311600198576867, "grad_norm": 0.47168946266174316, "learning_rate": 6.429752066115703e-06, "loss": 0.4379, "step": 389 }, { "epoch": 0.1936124441502565, "grad_norm": 0.47952762246131897, "learning_rate": 6.446280991735537e-06, "loss": 0.4347, "step": 390 }, { "epoch": 0.19410888631474432, "grad_norm": 0.49423375725746155, "learning_rate": 6.462809917355372e-06, "loss": 0.4544, "step": 391 }, { "epoch": 0.19460532847923218, "grad_norm": 0.48102736473083496, "learning_rate": 6.479338842975207e-06, "loss": 0.4766, "step": 392 }, { "epoch": 0.19510177064372, "grad_norm": 0.4913170337677002, "learning_rate": 6.495867768595042e-06, "loss": 0.4703, "step": 393 }, { "epoch": 0.19559821280820786, "grad_norm": 0.503303587436676, "learning_rate": 6.512396694214877e-06, "loss": 0.4755, "step": 394 }, { "epoch": 0.19609465497269568, "grad_norm": 0.57039874792099, "learning_rate": 6.528925619834712e-06, "loss": 0.4629, "step": 395 }, { "epoch": 0.1965910971371835, "grad_norm": 0.4933932423591614, "learning_rate": 6.545454545454546e-06, "loss": 0.4875, "step": 396 }, { "epoch": 0.19708753930167136, "grad_norm": 0.45007044076919556, "learning_rate": 6.56198347107438e-06, "loss": 0.4903, "step": 397 }, { "epoch": 0.1975839814661592, "grad_norm": 0.5145974159240723, "learning_rate": 6.578512396694216e-06, "loss": 0.4922, "step": 398 }, { "epoch": 0.19808042363064704, "grad_norm": 0.5051202178001404, "learning_rate": 6.5950413223140495e-06, "loss": 0.4795, "step": 399 }, { "epoch": 0.19857686579513487, "grad_norm": 0.4447315037250519, "learning_rate": 6.611570247933885e-06, "loss": 0.467, "step": 400 }, { "epoch": 0.1990733079596227, "grad_norm": 0.4814540147781372, "learning_rate": 6.628099173553719e-06, "loss": 0.4485, "step": 401 }, { "epoch": 0.19956975012411055, "grad_norm": 0.4990827143192291, "learning_rate": 6.644628099173555e-06, "loss": 0.463, "step": 402 }, { "epoch": 0.20006619228859837, "grad_norm": 0.5192259550094604, "learning_rate": 6.6611570247933885e-06, "loss": 0.4723, "step": 403 }, { "epoch": 0.20056263445308622, "grad_norm": 0.5059645771980286, "learning_rate": 6.677685950413224e-06, "loss": 0.4794, "step": 404 }, { "epoch": 0.20105907661757405, "grad_norm": 0.4817017614841461, "learning_rate": 6.694214876033058e-06, "loss": 0.4719, "step": 405 }, { "epoch": 0.20155551878206188, "grad_norm": 0.497331827878952, "learning_rate": 6.710743801652894e-06, "loss": 0.4517, "step": 406 }, { "epoch": 0.20205196094654973, "grad_norm": 0.5285255908966064, "learning_rate": 6.7272727272727275e-06, "loss": 0.4711, "step": 407 }, { "epoch": 0.20254840311103756, "grad_norm": 0.47163647413253784, "learning_rate": 6.743801652892562e-06, "loss": 0.4903, "step": 408 }, { "epoch": 0.2030448452755254, "grad_norm": 0.5295432806015015, "learning_rate": 6.760330578512397e-06, "loss": 0.4946, "step": 409 }, { "epoch": 0.20354128744001324, "grad_norm": 0.4628829061985016, "learning_rate": 6.776859504132232e-06, "loss": 0.4598, "step": 410 }, { "epoch": 0.2040377296045011, "grad_norm": 0.50532466173172, "learning_rate": 6.793388429752067e-06, "loss": 0.5219, "step": 411 }, { "epoch": 0.20453417176898891, "grad_norm": 0.4808811843395233, "learning_rate": 6.809917355371901e-06, "loss": 0.4636, "step": 412 }, { "epoch": 0.20503061393347674, "grad_norm": 0.45972102880477905, "learning_rate": 6.826446280991736e-06, "loss": 0.4336, "step": 413 }, { "epoch": 0.2055270560979646, "grad_norm": 0.5161400437355042, "learning_rate": 6.842975206611571e-06, "loss": 0.4772, "step": 414 }, { "epoch": 0.20602349826245242, "grad_norm": 0.5969698429107666, "learning_rate": 6.859504132231406e-06, "loss": 0.4711, "step": 415 }, { "epoch": 0.20651994042694027, "grad_norm": 0.5112705826759338, "learning_rate": 6.87603305785124e-06, "loss": 0.421, "step": 416 }, { "epoch": 0.2070163825914281, "grad_norm": 0.44240763783454895, "learning_rate": 6.8925619834710745e-06, "loss": 0.4723, "step": 417 }, { "epoch": 0.20751282475591593, "grad_norm": 0.490811288356781, "learning_rate": 6.90909090909091e-06, "loss": 0.435, "step": 418 }, { "epoch": 0.20800926692040378, "grad_norm": 0.48971572518348694, "learning_rate": 6.925619834710744e-06, "loss": 0.4315, "step": 419 }, { "epoch": 0.2085057090848916, "grad_norm": 0.49103692173957825, "learning_rate": 6.942148760330579e-06, "loss": 0.4399, "step": 420 }, { "epoch": 0.20900215124937946, "grad_norm": 0.4959203004837036, "learning_rate": 6.9586776859504135e-06, "loss": 0.4762, "step": 421 }, { "epoch": 0.20949859341386728, "grad_norm": 0.4981769025325775, "learning_rate": 6.975206611570249e-06, "loss": 0.4785, "step": 422 }, { "epoch": 0.2099950355783551, "grad_norm": 0.47105762362480164, "learning_rate": 6.991735537190083e-06, "loss": 0.4583, "step": 423 }, { "epoch": 0.21049147774284296, "grad_norm": 0.5248613357543945, "learning_rate": 7.008264462809918e-06, "loss": 0.4967, "step": 424 }, { "epoch": 0.2109879199073308, "grad_norm": 0.5097689032554626, "learning_rate": 7.0247933884297525e-06, "loss": 0.4631, "step": 425 }, { "epoch": 0.21148436207181864, "grad_norm": 0.5935734510421753, "learning_rate": 7.041322314049588e-06, "loss": 0.4548, "step": 426 }, { "epoch": 0.21198080423630647, "grad_norm": 0.4890136122703552, "learning_rate": 7.0578512396694225e-06, "loss": 0.4855, "step": 427 }, { "epoch": 0.2124772464007943, "grad_norm": 0.4939761161804199, "learning_rate": 7.074380165289256e-06, "loss": 0.4697, "step": 428 }, { "epoch": 0.21297368856528215, "grad_norm": 0.5336572527885437, "learning_rate": 7.0909090909090916e-06, "loss": 0.4843, "step": 429 }, { "epoch": 0.21347013072976997, "grad_norm": 0.48133477568626404, "learning_rate": 7.107438016528926e-06, "loss": 0.4563, "step": 430 }, { "epoch": 0.21396657289425783, "grad_norm": 0.5093000531196594, "learning_rate": 7.1239669421487615e-06, "loss": 0.4706, "step": 431 }, { "epoch": 0.21446301505874565, "grad_norm": 0.5128490924835205, "learning_rate": 7.140495867768595e-06, "loss": 0.4673, "step": 432 }, { "epoch": 0.21495945722323348, "grad_norm": 0.6049978137016296, "learning_rate": 7.1570247933884306e-06, "loss": 0.4826, "step": 433 }, { "epoch": 0.21545589938772133, "grad_norm": 0.5036123394966125, "learning_rate": 7.173553719008265e-06, "loss": 0.4732, "step": 434 }, { "epoch": 0.21595234155220916, "grad_norm": 0.5147184133529663, "learning_rate": 7.1900826446281005e-06, "loss": 0.4198, "step": 435 }, { "epoch": 0.216448783716697, "grad_norm": 0.6592348217964172, "learning_rate": 7.206611570247934e-06, "loss": 0.4698, "step": 436 }, { "epoch": 0.21694522588118484, "grad_norm": 0.4838290810585022, "learning_rate": 7.223140495867769e-06, "loss": 0.4826, "step": 437 }, { "epoch": 0.2174416680456727, "grad_norm": 0.5472857356071472, "learning_rate": 7.239669421487604e-06, "loss": 0.4814, "step": 438 }, { "epoch": 0.21793811021016052, "grad_norm": 0.5904685854911804, "learning_rate": 7.256198347107438e-06, "loss": 0.4746, "step": 439 }, { "epoch": 0.21843455237464834, "grad_norm": 0.6299452185630798, "learning_rate": 7.272727272727273e-06, "loss": 0.4691, "step": 440 }, { "epoch": 0.2189309945391362, "grad_norm": 0.4873991310596466, "learning_rate": 7.289256198347108e-06, "loss": 0.4437, "step": 441 }, { "epoch": 0.21942743670362402, "grad_norm": 0.5659573078155518, "learning_rate": 7.305785123966943e-06, "loss": 0.4527, "step": 442 }, { "epoch": 0.21992387886811188, "grad_norm": 0.5475215315818787, "learning_rate": 7.322314049586778e-06, "loss": 0.4617, "step": 443 }, { "epoch": 0.2204203210325997, "grad_norm": 0.47721394896507263, "learning_rate": 7.338842975206613e-06, "loss": 0.4482, "step": 444 }, { "epoch": 0.22091676319708753, "grad_norm": 0.4668905436992645, "learning_rate": 7.355371900826447e-06, "loss": 0.4289, "step": 445 }, { "epoch": 0.22141320536157538, "grad_norm": 0.505089282989502, "learning_rate": 7.371900826446282e-06, "loss": 0.4635, "step": 446 }, { "epoch": 0.2219096475260632, "grad_norm": 0.49508675932884216, "learning_rate": 7.388429752066117e-06, "loss": 0.4553, "step": 447 }, { "epoch": 0.22240608969055106, "grad_norm": 0.5488690137863159, "learning_rate": 7.40495867768595e-06, "loss": 0.4945, "step": 448 }, { "epoch": 0.2229025318550389, "grad_norm": 0.501284658908844, "learning_rate": 7.421487603305786e-06, "loss": 0.4494, "step": 449 }, { "epoch": 0.2233989740195267, "grad_norm": 0.5234982371330261, "learning_rate": 7.43801652892562e-06, "loss": 0.4841, "step": 450 }, { "epoch": 0.22389541618401457, "grad_norm": 0.530637264251709, "learning_rate": 7.454545454545456e-06, "loss": 0.456, "step": 451 }, { "epoch": 0.2243918583485024, "grad_norm": 0.4851827621459961, "learning_rate": 7.471074380165289e-06, "loss": 0.4366, "step": 452 }, { "epoch": 0.22488830051299025, "grad_norm": 0.4922843873500824, "learning_rate": 7.487603305785125e-06, "loss": 0.4445, "step": 453 }, { "epoch": 0.22538474267747807, "grad_norm": 0.5431027412414551, "learning_rate": 7.504132231404959e-06, "loss": 0.4688, "step": 454 }, { "epoch": 0.2258811848419659, "grad_norm": 0.5389288663864136, "learning_rate": 7.520661157024795e-06, "loss": 0.4741, "step": 455 }, { "epoch": 0.22637762700645375, "grad_norm": 0.537929892539978, "learning_rate": 7.537190082644628e-06, "loss": 0.4748, "step": 456 }, { "epoch": 0.22687406917094158, "grad_norm": 0.5672274231910706, "learning_rate": 7.553719008264463e-06, "loss": 0.4628, "step": 457 }, { "epoch": 0.22737051133542943, "grad_norm": 0.5099994540214539, "learning_rate": 7.570247933884298e-06, "loss": 0.4385, "step": 458 }, { "epoch": 0.22786695349991726, "grad_norm": 0.5815716981887817, "learning_rate": 7.586776859504133e-06, "loss": 0.4641, "step": 459 }, { "epoch": 0.22836339566440508, "grad_norm": 0.54779052734375, "learning_rate": 7.603305785123968e-06, "loss": 0.4818, "step": 460 }, { "epoch": 0.22885983782889294, "grad_norm": 0.6388765573501587, "learning_rate": 7.619834710743802e-06, "loss": 0.4766, "step": 461 }, { "epoch": 0.22935627999338076, "grad_norm": 0.5192151665687561, "learning_rate": 7.636363636363638e-06, "loss": 0.4743, "step": 462 }, { "epoch": 0.22985272215786862, "grad_norm": 0.5802621245384216, "learning_rate": 7.652892561983471e-06, "loss": 0.4808, "step": 463 }, { "epoch": 0.23034916432235644, "grad_norm": 0.5306212902069092, "learning_rate": 7.669421487603307e-06, "loss": 0.4801, "step": 464 }, { "epoch": 0.2308456064868443, "grad_norm": 0.4715561270713806, "learning_rate": 7.685950413223142e-06, "loss": 0.4554, "step": 465 }, { "epoch": 0.23134204865133212, "grad_norm": 0.5635004639625549, "learning_rate": 7.702479338842976e-06, "loss": 0.4558, "step": 466 }, { "epoch": 0.23183849081581995, "grad_norm": 0.5234822630882263, "learning_rate": 7.71900826446281e-06, "loss": 0.4758, "step": 467 }, { "epoch": 0.2323349329803078, "grad_norm": 0.5668840408325195, "learning_rate": 7.735537190082645e-06, "loss": 0.4792, "step": 468 }, { "epoch": 0.23283137514479563, "grad_norm": 0.5866802334785461, "learning_rate": 7.75206611570248e-06, "loss": 0.437, "step": 469 }, { "epoch": 0.23332781730928348, "grad_norm": 0.5463567972183228, "learning_rate": 7.768595041322314e-06, "loss": 0.4023, "step": 470 }, { "epoch": 0.2338242594737713, "grad_norm": 0.6540109515190125, "learning_rate": 7.785123966942149e-06, "loss": 0.465, "step": 471 }, { "epoch": 0.23432070163825913, "grad_norm": 0.4782220125198364, "learning_rate": 7.801652892561983e-06, "loss": 0.4691, "step": 472 }, { "epoch": 0.23481714380274699, "grad_norm": 0.6350997090339661, "learning_rate": 7.81818181818182e-06, "loss": 0.478, "step": 473 }, { "epoch": 0.2353135859672348, "grad_norm": 0.5266542434692383, "learning_rate": 7.834710743801654e-06, "loss": 0.4911, "step": 474 }, { "epoch": 0.23581002813172267, "grad_norm": 0.5874689221382141, "learning_rate": 7.851239669421489e-06, "loss": 0.457, "step": 475 }, { "epoch": 0.2363064702962105, "grad_norm": 0.4690110683441162, "learning_rate": 7.867768595041323e-06, "loss": 0.4287, "step": 476 }, { "epoch": 0.23680291246069832, "grad_norm": 0.5045602321624756, "learning_rate": 7.884297520661158e-06, "loss": 0.4695, "step": 477 }, { "epoch": 0.23729935462518617, "grad_norm": 0.46898049116134644, "learning_rate": 7.900826446280992e-06, "loss": 0.4449, "step": 478 }, { "epoch": 0.237795796789674, "grad_norm": 0.4730655252933502, "learning_rate": 7.917355371900827e-06, "loss": 0.4362, "step": 479 }, { "epoch": 0.23829223895416185, "grad_norm": 0.537470281124115, "learning_rate": 7.933884297520661e-06, "loss": 0.4685, "step": 480 }, { "epoch": 0.23878868111864968, "grad_norm": 0.5066771507263184, "learning_rate": 7.950413223140496e-06, "loss": 0.429, "step": 481 }, { "epoch": 0.2392851232831375, "grad_norm": 0.5229769349098206, "learning_rate": 7.966942148760332e-06, "loss": 0.4447, "step": 482 }, { "epoch": 0.23978156544762536, "grad_norm": 0.571217954158783, "learning_rate": 7.983471074380165e-06, "loss": 0.4738, "step": 483 }, { "epoch": 0.24027800761211318, "grad_norm": 0.4907481074333191, "learning_rate": 8.000000000000001e-06, "loss": 0.4883, "step": 484 }, { "epoch": 0.24077444977660103, "grad_norm": 0.44097188115119934, "learning_rate": 8.016528925619836e-06, "loss": 0.4432, "step": 485 }, { "epoch": 0.24127089194108886, "grad_norm": 0.49568381905555725, "learning_rate": 8.033057851239669e-06, "loss": 0.4622, "step": 486 }, { "epoch": 0.2417673341055767, "grad_norm": 0.502668559551239, "learning_rate": 8.049586776859505e-06, "loss": 0.4853, "step": 487 }, { "epoch": 0.24226377627006454, "grad_norm": 0.4797780215740204, "learning_rate": 8.06611570247934e-06, "loss": 0.4391, "step": 488 }, { "epoch": 0.24276021843455237, "grad_norm": 0.5599988102912903, "learning_rate": 8.082644628099174e-06, "loss": 0.4416, "step": 489 }, { "epoch": 0.24325666059904022, "grad_norm": 0.4995494484901428, "learning_rate": 8.099173553719009e-06, "loss": 0.4516, "step": 490 }, { "epoch": 0.24375310276352805, "grad_norm": 0.4847780764102936, "learning_rate": 8.115702479338843e-06, "loss": 0.4632, "step": 491 }, { "epoch": 0.2442495449280159, "grad_norm": 0.5168769359588623, "learning_rate": 8.132231404958678e-06, "loss": 0.4495, "step": 492 }, { "epoch": 0.24474598709250373, "grad_norm": 0.4912966191768646, "learning_rate": 8.148760330578514e-06, "loss": 0.4319, "step": 493 }, { "epoch": 0.24524242925699155, "grad_norm": 0.5270532965660095, "learning_rate": 8.165289256198348e-06, "loss": 0.4437, "step": 494 }, { "epoch": 0.2457388714214794, "grad_norm": 0.4817824959754944, "learning_rate": 8.181818181818183e-06, "loss": 0.4185, "step": 495 }, { "epoch": 0.24623531358596723, "grad_norm": 0.47625070810317993, "learning_rate": 8.198347107438017e-06, "loss": 0.4724, "step": 496 }, { "epoch": 0.24673175575045508, "grad_norm": 0.5020045042037964, "learning_rate": 8.214876033057852e-06, "loss": 0.4811, "step": 497 }, { "epoch": 0.2472281979149429, "grad_norm": 0.5770827531814575, "learning_rate": 8.231404958677687e-06, "loss": 0.4655, "step": 498 }, { "epoch": 0.24772464007943074, "grad_norm": 0.472914457321167, "learning_rate": 8.247933884297521e-06, "loss": 0.4208, "step": 499 }, { "epoch": 0.2482210822439186, "grad_norm": 0.5558708310127258, "learning_rate": 8.264462809917356e-06, "loss": 0.4266, "step": 500 }, { "epoch": 0.24871752440840642, "grad_norm": 0.5754216313362122, "learning_rate": 8.28099173553719e-06, "loss": 0.4498, "step": 501 }, { "epoch": 0.24921396657289427, "grad_norm": 0.565979540348053, "learning_rate": 8.297520661157026e-06, "loss": 0.4716, "step": 502 }, { "epoch": 0.2497104087373821, "grad_norm": 0.5891662240028381, "learning_rate": 8.31404958677686e-06, "loss": 0.4539, "step": 503 }, { "epoch": 0.25020685090186995, "grad_norm": 0.5138143301010132, "learning_rate": 8.330578512396695e-06, "loss": 0.4322, "step": 504 }, { "epoch": 0.25070329306635775, "grad_norm": 0.5762490630149841, "learning_rate": 8.34710743801653e-06, "loss": 0.4581, "step": 505 }, { "epoch": 0.2511997352308456, "grad_norm": 0.46826639771461487, "learning_rate": 8.363636363636365e-06, "loss": 0.4614, "step": 506 }, { "epoch": 0.25169617739533345, "grad_norm": 0.6525359153747559, "learning_rate": 8.380165289256199e-06, "loss": 0.482, "step": 507 }, { "epoch": 0.2521926195598213, "grad_norm": 0.5046901702880859, "learning_rate": 8.396694214876034e-06, "loss": 0.4675, "step": 508 }, { "epoch": 0.2526890617243091, "grad_norm": 0.47852233052253723, "learning_rate": 8.413223140495868e-06, "loss": 0.4719, "step": 509 }, { "epoch": 0.25318550388879696, "grad_norm": 0.5505646467208862, "learning_rate": 8.429752066115703e-06, "loss": 0.4565, "step": 510 }, { "epoch": 0.2536819460532848, "grad_norm": 0.46384549140930176, "learning_rate": 8.446280991735539e-06, "loss": 0.4449, "step": 511 }, { "epoch": 0.2541783882177726, "grad_norm": 0.5765954256057739, "learning_rate": 8.462809917355372e-06, "loss": 0.4621, "step": 512 }, { "epoch": 0.25467483038226046, "grad_norm": 0.4475886821746826, "learning_rate": 8.479338842975208e-06, "loss": 0.4073, "step": 513 }, { "epoch": 0.2551712725467483, "grad_norm": 0.5927165746688843, "learning_rate": 8.495867768595043e-06, "loss": 0.4626, "step": 514 }, { "epoch": 0.2556677147112361, "grad_norm": 0.5065038800239563, "learning_rate": 8.512396694214877e-06, "loss": 0.4393, "step": 515 }, { "epoch": 0.25616415687572397, "grad_norm": 0.5141983032226562, "learning_rate": 8.528925619834712e-06, "loss": 0.4704, "step": 516 }, { "epoch": 0.2566605990402118, "grad_norm": 0.5347033143043518, "learning_rate": 8.545454545454546e-06, "loss": 0.4326, "step": 517 }, { "epoch": 0.2571570412046997, "grad_norm": 0.46820417046546936, "learning_rate": 8.56198347107438e-06, "loss": 0.4494, "step": 518 }, { "epoch": 0.2576534833691875, "grad_norm": 0.5322158336639404, "learning_rate": 8.578512396694215e-06, "loss": 0.4922, "step": 519 }, { "epoch": 0.25814992553367533, "grad_norm": 0.5241012573242188, "learning_rate": 8.59504132231405e-06, "loss": 0.478, "step": 520 }, { "epoch": 0.2586463676981632, "grad_norm": 0.46995940804481506, "learning_rate": 8.611570247933884e-06, "loss": 0.4484, "step": 521 }, { "epoch": 0.259142809862651, "grad_norm": 0.6058238744735718, "learning_rate": 8.62809917355372e-06, "loss": 0.4826, "step": 522 }, { "epoch": 0.25963925202713883, "grad_norm": 0.5370778441429138, "learning_rate": 8.644628099173555e-06, "loss": 0.467, "step": 523 }, { "epoch": 0.2601356941916267, "grad_norm": 0.5864719152450562, "learning_rate": 8.66115702479339e-06, "loss": 0.4459, "step": 524 }, { "epoch": 0.2606321363561145, "grad_norm": 0.5862976312637329, "learning_rate": 8.677685950413224e-06, "loss": 0.4645, "step": 525 }, { "epoch": 0.26112857852060234, "grad_norm": 0.5378475189208984, "learning_rate": 8.694214876033059e-06, "loss": 0.4577, "step": 526 }, { "epoch": 0.2616250206850902, "grad_norm": 0.6304500699043274, "learning_rate": 8.710743801652893e-06, "loss": 0.5004, "step": 527 }, { "epoch": 0.26212146284957805, "grad_norm": 0.5542117953300476, "learning_rate": 8.727272727272728e-06, "loss": 0.4014, "step": 528 }, { "epoch": 0.26261790501406584, "grad_norm": 0.5334244966506958, "learning_rate": 8.743801652892562e-06, "loss": 0.4266, "step": 529 }, { "epoch": 0.2631143471785537, "grad_norm": 0.5097237229347229, "learning_rate": 8.760330578512397e-06, "loss": 0.4517, "step": 530 }, { "epoch": 0.26361078934304155, "grad_norm": 0.5597156286239624, "learning_rate": 8.776859504132233e-06, "loss": 0.4661, "step": 531 }, { "epoch": 0.26410723150752935, "grad_norm": 0.5276119112968445, "learning_rate": 8.793388429752066e-06, "loss": 0.4416, "step": 532 }, { "epoch": 0.2646036736720172, "grad_norm": 0.5205169320106506, "learning_rate": 8.809917355371902e-06, "loss": 0.4393, "step": 533 }, { "epoch": 0.26510011583650506, "grad_norm": 0.4926174581050873, "learning_rate": 8.826446280991737e-06, "loss": 0.4368, "step": 534 }, { "epoch": 0.2655965580009929, "grad_norm": 0.5341338515281677, "learning_rate": 8.842975206611571e-06, "loss": 0.4738, "step": 535 }, { "epoch": 0.2660930001654807, "grad_norm": 0.5294544100761414, "learning_rate": 8.859504132231406e-06, "loss": 0.4804, "step": 536 }, { "epoch": 0.26658944232996856, "grad_norm": 0.5970295667648315, "learning_rate": 8.87603305785124e-06, "loss": 0.4808, "step": 537 }, { "epoch": 0.2670858844944564, "grad_norm": 0.44714972376823425, "learning_rate": 8.892561983471075e-06, "loss": 0.4184, "step": 538 }, { "epoch": 0.2675823266589442, "grad_norm": 0.48664653301239014, "learning_rate": 8.90909090909091e-06, "loss": 0.4596, "step": 539 }, { "epoch": 0.26807876882343207, "grad_norm": 0.49594932794570923, "learning_rate": 8.925619834710744e-06, "loss": 0.4748, "step": 540 }, { "epoch": 0.2685752109879199, "grad_norm": 0.47706347703933716, "learning_rate": 8.942148760330578e-06, "loss": 0.4552, "step": 541 }, { "epoch": 0.2690716531524077, "grad_norm": 0.44448453187942505, "learning_rate": 8.958677685950415e-06, "loss": 0.457, "step": 542 }, { "epoch": 0.2695680953168956, "grad_norm": 0.49442651867866516, "learning_rate": 8.97520661157025e-06, "loss": 0.4493, "step": 543 }, { "epoch": 0.2700645374813834, "grad_norm": 0.5113320350646973, "learning_rate": 8.991735537190084e-06, "loss": 0.5025, "step": 544 }, { "epoch": 0.2705609796458713, "grad_norm": 0.43562591075897217, "learning_rate": 9.008264462809918e-06, "loss": 0.4596, "step": 545 }, { "epoch": 0.2710574218103591, "grad_norm": 0.6078981161117554, "learning_rate": 9.024793388429753e-06, "loss": 0.4659, "step": 546 }, { "epoch": 0.27155386397484693, "grad_norm": 0.5276830792427063, "learning_rate": 9.041322314049587e-06, "loss": 0.4963, "step": 547 }, { "epoch": 0.2720503061393348, "grad_norm": 0.45266959071159363, "learning_rate": 9.057851239669422e-06, "loss": 0.4433, "step": 548 }, { "epoch": 0.2725467483038226, "grad_norm": 0.5606865882873535, "learning_rate": 9.074380165289256e-06, "loss": 0.4502, "step": 549 }, { "epoch": 0.27304319046831044, "grad_norm": 0.6084829568862915, "learning_rate": 9.090909090909091e-06, "loss": 0.4507, "step": 550 }, { "epoch": 0.2735396326327983, "grad_norm": 0.519774317741394, "learning_rate": 9.107438016528927e-06, "loss": 0.464, "step": 551 }, { "epoch": 0.2740360747972861, "grad_norm": 0.544318437576294, "learning_rate": 9.12396694214876e-06, "loss": 0.471, "step": 552 }, { "epoch": 0.27453251696177394, "grad_norm": 0.541947603225708, "learning_rate": 9.140495867768596e-06, "loss": 0.4244, "step": 553 }, { "epoch": 0.2750289591262618, "grad_norm": 0.5687885284423828, "learning_rate": 9.157024793388431e-06, "loss": 0.4101, "step": 554 }, { "epoch": 0.27552540129074965, "grad_norm": 0.48723042011260986, "learning_rate": 9.173553719008265e-06, "loss": 0.4615, "step": 555 }, { "epoch": 0.27602184345523745, "grad_norm": 0.5754094123840332, "learning_rate": 9.1900826446281e-06, "loss": 0.4386, "step": 556 }, { "epoch": 0.2765182856197253, "grad_norm": 0.49355193972587585, "learning_rate": 9.206611570247935e-06, "loss": 0.4564, "step": 557 }, { "epoch": 0.27701472778421316, "grad_norm": 0.47735437750816345, "learning_rate": 9.223140495867769e-06, "loss": 0.443, "step": 558 }, { "epoch": 0.27751116994870095, "grad_norm": 0.5180425047874451, "learning_rate": 9.239669421487604e-06, "loss": 0.4316, "step": 559 }, { "epoch": 0.2780076121131888, "grad_norm": 0.5102524161338806, "learning_rate": 9.25619834710744e-06, "loss": 0.4594, "step": 560 }, { "epoch": 0.27850405427767666, "grad_norm": 0.515872597694397, "learning_rate": 9.272727272727273e-06, "loss": 0.4587, "step": 561 }, { "epoch": 0.2790004964421645, "grad_norm": 0.5197128057479858, "learning_rate": 9.289256198347109e-06, "loss": 0.4323, "step": 562 }, { "epoch": 0.2794969386066523, "grad_norm": 0.6178656220436096, "learning_rate": 9.305785123966943e-06, "loss": 0.4594, "step": 563 }, { "epoch": 0.27999338077114017, "grad_norm": 0.5761871933937073, "learning_rate": 9.322314049586778e-06, "loss": 0.4583, "step": 564 }, { "epoch": 0.280489822935628, "grad_norm": 0.5108822584152222, "learning_rate": 9.338842975206613e-06, "loss": 0.4487, "step": 565 }, { "epoch": 0.2809862651001158, "grad_norm": 0.5830370187759399, "learning_rate": 9.355371900826447e-06, "loss": 0.4347, "step": 566 }, { "epoch": 0.28148270726460367, "grad_norm": 0.524936318397522, "learning_rate": 9.371900826446282e-06, "loss": 0.4527, "step": 567 }, { "epoch": 0.2819791494290915, "grad_norm": 0.6342265009880066, "learning_rate": 9.388429752066116e-06, "loss": 0.4723, "step": 568 }, { "epoch": 0.2824755915935793, "grad_norm": 0.5875529050827026, "learning_rate": 9.40495867768595e-06, "loss": 0.437, "step": 569 }, { "epoch": 0.2829720337580672, "grad_norm": 0.5238960385322571, "learning_rate": 9.421487603305785e-06, "loss": 0.4553, "step": 570 }, { "epoch": 0.28346847592255503, "grad_norm": 0.4910949170589447, "learning_rate": 9.438016528925621e-06, "loss": 0.4282, "step": 571 }, { "epoch": 0.2839649180870429, "grad_norm": 0.5127577185630798, "learning_rate": 9.454545454545456e-06, "loss": 0.4323, "step": 572 }, { "epoch": 0.2844613602515307, "grad_norm": 0.5175203084945679, "learning_rate": 9.47107438016529e-06, "loss": 0.4506, "step": 573 }, { "epoch": 0.28495780241601854, "grad_norm": 0.5126334428787231, "learning_rate": 9.487603305785125e-06, "loss": 0.4607, "step": 574 }, { "epoch": 0.2854542445805064, "grad_norm": 0.5104261636734009, "learning_rate": 9.50413223140496e-06, "loss": 0.408, "step": 575 }, { "epoch": 0.2859506867449942, "grad_norm": 0.5459928512573242, "learning_rate": 9.520661157024794e-06, "loss": 0.4317, "step": 576 }, { "epoch": 0.28644712890948204, "grad_norm": 0.5241634249687195, "learning_rate": 9.537190082644629e-06, "loss": 0.4416, "step": 577 }, { "epoch": 0.2869435710739699, "grad_norm": 0.5555662512779236, "learning_rate": 9.553719008264463e-06, "loss": 0.4573, "step": 578 }, { "epoch": 0.2874400132384577, "grad_norm": 0.45178812742233276, "learning_rate": 9.570247933884298e-06, "loss": 0.4124, "step": 579 }, { "epoch": 0.28793645540294555, "grad_norm": 0.4829377233982086, "learning_rate": 9.586776859504134e-06, "loss": 0.4369, "step": 580 }, { "epoch": 0.2884328975674334, "grad_norm": 0.5526188015937805, "learning_rate": 9.603305785123967e-06, "loss": 0.4404, "step": 581 }, { "epoch": 0.28892933973192125, "grad_norm": 0.4729190766811371, "learning_rate": 9.619834710743803e-06, "loss": 0.4662, "step": 582 }, { "epoch": 0.28942578189640905, "grad_norm": 0.5166657567024231, "learning_rate": 9.636363636363638e-06, "loss": 0.4521, "step": 583 }, { "epoch": 0.2899222240608969, "grad_norm": 0.4866880476474762, "learning_rate": 9.652892561983472e-06, "loss": 0.4498, "step": 584 }, { "epoch": 0.29041866622538476, "grad_norm": 0.4980413615703583, "learning_rate": 9.669421487603307e-06, "loss": 0.4429, "step": 585 }, { "epoch": 0.29091510838987256, "grad_norm": 0.4950350522994995, "learning_rate": 9.685950413223141e-06, "loss": 0.4842, "step": 586 }, { "epoch": 0.2914115505543604, "grad_norm": 0.5774328708648682, "learning_rate": 9.702479338842976e-06, "loss": 0.4757, "step": 587 }, { "epoch": 0.29190799271884826, "grad_norm": 0.5499045252799988, "learning_rate": 9.71900826446281e-06, "loss": 0.4518, "step": 588 }, { "epoch": 0.2924044348833361, "grad_norm": 0.5308087468147278, "learning_rate": 9.735537190082645e-06, "loss": 0.43, "step": 589 }, { "epoch": 0.2929008770478239, "grad_norm": 0.5171785354614258, "learning_rate": 9.75206611570248e-06, "loss": 0.4481, "step": 590 }, { "epoch": 0.29339731921231177, "grad_norm": 0.5228750109672546, "learning_rate": 9.768595041322316e-06, "loss": 0.4444, "step": 591 }, { "epoch": 0.2938937613767996, "grad_norm": 0.5937492847442627, "learning_rate": 9.78512396694215e-06, "loss": 0.4359, "step": 592 }, { "epoch": 0.2943902035412874, "grad_norm": 0.5940197706222534, "learning_rate": 9.801652892561985e-06, "loss": 0.4373, "step": 593 }, { "epoch": 0.2948866457057753, "grad_norm": 0.5152766108512878, "learning_rate": 9.81818181818182e-06, "loss": 0.4247, "step": 594 }, { "epoch": 0.29538308787026313, "grad_norm": 0.6305420994758606, "learning_rate": 9.834710743801654e-06, "loss": 0.4465, "step": 595 }, { "epoch": 0.2958795300347509, "grad_norm": 0.5076446533203125, "learning_rate": 9.851239669421488e-06, "loss": 0.4255, "step": 596 }, { "epoch": 0.2963759721992388, "grad_norm": 0.583196222782135, "learning_rate": 9.867768595041323e-06, "loss": 0.4401, "step": 597 }, { "epoch": 0.29687241436372663, "grad_norm": 0.6289090514183044, "learning_rate": 9.884297520661157e-06, "loss": 0.4534, "step": 598 }, { "epoch": 0.2973688565282145, "grad_norm": 0.5307527184486389, "learning_rate": 9.900826446280992e-06, "loss": 0.4636, "step": 599 }, { "epoch": 0.2978652986927023, "grad_norm": 0.5354812741279602, "learning_rate": 9.917355371900828e-06, "loss": 0.4513, "step": 600 }, { "epoch": 0.29836174085719014, "grad_norm": 0.5508080124855042, "learning_rate": 9.933884297520661e-06, "loss": 0.4577, "step": 601 }, { "epoch": 0.298858183021678, "grad_norm": 0.5863556861877441, "learning_rate": 9.950413223140497e-06, "loss": 0.4613, "step": 602 }, { "epoch": 0.2993546251861658, "grad_norm": 0.46984976530075073, "learning_rate": 9.966942148760332e-06, "loss": 0.4389, "step": 603 }, { "epoch": 0.29985106735065364, "grad_norm": 0.6412164568901062, "learning_rate": 9.983471074380166e-06, "loss": 0.4875, "step": 604 }, { "epoch": 0.3003475095151415, "grad_norm": 0.5633898973464966, "learning_rate": 1e-05, "loss": 0.471, "step": 605 }, { "epoch": 0.3008439516796293, "grad_norm": 0.612064778804779, "learning_rate": 9.999999165317946e-06, "loss": 0.4776, "step": 606 }, { "epoch": 0.30134039384411715, "grad_norm": 0.6273703575134277, "learning_rate": 9.999996661272064e-06, "loss": 0.4563, "step": 607 }, { "epoch": 0.301836836008605, "grad_norm": 0.6296327710151672, "learning_rate": 9.999992487863189e-06, "loss": 0.4536, "step": 608 }, { "epoch": 0.30233327817309286, "grad_norm": 0.6092546582221985, "learning_rate": 9.999986645092714e-06, "loss": 0.4682, "step": 609 }, { "epoch": 0.30282972033758065, "grad_norm": 0.5337762832641602, "learning_rate": 9.99997913296259e-06, "loss": 0.4378, "step": 610 }, { "epoch": 0.3033261625020685, "grad_norm": 0.6045701503753662, "learning_rate": 9.999969951475326e-06, "loss": 0.4565, "step": 611 }, { "epoch": 0.30382260466655636, "grad_norm": 0.5537183284759521, "learning_rate": 9.999959100633987e-06, "loss": 0.4409, "step": 612 }, { "epoch": 0.30431904683104416, "grad_norm": 0.5408738255500793, "learning_rate": 9.999946580442195e-06, "loss": 0.4691, "step": 613 }, { "epoch": 0.304815488995532, "grad_norm": 0.5856359004974365, "learning_rate": 9.999932390904133e-06, "loss": 0.4478, "step": 614 }, { "epoch": 0.30531193116001987, "grad_norm": 0.5772810578346252, "learning_rate": 9.999916532024533e-06, "loss": 0.4501, "step": 615 }, { "epoch": 0.3058083733245077, "grad_norm": 0.5181899666786194, "learning_rate": 9.999899003808695e-06, "loss": 0.4446, "step": 616 }, { "epoch": 0.3063048154889955, "grad_norm": 0.6796911358833313, "learning_rate": 9.99987980626247e-06, "loss": 0.4853, "step": 617 }, { "epoch": 0.3068012576534834, "grad_norm": 0.542973518371582, "learning_rate": 9.999858939392263e-06, "loss": 0.4666, "step": 618 }, { "epoch": 0.3072976998179712, "grad_norm": 0.5564113259315491, "learning_rate": 9.99983640320505e-06, "loss": 0.4239, "step": 619 }, { "epoch": 0.307794141982459, "grad_norm": 0.5920099020004272, "learning_rate": 9.999812197708347e-06, "loss": 0.4088, "step": 620 }, { "epoch": 0.3082905841469469, "grad_norm": 0.5365559458732605, "learning_rate": 9.999786322910239e-06, "loss": 0.449, "step": 621 }, { "epoch": 0.30878702631143473, "grad_norm": 0.5724971294403076, "learning_rate": 9.999758778819363e-06, "loss": 0.4391, "step": 622 }, { "epoch": 0.30928346847592253, "grad_norm": 0.5643184781074524, "learning_rate": 9.99972956544492e-06, "loss": 0.4442, "step": 623 }, { "epoch": 0.3097799106404104, "grad_norm": 0.5334243774414062, "learning_rate": 9.999698682796658e-06, "loss": 0.4306, "step": 624 }, { "epoch": 0.31027635280489824, "grad_norm": 0.5101922750473022, "learning_rate": 9.99966613088489e-06, "loss": 0.4672, "step": 625 }, { "epoch": 0.3107727949693861, "grad_norm": 0.6167260408401489, "learning_rate": 9.999631909720487e-06, "loss": 0.4716, "step": 626 }, { "epoch": 0.3112692371338739, "grad_norm": 0.5099331140518188, "learning_rate": 9.999596019314868e-06, "loss": 0.4824, "step": 627 }, { "epoch": 0.31176567929836174, "grad_norm": 0.5308018922805786, "learning_rate": 9.999558459680022e-06, "loss": 0.4642, "step": 628 }, { "epoch": 0.3122621214628496, "grad_norm": 0.6032562851905823, "learning_rate": 9.999519230828486e-06, "loss": 0.4571, "step": 629 }, { "epoch": 0.3127585636273374, "grad_norm": 0.6114035844802856, "learning_rate": 9.999478332773357e-06, "loss": 0.4697, "step": 630 }, { "epoch": 0.31325500579182525, "grad_norm": 0.5585907101631165, "learning_rate": 9.999435765528293e-06, "loss": 0.409, "step": 631 }, { "epoch": 0.3137514479563131, "grad_norm": 0.5326111316680908, "learning_rate": 9.999391529107504e-06, "loss": 0.4578, "step": 632 }, { "epoch": 0.3142478901208009, "grad_norm": 0.5728356242179871, "learning_rate": 9.999345623525758e-06, "loss": 0.4486, "step": 633 }, { "epoch": 0.31474433228528875, "grad_norm": 0.6834273934364319, "learning_rate": 9.999298048798385e-06, "loss": 0.4305, "step": 634 }, { "epoch": 0.3152407744497766, "grad_norm": 0.5054042935371399, "learning_rate": 9.999248804941265e-06, "loss": 0.4355, "step": 635 }, { "epoch": 0.31573721661426446, "grad_norm": 0.6183457374572754, "learning_rate": 9.999197891970843e-06, "loss": 0.4554, "step": 636 }, { "epoch": 0.31623365877875226, "grad_norm": 0.6198065876960754, "learning_rate": 9.999145309904112e-06, "loss": 0.4492, "step": 637 }, { "epoch": 0.3167301009432401, "grad_norm": 0.5451098084449768, "learning_rate": 9.999091058758634e-06, "loss": 0.4541, "step": 638 }, { "epoch": 0.31722654310772797, "grad_norm": 0.5792382955551147, "learning_rate": 9.99903513855252e-06, "loss": 0.4555, "step": 639 }, { "epoch": 0.31772298527221576, "grad_norm": 0.4658042788505554, "learning_rate": 9.998977549304436e-06, "loss": 0.4593, "step": 640 }, { "epoch": 0.3182194274367036, "grad_norm": 0.5605179071426392, "learning_rate": 9.998918291033617e-06, "loss": 0.475, "step": 641 }, { "epoch": 0.31871586960119147, "grad_norm": 0.49415746331214905, "learning_rate": 9.998857363759842e-06, "loss": 0.4436, "step": 642 }, { "epoch": 0.3192123117656793, "grad_norm": 0.5020939707756042, "learning_rate": 9.998794767503455e-06, "loss": 0.4866, "step": 643 }, { "epoch": 0.3197087539301671, "grad_norm": 0.5191720128059387, "learning_rate": 9.998730502285354e-06, "loss": 0.4646, "step": 644 }, { "epoch": 0.320205196094655, "grad_norm": 0.6193519830703735, "learning_rate": 9.998664568126996e-06, "loss": 0.4669, "step": 645 }, { "epoch": 0.32070163825914283, "grad_norm": 0.5038687586784363, "learning_rate": 9.998596965050395e-06, "loss": 0.4372, "step": 646 }, { "epoch": 0.3211980804236306, "grad_norm": 0.5900541543960571, "learning_rate": 9.998527693078122e-06, "loss": 0.4328, "step": 647 }, { "epoch": 0.3216945225881185, "grad_norm": 0.5080050230026245, "learning_rate": 9.998456752233305e-06, "loss": 0.4576, "step": 648 }, { "epoch": 0.32219096475260633, "grad_norm": 0.6440244317054749, "learning_rate": 9.99838414253963e-06, "loss": 0.4582, "step": 649 }, { "epoch": 0.32268740691709413, "grad_norm": 0.5516005754470825, "learning_rate": 9.998309864021337e-06, "loss": 0.4361, "step": 650 }, { "epoch": 0.323183849081582, "grad_norm": 0.6402910351753235, "learning_rate": 9.998233916703225e-06, "loss": 0.4792, "step": 651 }, { "epoch": 0.32368029124606984, "grad_norm": 0.49359244108200073, "learning_rate": 9.998156300610658e-06, "loss": 0.4177, "step": 652 }, { "epoch": 0.3241767334105577, "grad_norm": 0.5922918319702148, "learning_rate": 9.99807701576954e-06, "loss": 0.4294, "step": 653 }, { "epoch": 0.3246731755750455, "grad_norm": 0.5579651594161987, "learning_rate": 9.997996062206348e-06, "loss": 0.4544, "step": 654 }, { "epoch": 0.32516961773953335, "grad_norm": 0.5604251027107239, "learning_rate": 9.99791343994811e-06, "loss": 0.4979, "step": 655 }, { "epoch": 0.3256660599040212, "grad_norm": 0.6041139960289001, "learning_rate": 9.997829149022408e-06, "loss": 0.4302, "step": 656 }, { "epoch": 0.326162502068509, "grad_norm": 0.6084315776824951, "learning_rate": 9.997743189457387e-06, "loss": 0.4695, "step": 657 }, { "epoch": 0.32665894423299685, "grad_norm": 0.49438631534576416, "learning_rate": 9.997655561281747e-06, "loss": 0.4702, "step": 658 }, { "epoch": 0.3271553863974847, "grad_norm": 0.5533457398414612, "learning_rate": 9.997566264524745e-06, "loss": 0.4497, "step": 659 }, { "epoch": 0.32765182856197256, "grad_norm": 0.7197371125221252, "learning_rate": 9.997475299216191e-06, "loss": 0.4481, "step": 660 }, { "epoch": 0.32814827072646036, "grad_norm": 0.46882712841033936, "learning_rate": 9.99738266538646e-06, "loss": 0.447, "step": 661 }, { "epoch": 0.3286447128909482, "grad_norm": 0.5058624744415283, "learning_rate": 9.997288363066479e-06, "loss": 0.4251, "step": 662 }, { "epoch": 0.32914115505543606, "grad_norm": 0.5551317930221558, "learning_rate": 9.99719239228773e-06, "loss": 0.4239, "step": 663 }, { "epoch": 0.32963759721992386, "grad_norm": 0.4609374403953552, "learning_rate": 9.99709475308226e-06, "loss": 0.4744, "step": 664 }, { "epoch": 0.3301340393844117, "grad_norm": 0.5528961420059204, "learning_rate": 9.996995445482664e-06, "loss": 0.436, "step": 665 }, { "epoch": 0.33063048154889957, "grad_norm": 0.5365409851074219, "learning_rate": 9.9968944695221e-06, "loss": 0.4566, "step": 666 }, { "epoch": 0.33112692371338737, "grad_norm": 0.5410192608833313, "learning_rate": 9.99679182523428e-06, "loss": 0.4359, "step": 667 }, { "epoch": 0.3316233658778752, "grad_norm": 0.4685255289077759, "learning_rate": 9.996687512653476e-06, "loss": 0.4453, "step": 668 }, { "epoch": 0.3321198080423631, "grad_norm": 0.5009786486625671, "learning_rate": 9.996581531814513e-06, "loss": 0.4734, "step": 669 }, { "epoch": 0.3326162502068509, "grad_norm": 0.49233925342559814, "learning_rate": 9.996473882752777e-06, "loss": 0.4319, "step": 670 }, { "epoch": 0.3331126923713387, "grad_norm": 0.4994749128818512, "learning_rate": 9.996364565504208e-06, "loss": 0.4557, "step": 671 }, { "epoch": 0.3336091345358266, "grad_norm": 0.4633113145828247, "learning_rate": 9.996253580105302e-06, "loss": 0.4452, "step": 672 }, { "epoch": 0.33410557670031443, "grad_norm": 0.49823713302612305, "learning_rate": 9.996140926593119e-06, "loss": 0.4266, "step": 673 }, { "epoch": 0.33460201886480223, "grad_norm": 0.5592987537384033, "learning_rate": 9.996026605005266e-06, "loss": 0.4629, "step": 674 }, { "epoch": 0.3350984610292901, "grad_norm": 0.5397360920906067, "learning_rate": 9.995910615379917e-06, "loss": 0.4259, "step": 675 }, { "epoch": 0.33559490319377794, "grad_norm": 0.5772051811218262, "learning_rate": 9.995792957755793e-06, "loss": 0.4638, "step": 676 }, { "epoch": 0.33609134535826574, "grad_norm": 0.475380003452301, "learning_rate": 9.995673632172179e-06, "loss": 0.4084, "step": 677 }, { "epoch": 0.3365877875227536, "grad_norm": 0.6015079021453857, "learning_rate": 9.995552638668912e-06, "loss": 0.4564, "step": 678 }, { "epoch": 0.33708422968724144, "grad_norm": 0.5377491116523743, "learning_rate": 9.995429977286394e-06, "loss": 0.4657, "step": 679 }, { "epoch": 0.3375806718517293, "grad_norm": 0.49280181527137756, "learning_rate": 9.995305648065573e-06, "loss": 0.4523, "step": 680 }, { "epoch": 0.3380771140162171, "grad_norm": 0.52469801902771, "learning_rate": 9.995179651047961e-06, "loss": 0.4202, "step": 681 }, { "epoch": 0.33857355618070495, "grad_norm": 0.5446463823318481, "learning_rate": 9.995051986275626e-06, "loss": 0.4618, "step": 682 }, { "epoch": 0.3390699983451928, "grad_norm": 0.5112648010253906, "learning_rate": 9.99492265379119e-06, "loss": 0.4494, "step": 683 }, { "epoch": 0.3395664405096806, "grad_norm": 0.4991936981678009, "learning_rate": 9.994791653637834e-06, "loss": 0.4258, "step": 684 }, { "epoch": 0.34006288267416845, "grad_norm": 0.5463187098503113, "learning_rate": 9.994658985859295e-06, "loss": 0.4478, "step": 685 }, { "epoch": 0.3405593248386563, "grad_norm": 0.539374828338623, "learning_rate": 9.99452465049987e-06, "loss": 0.4393, "step": 686 }, { "epoch": 0.34105576700314416, "grad_norm": 0.4759056568145752, "learning_rate": 9.994388647604408e-06, "loss": 0.4361, "step": 687 }, { "epoch": 0.34155220916763196, "grad_norm": 0.5828811526298523, "learning_rate": 9.994250977218313e-06, "loss": 0.4439, "step": 688 }, { "epoch": 0.3420486513321198, "grad_norm": 0.5403841137886047, "learning_rate": 9.994111639387557e-06, "loss": 0.4402, "step": 689 }, { "epoch": 0.34254509349660767, "grad_norm": 0.5208553671836853, "learning_rate": 9.993970634158656e-06, "loss": 0.4693, "step": 690 }, { "epoch": 0.34304153566109546, "grad_norm": 0.4911104440689087, "learning_rate": 9.993827961578688e-06, "loss": 0.4322, "step": 691 }, { "epoch": 0.3435379778255833, "grad_norm": 0.5144209861755371, "learning_rate": 9.993683621695287e-06, "loss": 0.4518, "step": 692 }, { "epoch": 0.34403441999007117, "grad_norm": 0.518179178237915, "learning_rate": 9.993537614556648e-06, "loss": 0.4472, "step": 693 }, { "epoch": 0.34453086215455897, "grad_norm": 0.49194419384002686, "learning_rate": 9.993389940211515e-06, "loss": 0.4307, "step": 694 }, { "epoch": 0.3450273043190468, "grad_norm": 0.4744194447994232, "learning_rate": 9.993240598709195e-06, "loss": 0.4162, "step": 695 }, { "epoch": 0.3455237464835347, "grad_norm": 0.5924322605133057, "learning_rate": 9.993089590099547e-06, "loss": 0.4138, "step": 696 }, { "epoch": 0.34602018864802253, "grad_norm": 0.5416196584701538, "learning_rate": 9.99293691443299e-06, "loss": 0.4461, "step": 697 }, { "epoch": 0.34651663081251033, "grad_norm": 0.5320457816123962, "learning_rate": 9.992782571760497e-06, "loss": 0.4335, "step": 698 }, { "epoch": 0.3470130729769982, "grad_norm": 0.5776810646057129, "learning_rate": 9.9926265621336e-06, "loss": 0.412, "step": 699 }, { "epoch": 0.34750951514148604, "grad_norm": 0.5206899046897888, "learning_rate": 9.992468885604385e-06, "loss": 0.4565, "step": 700 }, { "epoch": 0.34800595730597383, "grad_norm": 0.5362099409103394, "learning_rate": 9.992309542225497e-06, "loss": 0.3981, "step": 701 }, { "epoch": 0.3485023994704617, "grad_norm": 0.5672571063041687, "learning_rate": 9.992148532050139e-06, "loss": 0.4209, "step": 702 }, { "epoch": 0.34899884163494954, "grad_norm": 0.504905641078949, "learning_rate": 9.991985855132062e-06, "loss": 0.4198, "step": 703 }, { "epoch": 0.34949528379943734, "grad_norm": 0.5426161885261536, "learning_rate": 9.991821511525584e-06, "loss": 0.4957, "step": 704 }, { "epoch": 0.3499917259639252, "grad_norm": 0.5535491108894348, "learning_rate": 9.991655501285574e-06, "loss": 0.4593, "step": 705 }, { "epoch": 0.35048816812841305, "grad_norm": 0.577043890953064, "learning_rate": 9.991487824467458e-06, "loss": 0.5053, "step": 706 }, { "epoch": 0.3509846102929009, "grad_norm": 0.5157840251922607, "learning_rate": 9.991318481127218e-06, "loss": 0.4469, "step": 707 }, { "epoch": 0.3514810524573887, "grad_norm": 0.5320767760276794, "learning_rate": 9.991147471321392e-06, "loss": 0.4661, "step": 708 }, { "epoch": 0.35197749462187655, "grad_norm": 0.5244318246841431, "learning_rate": 9.990974795107078e-06, "loss": 0.4609, "step": 709 }, { "epoch": 0.3524739367863644, "grad_norm": 0.5609150528907776, "learning_rate": 9.990800452541929e-06, "loss": 0.4985, "step": 710 }, { "epoch": 0.3529703789508522, "grad_norm": 0.5098783373832703, "learning_rate": 9.99062444368415e-06, "loss": 0.4568, "step": 711 }, { "epoch": 0.35346682111534006, "grad_norm": 0.5824208855628967, "learning_rate": 9.990446768592507e-06, "loss": 0.4627, "step": 712 }, { "epoch": 0.3539632632798279, "grad_norm": 0.49832215905189514, "learning_rate": 9.99026742732632e-06, "loss": 0.4131, "step": 713 }, { "epoch": 0.35445970544431576, "grad_norm": 0.5054773092269897, "learning_rate": 9.990086419945469e-06, "loss": 0.4366, "step": 714 }, { "epoch": 0.35495614760880356, "grad_norm": 0.5407485961914062, "learning_rate": 9.989903746510383e-06, "loss": 0.4836, "step": 715 }, { "epoch": 0.3554525897732914, "grad_norm": 0.5134665369987488, "learning_rate": 9.989719407082056e-06, "loss": 0.4311, "step": 716 }, { "epoch": 0.35594903193777927, "grad_norm": 0.4711879789829254, "learning_rate": 9.989533401722031e-06, "loss": 0.4346, "step": 717 }, { "epoch": 0.35644547410226707, "grad_norm": 0.5221781730651855, "learning_rate": 9.98934573049241e-06, "loss": 0.4327, "step": 718 }, { "epoch": 0.3569419162667549, "grad_norm": 0.5473209619522095, "learning_rate": 9.989156393455856e-06, "loss": 0.4388, "step": 719 }, { "epoch": 0.3574383584312428, "grad_norm": 0.5433003902435303, "learning_rate": 9.988965390675578e-06, "loss": 0.4529, "step": 720 }, { "epoch": 0.3579348005957306, "grad_norm": 0.48526471853256226, "learning_rate": 9.988772722215348e-06, "loss": 0.4532, "step": 721 }, { "epoch": 0.3584312427602184, "grad_norm": 0.6175501942634583, "learning_rate": 9.988578388139493e-06, "loss": 0.4697, "step": 722 }, { "epoch": 0.3589276849247063, "grad_norm": 0.5441719889640808, "learning_rate": 9.988382388512898e-06, "loss": 0.4258, "step": 723 }, { "epoch": 0.35942412708919413, "grad_norm": 0.5307440161705017, "learning_rate": 9.988184723400999e-06, "loss": 0.4698, "step": 724 }, { "epoch": 0.35992056925368193, "grad_norm": 0.5242679715156555, "learning_rate": 9.987985392869792e-06, "loss": 0.4264, "step": 725 }, { "epoch": 0.3604170114181698, "grad_norm": 0.6541310548782349, "learning_rate": 9.987784396985829e-06, "loss": 0.4506, "step": 726 }, { "epoch": 0.36091345358265764, "grad_norm": 0.4976063370704651, "learning_rate": 9.987581735816216e-06, "loss": 0.4594, "step": 727 }, { "epoch": 0.36140989574714544, "grad_norm": 0.4939424395561218, "learning_rate": 9.987377409428617e-06, "loss": 0.4362, "step": 728 }, { "epoch": 0.3619063379116333, "grad_norm": 0.46865764260292053, "learning_rate": 9.98717141789125e-06, "loss": 0.4562, "step": 729 }, { "epoch": 0.36240278007612114, "grad_norm": 0.5117902755737305, "learning_rate": 9.98696376127289e-06, "loss": 0.4278, "step": 730 }, { "epoch": 0.36289922224060894, "grad_norm": 0.5054832696914673, "learning_rate": 9.98675443964287e-06, "loss": 0.4531, "step": 731 }, { "epoch": 0.3633956644050968, "grad_norm": 0.5057498216629028, "learning_rate": 9.986543453071074e-06, "loss": 0.45, "step": 732 }, { "epoch": 0.36389210656958465, "grad_norm": 0.5273807048797607, "learning_rate": 9.986330801627944e-06, "loss": 0.4339, "step": 733 }, { "epoch": 0.3643885487340725, "grad_norm": 0.49234089255332947, "learning_rate": 9.986116485384481e-06, "loss": 0.4685, "step": 734 }, { "epoch": 0.3648849908985603, "grad_norm": 0.5099316835403442, "learning_rate": 9.98590050441224e-06, "loss": 0.4107, "step": 735 }, { "epoch": 0.36538143306304816, "grad_norm": 0.5327556729316711, "learning_rate": 9.98568285878333e-06, "loss": 0.4326, "step": 736 }, { "epoch": 0.365877875227536, "grad_norm": 0.531071126461029, "learning_rate": 9.985463548570416e-06, "loss": 0.451, "step": 737 }, { "epoch": 0.3663743173920238, "grad_norm": 0.5359527468681335, "learning_rate": 9.985242573846721e-06, "loss": 0.4617, "step": 738 }, { "epoch": 0.36687075955651166, "grad_norm": 0.5030301213264465, "learning_rate": 9.98501993468602e-06, "loss": 0.4618, "step": 739 }, { "epoch": 0.3673672017209995, "grad_norm": 0.5213652849197388, "learning_rate": 9.984795631162651e-06, "loss": 0.428, "step": 740 }, { "epoch": 0.36786364388548737, "grad_norm": 0.556115984916687, "learning_rate": 9.984569663351497e-06, "loss": 0.4637, "step": 741 }, { "epoch": 0.36836008604997517, "grad_norm": 0.5582041144371033, "learning_rate": 9.984342031328007e-06, "loss": 0.4588, "step": 742 }, { "epoch": 0.368856528214463, "grad_norm": 0.5386667847633362, "learning_rate": 9.984112735168182e-06, "loss": 0.4008, "step": 743 }, { "epoch": 0.3693529703789509, "grad_norm": 0.560698926448822, "learning_rate": 9.983881774948572e-06, "loss": 0.456, "step": 744 }, { "epoch": 0.36984941254343867, "grad_norm": 0.5329315066337585, "learning_rate": 9.983649150746292e-06, "loss": 0.4411, "step": 745 }, { "epoch": 0.3703458547079265, "grad_norm": 0.4887692928314209, "learning_rate": 9.983414862639011e-06, "loss": 0.4498, "step": 746 }, { "epoch": 0.3708422968724144, "grad_norm": 0.5324393510818481, "learning_rate": 9.983178910704947e-06, "loss": 0.4684, "step": 747 }, { "epoch": 0.3713387390369022, "grad_norm": 0.547875702381134, "learning_rate": 9.982941295022881e-06, "loss": 0.4416, "step": 748 }, { "epoch": 0.37183518120139003, "grad_norm": 0.49825596809387207, "learning_rate": 9.982702015672145e-06, "loss": 0.4503, "step": 749 }, { "epoch": 0.3723316233658779, "grad_norm": 0.4924744963645935, "learning_rate": 9.982461072732628e-06, "loss": 0.4559, "step": 750 }, { "epoch": 0.37282806553036574, "grad_norm": 0.5351324677467346, "learning_rate": 9.982218466284775e-06, "loss": 0.4405, "step": 751 }, { "epoch": 0.37332450769485354, "grad_norm": 0.48400557041168213, "learning_rate": 9.981974196409586e-06, "loss": 0.446, "step": 752 }, { "epoch": 0.3738209498593414, "grad_norm": 0.4660663306713104, "learning_rate": 9.981728263188615e-06, "loss": 0.4705, "step": 753 }, { "epoch": 0.37431739202382924, "grad_norm": 0.5283627510070801, "learning_rate": 9.98148066670397e-06, "loss": 0.4535, "step": 754 }, { "epoch": 0.37481383418831704, "grad_norm": 0.5026755332946777, "learning_rate": 9.981231407038324e-06, "loss": 0.4578, "step": 755 }, { "epoch": 0.3753102763528049, "grad_norm": 0.4959315359592438, "learning_rate": 9.98098048427489e-06, "loss": 0.414, "step": 756 }, { "epoch": 0.37580671851729275, "grad_norm": 0.6642215847969055, "learning_rate": 9.98072789849745e-06, "loss": 0.476, "step": 757 }, { "epoch": 0.37630316068178055, "grad_norm": 0.47252151370048523, "learning_rate": 9.980473649790333e-06, "loss": 0.4654, "step": 758 }, { "epoch": 0.3767996028462684, "grad_norm": 0.5588207244873047, "learning_rate": 9.980217738238427e-06, "loss": 0.4492, "step": 759 }, { "epoch": 0.37729604501075625, "grad_norm": 0.5425735712051392, "learning_rate": 9.979960163927172e-06, "loss": 0.4519, "step": 760 }, { "epoch": 0.3777924871752441, "grad_norm": 0.4565294086933136, "learning_rate": 9.979700926942564e-06, "loss": 0.4203, "step": 761 }, { "epoch": 0.3782889293397319, "grad_norm": 0.56362384557724, "learning_rate": 9.97944002737116e-06, "loss": 0.4837, "step": 762 }, { "epoch": 0.37878537150421976, "grad_norm": 0.5658215284347534, "learning_rate": 9.979177465300063e-06, "loss": 0.441, "step": 763 }, { "epoch": 0.3792818136687076, "grad_norm": 0.4841872751712799, "learning_rate": 9.978913240816938e-06, "loss": 0.4736, "step": 764 }, { "epoch": 0.3797782558331954, "grad_norm": 0.5557580590248108, "learning_rate": 9.978647354010002e-06, "loss": 0.4571, "step": 765 }, { "epoch": 0.38027469799768326, "grad_norm": 0.544417679309845, "learning_rate": 9.978379804968026e-06, "loss": 0.415, "step": 766 }, { "epoch": 0.3807711401621711, "grad_norm": 0.5738711953163147, "learning_rate": 9.978110593780338e-06, "loss": 0.4658, "step": 767 }, { "epoch": 0.38126758232665897, "grad_norm": 0.552761435508728, "learning_rate": 9.977839720536818e-06, "loss": 0.4601, "step": 768 }, { "epoch": 0.38176402449114677, "grad_norm": 0.5964069366455078, "learning_rate": 9.977567185327907e-06, "loss": 0.4546, "step": 769 }, { "epoch": 0.3822604666556346, "grad_norm": 0.5214033126831055, "learning_rate": 9.977292988244597e-06, "loss": 0.4297, "step": 770 }, { "epoch": 0.3827569088201225, "grad_norm": 0.5043959021568298, "learning_rate": 9.977017129378432e-06, "loss": 0.4355, "step": 771 }, { "epoch": 0.3832533509846103, "grad_norm": 0.5363325476646423, "learning_rate": 9.976739608821515e-06, "loss": 0.4921, "step": 772 }, { "epoch": 0.38374979314909813, "grad_norm": 0.5615510940551758, "learning_rate": 9.976460426666505e-06, "loss": 0.4947, "step": 773 }, { "epoch": 0.384246235313586, "grad_norm": 0.47227251529693604, "learning_rate": 9.976179583006608e-06, "loss": 0.4437, "step": 774 }, { "epoch": 0.3847426774780738, "grad_norm": 0.5257678627967834, "learning_rate": 9.975897077935597e-06, "loss": 0.4299, "step": 775 }, { "epoch": 0.38523911964256163, "grad_norm": 0.5146631598472595, "learning_rate": 9.975612911547787e-06, "loss": 0.4262, "step": 776 }, { "epoch": 0.3857355618070495, "grad_norm": 0.5142720341682434, "learning_rate": 9.975327083938056e-06, "loss": 0.4283, "step": 777 }, { "epoch": 0.38623200397153734, "grad_norm": 0.4767795503139496, "learning_rate": 9.975039595201833e-06, "loss": 0.4426, "step": 778 }, { "epoch": 0.38672844613602514, "grad_norm": 0.5010292530059814, "learning_rate": 9.974750445435104e-06, "loss": 0.4496, "step": 779 }, { "epoch": 0.387224888300513, "grad_norm": 0.5124982595443726, "learning_rate": 9.974459634734407e-06, "loss": 0.4425, "step": 780 }, { "epoch": 0.38772133046500085, "grad_norm": 0.5307253003120422, "learning_rate": 9.974167163196837e-06, "loss": 0.4204, "step": 781 }, { "epoch": 0.38821777262948864, "grad_norm": 0.5629131197929382, "learning_rate": 9.97387303092004e-06, "loss": 0.4482, "step": 782 }, { "epoch": 0.3887142147939765, "grad_norm": 0.519805908203125, "learning_rate": 9.97357723800222e-06, "loss": 0.4574, "step": 783 }, { "epoch": 0.38921065695846435, "grad_norm": 0.5706680417060852, "learning_rate": 9.973279784542137e-06, "loss": 0.4579, "step": 784 }, { "epoch": 0.38970709912295215, "grad_norm": 0.5527223944664001, "learning_rate": 9.972980670639098e-06, "loss": 0.4188, "step": 785 }, { "epoch": 0.39020354128744, "grad_norm": 0.565447211265564, "learning_rate": 9.972679896392973e-06, "loss": 0.444, "step": 786 }, { "epoch": 0.39069998345192786, "grad_norm": 0.5002108812332153, "learning_rate": 9.97237746190418e-06, "loss": 0.4584, "step": 787 }, { "epoch": 0.3911964256164157, "grad_norm": 0.5092539191246033, "learning_rate": 9.972073367273694e-06, "loss": 0.4098, "step": 788 }, { "epoch": 0.3916928677809035, "grad_norm": 0.6041103005409241, "learning_rate": 9.971767612603045e-06, "loss": 0.4088, "step": 789 }, { "epoch": 0.39218930994539136, "grad_norm": 0.5207790732383728, "learning_rate": 9.971460197994314e-06, "loss": 0.4424, "step": 790 }, { "epoch": 0.3926857521098792, "grad_norm": 0.5629052519798279, "learning_rate": 9.97115112355014e-06, "loss": 0.4355, "step": 791 }, { "epoch": 0.393182194274367, "grad_norm": 0.5310881733894348, "learning_rate": 9.970840389373715e-06, "loss": 0.4052, "step": 792 }, { "epoch": 0.39367863643885487, "grad_norm": 0.5580400824546814, "learning_rate": 9.970527995568783e-06, "loss": 0.4596, "step": 793 }, { "epoch": 0.3941750786033427, "grad_norm": 0.512403130531311, "learning_rate": 9.970213942239644e-06, "loss": 0.4425, "step": 794 }, { "epoch": 0.3946715207678306, "grad_norm": 0.6799303889274597, "learning_rate": 9.969898229491155e-06, "loss": 0.458, "step": 795 }, { "epoch": 0.3951679629323184, "grad_norm": 0.5703893303871155, "learning_rate": 9.96958085742872e-06, "loss": 0.4633, "step": 796 }, { "epoch": 0.3956644050968062, "grad_norm": 0.5917856097221375, "learning_rate": 9.969261826158303e-06, "loss": 0.4249, "step": 797 }, { "epoch": 0.3961608472612941, "grad_norm": 0.5542701482772827, "learning_rate": 9.968941135786418e-06, "loss": 0.4121, "step": 798 }, { "epoch": 0.3966572894257819, "grad_norm": 0.5094189643859863, "learning_rate": 9.968618786420136e-06, "loss": 0.4482, "step": 799 }, { "epoch": 0.39715373159026973, "grad_norm": 0.5825555920600891, "learning_rate": 9.968294778167083e-06, "loss": 0.4407, "step": 800 }, { "epoch": 0.3976501737547576, "grad_norm": 0.5337191224098206, "learning_rate": 9.967969111135434e-06, "loss": 0.4499, "step": 801 }, { "epoch": 0.3981466159192454, "grad_norm": 0.6042580604553223, "learning_rate": 9.96764178543392e-06, "loss": 0.4404, "step": 802 }, { "epoch": 0.39864305808373324, "grad_norm": 0.5734981894493103, "learning_rate": 9.967312801171825e-06, "loss": 0.4346, "step": 803 }, { "epoch": 0.3991395002482211, "grad_norm": 0.5953512191772461, "learning_rate": 9.966982158458992e-06, "loss": 0.441, "step": 804 }, { "epoch": 0.39963594241270894, "grad_norm": 0.5604651570320129, "learning_rate": 9.96664985740581e-06, "loss": 0.4036, "step": 805 }, { "epoch": 0.40013238457719674, "grad_norm": 0.6163150072097778, "learning_rate": 9.96631589812323e-06, "loss": 0.4864, "step": 806 }, { "epoch": 0.4006288267416846, "grad_norm": 0.5293951034545898, "learning_rate": 9.965980280722744e-06, "loss": 0.4493, "step": 807 }, { "epoch": 0.40112526890617245, "grad_norm": 0.5155889391899109, "learning_rate": 9.965643005316413e-06, "loss": 0.4829, "step": 808 }, { "epoch": 0.40162171107066025, "grad_norm": 0.6413195133209229, "learning_rate": 9.965304072016842e-06, "loss": 0.4438, "step": 809 }, { "epoch": 0.4021181532351481, "grad_norm": 0.5489668846130371, "learning_rate": 9.964963480937189e-06, "loss": 0.4329, "step": 810 }, { "epoch": 0.40261459539963595, "grad_norm": 0.5321534276008606, "learning_rate": 9.964621232191169e-06, "loss": 0.4396, "step": 811 }, { "epoch": 0.40311103756412375, "grad_norm": 0.46472686529159546, "learning_rate": 9.964277325893053e-06, "loss": 0.4515, "step": 812 }, { "epoch": 0.4036074797286116, "grad_norm": 0.5002892017364502, "learning_rate": 9.963931762157657e-06, "loss": 0.4215, "step": 813 }, { "epoch": 0.40410392189309946, "grad_norm": 0.5166855454444885, "learning_rate": 9.96358454110036e-06, "loss": 0.4413, "step": 814 }, { "epoch": 0.4046003640575873, "grad_norm": 0.5024755597114563, "learning_rate": 9.963235662837085e-06, "loss": 0.4465, "step": 815 }, { "epoch": 0.4050968062220751, "grad_norm": 0.4843638241291046, "learning_rate": 9.962885127484318e-06, "loss": 0.4336, "step": 816 }, { "epoch": 0.40559324838656297, "grad_norm": 0.4917435050010681, "learning_rate": 9.96253293515909e-06, "loss": 0.3976, "step": 817 }, { "epoch": 0.4060896905510508, "grad_norm": 0.5303657650947571, "learning_rate": 9.96217908597899e-06, "loss": 0.4652, "step": 818 }, { "epoch": 0.4065861327155386, "grad_norm": 0.49316272139549255, "learning_rate": 9.961823580062155e-06, "loss": 0.4776, "step": 819 }, { "epoch": 0.40708257488002647, "grad_norm": 0.5292693972587585, "learning_rate": 9.961466417527283e-06, "loss": 0.441, "step": 820 }, { "epoch": 0.4075790170445143, "grad_norm": 0.508226215839386, "learning_rate": 9.96110759849362e-06, "loss": 0.4167, "step": 821 }, { "epoch": 0.4080754592090022, "grad_norm": 0.48645642399787903, "learning_rate": 9.960747123080965e-06, "loss": 0.4369, "step": 822 }, { "epoch": 0.40857190137349, "grad_norm": 0.6356767416000366, "learning_rate": 9.96038499140967e-06, "loss": 0.43, "step": 823 }, { "epoch": 0.40906834353797783, "grad_norm": 0.49043551087379456, "learning_rate": 9.960021203600642e-06, "loss": 0.4245, "step": 824 }, { "epoch": 0.4095647857024657, "grad_norm": 0.532439649105072, "learning_rate": 9.959655759775342e-06, "loss": 0.4199, "step": 825 }, { "epoch": 0.4100612278669535, "grad_norm": 0.5128755569458008, "learning_rate": 9.95928866005578e-06, "loss": 0.4331, "step": 826 }, { "epoch": 0.41055767003144134, "grad_norm": 0.5172882080078125, "learning_rate": 9.958919904564519e-06, "loss": 0.4543, "step": 827 }, { "epoch": 0.4110541121959292, "grad_norm": 0.4810371398925781, "learning_rate": 9.958549493424678e-06, "loss": 0.4338, "step": 828 }, { "epoch": 0.411550554360417, "grad_norm": 0.48897677659988403, "learning_rate": 9.958177426759928e-06, "loss": 0.4217, "step": 829 }, { "epoch": 0.41204699652490484, "grad_norm": 0.4585888087749481, "learning_rate": 9.957803704694488e-06, "loss": 0.4342, "step": 830 }, { "epoch": 0.4125434386893927, "grad_norm": 0.5872567296028137, "learning_rate": 9.95742832735314e-06, "loss": 0.4827, "step": 831 }, { "epoch": 0.41303988085388055, "grad_norm": 0.4967654049396515, "learning_rate": 9.957051294861208e-06, "loss": 0.4436, "step": 832 }, { "epoch": 0.41353632301836835, "grad_norm": 0.49069327116012573, "learning_rate": 9.956672607344572e-06, "loss": 0.4147, "step": 833 }, { "epoch": 0.4140327651828562, "grad_norm": 0.48549267649650574, "learning_rate": 9.95629226492967e-06, "loss": 0.3919, "step": 834 }, { "epoch": 0.41452920734734405, "grad_norm": 0.5578224658966064, "learning_rate": 9.955910267743486e-06, "loss": 0.4238, "step": 835 }, { "epoch": 0.41502564951183185, "grad_norm": 0.5848782062530518, "learning_rate": 9.955526615913554e-06, "loss": 0.4606, "step": 836 }, { "epoch": 0.4155220916763197, "grad_norm": 0.529934823513031, "learning_rate": 9.95514130956797e-06, "loss": 0.4549, "step": 837 }, { "epoch": 0.41601853384080756, "grad_norm": 0.5621156096458435, "learning_rate": 9.954754348835379e-06, "loss": 0.4536, "step": 838 }, { "epoch": 0.41651497600529536, "grad_norm": 0.5322206616401672, "learning_rate": 9.954365733844971e-06, "loss": 0.4593, "step": 839 }, { "epoch": 0.4170114181697832, "grad_norm": 0.6335204839706421, "learning_rate": 9.953975464726495e-06, "loss": 0.4451, "step": 840 }, { "epoch": 0.41750786033427106, "grad_norm": 0.4750513732433319, "learning_rate": 9.953583541610257e-06, "loss": 0.426, "step": 841 }, { "epoch": 0.4180043024987589, "grad_norm": 0.49504324793815613, "learning_rate": 9.953189964627102e-06, "loss": 0.4205, "step": 842 }, { "epoch": 0.4185007446632467, "grad_norm": 0.4917500615119934, "learning_rate": 9.95279473390844e-06, "loss": 0.438, "step": 843 }, { "epoch": 0.41899718682773457, "grad_norm": 0.48977091908454895, "learning_rate": 9.952397849586225e-06, "loss": 0.4155, "step": 844 }, { "epoch": 0.4194936289922224, "grad_norm": 0.5736581683158875, "learning_rate": 9.951999311792966e-06, "loss": 0.4611, "step": 845 }, { "epoch": 0.4199900711567102, "grad_norm": 0.44955137372016907, "learning_rate": 9.951599120661726e-06, "loss": 0.4241, "step": 846 }, { "epoch": 0.4204865133211981, "grad_norm": 0.5708332657814026, "learning_rate": 9.951197276326117e-06, "loss": 0.4158, "step": 847 }, { "epoch": 0.42098295548568593, "grad_norm": 0.5284034013748169, "learning_rate": 9.950793778920303e-06, "loss": 0.4473, "step": 848 }, { "epoch": 0.4214793976501738, "grad_norm": 0.5032584071159363, "learning_rate": 9.950388628579e-06, "loss": 0.4698, "step": 849 }, { "epoch": 0.4219758398146616, "grad_norm": 0.5813698768615723, "learning_rate": 9.94998182543748e-06, "loss": 0.4082, "step": 850 }, { "epoch": 0.42247228197914943, "grad_norm": 0.5888659358024597, "learning_rate": 9.94957336963156e-06, "loss": 0.4654, "step": 851 }, { "epoch": 0.4229687241436373, "grad_norm": 0.6404712796211243, "learning_rate": 9.949163261297616e-06, "loss": 0.4357, "step": 852 }, { "epoch": 0.4234651663081251, "grad_norm": 0.4834735095500946, "learning_rate": 9.948751500572568e-06, "loss": 0.4152, "step": 853 }, { "epoch": 0.42396160847261294, "grad_norm": 0.48269301652908325, "learning_rate": 9.948338087593894e-06, "loss": 0.3998, "step": 854 }, { "epoch": 0.4244580506371008, "grad_norm": 0.5617848634719849, "learning_rate": 9.94792302249962e-06, "loss": 0.4177, "step": 855 }, { "epoch": 0.4249544928015886, "grad_norm": 0.5150983929634094, "learning_rate": 9.947506305428328e-06, "loss": 0.4403, "step": 856 }, { "epoch": 0.42545093496607644, "grad_norm": 0.5144426226615906, "learning_rate": 9.947087936519143e-06, "loss": 0.4183, "step": 857 }, { "epoch": 0.4259473771305643, "grad_norm": 0.5009498000144958, "learning_rate": 9.946667915911754e-06, "loss": 0.4529, "step": 858 }, { "epoch": 0.42644381929505215, "grad_norm": 0.6057968139648438, "learning_rate": 9.94624624374639e-06, "loss": 0.443, "step": 859 }, { "epoch": 0.42694026145953995, "grad_norm": 0.5413539409637451, "learning_rate": 9.945822920163835e-06, "loss": 0.4392, "step": 860 }, { "epoch": 0.4274367036240278, "grad_norm": 0.4592570662498474, "learning_rate": 9.945397945305428e-06, "loss": 0.4097, "step": 861 }, { "epoch": 0.42793314578851566, "grad_norm": 0.5754675269126892, "learning_rate": 9.944971319313055e-06, "loss": 0.4253, "step": 862 }, { "epoch": 0.42842958795300345, "grad_norm": 0.4833267033100128, "learning_rate": 9.944543042329157e-06, "loss": 0.4189, "step": 863 }, { "epoch": 0.4289260301174913, "grad_norm": 0.5484238862991333, "learning_rate": 9.94411311449672e-06, "loss": 0.4573, "step": 864 }, { "epoch": 0.42942247228197916, "grad_norm": 0.5477850437164307, "learning_rate": 9.94368153595929e-06, "loss": 0.4154, "step": 865 }, { "epoch": 0.42991891444646696, "grad_norm": 0.4856090545654297, "learning_rate": 9.943248306860956e-06, "loss": 0.4478, "step": 866 }, { "epoch": 0.4304153566109548, "grad_norm": 0.6124984622001648, "learning_rate": 9.942813427346363e-06, "loss": 0.4105, "step": 867 }, { "epoch": 0.43091179877544267, "grad_norm": 0.5826840400695801, "learning_rate": 9.942376897560703e-06, "loss": 0.4591, "step": 868 }, { "epoch": 0.4314082409399305, "grad_norm": 0.5795828700065613, "learning_rate": 9.941938717649724e-06, "loss": 0.4102, "step": 869 }, { "epoch": 0.4319046831044183, "grad_norm": 0.6058121919631958, "learning_rate": 9.941498887759724e-06, "loss": 0.4296, "step": 870 }, { "epoch": 0.4324011252689062, "grad_norm": 0.4895571172237396, "learning_rate": 9.941057408037546e-06, "loss": 0.4355, "step": 871 }, { "epoch": 0.432897567433394, "grad_norm": 0.5605397820472717, "learning_rate": 9.94061427863059e-06, "loss": 0.4526, "step": 872 }, { "epoch": 0.4333940095978818, "grad_norm": 0.4755294919013977, "learning_rate": 9.940169499686803e-06, "loss": 0.4233, "step": 873 }, { "epoch": 0.4338904517623697, "grad_norm": 0.5013444423675537, "learning_rate": 9.93972307135469e-06, "loss": 0.4759, "step": 874 }, { "epoch": 0.43438689392685753, "grad_norm": 0.5026915669441223, "learning_rate": 9.939274993783295e-06, "loss": 0.4174, "step": 875 }, { "epoch": 0.4348833360913454, "grad_norm": 0.5316457748413086, "learning_rate": 9.938825267122223e-06, "loss": 0.4311, "step": 876 }, { "epoch": 0.4353797782558332, "grad_norm": 0.5509811639785767, "learning_rate": 9.938373891521622e-06, "loss": 0.4489, "step": 877 }, { "epoch": 0.43587622042032104, "grad_norm": 0.49691954255104065, "learning_rate": 9.937920867132199e-06, "loss": 0.4373, "step": 878 }, { "epoch": 0.4363726625848089, "grad_norm": 0.5427002906799316, "learning_rate": 9.937466194105202e-06, "loss": 0.4399, "step": 879 }, { "epoch": 0.4368691047492967, "grad_norm": 0.4931049644947052, "learning_rate": 9.937009872592435e-06, "loss": 0.4633, "step": 880 }, { "epoch": 0.43736554691378454, "grad_norm": 0.4841610789299011, "learning_rate": 9.936551902746255e-06, "loss": 0.4525, "step": 881 }, { "epoch": 0.4378619890782724, "grad_norm": 0.5018763542175293, "learning_rate": 9.93609228471956e-06, "loss": 0.4496, "step": 882 }, { "epoch": 0.4383584312427602, "grad_norm": 0.4684421718120575, "learning_rate": 9.935631018665808e-06, "loss": 0.4394, "step": 883 }, { "epoch": 0.43885487340724805, "grad_norm": 0.43876171112060547, "learning_rate": 9.935168104739002e-06, "loss": 0.434, "step": 884 }, { "epoch": 0.4393513155717359, "grad_norm": 0.482491135597229, "learning_rate": 9.934703543093695e-06, "loss": 0.4159, "step": 885 }, { "epoch": 0.43984775773622375, "grad_norm": 0.5079235434532166, "learning_rate": 9.934237333884994e-06, "loss": 0.4264, "step": 886 }, { "epoch": 0.44034419990071155, "grad_norm": 0.5346531271934509, "learning_rate": 9.933769477268552e-06, "loss": 0.4376, "step": 887 }, { "epoch": 0.4408406420651994, "grad_norm": 0.5797383189201355, "learning_rate": 9.933299973400574e-06, "loss": 0.4523, "step": 888 }, { "epoch": 0.44133708422968726, "grad_norm": 0.5135098695755005, "learning_rate": 9.932828822437815e-06, "loss": 0.4248, "step": 889 }, { "epoch": 0.44183352639417506, "grad_norm": 0.49172747135162354, "learning_rate": 9.932356024537577e-06, "loss": 0.4005, "step": 890 }, { "epoch": 0.4423299685586629, "grad_norm": 0.5609550476074219, "learning_rate": 9.931881579857719e-06, "loss": 0.4164, "step": 891 }, { "epoch": 0.44282641072315077, "grad_norm": 0.5491898655891418, "learning_rate": 9.931405488556642e-06, "loss": 0.4473, "step": 892 }, { "epoch": 0.44332285288763856, "grad_norm": 0.48863548040390015, "learning_rate": 9.930927750793298e-06, "loss": 0.4293, "step": 893 }, { "epoch": 0.4438192950521264, "grad_norm": 0.4826047420501709, "learning_rate": 9.930448366727197e-06, "loss": 0.4396, "step": 894 }, { "epoch": 0.44431573721661427, "grad_norm": 0.46728387475013733, "learning_rate": 9.929967336518387e-06, "loss": 0.4492, "step": 895 }, { "epoch": 0.4448121793811021, "grad_norm": 0.4815772473812103, "learning_rate": 9.929484660327472e-06, "loss": 0.4456, "step": 896 }, { "epoch": 0.4453086215455899, "grad_norm": 0.5186320543289185, "learning_rate": 9.929000338315604e-06, "loss": 0.4476, "step": 897 }, { "epoch": 0.4458050637100778, "grad_norm": 0.5162845849990845, "learning_rate": 9.928514370644487e-06, "loss": 0.3989, "step": 898 }, { "epoch": 0.44630150587456563, "grad_norm": 0.5513384938240051, "learning_rate": 9.92802675747637e-06, "loss": 0.4368, "step": 899 }, { "epoch": 0.4467979480390534, "grad_norm": 0.4881056845188141, "learning_rate": 9.927537498974059e-06, "loss": 0.4299, "step": 900 }, { "epoch": 0.4472943902035413, "grad_norm": 0.5284100770950317, "learning_rate": 9.927046595300895e-06, "loss": 0.4047, "step": 901 }, { "epoch": 0.44779083236802913, "grad_norm": 0.5033361911773682, "learning_rate": 9.926554046620785e-06, "loss": 0.4545, "step": 902 }, { "epoch": 0.448287274532517, "grad_norm": 0.4878405034542084, "learning_rate": 9.926059853098175e-06, "loss": 0.439, "step": 903 }, { "epoch": 0.4487837166970048, "grad_norm": 0.5183354020118713, "learning_rate": 9.925564014898063e-06, "loss": 0.4184, "step": 904 }, { "epoch": 0.44928015886149264, "grad_norm": 0.6062737703323364, "learning_rate": 9.925066532185996e-06, "loss": 0.4301, "step": 905 }, { "epoch": 0.4497766010259805, "grad_norm": 0.5468554496765137, "learning_rate": 9.924567405128069e-06, "loss": 0.4491, "step": 906 }, { "epoch": 0.4502730431904683, "grad_norm": 0.4926951825618744, "learning_rate": 9.924066633890929e-06, "loss": 0.4576, "step": 907 }, { "epoch": 0.45076948535495615, "grad_norm": 0.6153212189674377, "learning_rate": 9.923564218641768e-06, "loss": 0.3909, "step": 908 }, { "epoch": 0.451265927519444, "grad_norm": 0.5922148823738098, "learning_rate": 9.92306015954833e-06, "loss": 0.4489, "step": 909 }, { "epoch": 0.4517623696839318, "grad_norm": 0.6443377733230591, "learning_rate": 9.922554456778905e-06, "loss": 0.4596, "step": 910 }, { "epoch": 0.45225881184841965, "grad_norm": 0.5298601388931274, "learning_rate": 9.922047110502335e-06, "loss": 0.4185, "step": 911 }, { "epoch": 0.4527552540129075, "grad_norm": 0.5330771207809448, "learning_rate": 9.921538120888007e-06, "loss": 0.4203, "step": 912 }, { "epoch": 0.45325169617739536, "grad_norm": 0.5295668840408325, "learning_rate": 9.921027488105864e-06, "loss": 0.4234, "step": 913 }, { "epoch": 0.45374813834188316, "grad_norm": 0.5251925587654114, "learning_rate": 9.920515212326386e-06, "loss": 0.4393, "step": 914 }, { "epoch": 0.454244580506371, "grad_norm": 0.5841966867446899, "learning_rate": 9.920001293720612e-06, "loss": 0.4318, "step": 915 }, { "epoch": 0.45474102267085886, "grad_norm": 0.5280889272689819, "learning_rate": 9.919485732460123e-06, "loss": 0.4525, "step": 916 }, { "epoch": 0.45523746483534666, "grad_norm": 0.5599513053894043, "learning_rate": 9.918968528717053e-06, "loss": 0.4434, "step": 917 }, { "epoch": 0.4557339069998345, "grad_norm": 0.6050372123718262, "learning_rate": 9.918449682664082e-06, "loss": 0.4568, "step": 918 }, { "epoch": 0.45623034916432237, "grad_norm": 0.5087093114852905, "learning_rate": 9.917929194474438e-06, "loss": 0.4342, "step": 919 }, { "epoch": 0.45672679132881017, "grad_norm": 0.5840878486633301, "learning_rate": 9.917407064321897e-06, "loss": 0.4292, "step": 920 }, { "epoch": 0.457223233493298, "grad_norm": 0.5344041585922241, "learning_rate": 9.916883292380786e-06, "loss": 0.4639, "step": 921 }, { "epoch": 0.4577196756577859, "grad_norm": 0.5396878123283386, "learning_rate": 9.916357878825974e-06, "loss": 0.4273, "step": 922 }, { "epoch": 0.4582161178222737, "grad_norm": 0.5065993666648865, "learning_rate": 9.91583082383289e-06, "loss": 0.4097, "step": 923 }, { "epoch": 0.4587125599867615, "grad_norm": 0.5173776745796204, "learning_rate": 9.915302127577496e-06, "loss": 0.4269, "step": 924 }, { "epoch": 0.4592090021512494, "grad_norm": 0.6255975365638733, "learning_rate": 9.914771790236313e-06, "loss": 0.4578, "step": 925 }, { "epoch": 0.45970544431573723, "grad_norm": 0.513547420501709, "learning_rate": 9.914239811986406e-06, "loss": 0.4469, "step": 926 }, { "epoch": 0.46020188648022503, "grad_norm": 0.6120179891586304, "learning_rate": 9.913706193005386e-06, "loss": 0.458, "step": 927 }, { "epoch": 0.4606983286447129, "grad_norm": 0.5229704976081848, "learning_rate": 9.913170933471416e-06, "loss": 0.4181, "step": 928 }, { "epoch": 0.46119477080920074, "grad_norm": 0.4790898859500885, "learning_rate": 9.912634033563205e-06, "loss": 0.4171, "step": 929 }, { "epoch": 0.4616912129736886, "grad_norm": 0.5582307577133179, "learning_rate": 9.912095493460005e-06, "loss": 0.4218, "step": 930 }, { "epoch": 0.4621876551381764, "grad_norm": 0.5433757901191711, "learning_rate": 9.911555313341625e-06, "loss": 0.402, "step": 931 }, { "epoch": 0.46268409730266424, "grad_norm": 0.5585370063781738, "learning_rate": 9.911013493388416e-06, "loss": 0.4117, "step": 932 }, { "epoch": 0.4631805394671521, "grad_norm": 0.5540894865989685, "learning_rate": 9.910470033781274e-06, "loss": 0.4251, "step": 933 }, { "epoch": 0.4636769816316399, "grad_norm": 0.55588698387146, "learning_rate": 9.909924934701647e-06, "loss": 0.4215, "step": 934 }, { "epoch": 0.46417342379612775, "grad_norm": 0.5152177214622498, "learning_rate": 9.909378196331527e-06, "loss": 0.4202, "step": 935 }, { "epoch": 0.4646698659606156, "grad_norm": 0.5063419938087463, "learning_rate": 9.908829818853459e-06, "loss": 0.4219, "step": 936 }, { "epoch": 0.4651663081251034, "grad_norm": 0.5585060715675354, "learning_rate": 9.908279802450529e-06, "loss": 0.4497, "step": 937 }, { "epoch": 0.46566275028959125, "grad_norm": 0.5122774243354797, "learning_rate": 9.907728147306373e-06, "loss": 0.4234, "step": 938 }, { "epoch": 0.4661591924540791, "grad_norm": 0.4644809067249298, "learning_rate": 9.90717485360517e-06, "loss": 0.4319, "step": 939 }, { "epoch": 0.46665563461856696, "grad_norm": 0.5070093870162964, "learning_rate": 9.906619921531658e-06, "loss": 0.4382, "step": 940 }, { "epoch": 0.46715207678305476, "grad_norm": 0.5647521615028381, "learning_rate": 9.906063351271104e-06, "loss": 0.4373, "step": 941 }, { "epoch": 0.4676485189475426, "grad_norm": 0.5071494579315186, "learning_rate": 9.90550514300934e-06, "loss": 0.4166, "step": 942 }, { "epoch": 0.46814496111203047, "grad_norm": 0.5311141610145569, "learning_rate": 9.904945296932731e-06, "loss": 0.4684, "step": 943 }, { "epoch": 0.46864140327651826, "grad_norm": 0.6462129950523376, "learning_rate": 9.904383813228197e-06, "loss": 0.4283, "step": 944 }, { "epoch": 0.4691378454410061, "grad_norm": 0.5549330711364746, "learning_rate": 9.9038206920832e-06, "loss": 0.4333, "step": 945 }, { "epoch": 0.46963428760549397, "grad_norm": 0.4556307792663574, "learning_rate": 9.903255933685755e-06, "loss": 0.4032, "step": 946 }, { "epoch": 0.47013072976998177, "grad_norm": 0.6594709157943726, "learning_rate": 9.902689538224415e-06, "loss": 0.4562, "step": 947 }, { "epoch": 0.4706271719344696, "grad_norm": 0.6514871716499329, "learning_rate": 9.902121505888287e-06, "loss": 0.4396, "step": 948 }, { "epoch": 0.4711236140989575, "grad_norm": 0.5512629747390747, "learning_rate": 9.90155183686702e-06, "loss": 0.4458, "step": 949 }, { "epoch": 0.47162005626344533, "grad_norm": 0.5365515351295471, "learning_rate": 9.900980531350813e-06, "loss": 0.4069, "step": 950 }, { "epoch": 0.47211649842793313, "grad_norm": 0.6277828216552734, "learning_rate": 9.900407589530405e-06, "loss": 0.4148, "step": 951 }, { "epoch": 0.472612940592421, "grad_norm": 0.5080868601799011, "learning_rate": 9.89983301159709e-06, "loss": 0.4536, "step": 952 }, { "epoch": 0.47310938275690884, "grad_norm": 0.5718448758125305, "learning_rate": 9.899256797742702e-06, "loss": 0.4086, "step": 953 }, { "epoch": 0.47360582492139663, "grad_norm": 0.5919328331947327, "learning_rate": 9.898678948159625e-06, "loss": 0.4152, "step": 954 }, { "epoch": 0.4741022670858845, "grad_norm": 0.5181053280830383, "learning_rate": 9.898099463040784e-06, "loss": 0.4078, "step": 955 }, { "epoch": 0.47459870925037234, "grad_norm": 0.47623708844184875, "learning_rate": 9.897518342579657e-06, "loss": 0.4238, "step": 956 }, { "epoch": 0.4750951514148602, "grad_norm": 0.7070454359054565, "learning_rate": 9.896935586970262e-06, "loss": 0.4371, "step": 957 }, { "epoch": 0.475591593579348, "grad_norm": 0.4920821785926819, "learning_rate": 9.896351196407166e-06, "loss": 0.4197, "step": 958 }, { "epoch": 0.47608803574383585, "grad_norm": 0.5249311327934265, "learning_rate": 9.89576517108548e-06, "loss": 0.4672, "step": 959 }, { "epoch": 0.4765844779083237, "grad_norm": 0.5878430008888245, "learning_rate": 9.895177511200864e-06, "loss": 0.4267, "step": 960 }, { "epoch": 0.4770809200728115, "grad_norm": 0.5091522336006165, "learning_rate": 9.89458821694952e-06, "loss": 0.4109, "step": 961 }, { "epoch": 0.47757736223729935, "grad_norm": 0.5116826295852661, "learning_rate": 9.893997288528198e-06, "loss": 0.431, "step": 962 }, { "epoch": 0.4780738044017872, "grad_norm": 0.5908859372138977, "learning_rate": 9.893404726134193e-06, "loss": 0.4354, "step": 963 }, { "epoch": 0.478570246566275, "grad_norm": 0.4436945915222168, "learning_rate": 9.892810529965344e-06, "loss": 0.4183, "step": 964 }, { "epoch": 0.47906668873076286, "grad_norm": 0.5276147127151489, "learning_rate": 9.89221470022004e-06, "loss": 0.4231, "step": 965 }, { "epoch": 0.4795631308952507, "grad_norm": 0.49840396642684937, "learning_rate": 9.891617237097209e-06, "loss": 0.4292, "step": 966 }, { "epoch": 0.48005957305973856, "grad_norm": 0.517612874507904, "learning_rate": 9.891018140796332e-06, "loss": 0.4529, "step": 967 }, { "epoch": 0.48055601522422636, "grad_norm": 0.5972943305969238, "learning_rate": 9.890417411517426e-06, "loss": 0.4856, "step": 968 }, { "epoch": 0.4810524573887142, "grad_norm": 0.5636406540870667, "learning_rate": 9.889815049461062e-06, "loss": 0.4283, "step": 969 }, { "epoch": 0.48154889955320207, "grad_norm": 0.5095784068107605, "learning_rate": 9.88921105482835e-06, "loss": 0.4539, "step": 970 }, { "epoch": 0.48204534171768987, "grad_norm": 0.5541165471076965, "learning_rate": 9.888605427820947e-06, "loss": 0.4396, "step": 971 }, { "epoch": 0.4825417838821777, "grad_norm": 0.5522834658622742, "learning_rate": 9.887998168641057e-06, "loss": 0.4257, "step": 972 }, { "epoch": 0.4830382260466656, "grad_norm": 0.5257325172424316, "learning_rate": 9.887389277491429e-06, "loss": 0.4677, "step": 973 }, { "epoch": 0.4835346682111534, "grad_norm": 0.5484980344772339, "learning_rate": 9.88677875457535e-06, "loss": 0.4503, "step": 974 }, { "epoch": 0.4840311103756412, "grad_norm": 0.5331055521965027, "learning_rate": 9.886166600096662e-06, "loss": 0.4422, "step": 975 }, { "epoch": 0.4845275525401291, "grad_norm": 0.464057058095932, "learning_rate": 9.885552814259746e-06, "loss": 0.4194, "step": 976 }, { "epoch": 0.48502399470461693, "grad_norm": 0.4838947057723999, "learning_rate": 9.884937397269525e-06, "loss": 0.4026, "step": 977 }, { "epoch": 0.48552043686910473, "grad_norm": 0.4410441815853119, "learning_rate": 9.884320349331474e-06, "loss": 0.4127, "step": 978 }, { "epoch": 0.4860168790335926, "grad_norm": 0.45617637038230896, "learning_rate": 9.883701670651607e-06, "loss": 0.4151, "step": 979 }, { "epoch": 0.48651332119808044, "grad_norm": 0.45998504757881165, "learning_rate": 9.883081361436482e-06, "loss": 0.4421, "step": 980 }, { "epoch": 0.48700976336256824, "grad_norm": 0.4553280174732208, "learning_rate": 9.882459421893206e-06, "loss": 0.446, "step": 981 }, { "epoch": 0.4875062055270561, "grad_norm": 0.4982219934463501, "learning_rate": 9.881835852229427e-06, "loss": 0.4323, "step": 982 }, { "epoch": 0.48800264769154394, "grad_norm": 0.4532395303249359, "learning_rate": 9.881210652653338e-06, "loss": 0.4478, "step": 983 }, { "epoch": 0.4884990898560318, "grad_norm": 0.5249693989753723, "learning_rate": 9.880583823373676e-06, "loss": 0.4597, "step": 984 }, { "epoch": 0.4889955320205196, "grad_norm": 0.5217178463935852, "learning_rate": 9.879955364599722e-06, "loss": 0.4216, "step": 985 }, { "epoch": 0.48949197418500745, "grad_norm": 0.5868688821792603, "learning_rate": 9.879325276541303e-06, "loss": 0.4214, "step": 986 }, { "epoch": 0.4899884163494953, "grad_norm": 0.5302393436431885, "learning_rate": 9.878693559408785e-06, "loss": 0.4232, "step": 987 }, { "epoch": 0.4904848585139831, "grad_norm": 0.573111891746521, "learning_rate": 9.878060213413083e-06, "loss": 0.4428, "step": 988 }, { "epoch": 0.49098130067847096, "grad_norm": 0.5754631161689758, "learning_rate": 9.877425238765657e-06, "loss": 0.4174, "step": 989 }, { "epoch": 0.4914777428429588, "grad_norm": 0.5671702027320862, "learning_rate": 9.876788635678502e-06, "loss": 0.4156, "step": 990 }, { "epoch": 0.4919741850074466, "grad_norm": 0.595040500164032, "learning_rate": 9.876150404364166e-06, "loss": 0.4441, "step": 991 }, { "epoch": 0.49247062717193446, "grad_norm": 0.5539161562919617, "learning_rate": 9.875510545035736e-06, "loss": 0.4246, "step": 992 }, { "epoch": 0.4929670693364223, "grad_norm": 0.4997997283935547, "learning_rate": 9.874869057906844e-06, "loss": 0.438, "step": 993 }, { "epoch": 0.49346351150091017, "grad_norm": 0.5182589888572693, "learning_rate": 9.874225943191666e-06, "loss": 0.3948, "step": 994 }, { "epoch": 0.49395995366539797, "grad_norm": 0.6709293127059937, "learning_rate": 9.87358120110492e-06, "loss": 0.4281, "step": 995 }, { "epoch": 0.4944563958298858, "grad_norm": 0.569585919380188, "learning_rate": 9.872934831861867e-06, "loss": 0.4546, "step": 996 }, { "epoch": 0.4949528379943737, "grad_norm": 0.49241816997528076, "learning_rate": 9.872286835678313e-06, "loss": 0.4111, "step": 997 }, { "epoch": 0.49544928015886147, "grad_norm": 0.6001294255256653, "learning_rate": 9.871637212770606e-06, "loss": 0.4495, "step": 998 }, { "epoch": 0.4959457223233493, "grad_norm": 0.6091530919075012, "learning_rate": 9.870985963355636e-06, "loss": 0.4281, "step": 999 }, { "epoch": 0.4964421644878372, "grad_norm": 0.5085064768791199, "learning_rate": 9.87033308765084e-06, "loss": 0.4244, "step": 1000 }, { "epoch": 0.496938606652325, "grad_norm": 0.552900493144989, "learning_rate": 9.869678585874193e-06, "loss": 0.4353, "step": 1001 }, { "epoch": 0.49743504881681283, "grad_norm": 0.5535452365875244, "learning_rate": 9.86902245824422e-06, "loss": 0.4104, "step": 1002 }, { "epoch": 0.4979314909813007, "grad_norm": 0.560467004776001, "learning_rate": 9.868364704979977e-06, "loss": 0.4348, "step": 1003 }, { "epoch": 0.49842793314578854, "grad_norm": 0.5375909209251404, "learning_rate": 9.867705326301077e-06, "loss": 0.4464, "step": 1004 }, { "epoch": 0.49892437531027634, "grad_norm": 0.5028685927391052, "learning_rate": 9.867044322427663e-06, "loss": 0.4077, "step": 1005 }, { "epoch": 0.4994208174747642, "grad_norm": 0.46395447850227356, "learning_rate": 9.86638169358043e-06, "loss": 0.4405, "step": 1006 }, { "epoch": 0.49991725963925204, "grad_norm": 0.5610598921775818, "learning_rate": 9.865717439980611e-06, "loss": 0.4243, "step": 1007 }, { "epoch": 0.5004137018037399, "grad_norm": 0.5174155831336975, "learning_rate": 9.86505156184998e-06, "loss": 0.413, "step": 1008 }, { "epoch": 0.5009101439682278, "grad_norm": 0.4862215220928192, "learning_rate": 9.864384059410858e-06, "loss": 0.429, "step": 1009 }, { "epoch": 0.5014065861327155, "grad_norm": 0.5398895144462585, "learning_rate": 9.863714932886106e-06, "loss": 0.4132, "step": 1010 }, { "epoch": 0.5019030282972033, "grad_norm": 0.6123786568641663, "learning_rate": 9.863044182499126e-06, "loss": 0.4407, "step": 1011 }, { "epoch": 0.5023994704616912, "grad_norm": 0.42514514923095703, "learning_rate": 9.862371808473862e-06, "loss": 0.4065, "step": 1012 }, { "epoch": 0.502895912626179, "grad_norm": 0.6048532128334045, "learning_rate": 9.861697811034805e-06, "loss": 0.4067, "step": 1013 }, { "epoch": 0.5033923547906669, "grad_norm": 0.574467122554779, "learning_rate": 9.861022190406982e-06, "loss": 0.4214, "step": 1014 }, { "epoch": 0.5038887969551548, "grad_norm": 0.6229338049888611, "learning_rate": 9.860344946815966e-06, "loss": 0.4466, "step": 1015 }, { "epoch": 0.5043852391196426, "grad_norm": 0.6648034453392029, "learning_rate": 9.859666080487868e-06, "loss": 0.4294, "step": 1016 }, { "epoch": 0.5048816812841304, "grad_norm": 0.4747330844402313, "learning_rate": 9.858985591649343e-06, "loss": 0.4, "step": 1017 }, { "epoch": 0.5053781234486182, "grad_norm": 0.7622718214988708, "learning_rate": 9.85830348052759e-06, "loss": 0.4242, "step": 1018 }, { "epoch": 0.5058745656131061, "grad_norm": 0.5256722569465637, "learning_rate": 9.857619747350346e-06, "loss": 0.4286, "step": 1019 }, { "epoch": 0.5063710077775939, "grad_norm": 0.6567668318748474, "learning_rate": 9.856934392345892e-06, "loss": 0.4185, "step": 1020 }, { "epoch": 0.5068674499420818, "grad_norm": 0.6085847020149231, "learning_rate": 9.856247415743048e-06, "loss": 0.3908, "step": 1021 }, { "epoch": 0.5073638921065696, "grad_norm": 0.4850182831287384, "learning_rate": 9.855558817771177e-06, "loss": 0.431, "step": 1022 }, { "epoch": 0.5078603342710574, "grad_norm": 0.6314878463745117, "learning_rate": 9.854868598660184e-06, "loss": 0.4291, "step": 1023 }, { "epoch": 0.5083567764355452, "grad_norm": 0.4774710237979889, "learning_rate": 9.854176758640513e-06, "loss": 0.4131, "step": 1024 }, { "epoch": 0.5088532186000331, "grad_norm": 0.6782926917076111, "learning_rate": 9.853483297943153e-06, "loss": 0.4459, "step": 1025 }, { "epoch": 0.5093496607645209, "grad_norm": 0.48217692971229553, "learning_rate": 9.85278821679963e-06, "loss": 0.4266, "step": 1026 }, { "epoch": 0.5098461029290088, "grad_norm": 0.5392743945121765, "learning_rate": 9.852091515442012e-06, "loss": 0.4337, "step": 1027 }, { "epoch": 0.5103425450934966, "grad_norm": 0.5566638708114624, "learning_rate": 9.85139319410291e-06, "loss": 0.439, "step": 1028 }, { "epoch": 0.5108389872579845, "grad_norm": 0.5827664732933044, "learning_rate": 9.850693253015473e-06, "loss": 0.4295, "step": 1029 }, { "epoch": 0.5113354294224722, "grad_norm": 0.5606441497802734, "learning_rate": 9.849991692413394e-06, "loss": 0.4286, "step": 1030 }, { "epoch": 0.5118318715869601, "grad_norm": 0.5372473001480103, "learning_rate": 9.849288512530906e-06, "loss": 0.4684, "step": 1031 }, { "epoch": 0.5123283137514479, "grad_norm": 0.4593173563480377, "learning_rate": 9.848583713602777e-06, "loss": 0.4325, "step": 1032 }, { "epoch": 0.5128247559159358, "grad_norm": 0.5057374835014343, "learning_rate": 9.847877295864326e-06, "loss": 0.4088, "step": 1033 }, { "epoch": 0.5133211980804236, "grad_norm": 0.491314560174942, "learning_rate": 9.847169259551403e-06, "loss": 0.4088, "step": 1034 }, { "epoch": 0.5138176402449115, "grad_norm": 0.5540562272071838, "learning_rate": 9.846459604900403e-06, "loss": 0.4491, "step": 1035 }, { "epoch": 0.5143140824093994, "grad_norm": 0.5143297910690308, "learning_rate": 9.845748332148259e-06, "loss": 0.4801, "step": 1036 }, { "epoch": 0.5148105245738871, "grad_norm": 0.5325254797935486, "learning_rate": 9.845035441532448e-06, "loss": 0.4417, "step": 1037 }, { "epoch": 0.515306966738375, "grad_norm": 0.5466920733451843, "learning_rate": 9.844320933290986e-06, "loss": 0.4091, "step": 1038 }, { "epoch": 0.5158034089028628, "grad_norm": 0.448233038187027, "learning_rate": 9.843604807662422e-06, "loss": 0.4328, "step": 1039 }, { "epoch": 0.5162998510673507, "grad_norm": 0.6015673875808716, "learning_rate": 9.842887064885856e-06, "loss": 0.4455, "step": 1040 }, { "epoch": 0.5167962932318385, "grad_norm": 0.6502758264541626, "learning_rate": 9.842167705200923e-06, "loss": 0.4396, "step": 1041 }, { "epoch": 0.5172927353963264, "grad_norm": 0.4777083396911621, "learning_rate": 9.841446728847795e-06, "loss": 0.4336, "step": 1042 }, { "epoch": 0.5177891775608142, "grad_norm": 0.5076798796653748, "learning_rate": 9.840724136067186e-06, "loss": 0.4444, "step": 1043 }, { "epoch": 0.518285619725302, "grad_norm": 0.5334762334823608, "learning_rate": 9.839999927100354e-06, "loss": 0.4178, "step": 1044 }, { "epoch": 0.5187820618897898, "grad_norm": 0.5610637664794922, "learning_rate": 9.839274102189089e-06, "loss": 0.445, "step": 1045 }, { "epoch": 0.5192785040542777, "grad_norm": 0.5031196475028992, "learning_rate": 9.838546661575725e-06, "loss": 0.4372, "step": 1046 }, { "epoch": 0.5197749462187655, "grad_norm": 0.5379114151000977, "learning_rate": 9.837817605503134e-06, "loss": 0.439, "step": 1047 }, { "epoch": 0.5202713883832534, "grad_norm": 0.5807168483734131, "learning_rate": 9.837086934214733e-06, "loss": 0.447, "step": 1048 }, { "epoch": 0.5207678305477412, "grad_norm": 0.5428000688552856, "learning_rate": 9.836354647954467e-06, "loss": 0.4614, "step": 1049 }, { "epoch": 0.521264272712229, "grad_norm": 0.533850908279419, "learning_rate": 9.835620746966829e-06, "loss": 0.4519, "step": 1050 }, { "epoch": 0.5217607148767168, "grad_norm": 0.5152621269226074, "learning_rate": 9.834885231496847e-06, "loss": 0.4196, "step": 1051 }, { "epoch": 0.5222571570412047, "grad_norm": 0.5040428042411804, "learning_rate": 9.834148101790093e-06, "loss": 0.421, "step": 1052 }, { "epoch": 0.5227535992056925, "grad_norm": 0.5338100790977478, "learning_rate": 9.833409358092673e-06, "loss": 0.4178, "step": 1053 }, { "epoch": 0.5232500413701804, "grad_norm": 0.5457937717437744, "learning_rate": 9.832669000651231e-06, "loss": 0.4321, "step": 1054 }, { "epoch": 0.5237464835346682, "grad_norm": 0.5479785203933716, "learning_rate": 9.831927029712957e-06, "loss": 0.446, "step": 1055 }, { "epoch": 0.5242429256991561, "grad_norm": 0.5846545696258545, "learning_rate": 9.831183445525571e-06, "loss": 0.4311, "step": 1056 }, { "epoch": 0.5247393678636438, "grad_norm": 0.5550375580787659, "learning_rate": 9.830438248337337e-06, "loss": 0.4677, "step": 1057 }, { "epoch": 0.5252358100281317, "grad_norm": 0.6024202108383179, "learning_rate": 9.829691438397056e-06, "loss": 0.4263, "step": 1058 }, { "epoch": 0.5257322521926195, "grad_norm": 0.5553526878356934, "learning_rate": 9.828943015954066e-06, "loss": 0.4224, "step": 1059 }, { "epoch": 0.5262286943571074, "grad_norm": 0.46248888969421387, "learning_rate": 9.828192981258249e-06, "loss": 0.4188, "step": 1060 }, { "epoch": 0.5267251365215952, "grad_norm": 0.4792938828468323, "learning_rate": 9.827441334560017e-06, "loss": 0.4563, "step": 1061 }, { "epoch": 0.5272215786860831, "grad_norm": 0.49958959221839905, "learning_rate": 9.826688076110328e-06, "loss": 0.4403, "step": 1062 }, { "epoch": 0.527718020850571, "grad_norm": 0.5540366768836975, "learning_rate": 9.82593320616067e-06, "loss": 0.4341, "step": 1063 }, { "epoch": 0.5282144630150587, "grad_norm": 0.5369521975517273, "learning_rate": 9.825176724963075e-06, "loss": 0.4591, "step": 1064 }, { "epoch": 0.5287109051795466, "grad_norm": 0.47331371903419495, "learning_rate": 9.824418632770115e-06, "loss": 0.4371, "step": 1065 }, { "epoch": 0.5292073473440344, "grad_norm": 0.5868648886680603, "learning_rate": 9.823658929834892e-06, "loss": 0.4381, "step": 1066 }, { "epoch": 0.5297037895085223, "grad_norm": 0.47633877396583557, "learning_rate": 9.822897616411055e-06, "loss": 0.4746, "step": 1067 }, { "epoch": 0.5302002316730101, "grad_norm": 0.5334937572479248, "learning_rate": 9.82213469275278e-06, "loss": 0.4664, "step": 1068 }, { "epoch": 0.530696673837498, "grad_norm": 0.4563238322734833, "learning_rate": 9.821370159114792e-06, "loss": 0.4243, "step": 1069 }, { "epoch": 0.5311931160019858, "grad_norm": 0.49171096086502075, "learning_rate": 9.820604015752344e-06, "loss": 0.4338, "step": 1070 }, { "epoch": 0.5316895581664736, "grad_norm": 0.508989155292511, "learning_rate": 9.819836262921231e-06, "loss": 0.4458, "step": 1071 }, { "epoch": 0.5321860003309614, "grad_norm": 0.4905683100223541, "learning_rate": 9.819066900877787e-06, "loss": 0.4416, "step": 1072 }, { "epoch": 0.5326824424954493, "grad_norm": 0.4803203344345093, "learning_rate": 9.81829592987888e-06, "loss": 0.404, "step": 1073 }, { "epoch": 0.5331788846599371, "grad_norm": 0.4807369112968445, "learning_rate": 9.817523350181916e-06, "loss": 0.4324, "step": 1074 }, { "epoch": 0.533675326824425, "grad_norm": 0.5262630581855774, "learning_rate": 9.81674916204484e-06, "loss": 0.4529, "step": 1075 }, { "epoch": 0.5341717689889128, "grad_norm": 0.558804988861084, "learning_rate": 9.815973365726126e-06, "loss": 0.4215, "step": 1076 }, { "epoch": 0.5346682111534006, "grad_norm": 0.5320982933044434, "learning_rate": 9.8151959614848e-06, "loss": 0.4223, "step": 1077 }, { "epoch": 0.5351646533178884, "grad_norm": 0.574862003326416, "learning_rate": 9.814416949580412e-06, "loss": 0.4519, "step": 1078 }, { "epoch": 0.5356610954823763, "grad_norm": 0.5383363366127014, "learning_rate": 9.813636330273051e-06, "loss": 0.4, "step": 1079 }, { "epoch": 0.5361575376468641, "grad_norm": 0.6238560080528259, "learning_rate": 9.812854103823349e-06, "loss": 0.4644, "step": 1080 }, { "epoch": 0.536653979811352, "grad_norm": 0.5985142588615417, "learning_rate": 9.812070270492467e-06, "loss": 0.3971, "step": 1081 }, { "epoch": 0.5371504219758398, "grad_norm": 0.5606648921966553, "learning_rate": 9.811284830542105e-06, "loss": 0.46, "step": 1082 }, { "epoch": 0.5376468641403277, "grad_norm": 0.5831212401390076, "learning_rate": 9.810497784234503e-06, "loss": 0.4527, "step": 1083 }, { "epoch": 0.5381433063048154, "grad_norm": 0.4741464853286743, "learning_rate": 9.809709131832431e-06, "loss": 0.4476, "step": 1084 }, { "epoch": 0.5386397484693033, "grad_norm": 0.5714442133903503, "learning_rate": 9.808918873599205e-06, "loss": 0.43, "step": 1085 }, { "epoch": 0.5391361906337911, "grad_norm": 0.5853979587554932, "learning_rate": 9.808127009798662e-06, "loss": 0.4542, "step": 1086 }, { "epoch": 0.539632632798279, "grad_norm": 0.5734511017799377, "learning_rate": 9.807333540695192e-06, "loss": 0.4288, "step": 1087 }, { "epoch": 0.5401290749627669, "grad_norm": 0.5419822335243225, "learning_rate": 9.806538466553705e-06, "loss": 0.3819, "step": 1088 }, { "epoch": 0.5406255171272547, "grad_norm": 0.5461882948875427, "learning_rate": 9.80574178763966e-06, "loss": 0.443, "step": 1089 }, { "epoch": 0.5411219592917426, "grad_norm": 0.515774130821228, "learning_rate": 9.804943504219046e-06, "loss": 0.4324, "step": 1090 }, { "epoch": 0.5416184014562303, "grad_norm": 0.48168379068374634, "learning_rate": 9.804143616558387e-06, "loss": 0.4305, "step": 1091 }, { "epoch": 0.5421148436207182, "grad_norm": 0.5623446702957153, "learning_rate": 9.803342124924742e-06, "loss": 0.4101, "step": 1092 }, { "epoch": 0.542611285785206, "grad_norm": 0.4616771936416626, "learning_rate": 9.802539029585709e-06, "loss": 0.4225, "step": 1093 }, { "epoch": 0.5431077279496939, "grad_norm": 0.5579067468643188, "learning_rate": 9.80173433080942e-06, "loss": 0.4295, "step": 1094 }, { "epoch": 0.5436041701141817, "grad_norm": 0.5686947703361511, "learning_rate": 9.800928028864543e-06, "loss": 0.443, "step": 1095 }, { "epoch": 0.5441006122786696, "grad_norm": 0.52989661693573, "learning_rate": 9.80012012402028e-06, "loss": 0.4177, "step": 1096 }, { "epoch": 0.5445970544431574, "grad_norm": 0.5262389183044434, "learning_rate": 9.799310616546367e-06, "loss": 0.4397, "step": 1097 }, { "epoch": 0.5450934966076452, "grad_norm": 0.5149286985397339, "learning_rate": 9.798499506713075e-06, "loss": 0.3708, "step": 1098 }, { "epoch": 0.545589938772133, "grad_norm": 0.5136967897415161, "learning_rate": 9.797686794791216e-06, "loss": 0.4272, "step": 1099 }, { "epoch": 0.5460863809366209, "grad_norm": 0.5531652569770813, "learning_rate": 9.79687248105213e-06, "loss": 0.4497, "step": 1100 }, { "epoch": 0.5465828231011087, "grad_norm": 0.5133037567138672, "learning_rate": 9.796056565767694e-06, "loss": 0.4303, "step": 1101 }, { "epoch": 0.5470792652655966, "grad_norm": 0.5108101963996887, "learning_rate": 9.79523904921032e-06, "loss": 0.4446, "step": 1102 }, { "epoch": 0.5475757074300844, "grad_norm": 0.620180070400238, "learning_rate": 9.794419931652954e-06, "loss": 0.4511, "step": 1103 }, { "epoch": 0.5480721495945722, "grad_norm": 0.4355308711528778, "learning_rate": 9.793599213369078e-06, "loss": 0.4082, "step": 1104 }, { "epoch": 0.54856859175906, "grad_norm": 0.532392144203186, "learning_rate": 9.792776894632709e-06, "loss": 0.4468, "step": 1105 }, { "epoch": 0.5490650339235479, "grad_norm": 0.5199252963066101, "learning_rate": 9.791952975718395e-06, "loss": 0.4544, "step": 1106 }, { "epoch": 0.5495614760880357, "grad_norm": 0.5468968152999878, "learning_rate": 9.791127456901219e-06, "loss": 0.4179, "step": 1107 }, { "epoch": 0.5500579182525236, "grad_norm": 0.4781573712825775, "learning_rate": 9.790300338456802e-06, "loss": 0.4627, "step": 1108 }, { "epoch": 0.5505543604170114, "grad_norm": 0.5217368006706238, "learning_rate": 9.789471620661296e-06, "loss": 0.4876, "step": 1109 }, { "epoch": 0.5510508025814993, "grad_norm": 0.4611489474773407, "learning_rate": 9.788641303791384e-06, "loss": 0.4116, "step": 1110 }, { "epoch": 0.551547244745987, "grad_norm": 0.4777194857597351, "learning_rate": 9.78780938812429e-06, "loss": 0.4535, "step": 1111 }, { "epoch": 0.5520436869104749, "grad_norm": 0.556638240814209, "learning_rate": 9.786975873937768e-06, "loss": 0.4489, "step": 1112 }, { "epoch": 0.5525401290749627, "grad_norm": 0.5220369100570679, "learning_rate": 9.786140761510103e-06, "loss": 0.4304, "step": 1113 }, { "epoch": 0.5530365712394506, "grad_norm": 0.5472318530082703, "learning_rate": 9.785304051120117e-06, "loss": 0.4694, "step": 1114 }, { "epoch": 0.5535330134039385, "grad_norm": 0.5350620746612549, "learning_rate": 9.784465743047168e-06, "loss": 0.4341, "step": 1115 }, { "epoch": 0.5540294555684263, "grad_norm": 0.5804701447486877, "learning_rate": 9.78362583757114e-06, "loss": 0.4129, "step": 1116 }, { "epoch": 0.5545258977329142, "grad_norm": 0.591534435749054, "learning_rate": 9.782784334972459e-06, "loss": 0.4532, "step": 1117 }, { "epoch": 0.5550223398974019, "grad_norm": 0.5435558557510376, "learning_rate": 9.781941235532076e-06, "loss": 0.3959, "step": 1118 }, { "epoch": 0.5555187820618898, "grad_norm": 0.6130022406578064, "learning_rate": 9.781096539531479e-06, "loss": 0.4245, "step": 1119 }, { "epoch": 0.5560152242263776, "grad_norm": 0.5402059555053711, "learning_rate": 9.780250247252692e-06, "loss": 0.4158, "step": 1120 }, { "epoch": 0.5565116663908655, "grad_norm": 0.6965900659561157, "learning_rate": 9.779402358978267e-06, "loss": 0.4524, "step": 1121 }, { "epoch": 0.5570081085553533, "grad_norm": 0.5650897026062012, "learning_rate": 9.778552874991291e-06, "loss": 0.4173, "step": 1122 }, { "epoch": 0.5575045507198412, "grad_norm": 0.5196453332901001, "learning_rate": 9.777701795575385e-06, "loss": 0.3983, "step": 1123 }, { "epoch": 0.558000992884329, "grad_norm": 0.6387377381324768, "learning_rate": 9.7768491210147e-06, "loss": 0.4473, "step": 1124 }, { "epoch": 0.5584974350488168, "grad_norm": 0.5650745630264282, "learning_rate": 9.775994851593921e-06, "loss": 0.4317, "step": 1125 }, { "epoch": 0.5589938772133046, "grad_norm": 0.4777168333530426, "learning_rate": 9.775138987598264e-06, "loss": 0.4099, "step": 1126 }, { "epoch": 0.5594903193777925, "grad_norm": 0.5372154116630554, "learning_rate": 9.774281529313483e-06, "loss": 0.4265, "step": 1127 }, { "epoch": 0.5599867615422803, "grad_norm": 0.5358855128288269, "learning_rate": 9.773422477025854e-06, "loss": 0.4573, "step": 1128 }, { "epoch": 0.5604832037067682, "grad_norm": 0.47222018241882324, "learning_rate": 9.772561831022195e-06, "loss": 0.4212, "step": 1129 }, { "epoch": 0.560979645871256, "grad_norm": 0.5666679739952087, "learning_rate": 9.771699591589854e-06, "loss": 0.4213, "step": 1130 }, { "epoch": 0.5614760880357438, "grad_norm": 0.532594621181488, "learning_rate": 9.770835759016704e-06, "loss": 0.4611, "step": 1131 }, { "epoch": 0.5619725302002316, "grad_norm": 0.5289406180381775, "learning_rate": 9.76997033359116e-06, "loss": 0.4174, "step": 1132 }, { "epoch": 0.5624689723647195, "grad_norm": 0.5740029215812683, "learning_rate": 9.769103315602161e-06, "loss": 0.4757, "step": 1133 }, { "epoch": 0.5629654145292073, "grad_norm": 0.5167406797409058, "learning_rate": 9.768234705339184e-06, "loss": 0.3952, "step": 1134 }, { "epoch": 0.5634618566936952, "grad_norm": 0.5470203161239624, "learning_rate": 9.76736450309223e-06, "loss": 0.3889, "step": 1135 }, { "epoch": 0.563958298858183, "grad_norm": 0.5356740951538086, "learning_rate": 9.76649270915184e-06, "loss": 0.4306, "step": 1136 }, { "epoch": 0.5644547410226709, "grad_norm": 0.5565227270126343, "learning_rate": 9.765619323809078e-06, "loss": 0.4311, "step": 1137 }, { "epoch": 0.5649511831871586, "grad_norm": 0.5442019104957581, "learning_rate": 9.76474434735555e-06, "loss": 0.4411, "step": 1138 }, { "epoch": 0.5654476253516465, "grad_norm": 0.4968837797641754, "learning_rate": 9.76386778008338e-06, "loss": 0.4251, "step": 1139 }, { "epoch": 0.5659440675161344, "grad_norm": 0.5676209926605225, "learning_rate": 9.762989622285234e-06, "loss": 0.4231, "step": 1140 }, { "epoch": 0.5664405096806222, "grad_norm": 0.5116806626319885, "learning_rate": 9.762109874254305e-06, "loss": 0.4036, "step": 1141 }, { "epoch": 0.5669369518451101, "grad_norm": 0.5689163208007812, "learning_rate": 9.761228536284313e-06, "loss": 0.399, "step": 1142 }, { "epoch": 0.5674333940095979, "grad_norm": 0.5479409694671631, "learning_rate": 9.76034560866952e-06, "loss": 0.4211, "step": 1143 }, { "epoch": 0.5679298361740858, "grad_norm": 0.47617587447166443, "learning_rate": 9.759461091704703e-06, "loss": 0.4231, "step": 1144 }, { "epoch": 0.5684262783385735, "grad_norm": 0.5325528979301453, "learning_rate": 9.758574985685186e-06, "loss": 0.4005, "step": 1145 }, { "epoch": 0.5689227205030614, "grad_norm": 0.5765860080718994, "learning_rate": 9.75768729090681e-06, "loss": 0.4682, "step": 1146 }, { "epoch": 0.5694191626675492, "grad_norm": 0.49486884474754333, "learning_rate": 9.756798007665954e-06, "loss": 0.4188, "step": 1147 }, { "epoch": 0.5699156048320371, "grad_norm": 0.5626127123832703, "learning_rate": 9.755907136259525e-06, "loss": 0.4299, "step": 1148 }, { "epoch": 0.5704120469965249, "grad_norm": 0.5591809749603271, "learning_rate": 9.755014676984965e-06, "loss": 0.4606, "step": 1149 }, { "epoch": 0.5709084891610128, "grad_norm": 0.5387974381446838, "learning_rate": 9.754120630140237e-06, "loss": 0.4224, "step": 1150 }, { "epoch": 0.5714049313255006, "grad_norm": 0.5483975410461426, "learning_rate": 9.75322499602384e-06, "loss": 0.4172, "step": 1151 }, { "epoch": 0.5719013734899884, "grad_norm": 0.5780379772186279, "learning_rate": 9.752327774934802e-06, "loss": 0.4727, "step": 1152 }, { "epoch": 0.5723978156544762, "grad_norm": 0.46893295645713806, "learning_rate": 9.751428967172683e-06, "loss": 0.4066, "step": 1153 }, { "epoch": 0.5728942578189641, "grad_norm": 0.4754081070423126, "learning_rate": 9.750528573037566e-06, "loss": 0.4294, "step": 1154 }, { "epoch": 0.5733906999834519, "grad_norm": 0.5327164530754089, "learning_rate": 9.749626592830073e-06, "loss": 0.4462, "step": 1155 }, { "epoch": 0.5738871421479398, "grad_norm": 0.5004047751426697, "learning_rate": 9.748723026851346e-06, "loss": 0.4268, "step": 1156 }, { "epoch": 0.5743835843124276, "grad_norm": 0.501762866973877, "learning_rate": 9.747817875403066e-06, "loss": 0.478, "step": 1157 }, { "epoch": 0.5748800264769154, "grad_norm": 0.46259817481040955, "learning_rate": 9.746911138787434e-06, "loss": 0.4126, "step": 1158 }, { "epoch": 0.5753764686414032, "grad_norm": 0.5084194540977478, "learning_rate": 9.746002817307187e-06, "loss": 0.4683, "step": 1159 }, { "epoch": 0.5758729108058911, "grad_norm": 0.4157443046569824, "learning_rate": 9.745092911265587e-06, "loss": 0.3997, "step": 1160 }, { "epoch": 0.576369352970379, "grad_norm": 0.4279753565788269, "learning_rate": 9.744181420966432e-06, "loss": 0.4252, "step": 1161 }, { "epoch": 0.5768657951348668, "grad_norm": 0.42653948068618774, "learning_rate": 9.743268346714037e-06, "loss": 0.4027, "step": 1162 }, { "epoch": 0.5773622372993547, "grad_norm": 0.43132612109184265, "learning_rate": 9.742353688813257e-06, "loss": 0.419, "step": 1163 }, { "epoch": 0.5778586794638425, "grad_norm": 0.46800023317337036, "learning_rate": 9.741437447569473e-06, "loss": 0.4298, "step": 1164 }, { "epoch": 0.5783551216283302, "grad_norm": 0.49526914954185486, "learning_rate": 9.740519623288587e-06, "loss": 0.4272, "step": 1165 }, { "epoch": 0.5788515637928181, "grad_norm": 0.5277729630470276, "learning_rate": 9.73960021627704e-06, "loss": 0.4384, "step": 1166 }, { "epoch": 0.579348005957306, "grad_norm": 0.4937385320663452, "learning_rate": 9.738679226841796e-06, "loss": 0.4217, "step": 1167 }, { "epoch": 0.5798444481217938, "grad_norm": 0.4705744981765747, "learning_rate": 9.737756655290348e-06, "loss": 0.4344, "step": 1168 }, { "epoch": 0.5803408902862817, "grad_norm": 0.5207006931304932, "learning_rate": 9.736832501930717e-06, "loss": 0.4339, "step": 1169 }, { "epoch": 0.5808373324507695, "grad_norm": 0.5289670825004578, "learning_rate": 9.735906767071456e-06, "loss": 0.3992, "step": 1170 }, { "epoch": 0.5813337746152574, "grad_norm": 0.4760219156742096, "learning_rate": 9.73497945102164e-06, "loss": 0.444, "step": 1171 }, { "epoch": 0.5818302167797451, "grad_norm": 0.48737385869026184, "learning_rate": 9.734050554090872e-06, "loss": 0.3922, "step": 1172 }, { "epoch": 0.582326658944233, "grad_norm": 0.4717404842376709, "learning_rate": 9.733120076589291e-06, "loss": 0.4064, "step": 1173 }, { "epoch": 0.5828231011087208, "grad_norm": 0.46915537118911743, "learning_rate": 9.732188018827556e-06, "loss": 0.4451, "step": 1174 }, { "epoch": 0.5833195432732087, "grad_norm": 0.5035198330879211, "learning_rate": 9.731254381116852e-06, "loss": 0.4394, "step": 1175 }, { "epoch": 0.5838159854376965, "grad_norm": 0.5143969655036926, "learning_rate": 9.730319163768902e-06, "loss": 0.4366, "step": 1176 }, { "epoch": 0.5843124276021844, "grad_norm": 0.5205576419830322, "learning_rate": 9.729382367095944e-06, "loss": 0.3978, "step": 1177 }, { "epoch": 0.5848088697666722, "grad_norm": 0.5427659153938293, "learning_rate": 9.728443991410752e-06, "loss": 0.4724, "step": 1178 }, { "epoch": 0.58530531193116, "grad_norm": 0.5591843724250793, "learning_rate": 9.727504037026623e-06, "loss": 0.4362, "step": 1179 }, { "epoch": 0.5858017540956478, "grad_norm": 0.4947216510772705, "learning_rate": 9.726562504257383e-06, "loss": 0.4079, "step": 1180 }, { "epoch": 0.5862981962601357, "grad_norm": 0.4834507703781128, "learning_rate": 9.725619393417382e-06, "loss": 0.4119, "step": 1181 }, { "epoch": 0.5867946384246235, "grad_norm": 0.4930168390274048, "learning_rate": 9.724674704821503e-06, "loss": 0.4289, "step": 1182 }, { "epoch": 0.5872910805891114, "grad_norm": 0.45657438039779663, "learning_rate": 9.72372843878515e-06, "loss": 0.4066, "step": 1183 }, { "epoch": 0.5877875227535992, "grad_norm": 0.5892201066017151, "learning_rate": 9.722780595624253e-06, "loss": 0.4121, "step": 1184 }, { "epoch": 0.588283964918087, "grad_norm": 0.5301269292831421, "learning_rate": 9.721831175655274e-06, "loss": 0.4468, "step": 1185 }, { "epoch": 0.5887804070825748, "grad_norm": 0.5476454496383667, "learning_rate": 9.720880179195196e-06, "loss": 0.4227, "step": 1186 }, { "epoch": 0.5892768492470627, "grad_norm": 0.5568174123764038, "learning_rate": 9.719927606561534e-06, "loss": 0.416, "step": 1187 }, { "epoch": 0.5897732914115505, "grad_norm": 0.4975823163986206, "learning_rate": 9.718973458072325e-06, "loss": 0.4583, "step": 1188 }, { "epoch": 0.5902697335760384, "grad_norm": 0.5037890076637268, "learning_rate": 9.718017734046134e-06, "loss": 0.4283, "step": 1189 }, { "epoch": 0.5907661757405263, "grad_norm": 0.5127497315406799, "learning_rate": 9.717060434802049e-06, "loss": 0.4386, "step": 1190 }, { "epoch": 0.5912626179050141, "grad_norm": 0.48390138149261475, "learning_rate": 9.716101560659688e-06, "loss": 0.4386, "step": 1191 }, { "epoch": 0.5917590600695019, "grad_norm": 0.46671566367149353, "learning_rate": 9.715141111939192e-06, "loss": 0.4158, "step": 1192 }, { "epoch": 0.5922555022339897, "grad_norm": 0.5034670829772949, "learning_rate": 9.714179088961228e-06, "loss": 0.4195, "step": 1193 }, { "epoch": 0.5927519443984776, "grad_norm": 0.5509238243103027, "learning_rate": 9.713215492046992e-06, "loss": 0.4031, "step": 1194 }, { "epoch": 0.5932483865629654, "grad_norm": 0.5625327229499817, "learning_rate": 9.712250321518201e-06, "loss": 0.4266, "step": 1195 }, { "epoch": 0.5937448287274533, "grad_norm": 0.5141175985336304, "learning_rate": 9.711283577697099e-06, "loss": 0.4117, "step": 1196 }, { "epoch": 0.5942412708919411, "grad_norm": 0.5091103315353394, "learning_rate": 9.710315260906456e-06, "loss": 0.4029, "step": 1197 }, { "epoch": 0.594737713056429, "grad_norm": 0.64869624376297, "learning_rate": 9.709345371469567e-06, "loss": 0.4357, "step": 1198 }, { "epoch": 0.5952341552209167, "grad_norm": 0.521274983882904, "learning_rate": 9.708373909710251e-06, "loss": 0.4468, "step": 1199 }, { "epoch": 0.5957305973854046, "grad_norm": 0.48420047760009766, "learning_rate": 9.707400875952856e-06, "loss": 0.4286, "step": 1200 }, { "epoch": 0.5962270395498924, "grad_norm": 0.5356220602989197, "learning_rate": 9.706426270522244e-06, "loss": 0.4463, "step": 1201 }, { "epoch": 0.5967234817143803, "grad_norm": 0.49604955315589905, "learning_rate": 9.705450093743815e-06, "loss": 0.4095, "step": 1202 }, { "epoch": 0.5972199238788681, "grad_norm": 0.47788006067276, "learning_rate": 9.704472345943489e-06, "loss": 0.4394, "step": 1203 }, { "epoch": 0.597716366043356, "grad_norm": 0.4630562663078308, "learning_rate": 9.703493027447705e-06, "loss": 0.4198, "step": 1204 }, { "epoch": 0.5982128082078438, "grad_norm": 0.5444955229759216, "learning_rate": 9.702512138583435e-06, "loss": 0.4591, "step": 1205 }, { "epoch": 0.5987092503723316, "grad_norm": 0.450776606798172, "learning_rate": 9.701529679678168e-06, "loss": 0.4216, "step": 1206 }, { "epoch": 0.5992056925368194, "grad_norm": 0.4049948453903198, "learning_rate": 9.700545651059921e-06, "loss": 0.3862, "step": 1207 }, { "epoch": 0.5997021347013073, "grad_norm": 0.4928717315196991, "learning_rate": 9.699560053057236e-06, "loss": 0.4331, "step": 1208 }, { "epoch": 0.6001985768657951, "grad_norm": 0.5407342314720154, "learning_rate": 9.698572885999174e-06, "loss": 0.4486, "step": 1209 }, { "epoch": 0.600695019030283, "grad_norm": 0.5695802569389343, "learning_rate": 9.697584150215326e-06, "loss": 0.4337, "step": 1210 }, { "epoch": 0.6011914611947708, "grad_norm": 0.5236667394638062, "learning_rate": 9.696593846035807e-06, "loss": 0.4336, "step": 1211 }, { "epoch": 0.6016879033592586, "grad_norm": 0.618220865726471, "learning_rate": 9.695601973791245e-06, "loss": 0.445, "step": 1212 }, { "epoch": 0.6021843455237464, "grad_norm": 0.6402891278266907, "learning_rate": 9.694608533812807e-06, "loss": 0.444, "step": 1213 }, { "epoch": 0.6026807876882343, "grad_norm": 0.5230380296707153, "learning_rate": 9.693613526432168e-06, "loss": 0.421, "step": 1214 }, { "epoch": 0.6031772298527222, "grad_norm": 0.5487551093101501, "learning_rate": 9.692616951981539e-06, "loss": 0.4205, "step": 1215 }, { "epoch": 0.60367367201721, "grad_norm": 0.556509792804718, "learning_rate": 9.69161881079365e-06, "loss": 0.4329, "step": 1216 }, { "epoch": 0.6041701141816979, "grad_norm": 0.5386896133422852, "learning_rate": 9.690619103201751e-06, "loss": 0.4265, "step": 1217 }, { "epoch": 0.6046665563461857, "grad_norm": 0.5098614692687988, "learning_rate": 9.689617829539616e-06, "loss": 0.424, "step": 1218 }, { "epoch": 0.6051629985106735, "grad_norm": 0.5308360457420349, "learning_rate": 9.688614990141545e-06, "loss": 0.4195, "step": 1219 }, { "epoch": 0.6056594406751613, "grad_norm": 0.5589315295219421, "learning_rate": 9.687610585342358e-06, "loss": 0.4478, "step": 1220 }, { "epoch": 0.6061558828396492, "grad_norm": 0.46537381410598755, "learning_rate": 9.686604615477398e-06, "loss": 0.4028, "step": 1221 }, { "epoch": 0.606652325004137, "grad_norm": 0.49599573016166687, "learning_rate": 9.685597080882533e-06, "loss": 0.41, "step": 1222 }, { "epoch": 0.6071487671686249, "grad_norm": 0.49529409408569336, "learning_rate": 9.684587981894148e-06, "loss": 0.4376, "step": 1223 }, { "epoch": 0.6076452093331127, "grad_norm": 0.4942144751548767, "learning_rate": 9.68357731884916e-06, "loss": 0.4288, "step": 1224 }, { "epoch": 0.6081416514976006, "grad_norm": 0.5832082629203796, "learning_rate": 9.682565092084994e-06, "loss": 0.4407, "step": 1225 }, { "epoch": 0.6086380936620883, "grad_norm": 0.5425435304641724, "learning_rate": 9.681551301939612e-06, "loss": 0.4352, "step": 1226 }, { "epoch": 0.6091345358265762, "grad_norm": 0.5251684784889221, "learning_rate": 9.680535948751485e-06, "loss": 0.4291, "step": 1227 }, { "epoch": 0.609630977991064, "grad_norm": 0.5943320989608765, "learning_rate": 9.679519032859616e-06, "loss": 0.448, "step": 1228 }, { "epoch": 0.6101274201555519, "grad_norm": 0.48713070154190063, "learning_rate": 9.678500554603524e-06, "loss": 0.4258, "step": 1229 }, { "epoch": 0.6106238623200397, "grad_norm": 0.5225383043289185, "learning_rate": 9.677480514323253e-06, "loss": 0.4117, "step": 1230 }, { "epoch": 0.6111203044845276, "grad_norm": 0.5342687368392944, "learning_rate": 9.676458912359362e-06, "loss": 0.4239, "step": 1231 }, { "epoch": 0.6116167466490154, "grad_norm": 0.49197959899902344, "learning_rate": 9.675435749052941e-06, "loss": 0.4052, "step": 1232 }, { "epoch": 0.6121131888135032, "grad_norm": 0.4730934500694275, "learning_rate": 9.674411024745593e-06, "loss": 0.425, "step": 1233 }, { "epoch": 0.612609630977991, "grad_norm": 0.5112777352333069, "learning_rate": 9.67338473977945e-06, "loss": 0.4013, "step": 1234 }, { "epoch": 0.6131060731424789, "grad_norm": 0.5142985582351685, "learning_rate": 9.672356894497157e-06, "loss": 0.3813, "step": 1235 }, { "epoch": 0.6136025153069667, "grad_norm": 0.5871564745903015, "learning_rate": 9.671327489241884e-06, "loss": 0.4513, "step": 1236 }, { "epoch": 0.6140989574714546, "grad_norm": 0.5272417664527893, "learning_rate": 9.670296524357322e-06, "loss": 0.4172, "step": 1237 }, { "epoch": 0.6145953996359425, "grad_norm": 0.48807352781295776, "learning_rate": 9.669264000187681e-06, "loss": 0.4088, "step": 1238 }, { "epoch": 0.6150918418004302, "grad_norm": 0.5822413563728333, "learning_rate": 9.668229917077696e-06, "loss": 0.4355, "step": 1239 }, { "epoch": 0.615588283964918, "grad_norm": 0.5084622502326965, "learning_rate": 9.667194275372618e-06, "loss": 0.4251, "step": 1240 }, { "epoch": 0.6160847261294059, "grad_norm": 0.5088801980018616, "learning_rate": 9.666157075418216e-06, "loss": 0.446, "step": 1241 }, { "epoch": 0.6165811682938938, "grad_norm": 0.5859757661819458, "learning_rate": 9.665118317560786e-06, "loss": 0.43, "step": 1242 }, { "epoch": 0.6170776104583816, "grad_norm": 0.46454647183418274, "learning_rate": 9.664078002147143e-06, "loss": 0.4136, "step": 1243 }, { "epoch": 0.6175740526228695, "grad_norm": 0.4904041886329651, "learning_rate": 9.663036129524616e-06, "loss": 0.4222, "step": 1244 }, { "epoch": 0.6180704947873573, "grad_norm": 0.5326743125915527, "learning_rate": 9.66199270004106e-06, "loss": 0.3901, "step": 1245 }, { "epoch": 0.6185669369518451, "grad_norm": 0.5233071446418762, "learning_rate": 9.660947714044846e-06, "loss": 0.4482, "step": 1246 }, { "epoch": 0.6190633791163329, "grad_norm": 0.48826712369918823, "learning_rate": 9.659901171884869e-06, "loss": 0.421, "step": 1247 }, { "epoch": 0.6195598212808208, "grad_norm": 0.5457708835601807, "learning_rate": 9.658853073910541e-06, "loss": 0.4162, "step": 1248 }, { "epoch": 0.6200562634453086, "grad_norm": 0.5104749798774719, "learning_rate": 9.65780342047179e-06, "loss": 0.437, "step": 1249 }, { "epoch": 0.6205527056097965, "grad_norm": 0.4966195225715637, "learning_rate": 9.65675221191907e-06, "loss": 0.3962, "step": 1250 }, { "epoch": 0.6210491477742843, "grad_norm": 0.5349470973014832, "learning_rate": 9.65569944860335e-06, "loss": 0.423, "step": 1251 }, { "epoch": 0.6215455899387722, "grad_norm": 0.5285604000091553, "learning_rate": 9.65464513087612e-06, "loss": 0.4044, "step": 1252 }, { "epoch": 0.6220420321032599, "grad_norm": 0.5484540462493896, "learning_rate": 9.653589259089386e-06, "loss": 0.4329, "step": 1253 }, { "epoch": 0.6225384742677478, "grad_norm": 0.5081093907356262, "learning_rate": 9.652531833595675e-06, "loss": 0.4282, "step": 1254 }, { "epoch": 0.6230349164322356, "grad_norm": 0.4672727882862091, "learning_rate": 9.651472854748036e-06, "loss": 0.4399, "step": 1255 }, { "epoch": 0.6235313585967235, "grad_norm": 0.6321982741355896, "learning_rate": 9.65041232290003e-06, "loss": 0.4391, "step": 1256 }, { "epoch": 0.6240278007612113, "grad_norm": 0.5001663565635681, "learning_rate": 9.649350238405739e-06, "loss": 0.445, "step": 1257 }, { "epoch": 0.6245242429256992, "grad_norm": 0.48429036140441895, "learning_rate": 9.648286601619766e-06, "loss": 0.43, "step": 1258 }, { "epoch": 0.625020685090187, "grad_norm": 0.5205984711647034, "learning_rate": 9.647221412897232e-06, "loss": 0.403, "step": 1259 }, { "epoch": 0.6255171272546748, "grad_norm": 0.547050416469574, "learning_rate": 9.646154672593771e-06, "loss": 0.4182, "step": 1260 }, { "epoch": 0.6260135694191626, "grad_norm": 0.5567912459373474, "learning_rate": 9.64508638106554e-06, "loss": 0.4222, "step": 1261 }, { "epoch": 0.6265100115836505, "grad_norm": 0.5758523344993591, "learning_rate": 9.644016538669214e-06, "loss": 0.408, "step": 1262 }, { "epoch": 0.6270064537481383, "grad_norm": 0.5056365728378296, "learning_rate": 9.642945145761983e-06, "loss": 0.4508, "step": 1263 }, { "epoch": 0.6275028959126262, "grad_norm": 0.4823448359966278, "learning_rate": 9.641872202701557e-06, "loss": 0.3939, "step": 1264 }, { "epoch": 0.627999338077114, "grad_norm": 0.5596896409988403, "learning_rate": 9.640797709846159e-06, "loss": 0.4132, "step": 1265 }, { "epoch": 0.6284957802416018, "grad_norm": 0.5160283446311951, "learning_rate": 9.639721667554537e-06, "loss": 0.4283, "step": 1266 }, { "epoch": 0.6289922224060897, "grad_norm": 0.5751320123672485, "learning_rate": 9.638644076185953e-06, "loss": 0.3944, "step": 1267 }, { "epoch": 0.6294886645705775, "grad_norm": 0.48275378346443176, "learning_rate": 9.63756493610018e-06, "loss": 0.4321, "step": 1268 }, { "epoch": 0.6299851067350654, "grad_norm": 0.4356823265552521, "learning_rate": 9.636484247657519e-06, "loss": 0.4158, "step": 1269 }, { "epoch": 0.6304815488995532, "grad_norm": 0.43823835253715515, "learning_rate": 9.635402011218778e-06, "loss": 0.405, "step": 1270 }, { "epoch": 0.6309779910640411, "grad_norm": 0.4618641436100006, "learning_rate": 9.634318227145291e-06, "loss": 0.4235, "step": 1271 }, { "epoch": 0.6314744332285289, "grad_norm": 0.494246244430542, "learning_rate": 9.633232895798901e-06, "loss": 0.4349, "step": 1272 }, { "epoch": 0.6319708753930167, "grad_norm": 0.4567461907863617, "learning_rate": 9.63214601754197e-06, "loss": 0.4118, "step": 1273 }, { "epoch": 0.6324673175575045, "grad_norm": 0.431259423494339, "learning_rate": 9.63105759273738e-06, "loss": 0.4261, "step": 1274 }, { "epoch": 0.6329637597219924, "grad_norm": 0.4961898624897003, "learning_rate": 9.629967621748527e-06, "loss": 0.4068, "step": 1275 }, { "epoch": 0.6334602018864802, "grad_norm": 0.5077929496765137, "learning_rate": 9.628876104939318e-06, "loss": 0.4609, "step": 1276 }, { "epoch": 0.6339566440509681, "grad_norm": 0.42936035990715027, "learning_rate": 9.627783042674182e-06, "loss": 0.4094, "step": 1277 }, { "epoch": 0.6344530862154559, "grad_norm": 0.4513116180896759, "learning_rate": 9.626688435318066e-06, "loss": 0.4228, "step": 1278 }, { "epoch": 0.6349495283799438, "grad_norm": 0.4492329955101013, "learning_rate": 9.62559228323643e-06, "loss": 0.4641, "step": 1279 }, { "epoch": 0.6354459705444315, "grad_norm": 0.4949449598789215, "learning_rate": 9.624494586795243e-06, "loss": 0.4151, "step": 1280 }, { "epoch": 0.6359424127089194, "grad_norm": 0.4164928197860718, "learning_rate": 9.623395346361004e-06, "loss": 0.425, "step": 1281 }, { "epoch": 0.6364388548734072, "grad_norm": 0.4798770546913147, "learning_rate": 9.622294562300714e-06, "loss": 0.413, "step": 1282 }, { "epoch": 0.6369352970378951, "grad_norm": 0.49592557549476624, "learning_rate": 9.621192234981897e-06, "loss": 0.4394, "step": 1283 }, { "epoch": 0.6374317392023829, "grad_norm": 0.5068711638450623, "learning_rate": 9.620088364772589e-06, "loss": 0.4513, "step": 1284 }, { "epoch": 0.6379281813668708, "grad_norm": 0.4861076772212982, "learning_rate": 9.618982952041344e-06, "loss": 0.4359, "step": 1285 }, { "epoch": 0.6384246235313586, "grad_norm": 0.4205637574195862, "learning_rate": 9.61787599715723e-06, "loss": 0.3807, "step": 1286 }, { "epoch": 0.6389210656958464, "grad_norm": 0.4314909279346466, "learning_rate": 9.616767500489822e-06, "loss": 0.4112, "step": 1287 }, { "epoch": 0.6394175078603342, "grad_norm": 0.5234023928642273, "learning_rate": 9.615657462409227e-06, "loss": 0.4148, "step": 1288 }, { "epoch": 0.6399139500248221, "grad_norm": 0.4744281768798828, "learning_rate": 9.614545883286051e-06, "loss": 0.4237, "step": 1289 }, { "epoch": 0.64041039218931, "grad_norm": 0.5227346420288086, "learning_rate": 9.613432763491422e-06, "loss": 0.3961, "step": 1290 }, { "epoch": 0.6409068343537978, "grad_norm": 0.4964808225631714, "learning_rate": 9.612318103396977e-06, "loss": 0.4153, "step": 1291 }, { "epoch": 0.6414032765182857, "grad_norm": 0.4765512943267822, "learning_rate": 9.611201903374873e-06, "loss": 0.3919, "step": 1292 }, { "epoch": 0.6418997186827734, "grad_norm": 0.5205007791519165, "learning_rate": 9.610084163797782e-06, "loss": 0.4471, "step": 1293 }, { "epoch": 0.6423961608472613, "grad_norm": 0.4656890332698822, "learning_rate": 9.608964885038882e-06, "loss": 0.3951, "step": 1294 }, { "epoch": 0.6428926030117491, "grad_norm": 0.5273801684379578, "learning_rate": 9.607844067471871e-06, "loss": 0.4143, "step": 1295 }, { "epoch": 0.643389045176237, "grad_norm": 0.5907631516456604, "learning_rate": 9.606721711470962e-06, "loss": 0.4736, "step": 1296 }, { "epoch": 0.6438854873407248, "grad_norm": 0.5249814987182617, "learning_rate": 9.605597817410875e-06, "loss": 0.4176, "step": 1297 }, { "epoch": 0.6443819295052127, "grad_norm": 0.5770432353019714, "learning_rate": 9.604472385666851e-06, "loss": 0.4384, "step": 1298 }, { "epoch": 0.6448783716697005, "grad_norm": 0.48104920983314514, "learning_rate": 9.60334541661464e-06, "loss": 0.449, "step": 1299 }, { "epoch": 0.6453748138341883, "grad_norm": 0.5191637277603149, "learning_rate": 9.602216910630507e-06, "loss": 0.3997, "step": 1300 }, { "epoch": 0.6458712559986761, "grad_norm": 0.4997371733188629, "learning_rate": 9.60108686809123e-06, "loss": 0.4227, "step": 1301 }, { "epoch": 0.646367698163164, "grad_norm": 0.5253583788871765, "learning_rate": 9.599955289374097e-06, "loss": 0.4236, "step": 1302 }, { "epoch": 0.6468641403276518, "grad_norm": 0.5866472125053406, "learning_rate": 9.598822174856912e-06, "loss": 0.4221, "step": 1303 }, { "epoch": 0.6473605824921397, "grad_norm": 0.4820755422115326, "learning_rate": 9.597687524917992e-06, "loss": 0.4217, "step": 1304 }, { "epoch": 0.6478570246566275, "grad_norm": 0.6475728750228882, "learning_rate": 9.596551339936167e-06, "loss": 0.4153, "step": 1305 }, { "epoch": 0.6483534668211154, "grad_norm": 0.4641280770301819, "learning_rate": 9.595413620290774e-06, "loss": 0.4064, "step": 1306 }, { "epoch": 0.6488499089856031, "grad_norm": 0.5901975631713867, "learning_rate": 9.594274366361673e-06, "loss": 0.3915, "step": 1307 }, { "epoch": 0.649346351150091, "grad_norm": 0.5298956036567688, "learning_rate": 9.593133578529224e-06, "loss": 0.3994, "step": 1308 }, { "epoch": 0.6498427933145788, "grad_norm": 0.4894851744174957, "learning_rate": 9.59199125717431e-06, "loss": 0.3911, "step": 1309 }, { "epoch": 0.6503392354790667, "grad_norm": 0.574354350566864, "learning_rate": 9.590847402678316e-06, "loss": 0.4152, "step": 1310 }, { "epoch": 0.6508356776435545, "grad_norm": 0.5474209189414978, "learning_rate": 9.589702015423148e-06, "loss": 0.4372, "step": 1311 }, { "epoch": 0.6513321198080424, "grad_norm": 0.5834385752677917, "learning_rate": 9.588555095791219e-06, "loss": 0.4425, "step": 1312 }, { "epoch": 0.6518285619725303, "grad_norm": 0.5595725774765015, "learning_rate": 9.587406644165453e-06, "loss": 0.427, "step": 1313 }, { "epoch": 0.652325004137018, "grad_norm": 0.4620155990123749, "learning_rate": 9.586256660929287e-06, "loss": 0.4033, "step": 1314 }, { "epoch": 0.6528214463015058, "grad_norm": 0.6443761587142944, "learning_rate": 9.585105146466668e-06, "loss": 0.4065, "step": 1315 }, { "epoch": 0.6533178884659937, "grad_norm": 0.5355512499809265, "learning_rate": 9.58395210116206e-06, "loss": 0.4166, "step": 1316 }, { "epoch": 0.6538143306304816, "grad_norm": 0.5671602487564087, "learning_rate": 9.582797525400428e-06, "loss": 0.4056, "step": 1317 }, { "epoch": 0.6543107727949694, "grad_norm": 0.5023546814918518, "learning_rate": 9.581641419567256e-06, "loss": 0.4376, "step": 1318 }, { "epoch": 0.6548072149594573, "grad_norm": 0.6526280641555786, "learning_rate": 9.580483784048537e-06, "loss": 0.459, "step": 1319 }, { "epoch": 0.6553036571239451, "grad_norm": 0.5473082661628723, "learning_rate": 9.579324619230772e-06, "loss": 0.4324, "step": 1320 }, { "epoch": 0.6558000992884329, "grad_norm": 0.5337646007537842, "learning_rate": 9.578163925500978e-06, "loss": 0.4234, "step": 1321 }, { "epoch": 0.6562965414529207, "grad_norm": 0.6018436551094055, "learning_rate": 9.577001703246676e-06, "loss": 0.4285, "step": 1322 }, { "epoch": 0.6567929836174086, "grad_norm": 0.4874557554721832, "learning_rate": 9.5758379528559e-06, "loss": 0.4129, "step": 1323 }, { "epoch": 0.6572894257818964, "grad_norm": 0.6079245209693909, "learning_rate": 9.574672674717196e-06, "loss": 0.4306, "step": 1324 }, { "epoch": 0.6577858679463843, "grad_norm": 0.5197559595108032, "learning_rate": 9.57350586921962e-06, "loss": 0.4095, "step": 1325 }, { "epoch": 0.6582823101108721, "grad_norm": 0.5543325543403625, "learning_rate": 9.572337536752733e-06, "loss": 0.4547, "step": 1326 }, { "epoch": 0.6587787522753599, "grad_norm": 0.4961937367916107, "learning_rate": 9.571167677706615e-06, "loss": 0.4371, "step": 1327 }, { "epoch": 0.6592751944398477, "grad_norm": 0.4802525043487549, "learning_rate": 9.569996292471844e-06, "loss": 0.4091, "step": 1328 }, { "epoch": 0.6597716366043356, "grad_norm": 0.5564889311790466, "learning_rate": 9.568823381439518e-06, "loss": 0.444, "step": 1329 }, { "epoch": 0.6602680787688234, "grad_norm": 0.5338681936264038, "learning_rate": 9.567648945001238e-06, "loss": 0.4512, "step": 1330 }, { "epoch": 0.6607645209333113, "grad_norm": 0.5202851295471191, "learning_rate": 9.566472983549118e-06, "loss": 0.4417, "step": 1331 }, { "epoch": 0.6612609630977991, "grad_norm": 0.5006901025772095, "learning_rate": 9.565295497475777e-06, "loss": 0.4039, "step": 1332 }, { "epoch": 0.661757405262287, "grad_norm": 0.4832923710346222, "learning_rate": 9.564116487174348e-06, "loss": 0.4136, "step": 1333 }, { "epoch": 0.6622538474267747, "grad_norm": 0.5419968366622925, "learning_rate": 9.56293595303847e-06, "loss": 0.3827, "step": 1334 }, { "epoch": 0.6627502895912626, "grad_norm": 0.5236407518386841, "learning_rate": 9.561753895462292e-06, "loss": 0.4099, "step": 1335 }, { "epoch": 0.6632467317557504, "grad_norm": 0.5100114941596985, "learning_rate": 9.560570314840469e-06, "loss": 0.4174, "step": 1336 }, { "epoch": 0.6637431739202383, "grad_norm": 0.5641699433326721, "learning_rate": 9.559385211568167e-06, "loss": 0.437, "step": 1337 }, { "epoch": 0.6642396160847261, "grad_norm": 0.5240993499755859, "learning_rate": 9.558198586041062e-06, "loss": 0.4035, "step": 1338 }, { "epoch": 0.664736058249214, "grad_norm": 0.49507176876068115, "learning_rate": 9.557010438655332e-06, "loss": 0.4351, "step": 1339 }, { "epoch": 0.6652325004137019, "grad_norm": 0.5113682746887207, "learning_rate": 9.555820769807668e-06, "loss": 0.4415, "step": 1340 }, { "epoch": 0.6657289425781896, "grad_norm": 0.5623453259468079, "learning_rate": 9.554629579895272e-06, "loss": 0.4119, "step": 1341 }, { "epoch": 0.6662253847426775, "grad_norm": 0.4687720835208893, "learning_rate": 9.553436869315846e-06, "loss": 0.4437, "step": 1342 }, { "epoch": 0.6667218269071653, "grad_norm": 0.5178980231285095, "learning_rate": 9.552242638467604e-06, "loss": 0.4104, "step": 1343 }, { "epoch": 0.6672182690716532, "grad_norm": 0.527738630771637, "learning_rate": 9.55104688774927e-06, "loss": 0.405, "step": 1344 }, { "epoch": 0.667714711236141, "grad_norm": 0.48123544454574585, "learning_rate": 9.54984961756007e-06, "loss": 0.4097, "step": 1345 }, { "epoch": 0.6682111534006289, "grad_norm": 0.4117726683616638, "learning_rate": 9.548650828299742e-06, "loss": 0.4326, "step": 1346 }, { "epoch": 0.6687075955651167, "grad_norm": 0.497626930475235, "learning_rate": 9.547450520368526e-06, "loss": 0.4345, "step": 1347 }, { "epoch": 0.6692040377296045, "grad_norm": 0.5139958262443542, "learning_rate": 9.546248694167175e-06, "loss": 0.4389, "step": 1348 }, { "epoch": 0.6697004798940923, "grad_norm": 0.6015542149543762, "learning_rate": 9.545045350096944e-06, "loss": 0.428, "step": 1349 }, { "epoch": 0.6701969220585802, "grad_norm": 0.48315858840942383, "learning_rate": 9.5438404885596e-06, "loss": 0.4061, "step": 1350 }, { "epoch": 0.670693364223068, "grad_norm": 0.44489818811416626, "learning_rate": 9.54263410995741e-06, "loss": 0.4181, "step": 1351 }, { "epoch": 0.6711898063875559, "grad_norm": 0.5019353032112122, "learning_rate": 9.541426214693153e-06, "loss": 0.402, "step": 1352 }, { "epoch": 0.6716862485520437, "grad_norm": 0.48301929235458374, "learning_rate": 9.540216803170113e-06, "loss": 0.4486, "step": 1353 }, { "epoch": 0.6721826907165315, "grad_norm": 0.47551822662353516, "learning_rate": 9.539005875792077e-06, "loss": 0.4426, "step": 1354 }, { "epoch": 0.6726791328810193, "grad_norm": 0.4601283073425293, "learning_rate": 9.537793432963345e-06, "loss": 0.4288, "step": 1355 }, { "epoch": 0.6731755750455072, "grad_norm": 0.5346404314041138, "learning_rate": 9.536579475088714e-06, "loss": 0.4232, "step": 1356 }, { "epoch": 0.673672017209995, "grad_norm": 0.46925219893455505, "learning_rate": 9.535364002573495e-06, "loss": 0.4289, "step": 1357 }, { "epoch": 0.6741684593744829, "grad_norm": 0.521126925945282, "learning_rate": 9.534147015823498e-06, "loss": 0.4011, "step": 1358 }, { "epoch": 0.6746649015389707, "grad_norm": 0.5056098103523254, "learning_rate": 9.532928515245046e-06, "loss": 0.4204, "step": 1359 }, { "epoch": 0.6751613437034586, "grad_norm": 0.5012559294700623, "learning_rate": 9.531708501244958e-06, "loss": 0.4342, "step": 1360 }, { "epoch": 0.6756577858679463, "grad_norm": 0.5820482969284058, "learning_rate": 9.530486974230568e-06, "loss": 0.4319, "step": 1361 }, { "epoch": 0.6761542280324342, "grad_norm": 0.5230372548103333, "learning_rate": 9.52926393460971e-06, "loss": 0.4286, "step": 1362 }, { "epoch": 0.676650670196922, "grad_norm": 0.4263373911380768, "learning_rate": 9.528039382790722e-06, "loss": 0.4129, "step": 1363 }, { "epoch": 0.6771471123614099, "grad_norm": 0.5038771629333496, "learning_rate": 9.526813319182449e-06, "loss": 0.4297, "step": 1364 }, { "epoch": 0.6776435545258978, "grad_norm": 0.5229907035827637, "learning_rate": 9.525585744194243e-06, "loss": 0.4202, "step": 1365 }, { "epoch": 0.6781399966903856, "grad_norm": 0.4708233177661896, "learning_rate": 9.524356658235954e-06, "loss": 0.441, "step": 1366 }, { "epoch": 0.6786364388548735, "grad_norm": 0.4686647355556488, "learning_rate": 9.52312606171794e-06, "loss": 0.3887, "step": 1367 }, { "epoch": 0.6791328810193612, "grad_norm": 0.5626792907714844, "learning_rate": 9.52189395505107e-06, "loss": 0.4196, "step": 1368 }, { "epoch": 0.679629323183849, "grad_norm": 0.44956615567207336, "learning_rate": 9.520660338646702e-06, "loss": 0.418, "step": 1369 }, { "epoch": 0.6801257653483369, "grad_norm": 0.4918896555900574, "learning_rate": 9.519425212916714e-06, "loss": 0.4677, "step": 1370 }, { "epoch": 0.6806222075128248, "grad_norm": 0.5348824262619019, "learning_rate": 9.51818857827348e-06, "loss": 0.4463, "step": 1371 }, { "epoch": 0.6811186496773126, "grad_norm": 0.5237188339233398, "learning_rate": 9.516950435129875e-06, "loss": 0.4425, "step": 1372 }, { "epoch": 0.6816150918418005, "grad_norm": 0.4530795216560364, "learning_rate": 9.515710783899284e-06, "loss": 0.4061, "step": 1373 }, { "epoch": 0.6821115340062883, "grad_norm": 0.4937952160835266, "learning_rate": 9.514469624995593e-06, "loss": 0.4126, "step": 1374 }, { "epoch": 0.6826079761707761, "grad_norm": 0.4812004268169403, "learning_rate": 9.51322695883319e-06, "loss": 0.3952, "step": 1375 }, { "epoch": 0.6831044183352639, "grad_norm": 0.4351550340652466, "learning_rate": 9.51198278582697e-06, "loss": 0.3968, "step": 1376 }, { "epoch": 0.6836008604997518, "grad_norm": 0.46186211705207825, "learning_rate": 9.510737106392325e-06, "loss": 0.4042, "step": 1377 }, { "epoch": 0.6840973026642396, "grad_norm": 0.5073333382606506, "learning_rate": 9.509489920945155e-06, "loss": 0.4177, "step": 1378 }, { "epoch": 0.6845937448287275, "grad_norm": 0.500526487827301, "learning_rate": 9.508241229901862e-06, "loss": 0.4249, "step": 1379 }, { "epoch": 0.6850901869932153, "grad_norm": 0.5930198431015015, "learning_rate": 9.50699103367935e-06, "loss": 0.4254, "step": 1380 }, { "epoch": 0.6855866291577031, "grad_norm": 0.5523226261138916, "learning_rate": 9.505739332695026e-06, "loss": 0.4316, "step": 1381 }, { "epoch": 0.6860830713221909, "grad_norm": 0.5049574971199036, "learning_rate": 9.504486127366796e-06, "loss": 0.4325, "step": 1382 }, { "epoch": 0.6865795134866788, "grad_norm": 0.4855443239212036, "learning_rate": 9.503231418113073e-06, "loss": 0.4123, "step": 1383 }, { "epoch": 0.6870759556511666, "grad_norm": 0.6016560196876526, "learning_rate": 9.501975205352772e-06, "loss": 0.4123, "step": 1384 }, { "epoch": 0.6875723978156545, "grad_norm": 0.6077421307563782, "learning_rate": 9.500717489505307e-06, "loss": 0.4148, "step": 1385 }, { "epoch": 0.6880688399801423, "grad_norm": 0.45738333463668823, "learning_rate": 9.499458270990593e-06, "loss": 0.4411, "step": 1386 }, { "epoch": 0.6885652821446302, "grad_norm": 0.6852195858955383, "learning_rate": 9.498197550229054e-06, "loss": 0.4326, "step": 1387 }, { "epoch": 0.6890617243091179, "grad_norm": 0.5387908816337585, "learning_rate": 9.496935327641605e-06, "loss": 0.4193, "step": 1388 }, { "epoch": 0.6895581664736058, "grad_norm": 0.5440100431442261, "learning_rate": 9.49567160364967e-06, "loss": 0.423, "step": 1389 }, { "epoch": 0.6900546086380936, "grad_norm": 0.5788814425468445, "learning_rate": 9.494406378675173e-06, "loss": 0.4511, "step": 1390 }, { "epoch": 0.6905510508025815, "grad_norm": 0.5605000853538513, "learning_rate": 9.493139653140537e-06, "loss": 0.4233, "step": 1391 }, { "epoch": 0.6910474929670694, "grad_norm": 0.5968397855758667, "learning_rate": 9.491871427468687e-06, "loss": 0.4359, "step": 1392 }, { "epoch": 0.6915439351315572, "grad_norm": 0.6033188104629517, "learning_rate": 9.490601702083051e-06, "loss": 0.4092, "step": 1393 }, { "epoch": 0.6920403772960451, "grad_norm": 0.5288234949111938, "learning_rate": 9.489330477407554e-06, "loss": 0.4812, "step": 1394 }, { "epoch": 0.6925368194605328, "grad_norm": 0.4929066300392151, "learning_rate": 9.488057753866623e-06, "loss": 0.4146, "step": 1395 }, { "epoch": 0.6930332616250207, "grad_norm": 0.640006422996521, "learning_rate": 9.486783531885187e-06, "loss": 0.4102, "step": 1396 }, { "epoch": 0.6935297037895085, "grad_norm": 0.47678375244140625, "learning_rate": 9.485507811888673e-06, "loss": 0.3953, "step": 1397 }, { "epoch": 0.6940261459539964, "grad_norm": 0.5638694167137146, "learning_rate": 9.48423059430301e-06, "loss": 0.409, "step": 1398 }, { "epoch": 0.6945225881184842, "grad_norm": 0.6409719586372375, "learning_rate": 9.482951879554628e-06, "loss": 0.4479, "step": 1399 }, { "epoch": 0.6950190302829721, "grad_norm": 0.6293082237243652, "learning_rate": 9.481671668070452e-06, "loss": 0.4381, "step": 1400 }, { "epoch": 0.6955154724474599, "grad_norm": 0.5290820002555847, "learning_rate": 9.480389960277911e-06, "loss": 0.4244, "step": 1401 }, { "epoch": 0.6960119146119477, "grad_norm": 0.5936523675918579, "learning_rate": 9.479106756604935e-06, "loss": 0.4414, "step": 1402 }, { "epoch": 0.6965083567764355, "grad_norm": 0.5753828287124634, "learning_rate": 9.477822057479945e-06, "loss": 0.4501, "step": 1403 }, { "epoch": 0.6970047989409234, "grad_norm": 0.5422154664993286, "learning_rate": 9.476535863331873e-06, "loss": 0.4352, "step": 1404 }, { "epoch": 0.6975012411054112, "grad_norm": 0.5984291434288025, "learning_rate": 9.47524817459014e-06, "loss": 0.4051, "step": 1405 }, { "epoch": 0.6979976832698991, "grad_norm": 0.5314387083053589, "learning_rate": 9.473958991684671e-06, "loss": 0.3859, "step": 1406 }, { "epoch": 0.6984941254343869, "grad_norm": 0.5778379440307617, "learning_rate": 9.472668315045893e-06, "loss": 0.3896, "step": 1407 }, { "epoch": 0.6989905675988747, "grad_norm": 0.5710842609405518, "learning_rate": 9.471376145104723e-06, "loss": 0.4508, "step": 1408 }, { "epoch": 0.6994870097633625, "grad_norm": 0.6249328255653381, "learning_rate": 9.470082482292585e-06, "loss": 0.4299, "step": 1409 }, { "epoch": 0.6999834519278504, "grad_norm": 0.5066711902618408, "learning_rate": 9.468787327041394e-06, "loss": 0.4018, "step": 1410 }, { "epoch": 0.7004798940923382, "grad_norm": 0.5056977272033691, "learning_rate": 9.467490679783571e-06, "loss": 0.3913, "step": 1411 }, { "epoch": 0.7009763362568261, "grad_norm": 0.5577489733695984, "learning_rate": 9.46619254095203e-06, "loss": 0.3969, "step": 1412 }, { "epoch": 0.701472778421314, "grad_norm": 0.5396606922149658, "learning_rate": 9.464892910980184e-06, "loss": 0.4101, "step": 1413 }, { "epoch": 0.7019692205858018, "grad_norm": 0.5154517292976379, "learning_rate": 9.463591790301942e-06, "loss": 0.4123, "step": 1414 }, { "epoch": 0.7024656627502895, "grad_norm": 0.5582296848297119, "learning_rate": 9.462289179351716e-06, "loss": 0.4178, "step": 1415 }, { "epoch": 0.7029621049147774, "grad_norm": 0.5315210223197937, "learning_rate": 9.460985078564414e-06, "loss": 0.391, "step": 1416 }, { "epoch": 0.7034585470792653, "grad_norm": 0.5136325359344482, "learning_rate": 9.459679488375432e-06, "loss": 0.3927, "step": 1417 }, { "epoch": 0.7039549892437531, "grad_norm": 0.5777138471603394, "learning_rate": 9.45837240922068e-06, "loss": 0.4538, "step": 1418 }, { "epoch": 0.704451431408241, "grad_norm": 0.5437465906143188, "learning_rate": 9.45706384153655e-06, "loss": 0.4167, "step": 1419 }, { "epoch": 0.7049478735727288, "grad_norm": 0.5108785033226013, "learning_rate": 9.455753785759942e-06, "loss": 0.4094, "step": 1420 }, { "epoch": 0.7054443157372167, "grad_norm": 0.48284947872161865, "learning_rate": 9.454442242328246e-06, "loss": 0.4292, "step": 1421 }, { "epoch": 0.7059407579017044, "grad_norm": 0.5653537511825562, "learning_rate": 9.453129211679348e-06, "loss": 0.4278, "step": 1422 }, { "epoch": 0.7064372000661923, "grad_norm": 0.5063378214836121, "learning_rate": 9.451814694251636e-06, "loss": 0.4242, "step": 1423 }, { "epoch": 0.7069336422306801, "grad_norm": 0.5237030386924744, "learning_rate": 9.450498690483993e-06, "loss": 0.4231, "step": 1424 }, { "epoch": 0.707430084395168, "grad_norm": 0.5041419267654419, "learning_rate": 9.449181200815793e-06, "loss": 0.4433, "step": 1425 }, { "epoch": 0.7079265265596558, "grad_norm": 0.5050877332687378, "learning_rate": 9.447862225686912e-06, "loss": 0.423, "step": 1426 }, { "epoch": 0.7084229687241437, "grad_norm": 0.5025044679641724, "learning_rate": 9.446541765537723e-06, "loss": 0.4056, "step": 1427 }, { "epoch": 0.7089194108886315, "grad_norm": 0.5889648199081421, "learning_rate": 9.445219820809086e-06, "loss": 0.4108, "step": 1428 }, { "epoch": 0.7094158530531193, "grad_norm": 0.4778992235660553, "learning_rate": 9.443896391942365e-06, "loss": 0.4395, "step": 1429 }, { "epoch": 0.7099122952176071, "grad_norm": 0.5025021433830261, "learning_rate": 9.442571479379419e-06, "loss": 0.3893, "step": 1430 }, { "epoch": 0.710408737382095, "grad_norm": 0.603552520275116, "learning_rate": 9.441245083562597e-06, "loss": 0.4058, "step": 1431 }, { "epoch": 0.7109051795465828, "grad_norm": 0.5209101438522339, "learning_rate": 9.439917204934748e-06, "loss": 0.4398, "step": 1432 }, { "epoch": 0.7114016217110707, "grad_norm": 0.5636401772499084, "learning_rate": 9.438587843939216e-06, "loss": 0.4237, "step": 1433 }, { "epoch": 0.7118980638755585, "grad_norm": 0.5075924396514893, "learning_rate": 9.437257001019835e-06, "loss": 0.4099, "step": 1434 }, { "epoch": 0.7123945060400463, "grad_norm": 0.48307451605796814, "learning_rate": 9.435924676620941e-06, "loss": 0.4327, "step": 1435 }, { "epoch": 0.7128909482045341, "grad_norm": 0.4608868658542633, "learning_rate": 9.43459087118736e-06, "loss": 0.4, "step": 1436 }, { "epoch": 0.713387390369022, "grad_norm": 0.497466117143631, "learning_rate": 9.43325558516441e-06, "loss": 0.44, "step": 1437 }, { "epoch": 0.7138838325335098, "grad_norm": 0.4656241536140442, "learning_rate": 9.43191881899791e-06, "loss": 0.4229, "step": 1438 }, { "epoch": 0.7143802746979977, "grad_norm": 0.5686501264572144, "learning_rate": 9.430580573134169e-06, "loss": 0.3985, "step": 1439 }, { "epoch": 0.7148767168624856, "grad_norm": 0.5609332323074341, "learning_rate": 9.429240848019992e-06, "loss": 0.4041, "step": 1440 }, { "epoch": 0.7153731590269734, "grad_norm": 0.4730241000652313, "learning_rate": 9.427899644102676e-06, "loss": 0.4396, "step": 1441 }, { "epoch": 0.7158696011914611, "grad_norm": 0.4752410054206848, "learning_rate": 9.426556961830013e-06, "loss": 0.4261, "step": 1442 }, { "epoch": 0.716366043355949, "grad_norm": 0.5577260851860046, "learning_rate": 9.425212801650286e-06, "loss": 0.41, "step": 1443 }, { "epoch": 0.7168624855204369, "grad_norm": 0.499697744846344, "learning_rate": 9.423867164012276e-06, "loss": 0.4016, "step": 1444 }, { "epoch": 0.7173589276849247, "grad_norm": 0.4635988175868988, "learning_rate": 9.422520049365254e-06, "loss": 0.4144, "step": 1445 }, { "epoch": 0.7178553698494126, "grad_norm": 0.5293720364570618, "learning_rate": 9.421171458158986e-06, "loss": 0.3842, "step": 1446 }, { "epoch": 0.7183518120139004, "grad_norm": 0.6009488701820374, "learning_rate": 9.419821390843728e-06, "loss": 0.4134, "step": 1447 }, { "epoch": 0.7188482541783883, "grad_norm": 0.5835692882537842, "learning_rate": 9.41846984787023e-06, "loss": 0.364, "step": 1448 }, { "epoch": 0.719344696342876, "grad_norm": 0.6067633628845215, "learning_rate": 9.41711682968974e-06, "loss": 0.4204, "step": 1449 }, { "epoch": 0.7198411385073639, "grad_norm": 0.705014705657959, "learning_rate": 9.41576233675399e-06, "loss": 0.434, "step": 1450 }, { "epoch": 0.7203375806718517, "grad_norm": 0.514858603477478, "learning_rate": 9.414406369515208e-06, "loss": 0.391, "step": 1451 }, { "epoch": 0.7208340228363396, "grad_norm": 0.5548005104064941, "learning_rate": 9.413048928426118e-06, "loss": 0.4113, "step": 1452 }, { "epoch": 0.7213304650008274, "grad_norm": 0.612982988357544, "learning_rate": 9.411690013939932e-06, "loss": 0.4024, "step": 1453 }, { "epoch": 0.7218269071653153, "grad_norm": 0.5554224848747253, "learning_rate": 9.41032962651035e-06, "loss": 0.451, "step": 1454 }, { "epoch": 0.7223233493298031, "grad_norm": 0.5587801933288574, "learning_rate": 9.408967766591574e-06, "loss": 0.4213, "step": 1455 }, { "epoch": 0.7228197914942909, "grad_norm": 0.6164798736572266, "learning_rate": 9.40760443463829e-06, "loss": 0.4311, "step": 1456 }, { "epoch": 0.7233162336587787, "grad_norm": 0.4971064329147339, "learning_rate": 9.406239631105675e-06, "loss": 0.4413, "step": 1457 }, { "epoch": 0.7238126758232666, "grad_norm": 0.49876508116722107, "learning_rate": 9.404873356449406e-06, "loss": 0.4153, "step": 1458 }, { "epoch": 0.7243091179877544, "grad_norm": 0.5509892106056213, "learning_rate": 9.403505611125638e-06, "loss": 0.3855, "step": 1459 }, { "epoch": 0.7248055601522423, "grad_norm": 0.4648885428905487, "learning_rate": 9.402136395591028e-06, "loss": 0.4074, "step": 1460 }, { "epoch": 0.7253020023167301, "grad_norm": 0.5607953667640686, "learning_rate": 9.40076571030272e-06, "loss": 0.3769, "step": 1461 }, { "epoch": 0.7257984444812179, "grad_norm": 0.539428174495697, "learning_rate": 9.399393555718346e-06, "loss": 0.4565, "step": 1462 }, { "epoch": 0.7262948866457057, "grad_norm": 0.48918208479881287, "learning_rate": 9.398019932296033e-06, "loss": 0.4197, "step": 1463 }, { "epoch": 0.7267913288101936, "grad_norm": 0.5396202802658081, "learning_rate": 9.396644840494396e-06, "loss": 0.4046, "step": 1464 }, { "epoch": 0.7272877709746814, "grad_norm": 0.5213853716850281, "learning_rate": 9.395268280772542e-06, "loss": 0.4202, "step": 1465 }, { "epoch": 0.7277842131391693, "grad_norm": 0.5411091446876526, "learning_rate": 9.393890253590064e-06, "loss": 0.3981, "step": 1466 }, { "epoch": 0.7282806553036572, "grad_norm": 0.5064947009086609, "learning_rate": 9.392510759407053e-06, "loss": 0.4161, "step": 1467 }, { "epoch": 0.728777097468145, "grad_norm": 0.5004937052726746, "learning_rate": 9.391129798684078e-06, "loss": 0.4136, "step": 1468 }, { "epoch": 0.7292735396326328, "grad_norm": 0.520924985408783, "learning_rate": 9.389747371882207e-06, "loss": 0.414, "step": 1469 }, { "epoch": 0.7297699817971206, "grad_norm": 0.43898993730545044, "learning_rate": 9.388363479462997e-06, "loss": 0.3836, "step": 1470 }, { "epoch": 0.7302664239616085, "grad_norm": 0.5177914500236511, "learning_rate": 9.38697812188849e-06, "loss": 0.4119, "step": 1471 }, { "epoch": 0.7307628661260963, "grad_norm": 0.5588692426681519, "learning_rate": 9.38559129962122e-06, "loss": 0.4209, "step": 1472 }, { "epoch": 0.7312593082905842, "grad_norm": 0.483027845621109, "learning_rate": 9.384203013124209e-06, "loss": 0.4245, "step": 1473 }, { "epoch": 0.731755750455072, "grad_norm": 0.5845569968223572, "learning_rate": 9.382813262860968e-06, "loss": 0.438, "step": 1474 }, { "epoch": 0.7322521926195599, "grad_norm": 0.6295353174209595, "learning_rate": 9.381422049295496e-06, "loss": 0.4176, "step": 1475 }, { "epoch": 0.7327486347840476, "grad_norm": 0.4728439450263977, "learning_rate": 9.380029372892282e-06, "loss": 0.4183, "step": 1476 }, { "epoch": 0.7332450769485355, "grad_norm": 0.4644218683242798, "learning_rate": 9.378635234116303e-06, "loss": 0.4365, "step": 1477 }, { "epoch": 0.7337415191130233, "grad_norm": 0.5565840005874634, "learning_rate": 9.377239633433026e-06, "loss": 0.4166, "step": 1478 }, { "epoch": 0.7342379612775112, "grad_norm": 0.4670703113079071, "learning_rate": 9.3758425713084e-06, "loss": 0.4327, "step": 1479 }, { "epoch": 0.734734403441999, "grad_norm": 0.5267097353935242, "learning_rate": 9.374444048208868e-06, "loss": 0.3765, "step": 1480 }, { "epoch": 0.7352308456064869, "grad_norm": 0.4699386656284332, "learning_rate": 9.37304406460136e-06, "loss": 0.4239, "step": 1481 }, { "epoch": 0.7357272877709747, "grad_norm": 0.5009515881538391, "learning_rate": 9.371642620953293e-06, "loss": 0.4052, "step": 1482 }, { "epoch": 0.7362237299354625, "grad_norm": 0.49456244707107544, "learning_rate": 9.370239717732567e-06, "loss": 0.4534, "step": 1483 }, { "epoch": 0.7367201720999503, "grad_norm": 0.5477268099784851, "learning_rate": 9.368835355407577e-06, "loss": 0.4447, "step": 1484 }, { "epoch": 0.7372166142644382, "grad_norm": 0.4193306267261505, "learning_rate": 9.367429534447199e-06, "loss": 0.411, "step": 1485 }, { "epoch": 0.737713056428926, "grad_norm": 0.5160143971443176, "learning_rate": 9.3660222553208e-06, "loss": 0.4561, "step": 1486 }, { "epoch": 0.7382094985934139, "grad_norm": 0.5491133332252502, "learning_rate": 9.364613518498233e-06, "loss": 0.3991, "step": 1487 }, { "epoch": 0.7387059407579017, "grad_norm": 0.45887160301208496, "learning_rate": 9.363203324449837e-06, "loss": 0.3931, "step": 1488 }, { "epoch": 0.7392023829223895, "grad_norm": 0.5202521681785583, "learning_rate": 9.361791673646434e-06, "loss": 0.4196, "step": 1489 }, { "epoch": 0.7396988250868773, "grad_norm": 0.4915379583835602, "learning_rate": 9.360378566559338e-06, "loss": 0.418, "step": 1490 }, { "epoch": 0.7401952672513652, "grad_norm": 0.474763959646225, "learning_rate": 9.358964003660347e-06, "loss": 0.4496, "step": 1491 }, { "epoch": 0.740691709415853, "grad_norm": 0.5465794205665588, "learning_rate": 9.357547985421746e-06, "loss": 0.3932, "step": 1492 }, { "epoch": 0.7411881515803409, "grad_norm": 0.4460706412792206, "learning_rate": 9.356130512316306e-06, "loss": 0.4076, "step": 1493 }, { "epoch": 0.7416845937448288, "grad_norm": 0.458501935005188, "learning_rate": 9.354711584817278e-06, "loss": 0.4122, "step": 1494 }, { "epoch": 0.7421810359093166, "grad_norm": 0.42300957441329956, "learning_rate": 9.353291203398409e-06, "loss": 0.3836, "step": 1495 }, { "epoch": 0.7426774780738044, "grad_norm": 0.5613730549812317, "learning_rate": 9.351869368533921e-06, "loss": 0.416, "step": 1496 }, { "epoch": 0.7431739202382922, "grad_norm": 0.49259671568870544, "learning_rate": 9.350446080698528e-06, "loss": 0.4326, "step": 1497 }, { "epoch": 0.7436703624027801, "grad_norm": 0.47412481904029846, "learning_rate": 9.349021340367429e-06, "loss": 0.4098, "step": 1498 }, { "epoch": 0.7441668045672679, "grad_norm": 0.45613744854927063, "learning_rate": 9.347595148016304e-06, "loss": 0.4309, "step": 1499 }, { "epoch": 0.7446632467317558, "grad_norm": 0.5208142995834351, "learning_rate": 9.34616750412132e-06, "loss": 0.4183, "step": 1500 }, { "epoch": 0.7451596888962436, "grad_norm": 0.48829638957977295, "learning_rate": 9.344738409159126e-06, "loss": 0.4251, "step": 1501 }, { "epoch": 0.7456561310607315, "grad_norm": 0.47687965631484985, "learning_rate": 9.343307863606865e-06, "loss": 0.4169, "step": 1502 }, { "epoch": 0.7461525732252192, "grad_norm": 0.5321457982063293, "learning_rate": 9.34187586794215e-06, "loss": 0.4462, "step": 1503 }, { "epoch": 0.7466490153897071, "grad_norm": 0.47465062141418457, "learning_rate": 9.340442422643087e-06, "loss": 0.4415, "step": 1504 }, { "epoch": 0.7471454575541949, "grad_norm": 0.47667887806892395, "learning_rate": 9.33900752818827e-06, "loss": 0.4263, "step": 1505 }, { "epoch": 0.7476418997186828, "grad_norm": 0.5445138216018677, "learning_rate": 9.337571185056764e-06, "loss": 0.419, "step": 1506 }, { "epoch": 0.7481383418831706, "grad_norm": 0.49852269887924194, "learning_rate": 9.336133393728128e-06, "loss": 0.3991, "step": 1507 }, { "epoch": 0.7486347840476585, "grad_norm": 0.5332211852073669, "learning_rate": 9.334694154682403e-06, "loss": 0.4263, "step": 1508 }, { "epoch": 0.7491312262121463, "grad_norm": 0.5101137161254883, "learning_rate": 9.33325346840011e-06, "loss": 0.4375, "step": 1509 }, { "epoch": 0.7496276683766341, "grad_norm": 0.4968157708644867, "learning_rate": 9.331811335362256e-06, "loss": 0.437, "step": 1510 }, { "epoch": 0.7501241105411219, "grad_norm": 0.4878857135772705, "learning_rate": 9.330367756050326e-06, "loss": 0.4259, "step": 1511 }, { "epoch": 0.7506205527056098, "grad_norm": 0.5118778347969055, "learning_rate": 9.328922730946297e-06, "loss": 0.4329, "step": 1512 }, { "epoch": 0.7511169948700976, "grad_norm": 0.48797377943992615, "learning_rate": 9.327476260532623e-06, "loss": 0.4199, "step": 1513 }, { "epoch": 0.7516134370345855, "grad_norm": 0.4367382228374481, "learning_rate": 9.326028345292237e-06, "loss": 0.402, "step": 1514 }, { "epoch": 0.7521098791990734, "grad_norm": 0.5002572536468506, "learning_rate": 9.324578985708563e-06, "loss": 0.4068, "step": 1515 }, { "epoch": 0.7526063213635611, "grad_norm": 0.48751524090766907, "learning_rate": 9.323128182265502e-06, "loss": 0.4359, "step": 1516 }, { "epoch": 0.753102763528049, "grad_norm": 0.4829558730125427, "learning_rate": 9.321675935447436e-06, "loss": 0.4116, "step": 1517 }, { "epoch": 0.7535992056925368, "grad_norm": 0.4841277301311493, "learning_rate": 9.320222245739233e-06, "loss": 0.4694, "step": 1518 }, { "epoch": 0.7540956478570247, "grad_norm": 0.5027683973312378, "learning_rate": 9.318767113626237e-06, "loss": 0.4089, "step": 1519 }, { "epoch": 0.7545920900215125, "grad_norm": 0.40499162673950195, "learning_rate": 9.317310539594282e-06, "loss": 0.4241, "step": 1520 }, { "epoch": 0.7550885321860004, "grad_norm": 0.46626242995262146, "learning_rate": 9.315852524129673e-06, "loss": 0.4395, "step": 1521 }, { "epoch": 0.7555849743504882, "grad_norm": 0.4088857173919678, "learning_rate": 9.314393067719208e-06, "loss": 0.4339, "step": 1522 }, { "epoch": 0.756081416514976, "grad_norm": 0.5028079152107239, "learning_rate": 9.312932170850153e-06, "loss": 0.4292, "step": 1523 }, { "epoch": 0.7565778586794638, "grad_norm": 0.5122424960136414, "learning_rate": 9.311469834010267e-06, "loss": 0.4328, "step": 1524 }, { "epoch": 0.7570743008439517, "grad_norm": 0.45571795105934143, "learning_rate": 9.310006057687782e-06, "loss": 0.3998, "step": 1525 }, { "epoch": 0.7575707430084395, "grad_norm": 0.5284724235534668, "learning_rate": 9.308540842371415e-06, "loss": 0.4045, "step": 1526 }, { "epoch": 0.7580671851729274, "grad_norm": 0.45405980944633484, "learning_rate": 9.30707418855036e-06, "loss": 0.4016, "step": 1527 }, { "epoch": 0.7585636273374152, "grad_norm": 0.4845850169658661, "learning_rate": 9.305606096714292e-06, "loss": 0.3935, "step": 1528 }, { "epoch": 0.7590600695019031, "grad_norm": 0.5157355070114136, "learning_rate": 9.304136567353371e-06, "loss": 0.4334, "step": 1529 }, { "epoch": 0.7595565116663908, "grad_norm": 0.5088858604431152, "learning_rate": 9.302665600958227e-06, "loss": 0.4429, "step": 1530 }, { "epoch": 0.7600529538308787, "grad_norm": 0.490999698638916, "learning_rate": 9.30119319801998e-06, "loss": 0.411, "step": 1531 }, { "epoch": 0.7605493959953665, "grad_norm": 0.4949207007884979, "learning_rate": 9.299719359030224e-06, "loss": 0.4356, "step": 1532 }, { "epoch": 0.7610458381598544, "grad_norm": 0.5446283221244812, "learning_rate": 9.298244084481034e-06, "loss": 0.4329, "step": 1533 }, { "epoch": 0.7615422803243422, "grad_norm": 0.4463607966899872, "learning_rate": 9.296767374864963e-06, "loss": 0.4436, "step": 1534 }, { "epoch": 0.7620387224888301, "grad_norm": 0.43548673391342163, "learning_rate": 9.295289230675046e-06, "loss": 0.4155, "step": 1535 }, { "epoch": 0.7625351646533179, "grad_norm": 0.5531527996063232, "learning_rate": 9.293809652404795e-06, "loss": 0.4175, "step": 1536 }, { "epoch": 0.7630316068178057, "grad_norm": 0.4397556781768799, "learning_rate": 9.292328640548201e-06, "loss": 0.4073, "step": 1537 }, { "epoch": 0.7635280489822935, "grad_norm": 0.5213899612426758, "learning_rate": 9.290846195599732e-06, "loss": 0.4262, "step": 1538 }, { "epoch": 0.7640244911467814, "grad_norm": 0.5237262845039368, "learning_rate": 9.289362318054337e-06, "loss": 0.4382, "step": 1539 }, { "epoch": 0.7645209333112692, "grad_norm": 0.49963489174842834, "learning_rate": 9.28787700840744e-06, "loss": 0.4151, "step": 1540 }, { "epoch": 0.7650173754757571, "grad_norm": 0.48897674679756165, "learning_rate": 9.286390267154951e-06, "loss": 0.4147, "step": 1541 }, { "epoch": 0.765513817640245, "grad_norm": 0.5007455945014954, "learning_rate": 9.284902094793248e-06, "loss": 0.4508, "step": 1542 }, { "epoch": 0.7660102598047327, "grad_norm": 0.4666837155818939, "learning_rate": 9.283412491819194e-06, "loss": 0.4228, "step": 1543 }, { "epoch": 0.7665067019692205, "grad_norm": 0.5989648103713989, "learning_rate": 9.281921458730126e-06, "loss": 0.392, "step": 1544 }, { "epoch": 0.7670031441337084, "grad_norm": 0.508074939250946, "learning_rate": 9.280428996023857e-06, "loss": 0.4307, "step": 1545 }, { "epoch": 0.7674995862981963, "grad_norm": 0.45864689350128174, "learning_rate": 9.278935104198682e-06, "loss": 0.4165, "step": 1546 }, { "epoch": 0.7679960284626841, "grad_norm": 0.5159915089607239, "learning_rate": 9.277439783753373e-06, "loss": 0.3845, "step": 1547 }, { "epoch": 0.768492470627172, "grad_norm": 0.5260454416275024, "learning_rate": 9.275943035187173e-06, "loss": 0.4418, "step": 1548 }, { "epoch": 0.7689889127916598, "grad_norm": 0.5964154601097107, "learning_rate": 9.274444858999808e-06, "loss": 0.4012, "step": 1549 }, { "epoch": 0.7694853549561476, "grad_norm": 0.6422842741012573, "learning_rate": 9.272945255691476e-06, "loss": 0.4164, "step": 1550 }, { "epoch": 0.7699817971206354, "grad_norm": 0.546082079410553, "learning_rate": 9.271444225762857e-06, "loss": 0.4339, "step": 1551 }, { "epoch": 0.7704782392851233, "grad_norm": 0.5623235702514648, "learning_rate": 9.269941769715102e-06, "loss": 0.4344, "step": 1552 }, { "epoch": 0.7709746814496111, "grad_norm": 0.7164422273635864, "learning_rate": 9.268437888049839e-06, "loss": 0.4065, "step": 1553 }, { "epoch": 0.771471123614099, "grad_norm": 0.49463602900505066, "learning_rate": 9.266932581269177e-06, "loss": 0.3723, "step": 1554 }, { "epoch": 0.7719675657785868, "grad_norm": 0.5398067235946655, "learning_rate": 9.265425849875696e-06, "loss": 0.4049, "step": 1555 }, { "epoch": 0.7724640079430747, "grad_norm": 0.5658251643180847, "learning_rate": 9.26391769437245e-06, "loss": 0.4193, "step": 1556 }, { "epoch": 0.7729604501075624, "grad_norm": 0.543198823928833, "learning_rate": 9.262408115262971e-06, "loss": 0.3975, "step": 1557 }, { "epoch": 0.7734568922720503, "grad_norm": 0.43356168270111084, "learning_rate": 9.26089711305127e-06, "loss": 0.3932, "step": 1558 }, { "epoch": 0.7739533344365381, "grad_norm": 0.5520955920219421, "learning_rate": 9.259384688241828e-06, "loss": 0.4042, "step": 1559 }, { "epoch": 0.774449776601026, "grad_norm": 0.5629108548164368, "learning_rate": 9.257870841339601e-06, "loss": 0.4024, "step": 1560 }, { "epoch": 0.7749462187655138, "grad_norm": 0.4845888316631317, "learning_rate": 9.256355572850024e-06, "loss": 0.4, "step": 1561 }, { "epoch": 0.7754426609300017, "grad_norm": 0.4695316255092621, "learning_rate": 9.254838883279002e-06, "loss": 0.3901, "step": 1562 }, { "epoch": 0.7759391030944895, "grad_norm": 0.5498716235160828, "learning_rate": 9.253320773132917e-06, "loss": 0.3938, "step": 1563 }, { "epoch": 0.7764355452589773, "grad_norm": 0.5578382015228271, "learning_rate": 9.251801242918623e-06, "loss": 0.4312, "step": 1564 }, { "epoch": 0.7769319874234651, "grad_norm": 0.48458924889564514, "learning_rate": 9.250280293143455e-06, "loss": 0.4214, "step": 1565 }, { "epoch": 0.777428429587953, "grad_norm": 0.544766366481781, "learning_rate": 9.248757924315211e-06, "loss": 0.4039, "step": 1566 }, { "epoch": 0.7779248717524408, "grad_norm": 0.4356980323791504, "learning_rate": 9.24723413694217e-06, "loss": 0.3665, "step": 1567 }, { "epoch": 0.7784213139169287, "grad_norm": 0.5239348411560059, "learning_rate": 9.245708931533087e-06, "loss": 0.4103, "step": 1568 }, { "epoch": 0.7789177560814166, "grad_norm": 0.5110220313072205, "learning_rate": 9.24418230859718e-06, "loss": 0.4248, "step": 1569 }, { "epoch": 0.7794141982459043, "grad_norm": 0.4991197884082794, "learning_rate": 9.242654268644153e-06, "loss": 0.4096, "step": 1570 }, { "epoch": 0.7799106404103922, "grad_norm": 0.5226978659629822, "learning_rate": 9.241124812184176e-06, "loss": 0.4175, "step": 1571 }, { "epoch": 0.78040708257488, "grad_norm": 0.5207801461219788, "learning_rate": 9.239593939727889e-06, "loss": 0.4164, "step": 1572 }, { "epoch": 0.7809035247393679, "grad_norm": 0.5068551301956177, "learning_rate": 9.238061651786414e-06, "loss": 0.4037, "step": 1573 }, { "epoch": 0.7813999669038557, "grad_norm": 0.5469658970832825, "learning_rate": 9.236527948871335e-06, "loss": 0.4457, "step": 1574 }, { "epoch": 0.7818964090683436, "grad_norm": 0.46739548444747925, "learning_rate": 9.234992831494718e-06, "loss": 0.4208, "step": 1575 }, { "epoch": 0.7823928512328314, "grad_norm": 0.5291287302970886, "learning_rate": 9.233456300169093e-06, "loss": 0.4313, "step": 1576 }, { "epoch": 0.7828892933973192, "grad_norm": 0.47122180461883545, "learning_rate": 9.23191835540747e-06, "loss": 0.4077, "step": 1577 }, { "epoch": 0.783385735561807, "grad_norm": 0.5210561156272888, "learning_rate": 9.230378997723326e-06, "loss": 0.4404, "step": 1578 }, { "epoch": 0.7838821777262949, "grad_norm": 0.6034771800041199, "learning_rate": 9.228838227630609e-06, "loss": 0.4447, "step": 1579 }, { "epoch": 0.7843786198907827, "grad_norm": 0.46679237484931946, "learning_rate": 9.22729604564374e-06, "loss": 0.4054, "step": 1580 }, { "epoch": 0.7848750620552706, "grad_norm": 0.6006196141242981, "learning_rate": 9.225752452277617e-06, "loss": 0.4007, "step": 1581 }, { "epoch": 0.7853715042197584, "grad_norm": 0.6089896559715271, "learning_rate": 9.224207448047594e-06, "loss": 0.4088, "step": 1582 }, { "epoch": 0.7858679463842463, "grad_norm": 0.5658348202705383, "learning_rate": 9.222661033469517e-06, "loss": 0.4223, "step": 1583 }, { "epoch": 0.786364388548734, "grad_norm": 0.4446297287940979, "learning_rate": 9.221113209059684e-06, "loss": 0.3875, "step": 1584 }, { "epoch": 0.7868608307132219, "grad_norm": 0.557367742061615, "learning_rate": 9.219563975334875e-06, "loss": 0.4155, "step": 1585 }, { "epoch": 0.7873572728777097, "grad_norm": 0.555645227432251, "learning_rate": 9.218013332812334e-06, "loss": 0.4294, "step": 1586 }, { "epoch": 0.7878537150421976, "grad_norm": 0.5195057392120361, "learning_rate": 9.216461282009783e-06, "loss": 0.4093, "step": 1587 }, { "epoch": 0.7883501572066854, "grad_norm": 0.5364248752593994, "learning_rate": 9.214907823445405e-06, "loss": 0.3799, "step": 1588 }, { "epoch": 0.7888465993711733, "grad_norm": 0.631740152835846, "learning_rate": 9.213352957637862e-06, "loss": 0.4164, "step": 1589 }, { "epoch": 0.7893430415356611, "grad_norm": 0.5231354832649231, "learning_rate": 9.211796685106275e-06, "loss": 0.4026, "step": 1590 }, { "epoch": 0.7898394837001489, "grad_norm": 0.4969004690647125, "learning_rate": 9.210239006370249e-06, "loss": 0.4083, "step": 1591 }, { "epoch": 0.7903359258646367, "grad_norm": 0.564734935760498, "learning_rate": 9.208679921949845e-06, "loss": 0.4225, "step": 1592 }, { "epoch": 0.7908323680291246, "grad_norm": 0.4945008456707001, "learning_rate": 9.2071194323656e-06, "loss": 0.4517, "step": 1593 }, { "epoch": 0.7913288101936125, "grad_norm": 0.48380568623542786, "learning_rate": 9.205557538138522e-06, "loss": 0.3966, "step": 1594 }, { "epoch": 0.7918252523581003, "grad_norm": 0.5383281111717224, "learning_rate": 9.203994239790081e-06, "loss": 0.4197, "step": 1595 }, { "epoch": 0.7923216945225882, "grad_norm": 0.5684495568275452, "learning_rate": 9.202429537842221e-06, "loss": 0.4109, "step": 1596 }, { "epoch": 0.7928181366870759, "grad_norm": 0.4798033833503723, "learning_rate": 9.200863432817355e-06, "loss": 0.4084, "step": 1597 }, { "epoch": 0.7933145788515638, "grad_norm": 0.47056132555007935, "learning_rate": 9.199295925238362e-06, "loss": 0.4198, "step": 1598 }, { "epoch": 0.7938110210160516, "grad_norm": 0.49572545289993286, "learning_rate": 9.19772701562859e-06, "loss": 0.3897, "step": 1599 }, { "epoch": 0.7943074631805395, "grad_norm": 0.4764513373374939, "learning_rate": 9.196156704511856e-06, "loss": 0.3857, "step": 1600 }, { "epoch": 0.7948039053450273, "grad_norm": 0.5549061298370361, "learning_rate": 9.194584992412442e-06, "loss": 0.4143, "step": 1601 }, { "epoch": 0.7953003475095152, "grad_norm": 0.5622666478157043, "learning_rate": 9.193011879855103e-06, "loss": 0.4321, "step": 1602 }, { "epoch": 0.795796789674003, "grad_norm": 0.5432247519493103, "learning_rate": 9.191437367365056e-06, "loss": 0.4026, "step": 1603 }, { "epoch": 0.7962932318384908, "grad_norm": 0.5699751377105713, "learning_rate": 9.18986145546799e-06, "loss": 0.4075, "step": 1604 }, { "epoch": 0.7967896740029786, "grad_norm": 0.49811556935310364, "learning_rate": 9.188284144690057e-06, "loss": 0.4037, "step": 1605 }, { "epoch": 0.7972861161674665, "grad_norm": 0.558072566986084, "learning_rate": 9.18670543555788e-06, "loss": 0.3975, "step": 1606 }, { "epoch": 0.7977825583319543, "grad_norm": 0.5388950705528259, "learning_rate": 9.185125328598547e-06, "loss": 0.4321, "step": 1607 }, { "epoch": 0.7982790004964422, "grad_norm": 0.4709283411502838, "learning_rate": 9.183543824339612e-06, "loss": 0.4229, "step": 1608 }, { "epoch": 0.79877544266093, "grad_norm": 0.545525074005127, "learning_rate": 9.181960923309094e-06, "loss": 0.4141, "step": 1609 }, { "epoch": 0.7992718848254179, "grad_norm": 0.5342137217521667, "learning_rate": 9.180376626035486e-06, "loss": 0.4556, "step": 1610 }, { "epoch": 0.7997683269899056, "grad_norm": 0.5854952335357666, "learning_rate": 9.178790933047739e-06, "loss": 0.4393, "step": 1611 }, { "epoch": 0.8002647691543935, "grad_norm": 0.47349703311920166, "learning_rate": 9.17720384487527e-06, "loss": 0.4348, "step": 1612 }, { "epoch": 0.8007612113188813, "grad_norm": 0.5156567096710205, "learning_rate": 9.175615362047969e-06, "loss": 0.4128, "step": 1613 }, { "epoch": 0.8012576534833692, "grad_norm": 0.5904585123062134, "learning_rate": 9.174025485096188e-06, "loss": 0.4111, "step": 1614 }, { "epoch": 0.801754095647857, "grad_norm": 0.4797278046607971, "learning_rate": 9.172434214550739e-06, "loss": 0.3936, "step": 1615 }, { "epoch": 0.8022505378123449, "grad_norm": 0.5590804219245911, "learning_rate": 9.170841550942905e-06, "loss": 0.3931, "step": 1616 }, { "epoch": 0.8027469799768328, "grad_norm": 0.525582492351532, "learning_rate": 9.169247494804436e-06, "loss": 0.3868, "step": 1617 }, { "epoch": 0.8032434221413205, "grad_norm": 0.6458089351654053, "learning_rate": 9.167652046667542e-06, "loss": 0.4052, "step": 1618 }, { "epoch": 0.8037398643058083, "grad_norm": 0.5126916170120239, "learning_rate": 9.166055207064899e-06, "loss": 0.4384, "step": 1619 }, { "epoch": 0.8042363064702962, "grad_norm": 0.5903427004814148, "learning_rate": 9.16445697652965e-06, "loss": 0.4238, "step": 1620 }, { "epoch": 0.8047327486347841, "grad_norm": 0.5480103492736816, "learning_rate": 9.162857355595401e-06, "loss": 0.3924, "step": 1621 }, { "epoch": 0.8052291907992719, "grad_norm": 0.4722171723842621, "learning_rate": 9.161256344796221e-06, "loss": 0.4307, "step": 1622 }, { "epoch": 0.8057256329637598, "grad_norm": 0.5784984827041626, "learning_rate": 9.159653944666643e-06, "loss": 0.4204, "step": 1623 }, { "epoch": 0.8062220751282475, "grad_norm": 0.44169947504997253, "learning_rate": 9.158050155741667e-06, "loss": 0.418, "step": 1624 }, { "epoch": 0.8067185172927354, "grad_norm": 0.4635929763317108, "learning_rate": 9.156444978556753e-06, "loss": 0.4132, "step": 1625 }, { "epoch": 0.8072149594572232, "grad_norm": 0.45897865295410156, "learning_rate": 9.154838413647828e-06, "loss": 0.3977, "step": 1626 }, { "epoch": 0.8077114016217111, "grad_norm": 0.48335617780685425, "learning_rate": 9.153230461551276e-06, "loss": 0.4302, "step": 1627 }, { "epoch": 0.8082078437861989, "grad_norm": 0.46106842160224915, "learning_rate": 9.151621122803954e-06, "loss": 0.4101, "step": 1628 }, { "epoch": 0.8087042859506868, "grad_norm": 0.4519864320755005, "learning_rate": 9.150010397943175e-06, "loss": 0.3836, "step": 1629 }, { "epoch": 0.8092007281151746, "grad_norm": 0.48613545298576355, "learning_rate": 9.148398287506713e-06, "loss": 0.3552, "step": 1630 }, { "epoch": 0.8096971702796624, "grad_norm": 0.4913788437843323, "learning_rate": 9.14678479203281e-06, "loss": 0.4381, "step": 1631 }, { "epoch": 0.8101936124441502, "grad_norm": 0.5207414031028748, "learning_rate": 9.145169912060168e-06, "loss": 0.4305, "step": 1632 }, { "epoch": 0.8106900546086381, "grad_norm": 0.4831680357456207, "learning_rate": 9.143553648127954e-06, "loss": 0.4284, "step": 1633 }, { "epoch": 0.8111864967731259, "grad_norm": 0.5241771340370178, "learning_rate": 9.14193600077579e-06, "loss": 0.4262, "step": 1634 }, { "epoch": 0.8116829389376138, "grad_norm": 0.4733656048774719, "learning_rate": 9.140316970543768e-06, "loss": 0.4148, "step": 1635 }, { "epoch": 0.8121793811021016, "grad_norm": 0.4878460764884949, "learning_rate": 9.138696557972437e-06, "loss": 0.4174, "step": 1636 }, { "epoch": 0.8126758232665895, "grad_norm": 0.5113739371299744, "learning_rate": 9.137074763602809e-06, "loss": 0.4629, "step": 1637 }, { "epoch": 0.8131722654310772, "grad_norm": 0.45193958282470703, "learning_rate": 9.135451587976357e-06, "loss": 0.4058, "step": 1638 }, { "epoch": 0.8136687075955651, "grad_norm": 0.5110943913459778, "learning_rate": 9.133827031635015e-06, "loss": 0.3994, "step": 1639 }, { "epoch": 0.8141651497600529, "grad_norm": 0.5947158932685852, "learning_rate": 9.132201095121178e-06, "loss": 0.4126, "step": 1640 }, { "epoch": 0.8146615919245408, "grad_norm": 0.4667949378490448, "learning_rate": 9.130573778977702e-06, "loss": 0.4197, "step": 1641 }, { "epoch": 0.8151580340890286, "grad_norm": 0.6008071899414062, "learning_rate": 9.128945083747906e-06, "loss": 0.4073, "step": 1642 }, { "epoch": 0.8156544762535165, "grad_norm": 0.5142635107040405, "learning_rate": 9.127315009975564e-06, "loss": 0.4072, "step": 1643 }, { "epoch": 0.8161509184180044, "grad_norm": 0.4505488872528076, "learning_rate": 9.125683558204914e-06, "loss": 0.4226, "step": 1644 }, { "epoch": 0.8166473605824921, "grad_norm": 0.49643075466156006, "learning_rate": 9.124050728980652e-06, "loss": 0.4335, "step": 1645 }, { "epoch": 0.81714380274698, "grad_norm": 0.47576823830604553, "learning_rate": 9.122416522847939e-06, "loss": 0.4092, "step": 1646 }, { "epoch": 0.8176402449114678, "grad_norm": 0.434551477432251, "learning_rate": 9.12078094035239e-06, "loss": 0.4066, "step": 1647 }, { "epoch": 0.8181366870759557, "grad_norm": 0.5035120248794556, "learning_rate": 9.119143982040082e-06, "loss": 0.4142, "step": 1648 }, { "epoch": 0.8186331292404435, "grad_norm": 0.4838060140609741, "learning_rate": 9.117505648457549e-06, "loss": 0.4337, "step": 1649 }, { "epoch": 0.8191295714049314, "grad_norm": 0.5131255388259888, "learning_rate": 9.115865940151788e-06, "loss": 0.4236, "step": 1650 }, { "epoch": 0.8196260135694191, "grad_norm": 0.5540606379508972, "learning_rate": 9.114224857670255e-06, "loss": 0.3927, "step": 1651 }, { "epoch": 0.820122455733907, "grad_norm": 0.4727320671081543, "learning_rate": 9.112582401560858e-06, "loss": 0.4237, "step": 1652 }, { "epoch": 0.8206188978983948, "grad_norm": 0.4459339678287506, "learning_rate": 9.110938572371972e-06, "loss": 0.4137, "step": 1653 }, { "epoch": 0.8211153400628827, "grad_norm": 0.5326864719390869, "learning_rate": 9.109293370652426e-06, "loss": 0.4221, "step": 1654 }, { "epoch": 0.8216117822273705, "grad_norm": 0.4853644371032715, "learning_rate": 9.107646796951507e-06, "loss": 0.4259, "step": 1655 }, { "epoch": 0.8221082243918584, "grad_norm": 0.4275055229663849, "learning_rate": 9.105998851818963e-06, "loss": 0.4208, "step": 1656 }, { "epoch": 0.8226046665563462, "grad_norm": 0.49317795038223267, "learning_rate": 9.104349535804996e-06, "loss": 0.4083, "step": 1657 }, { "epoch": 0.823101108720834, "grad_norm": 0.47928351163864136, "learning_rate": 9.102698849460269e-06, "loss": 0.4364, "step": 1658 }, { "epoch": 0.8235975508853218, "grad_norm": 0.4892079532146454, "learning_rate": 9.101046793335904e-06, "loss": 0.4245, "step": 1659 }, { "epoch": 0.8240939930498097, "grad_norm": 0.4303673505783081, "learning_rate": 9.099393367983473e-06, "loss": 0.4144, "step": 1660 }, { "epoch": 0.8245904352142975, "grad_norm": 0.48881176114082336, "learning_rate": 9.09773857395501e-06, "loss": 0.3948, "step": 1661 }, { "epoch": 0.8250868773787854, "grad_norm": 0.5366873741149902, "learning_rate": 9.09608241180301e-06, "loss": 0.4391, "step": 1662 }, { "epoch": 0.8255833195432732, "grad_norm": 0.45795080065727234, "learning_rate": 9.094424882080419e-06, "loss": 0.4016, "step": 1663 }, { "epoch": 0.8260797617077611, "grad_norm": 0.4389495551586151, "learning_rate": 9.092765985340639e-06, "loss": 0.4239, "step": 1664 }, { "epoch": 0.8265762038722488, "grad_norm": 0.5160349011421204, "learning_rate": 9.09110572213753e-06, "loss": 0.4031, "step": 1665 }, { "epoch": 0.8270726460367367, "grad_norm": 0.4198783040046692, "learning_rate": 9.089444093025412e-06, "loss": 0.3988, "step": 1666 }, { "epoch": 0.8275690882012245, "grad_norm": 0.4725618362426758, "learning_rate": 9.087781098559056e-06, "loss": 0.422, "step": 1667 }, { "epoch": 0.8280655303657124, "grad_norm": 0.46272632479667664, "learning_rate": 9.086116739293692e-06, "loss": 0.4168, "step": 1668 }, { "epoch": 0.8285619725302003, "grad_norm": 0.5230858325958252, "learning_rate": 9.084451015785001e-06, "loss": 0.4272, "step": 1669 }, { "epoch": 0.8290584146946881, "grad_norm": 0.4354790449142456, "learning_rate": 9.082783928589127e-06, "loss": 0.3786, "step": 1670 }, { "epoch": 0.829554856859176, "grad_norm": 0.43896690011024475, "learning_rate": 9.081115478262664e-06, "loss": 0.4063, "step": 1671 }, { "epoch": 0.8300512990236637, "grad_norm": 0.45089399814605713, "learning_rate": 9.079445665362659e-06, "loss": 0.4329, "step": 1672 }, { "epoch": 0.8305477411881516, "grad_norm": 0.43840762972831726, "learning_rate": 9.077774490446619e-06, "loss": 0.4021, "step": 1673 }, { "epoch": 0.8310441833526394, "grad_norm": 0.4377192258834839, "learning_rate": 9.076101954072506e-06, "loss": 0.3889, "step": 1674 }, { "epoch": 0.8315406255171273, "grad_norm": 0.5005868673324585, "learning_rate": 9.074428056798733e-06, "loss": 0.4112, "step": 1675 }, { "epoch": 0.8320370676816151, "grad_norm": 0.47533589601516724, "learning_rate": 9.072752799184167e-06, "loss": 0.4047, "step": 1676 }, { "epoch": 0.832533509846103, "grad_norm": 0.45274484157562256, "learning_rate": 9.071076181788134e-06, "loss": 0.4185, "step": 1677 }, { "epoch": 0.8330299520105907, "grad_norm": 0.4618212878704071, "learning_rate": 9.06939820517041e-06, "loss": 0.3691, "step": 1678 }, { "epoch": 0.8335263941750786, "grad_norm": 0.572978675365448, "learning_rate": 9.067718869891226e-06, "loss": 0.4255, "step": 1679 }, { "epoch": 0.8340228363395664, "grad_norm": 0.5442951321601868, "learning_rate": 9.066038176511265e-06, "loss": 0.446, "step": 1680 }, { "epoch": 0.8345192785040543, "grad_norm": 0.4606418013572693, "learning_rate": 9.064356125591664e-06, "loss": 0.4016, "step": 1681 }, { "epoch": 0.8350157206685421, "grad_norm": 0.5377158522605896, "learning_rate": 9.062672717694019e-06, "loss": 0.4365, "step": 1682 }, { "epoch": 0.83551216283303, "grad_norm": 0.4571124017238617, "learning_rate": 9.06098795338037e-06, "loss": 0.3988, "step": 1683 }, { "epoch": 0.8360086049975178, "grad_norm": 0.45027613639831543, "learning_rate": 9.059301833213213e-06, "loss": 0.4211, "step": 1684 }, { "epoch": 0.8365050471620056, "grad_norm": 0.4797086715698242, "learning_rate": 9.0576143577555e-06, "loss": 0.4155, "step": 1685 }, { "epoch": 0.8370014893264934, "grad_norm": 0.40191295742988586, "learning_rate": 9.055925527570633e-06, "loss": 0.4012, "step": 1686 }, { "epoch": 0.8374979314909813, "grad_norm": 0.5375571846961975, "learning_rate": 9.054235343222466e-06, "loss": 0.4228, "step": 1687 }, { "epoch": 0.8379943736554691, "grad_norm": 0.5067715048789978, "learning_rate": 9.052543805275307e-06, "loss": 0.4116, "step": 1688 }, { "epoch": 0.838490815819957, "grad_norm": 0.4946935772895813, "learning_rate": 9.050850914293914e-06, "loss": 0.3902, "step": 1689 }, { "epoch": 0.8389872579844448, "grad_norm": 0.5162731409072876, "learning_rate": 9.049156670843495e-06, "loss": 0.4102, "step": 1690 }, { "epoch": 0.8394837001489327, "grad_norm": 0.5997983813285828, "learning_rate": 9.047461075489714e-06, "loss": 0.4317, "step": 1691 }, { "epoch": 0.8399801423134204, "grad_norm": 0.5309721827507019, "learning_rate": 9.045764128798684e-06, "loss": 0.4041, "step": 1692 }, { "epoch": 0.8404765844779083, "grad_norm": 0.5042637586593628, "learning_rate": 9.04406583133697e-06, "loss": 0.4298, "step": 1693 }, { "epoch": 0.8409730266423961, "grad_norm": 0.5311905741691589, "learning_rate": 9.042366183671585e-06, "loss": 0.4089, "step": 1694 }, { "epoch": 0.841469468806884, "grad_norm": 0.4703192412853241, "learning_rate": 9.040665186369999e-06, "loss": 0.403, "step": 1695 }, { "epoch": 0.8419659109713719, "grad_norm": 0.526499330997467, "learning_rate": 9.038962840000125e-06, "loss": 0.4049, "step": 1696 }, { "epoch": 0.8424623531358597, "grad_norm": 0.6273605823516846, "learning_rate": 9.03725914513033e-06, "loss": 0.4439, "step": 1697 }, { "epoch": 0.8429587953003476, "grad_norm": 0.5087134838104248, "learning_rate": 9.035554102329435e-06, "loss": 0.4307, "step": 1698 }, { "epoch": 0.8434552374648353, "grad_norm": 0.44477999210357666, "learning_rate": 9.033847712166706e-06, "loss": 0.3932, "step": 1699 }, { "epoch": 0.8439516796293232, "grad_norm": 0.43531534075737, "learning_rate": 9.03213997521186e-06, "loss": 0.4177, "step": 1700 }, { "epoch": 0.844448121793811, "grad_norm": 0.5974271297454834, "learning_rate": 9.030430892035062e-06, "loss": 0.4004, "step": 1701 }, { "epoch": 0.8449445639582989, "grad_norm": 0.5129312872886658, "learning_rate": 9.02872046320693e-06, "loss": 0.4009, "step": 1702 }, { "epoch": 0.8454410061227867, "grad_norm": 0.4954774081707001, "learning_rate": 9.027008689298531e-06, "loss": 0.4241, "step": 1703 }, { "epoch": 0.8459374482872746, "grad_norm": 0.44425395131111145, "learning_rate": 9.025295570881378e-06, "loss": 0.3962, "step": 1704 }, { "epoch": 0.8464338904517623, "grad_norm": 0.48330360651016235, "learning_rate": 9.023581108527437e-06, "loss": 0.4257, "step": 1705 }, { "epoch": 0.8469303326162502, "grad_norm": 0.415500670671463, "learning_rate": 9.021865302809117e-06, "loss": 0.3712, "step": 1706 }, { "epoch": 0.847426774780738, "grad_norm": 0.46250301599502563, "learning_rate": 9.020148154299282e-06, "loss": 0.4194, "step": 1707 }, { "epoch": 0.8479232169452259, "grad_norm": 0.5219912528991699, "learning_rate": 9.01842966357124e-06, "loss": 0.4278, "step": 1708 }, { "epoch": 0.8484196591097137, "grad_norm": 0.5093996524810791, "learning_rate": 9.016709831198746e-06, "loss": 0.3886, "step": 1709 }, { "epoch": 0.8489161012742016, "grad_norm": 0.42477884888648987, "learning_rate": 9.01498865775601e-06, "loss": 0.4119, "step": 1710 }, { "epoch": 0.8494125434386894, "grad_norm": 0.5261044502258301, "learning_rate": 9.013266143817681e-06, "loss": 0.4259, "step": 1711 }, { "epoch": 0.8499089856031772, "grad_norm": 0.509713351726532, "learning_rate": 9.011542289958861e-06, "loss": 0.403, "step": 1712 }, { "epoch": 0.850405427767665, "grad_norm": 0.5277685523033142, "learning_rate": 9.009817096755098e-06, "loss": 0.4068, "step": 1713 }, { "epoch": 0.8509018699321529, "grad_norm": 0.4909937381744385, "learning_rate": 9.008090564782388e-06, "loss": 0.4042, "step": 1714 }, { "epoch": 0.8513983120966407, "grad_norm": 0.42805755138397217, "learning_rate": 9.006362694617173e-06, "loss": 0.3993, "step": 1715 }, { "epoch": 0.8518947542611286, "grad_norm": 0.46156519651412964, "learning_rate": 9.004633486836339e-06, "loss": 0.3935, "step": 1716 }, { "epoch": 0.8523911964256164, "grad_norm": 0.453603595495224, "learning_rate": 9.002902942017225e-06, "loss": 0.3705, "step": 1717 }, { "epoch": 0.8528876385901043, "grad_norm": 0.5216612815856934, "learning_rate": 9.00117106073761e-06, "loss": 0.4379, "step": 1718 }, { "epoch": 0.853384080754592, "grad_norm": 0.4975984990596771, "learning_rate": 8.999437843575727e-06, "loss": 0.4438, "step": 1719 }, { "epoch": 0.8538805229190799, "grad_norm": 0.4418516755104065, "learning_rate": 8.997703291110243e-06, "loss": 0.4323, "step": 1720 }, { "epoch": 0.8543769650835678, "grad_norm": 0.4330536425113678, "learning_rate": 8.995967403920283e-06, "loss": 0.385, "step": 1721 }, { "epoch": 0.8548734072480556, "grad_norm": 0.5032020211219788, "learning_rate": 8.994230182585412e-06, "loss": 0.4147, "step": 1722 }, { "epoch": 0.8553698494125435, "grad_norm": 0.46045470237731934, "learning_rate": 8.99249162768564e-06, "loss": 0.3782, "step": 1723 }, { "epoch": 0.8558662915770313, "grad_norm": 0.4558328688144684, "learning_rate": 8.990751739801424e-06, "loss": 0.404, "step": 1724 }, { "epoch": 0.8563627337415192, "grad_norm": 0.4279572367668152, "learning_rate": 8.989010519513664e-06, "loss": 0.4237, "step": 1725 }, { "epoch": 0.8568591759060069, "grad_norm": 0.4930325150489807, "learning_rate": 8.987267967403706e-06, "loss": 0.4108, "step": 1726 }, { "epoch": 0.8573556180704948, "grad_norm": 0.5261193513870239, "learning_rate": 8.985524084053342e-06, "loss": 0.4606, "step": 1727 }, { "epoch": 0.8578520602349826, "grad_norm": 0.4688494801521301, "learning_rate": 8.983778870044806e-06, "loss": 0.4023, "step": 1728 }, { "epoch": 0.8583485023994705, "grad_norm": 0.48422887921333313, "learning_rate": 8.982032325960781e-06, "loss": 0.4266, "step": 1729 }, { "epoch": 0.8588449445639583, "grad_norm": 0.5033143758773804, "learning_rate": 8.980284452384387e-06, "loss": 0.4041, "step": 1730 }, { "epoch": 0.8593413867284462, "grad_norm": 0.4707951247692108, "learning_rate": 8.978535249899191e-06, "loss": 0.4242, "step": 1731 }, { "epoch": 0.8598378288929339, "grad_norm": 0.5105404853820801, "learning_rate": 8.976784719089206e-06, "loss": 0.4415, "step": 1732 }, { "epoch": 0.8603342710574218, "grad_norm": 0.5236144065856934, "learning_rate": 8.975032860538888e-06, "loss": 0.4495, "step": 1733 }, { "epoch": 0.8608307132219096, "grad_norm": 0.4568546414375305, "learning_rate": 8.973279674833133e-06, "loss": 0.4169, "step": 1734 }, { "epoch": 0.8613271553863975, "grad_norm": 0.5171574354171753, "learning_rate": 8.971525162557282e-06, "loss": 0.4301, "step": 1735 }, { "epoch": 0.8618235975508853, "grad_norm": 0.5616282224655151, "learning_rate": 8.969769324297118e-06, "loss": 0.4117, "step": 1736 }, { "epoch": 0.8623200397153732, "grad_norm": 0.46003085374832153, "learning_rate": 8.96801216063887e-06, "loss": 0.4085, "step": 1737 }, { "epoch": 0.862816481879861, "grad_norm": 0.536130428314209, "learning_rate": 8.966253672169206e-06, "loss": 0.4062, "step": 1738 }, { "epoch": 0.8633129240443488, "grad_norm": 0.5552747845649719, "learning_rate": 8.964493859475239e-06, "loss": 0.4088, "step": 1739 }, { "epoch": 0.8638093662088366, "grad_norm": 0.5133150219917297, "learning_rate": 8.962732723144518e-06, "loss": 0.4269, "step": 1740 }, { "epoch": 0.8643058083733245, "grad_norm": 0.486724317073822, "learning_rate": 8.960970263765044e-06, "loss": 0.4443, "step": 1741 }, { "epoch": 0.8648022505378123, "grad_norm": 0.47939014434814453, "learning_rate": 8.959206481925252e-06, "loss": 0.3712, "step": 1742 }, { "epoch": 0.8652986927023002, "grad_norm": 0.488091379404068, "learning_rate": 8.957441378214021e-06, "loss": 0.415, "step": 1743 }, { "epoch": 0.865795134866788, "grad_norm": 0.4941757321357727, "learning_rate": 8.95567495322067e-06, "loss": 0.3927, "step": 1744 }, { "epoch": 0.8662915770312759, "grad_norm": 0.5425012707710266, "learning_rate": 8.953907207534964e-06, "loss": 0.4412, "step": 1745 }, { "epoch": 0.8667880191957636, "grad_norm": 0.515757143497467, "learning_rate": 8.9521381417471e-06, "loss": 0.4165, "step": 1746 }, { "epoch": 0.8672844613602515, "grad_norm": 0.4793817400932312, "learning_rate": 8.950367756447727e-06, "loss": 0.3867, "step": 1747 }, { "epoch": 0.8677809035247394, "grad_norm": 0.5943219661712646, "learning_rate": 8.948596052227921e-06, "loss": 0.4018, "step": 1748 }, { "epoch": 0.8682773456892272, "grad_norm": 0.5173690319061279, "learning_rate": 8.946823029679213e-06, "loss": 0.4085, "step": 1749 }, { "epoch": 0.8687737878537151, "grad_norm": 0.5194466710090637, "learning_rate": 8.945048689393563e-06, "loss": 0.4354, "step": 1750 }, { "epoch": 0.8692702300182029, "grad_norm": 0.5386267304420471, "learning_rate": 8.943273031963375e-06, "loss": 0.4681, "step": 1751 }, { "epoch": 0.8697666721826908, "grad_norm": 0.5303268432617188, "learning_rate": 8.941496057981495e-06, "loss": 0.4403, "step": 1752 }, { "epoch": 0.8702631143471785, "grad_norm": 0.4539945721626282, "learning_rate": 8.939717768041206e-06, "loss": 0.369, "step": 1753 }, { "epoch": 0.8707595565116664, "grad_norm": 0.5188801288604736, "learning_rate": 8.937938162736229e-06, "loss": 0.4155, "step": 1754 }, { "epoch": 0.8712559986761542, "grad_norm": 0.4512590169906616, "learning_rate": 8.936157242660726e-06, "loss": 0.4196, "step": 1755 }, { "epoch": 0.8717524408406421, "grad_norm": 0.431770384311676, "learning_rate": 8.9343750084093e-06, "loss": 0.4209, "step": 1756 }, { "epoch": 0.8722488830051299, "grad_norm": 0.489397257566452, "learning_rate": 8.932591460576988e-06, "loss": 0.4344, "step": 1757 }, { "epoch": 0.8727453251696178, "grad_norm": 0.4939683675765991, "learning_rate": 8.93080659975927e-06, "loss": 0.3916, "step": 1758 }, { "epoch": 0.8732417673341055, "grad_norm": 0.4380597770214081, "learning_rate": 8.92902042655206e-06, "loss": 0.4122, "step": 1759 }, { "epoch": 0.8737382094985934, "grad_norm": 0.49480611085891724, "learning_rate": 8.927232941551716e-06, "loss": 0.4069, "step": 1760 }, { "epoch": 0.8742346516630812, "grad_norm": 0.5667482018470764, "learning_rate": 8.92544414535503e-06, "loss": 0.439, "step": 1761 }, { "epoch": 0.8747310938275691, "grad_norm": 0.5013667941093445, "learning_rate": 8.92365403855923e-06, "loss": 0.403, "step": 1762 }, { "epoch": 0.8752275359920569, "grad_norm": 0.44013726711273193, "learning_rate": 8.921862621761985e-06, "loss": 0.4135, "step": 1763 }, { "epoch": 0.8757239781565448, "grad_norm": 0.5454927086830139, "learning_rate": 8.920069895561403e-06, "loss": 0.4402, "step": 1764 }, { "epoch": 0.8762204203210326, "grad_norm": 0.5675665736198425, "learning_rate": 8.918275860556022e-06, "loss": 0.4198, "step": 1765 }, { "epoch": 0.8767168624855204, "grad_norm": 0.5318986773490906, "learning_rate": 8.916480517344826e-06, "loss": 0.3934, "step": 1766 }, { "epoch": 0.8772133046500082, "grad_norm": 0.4533163011074066, "learning_rate": 8.914683866527227e-06, "loss": 0.4062, "step": 1767 }, { "epoch": 0.8777097468144961, "grad_norm": 0.6017132997512817, "learning_rate": 8.912885908703083e-06, "loss": 0.4355, "step": 1768 }, { "epoch": 0.878206188978984, "grad_norm": 0.5408520698547363, "learning_rate": 8.911086644472679e-06, "loss": 0.4241, "step": 1769 }, { "epoch": 0.8787026311434718, "grad_norm": 0.5606937408447266, "learning_rate": 8.909286074436742e-06, "loss": 0.4408, "step": 1770 }, { "epoch": 0.8791990733079597, "grad_norm": 0.5245396494865417, "learning_rate": 8.907484199196432e-06, "loss": 0.4179, "step": 1771 }, { "epoch": 0.8796955154724475, "grad_norm": 0.4745728671550751, "learning_rate": 8.905681019353349e-06, "loss": 0.381, "step": 1772 }, { "epoch": 0.8801919576369353, "grad_norm": 0.5039427876472473, "learning_rate": 8.903876535509524e-06, "loss": 0.3966, "step": 1773 }, { "epoch": 0.8806883998014231, "grad_norm": 0.4786587953567505, "learning_rate": 8.902070748267425e-06, "loss": 0.409, "step": 1774 }, { "epoch": 0.881184841965911, "grad_norm": 0.4910842478275299, "learning_rate": 8.900263658229954e-06, "loss": 0.3947, "step": 1775 }, { "epoch": 0.8816812841303988, "grad_norm": 0.5244905948638916, "learning_rate": 8.898455266000455e-06, "loss": 0.4196, "step": 1776 }, { "epoch": 0.8821777262948867, "grad_norm": 0.5540930032730103, "learning_rate": 8.896645572182694e-06, "loss": 0.444, "step": 1777 }, { "epoch": 0.8826741684593745, "grad_norm": 0.5025313496589661, "learning_rate": 8.894834577380882e-06, "loss": 0.4305, "step": 1778 }, { "epoch": 0.8831706106238624, "grad_norm": 0.5331885814666748, "learning_rate": 8.89302228219966e-06, "loss": 0.4074, "step": 1779 }, { "epoch": 0.8836670527883501, "grad_norm": 0.5388621091842651, "learning_rate": 8.891208687244104e-06, "loss": 0.4276, "step": 1780 }, { "epoch": 0.884163494952838, "grad_norm": 0.4646928608417511, "learning_rate": 8.889393793119725e-06, "loss": 0.4078, "step": 1781 }, { "epoch": 0.8846599371173258, "grad_norm": 0.5389218926429749, "learning_rate": 8.887577600432466e-06, "loss": 0.413, "step": 1782 }, { "epoch": 0.8851563792818137, "grad_norm": 0.47819042205810547, "learning_rate": 8.885760109788705e-06, "loss": 0.4081, "step": 1783 }, { "epoch": 0.8856528214463015, "grad_norm": 0.5623059272766113, "learning_rate": 8.883941321795254e-06, "loss": 0.4154, "step": 1784 }, { "epoch": 0.8861492636107894, "grad_norm": 0.5238704085350037, "learning_rate": 8.882121237059353e-06, "loss": 0.3649, "step": 1785 }, { "epoch": 0.8866457057752771, "grad_norm": 0.4699023962020874, "learning_rate": 8.880299856188681e-06, "loss": 0.4222, "step": 1786 }, { "epoch": 0.887142147939765, "grad_norm": 0.47235649824142456, "learning_rate": 8.878477179791349e-06, "loss": 0.3967, "step": 1787 }, { "epoch": 0.8876385901042528, "grad_norm": 0.5592638254165649, "learning_rate": 8.876653208475898e-06, "loss": 0.4189, "step": 1788 }, { "epoch": 0.8881350322687407, "grad_norm": 0.6072983741760254, "learning_rate": 8.874827942851302e-06, "loss": 0.4412, "step": 1789 }, { "epoch": 0.8886314744332285, "grad_norm": 0.5135446786880493, "learning_rate": 8.873001383526966e-06, "loss": 0.4121, "step": 1790 }, { "epoch": 0.8891279165977164, "grad_norm": 0.5140111446380615, "learning_rate": 8.871173531112733e-06, "loss": 0.3691, "step": 1791 }, { "epoch": 0.8896243587622042, "grad_norm": 0.49266213178634644, "learning_rate": 8.86934438621887e-06, "loss": 0.4219, "step": 1792 }, { "epoch": 0.890120800926692, "grad_norm": 0.5339646935462952, "learning_rate": 8.86751394945608e-06, "loss": 0.4013, "step": 1793 }, { "epoch": 0.8906172430911798, "grad_norm": 0.5845082402229309, "learning_rate": 8.865682221435495e-06, "loss": 0.4154, "step": 1794 }, { "epoch": 0.8911136852556677, "grad_norm": 0.5604977011680603, "learning_rate": 8.863849202768677e-06, "loss": 0.4243, "step": 1795 }, { "epoch": 0.8916101274201556, "grad_norm": 0.6648566722869873, "learning_rate": 8.862014894067627e-06, "loss": 0.4169, "step": 1796 }, { "epoch": 0.8921065695846434, "grad_norm": 0.5644614100456238, "learning_rate": 8.860179295944766e-06, "loss": 0.4216, "step": 1797 }, { "epoch": 0.8926030117491313, "grad_norm": 0.45960915088653564, "learning_rate": 8.858342409012953e-06, "loss": 0.418, "step": 1798 }, { "epoch": 0.8930994539136191, "grad_norm": 0.5232410430908203, "learning_rate": 8.856504233885473e-06, "loss": 0.3748, "step": 1799 }, { "epoch": 0.8935958960781069, "grad_norm": 0.564815104007721, "learning_rate": 8.854664771176044e-06, "loss": 0.4422, "step": 1800 }, { "epoch": 0.8940923382425947, "grad_norm": 0.5483181476593018, "learning_rate": 8.852824021498811e-06, "loss": 0.4093, "step": 1801 }, { "epoch": 0.8945887804070826, "grad_norm": 0.5440123677253723, "learning_rate": 8.850981985468351e-06, "loss": 0.4426, "step": 1802 }, { "epoch": 0.8950852225715704, "grad_norm": 0.5860922336578369, "learning_rate": 8.849138663699671e-06, "loss": 0.4518, "step": 1803 }, { "epoch": 0.8955816647360583, "grad_norm": 0.43092867732048035, "learning_rate": 8.847294056808204e-06, "loss": 0.4058, "step": 1804 }, { "epoch": 0.8960781069005461, "grad_norm": 0.4273878037929535, "learning_rate": 8.845448165409815e-06, "loss": 0.4264, "step": 1805 }, { "epoch": 0.896574549065034, "grad_norm": 0.5020388960838318, "learning_rate": 8.8436009901208e-06, "loss": 0.4295, "step": 1806 }, { "epoch": 0.8970709912295217, "grad_norm": 0.47464853525161743, "learning_rate": 8.841752531557875e-06, "loss": 0.4175, "step": 1807 }, { "epoch": 0.8975674333940096, "grad_norm": 0.47761070728302, "learning_rate": 8.839902790338193e-06, "loss": 0.4079, "step": 1808 }, { "epoch": 0.8980638755584974, "grad_norm": 0.45396384596824646, "learning_rate": 8.838051767079332e-06, "loss": 0.4248, "step": 1809 }, { "epoch": 0.8985603177229853, "grad_norm": 0.5196669697761536, "learning_rate": 8.836199462399298e-06, "loss": 0.4401, "step": 1810 }, { "epoch": 0.8990567598874731, "grad_norm": 0.46981653571128845, "learning_rate": 8.834345876916526e-06, "loss": 0.4008, "step": 1811 }, { "epoch": 0.899553202051961, "grad_norm": 0.4854908287525177, "learning_rate": 8.832491011249878e-06, "loss": 0.4448, "step": 1812 }, { "epoch": 0.9000496442164487, "grad_norm": 0.47720903158187866, "learning_rate": 8.830634866018641e-06, "loss": 0.3832, "step": 1813 }, { "epoch": 0.9005460863809366, "grad_norm": 0.5196309089660645, "learning_rate": 8.828777441842536e-06, "loss": 0.4173, "step": 1814 }, { "epoch": 0.9010425285454244, "grad_norm": 0.4125162363052368, "learning_rate": 8.826918739341701e-06, "loss": 0.386, "step": 1815 }, { "epoch": 0.9015389707099123, "grad_norm": 0.5106251239776611, "learning_rate": 8.82505875913671e-06, "loss": 0.3975, "step": 1816 }, { "epoch": 0.9020354128744001, "grad_norm": 0.44103872776031494, "learning_rate": 8.82319750184856e-06, "loss": 0.4256, "step": 1817 }, { "epoch": 0.902531855038888, "grad_norm": 0.4507336914539337, "learning_rate": 8.821334968098671e-06, "loss": 0.4195, "step": 1818 }, { "epoch": 0.9030282972033759, "grad_norm": 0.5946172475814819, "learning_rate": 8.819471158508894e-06, "loss": 0.4258, "step": 1819 }, { "epoch": 0.9035247393678636, "grad_norm": 0.47289878129959106, "learning_rate": 8.817606073701505e-06, "loss": 0.4533, "step": 1820 }, { "epoch": 0.9040211815323514, "grad_norm": 0.4842042922973633, "learning_rate": 8.815739714299206e-06, "loss": 0.4211, "step": 1821 }, { "epoch": 0.9045176236968393, "grad_norm": 0.462508887052536, "learning_rate": 8.813872080925122e-06, "loss": 0.4096, "step": 1822 }, { "epoch": 0.9050140658613272, "grad_norm": 0.4721945524215698, "learning_rate": 8.812003174202803e-06, "loss": 0.4022, "step": 1823 }, { "epoch": 0.905510508025815, "grad_norm": 0.46020299196243286, "learning_rate": 8.810132994756232e-06, "loss": 0.3953, "step": 1824 }, { "epoch": 0.9060069501903029, "grad_norm": 0.4870629608631134, "learning_rate": 8.808261543209807e-06, "loss": 0.4033, "step": 1825 }, { "epoch": 0.9065033923547907, "grad_norm": 0.4663901627063751, "learning_rate": 8.806388820188354e-06, "loss": 0.4023, "step": 1826 }, { "epoch": 0.9069998345192785, "grad_norm": 0.48873478174209595, "learning_rate": 8.804514826317125e-06, "loss": 0.4436, "step": 1827 }, { "epoch": 0.9074962766837663, "grad_norm": 0.47995492815971375, "learning_rate": 8.8026395622218e-06, "loss": 0.4431, "step": 1828 }, { "epoch": 0.9079927188482542, "grad_norm": 0.46264931559562683, "learning_rate": 8.800763028528472e-06, "loss": 0.3946, "step": 1829 }, { "epoch": 0.908489161012742, "grad_norm": 0.46530285477638245, "learning_rate": 8.79888522586367e-06, "loss": 0.3912, "step": 1830 }, { "epoch": 0.9089856031772299, "grad_norm": 0.48248887062072754, "learning_rate": 8.797006154854338e-06, "loss": 0.4373, "step": 1831 }, { "epoch": 0.9094820453417177, "grad_norm": 0.47299081087112427, "learning_rate": 8.795125816127849e-06, "loss": 0.4082, "step": 1832 }, { "epoch": 0.9099784875062056, "grad_norm": 0.46829649806022644, "learning_rate": 8.793244210311995e-06, "loss": 0.3919, "step": 1833 }, { "epoch": 0.9104749296706933, "grad_norm": 0.5163505673408508, "learning_rate": 8.791361338034993e-06, "loss": 0.4095, "step": 1834 }, { "epoch": 0.9109713718351812, "grad_norm": 0.46528899669647217, "learning_rate": 8.789477199925485e-06, "loss": 0.4184, "step": 1835 }, { "epoch": 0.911467813999669, "grad_norm": 0.4687691032886505, "learning_rate": 8.787591796612531e-06, "loss": 0.3908, "step": 1836 }, { "epoch": 0.9119642561641569, "grad_norm": 0.42522284388542175, "learning_rate": 8.785705128725618e-06, "loss": 0.4053, "step": 1837 }, { "epoch": 0.9124606983286447, "grad_norm": 0.538718044757843, "learning_rate": 8.783817196894652e-06, "loss": 0.4165, "step": 1838 }, { "epoch": 0.9129571404931326, "grad_norm": 0.5019674897193909, "learning_rate": 8.781928001749961e-06, "loss": 0.3972, "step": 1839 }, { "epoch": 0.9134535826576203, "grad_norm": 0.45102980732917786, "learning_rate": 8.780037543922299e-06, "loss": 0.411, "step": 1840 }, { "epoch": 0.9139500248221082, "grad_norm": 0.5113197565078735, "learning_rate": 8.778145824042838e-06, "loss": 0.3829, "step": 1841 }, { "epoch": 0.914446466986596, "grad_norm": 0.5215175747871399, "learning_rate": 8.776252842743169e-06, "loss": 0.4005, "step": 1842 }, { "epoch": 0.9149429091510839, "grad_norm": 0.46376290917396545, "learning_rate": 8.774358600655309e-06, "loss": 0.3877, "step": 1843 }, { "epoch": 0.9154393513155717, "grad_norm": 0.48933055996894836, "learning_rate": 8.772463098411694e-06, "loss": 0.4247, "step": 1844 }, { "epoch": 0.9159357934800596, "grad_norm": 0.48125559091567993, "learning_rate": 8.77056633664518e-06, "loss": 0.4343, "step": 1845 }, { "epoch": 0.9164322356445475, "grad_norm": 0.44430363178253174, "learning_rate": 8.768668315989045e-06, "loss": 0.3705, "step": 1846 }, { "epoch": 0.9169286778090352, "grad_norm": 0.47841647267341614, "learning_rate": 8.766769037076986e-06, "loss": 0.41, "step": 1847 }, { "epoch": 0.917425119973523, "grad_norm": 0.4537143409252167, "learning_rate": 8.76486850054312e-06, "loss": 0.4059, "step": 1848 }, { "epoch": 0.9179215621380109, "grad_norm": 0.4876398742198944, "learning_rate": 8.762966707021985e-06, "loss": 0.4123, "step": 1849 }, { "epoch": 0.9184180043024988, "grad_norm": 0.500105619430542, "learning_rate": 8.761063657148537e-06, "loss": 0.4188, "step": 1850 }, { "epoch": 0.9189144464669866, "grad_norm": 0.4654165506362915, "learning_rate": 8.759159351558155e-06, "loss": 0.4015, "step": 1851 }, { "epoch": 0.9194108886314745, "grad_norm": 0.45286285877227783, "learning_rate": 8.757253790886635e-06, "loss": 0.3995, "step": 1852 }, { "epoch": 0.9199073307959623, "grad_norm": 0.5433377623558044, "learning_rate": 8.75534697577019e-06, "loss": 0.3988, "step": 1853 }, { "epoch": 0.9204037729604501, "grad_norm": 0.5008939504623413, "learning_rate": 8.753438906845454e-06, "loss": 0.4073, "step": 1854 }, { "epoch": 0.9209002151249379, "grad_norm": 0.466042160987854, "learning_rate": 8.751529584749482e-06, "loss": 0.4157, "step": 1855 }, { "epoch": 0.9213966572894258, "grad_norm": 0.6000510454177856, "learning_rate": 8.749619010119738e-06, "loss": 0.428, "step": 1856 }, { "epoch": 0.9218930994539136, "grad_norm": 0.5030505657196045, "learning_rate": 8.74770718359412e-06, "loss": 0.4138, "step": 1857 }, { "epoch": 0.9223895416184015, "grad_norm": 0.512173593044281, "learning_rate": 8.745794105810928e-06, "loss": 0.3849, "step": 1858 }, { "epoch": 0.9228859837828893, "grad_norm": 0.5192985534667969, "learning_rate": 8.74387977740889e-06, "loss": 0.3781, "step": 1859 }, { "epoch": 0.9233824259473772, "grad_norm": 0.4938175082206726, "learning_rate": 8.741964199027147e-06, "loss": 0.416, "step": 1860 }, { "epoch": 0.9238788681118649, "grad_norm": 0.5401041507720947, "learning_rate": 8.740047371305259e-06, "loss": 0.4092, "step": 1861 }, { "epoch": 0.9243753102763528, "grad_norm": 0.5358916521072388, "learning_rate": 8.738129294883202e-06, "loss": 0.4005, "step": 1862 }, { "epoch": 0.9248717524408406, "grad_norm": 0.5519017577171326, "learning_rate": 8.73620997040137e-06, "loss": 0.4031, "step": 1863 }, { "epoch": 0.9253681946053285, "grad_norm": 0.45749226212501526, "learning_rate": 8.734289398500576e-06, "loss": 0.4113, "step": 1864 }, { "epoch": 0.9258646367698163, "grad_norm": 0.5364435315132141, "learning_rate": 8.732367579822043e-06, "loss": 0.4141, "step": 1865 }, { "epoch": 0.9263610789343042, "grad_norm": 0.5138802528381348, "learning_rate": 8.730444515007413e-06, "loss": 0.4247, "step": 1866 }, { "epoch": 0.9268575210987919, "grad_norm": 0.5038459897041321, "learning_rate": 8.72852020469875e-06, "loss": 0.3965, "step": 1867 }, { "epoch": 0.9273539632632798, "grad_norm": 0.5963847041130066, "learning_rate": 8.726594649538524e-06, "loss": 0.3694, "step": 1868 }, { "epoch": 0.9278504054277676, "grad_norm": 0.4983658194541931, "learning_rate": 8.72466785016963e-06, "loss": 0.3715, "step": 1869 }, { "epoch": 0.9283468475922555, "grad_norm": 0.47432941198349, "learning_rate": 8.72273980723537e-06, "loss": 0.3913, "step": 1870 }, { "epoch": 0.9288432897567434, "grad_norm": 0.5898661613464355, "learning_rate": 8.720810521379467e-06, "loss": 0.3967, "step": 1871 }, { "epoch": 0.9293397319212312, "grad_norm": 0.6252992153167725, "learning_rate": 8.718879993246058e-06, "loss": 0.3732, "step": 1872 }, { "epoch": 0.9298361740857191, "grad_norm": 0.4738160967826843, "learning_rate": 8.716948223479693e-06, "loss": 0.4095, "step": 1873 }, { "epoch": 0.9303326162502068, "grad_norm": 0.4118805527687073, "learning_rate": 8.715015212725336e-06, "loss": 0.4046, "step": 1874 }, { "epoch": 0.9308290584146947, "grad_norm": 0.5692242383956909, "learning_rate": 8.713080961628368e-06, "loss": 0.4183, "step": 1875 }, { "epoch": 0.9313255005791825, "grad_norm": 0.5263821482658386, "learning_rate": 8.711145470834584e-06, "loss": 0.4357, "step": 1876 }, { "epoch": 0.9318219427436704, "grad_norm": 0.49682146310806274, "learning_rate": 8.709208740990189e-06, "loss": 0.4162, "step": 1877 }, { "epoch": 0.9323183849081582, "grad_norm": 0.4743478298187256, "learning_rate": 8.707270772741807e-06, "loss": 0.4034, "step": 1878 }, { "epoch": 0.9328148270726461, "grad_norm": 0.4386703670024872, "learning_rate": 8.705331566736473e-06, "loss": 0.4226, "step": 1879 }, { "epoch": 0.9333112692371339, "grad_norm": 0.49264732003211975, "learning_rate": 8.703391123621632e-06, "loss": 0.3985, "step": 1880 }, { "epoch": 0.9338077114016217, "grad_norm": 0.46217694878578186, "learning_rate": 8.701449444045149e-06, "loss": 0.4213, "step": 1881 }, { "epoch": 0.9343041535661095, "grad_norm": 0.48830947279930115, "learning_rate": 8.699506528655297e-06, "loss": 0.4138, "step": 1882 }, { "epoch": 0.9348005957305974, "grad_norm": 0.5274256467819214, "learning_rate": 8.697562378100761e-06, "loss": 0.4367, "step": 1883 }, { "epoch": 0.9352970378950852, "grad_norm": 0.45420172810554504, "learning_rate": 8.695616993030642e-06, "loss": 0.4016, "step": 1884 }, { "epoch": 0.9357934800595731, "grad_norm": 0.5565884113311768, "learning_rate": 8.69367037409445e-06, "loss": 0.4082, "step": 1885 }, { "epoch": 0.9362899222240609, "grad_norm": 0.5523955821990967, "learning_rate": 8.691722521942107e-06, "loss": 0.3981, "step": 1886 }, { "epoch": 0.9367863643885488, "grad_norm": 0.4834287762641907, "learning_rate": 8.68977343722395e-06, "loss": 0.3881, "step": 1887 }, { "epoch": 0.9372828065530365, "grad_norm": 0.5071977376937866, "learning_rate": 8.687823120590727e-06, "loss": 0.4389, "step": 1888 }, { "epoch": 0.9377792487175244, "grad_norm": 0.547340452671051, "learning_rate": 8.685871572693592e-06, "loss": 0.4123, "step": 1889 }, { "epoch": 0.9382756908820122, "grad_norm": 0.5628478527069092, "learning_rate": 8.683918794184115e-06, "loss": 0.4124, "step": 1890 }, { "epoch": 0.9387721330465001, "grad_norm": 0.46965232491493225, "learning_rate": 8.681964785714275e-06, "loss": 0.4215, "step": 1891 }, { "epoch": 0.9392685752109879, "grad_norm": 0.5463140606880188, "learning_rate": 8.680009547936465e-06, "loss": 0.3942, "step": 1892 }, { "epoch": 0.9397650173754758, "grad_norm": 0.6247212886810303, "learning_rate": 8.678053081503484e-06, "loss": 0.4457, "step": 1893 }, { "epoch": 0.9402614595399635, "grad_norm": 0.4557335078716278, "learning_rate": 8.676095387068542e-06, "loss": 0.4056, "step": 1894 }, { "epoch": 0.9407579017044514, "grad_norm": 0.5569730401039124, "learning_rate": 8.674136465285261e-06, "loss": 0.3954, "step": 1895 }, { "epoch": 0.9412543438689392, "grad_norm": 0.5577400326728821, "learning_rate": 8.672176316807672e-06, "loss": 0.4107, "step": 1896 }, { "epoch": 0.9417507860334271, "grad_norm": 0.48726892471313477, "learning_rate": 8.670214942290215e-06, "loss": 0.396, "step": 1897 }, { "epoch": 0.942247228197915, "grad_norm": 0.5973776578903198, "learning_rate": 8.66825234238774e-06, "loss": 0.4271, "step": 1898 }, { "epoch": 0.9427436703624028, "grad_norm": 0.56330806016922, "learning_rate": 8.666288517755505e-06, "loss": 0.4089, "step": 1899 }, { "epoch": 0.9432401125268907, "grad_norm": 0.5830925107002258, "learning_rate": 8.66432346904918e-06, "loss": 0.394, "step": 1900 }, { "epoch": 0.9437365546913784, "grad_norm": 0.561930775642395, "learning_rate": 8.662357196924838e-06, "loss": 0.459, "step": 1901 }, { "epoch": 0.9442329968558663, "grad_norm": 0.5722702741622925, "learning_rate": 8.660389702038965e-06, "loss": 0.3758, "step": 1902 }, { "epoch": 0.9447294390203541, "grad_norm": 0.618113100528717, "learning_rate": 8.658420985048455e-06, "loss": 0.4077, "step": 1903 }, { "epoch": 0.945225881184842, "grad_norm": 0.6111602783203125, "learning_rate": 8.656451046610607e-06, "loss": 0.4152, "step": 1904 }, { "epoch": 0.9457223233493298, "grad_norm": 0.4765700101852417, "learning_rate": 8.654479887383134e-06, "loss": 0.4048, "step": 1905 }, { "epoch": 0.9462187655138177, "grad_norm": 0.6225451231002808, "learning_rate": 8.652507508024148e-06, "loss": 0.3933, "step": 1906 }, { "epoch": 0.9467152076783055, "grad_norm": 0.5625535249710083, "learning_rate": 8.650533909192174e-06, "loss": 0.4299, "step": 1907 }, { "epoch": 0.9472116498427933, "grad_norm": 0.5602776408195496, "learning_rate": 8.648559091546145e-06, "loss": 0.4252, "step": 1908 }, { "epoch": 0.9477080920072811, "grad_norm": 0.5519794821739197, "learning_rate": 8.646583055745398e-06, "loss": 0.4066, "step": 1909 }, { "epoch": 0.948204534171769, "grad_norm": 0.47594600915908813, "learning_rate": 8.644605802449677e-06, "loss": 0.4053, "step": 1910 }, { "epoch": 0.9487009763362568, "grad_norm": 0.58127760887146, "learning_rate": 8.642627332319133e-06, "loss": 0.4177, "step": 1911 }, { "epoch": 0.9491974185007447, "grad_norm": 0.4749923646450043, "learning_rate": 8.640647646014324e-06, "loss": 0.4113, "step": 1912 }, { "epoch": 0.9496938606652325, "grad_norm": 0.5497516989707947, "learning_rate": 8.638666744196213e-06, "loss": 0.4356, "step": 1913 }, { "epoch": 0.9501903028297204, "grad_norm": 0.5009453892707825, "learning_rate": 8.636684627526171e-06, "loss": 0.4173, "step": 1914 }, { "epoch": 0.9506867449942081, "grad_norm": 0.41165533661842346, "learning_rate": 8.63470129666597e-06, "loss": 0.4136, "step": 1915 }, { "epoch": 0.951183187158696, "grad_norm": 0.5484547019004822, "learning_rate": 8.632716752277792e-06, "loss": 0.3681, "step": 1916 }, { "epoch": 0.9516796293231838, "grad_norm": 0.4664028286933899, "learning_rate": 8.630730995024224e-06, "loss": 0.4083, "step": 1917 }, { "epoch": 0.9521760714876717, "grad_norm": 0.4595913589000702, "learning_rate": 8.628744025568252e-06, "loss": 0.4247, "step": 1918 }, { "epoch": 0.9526725136521595, "grad_norm": 0.4577116370201111, "learning_rate": 8.626755844573274e-06, "loss": 0.4155, "step": 1919 }, { "epoch": 0.9531689558166474, "grad_norm": 0.46842798590660095, "learning_rate": 8.62476645270309e-06, "loss": 0.4029, "step": 1920 }, { "epoch": 0.9536653979811351, "grad_norm": 0.48451828956604004, "learning_rate": 8.622775850621904e-06, "loss": 0.4339, "step": 1921 }, { "epoch": 0.954161840145623, "grad_norm": 0.4185126721858978, "learning_rate": 8.62078403899432e-06, "loss": 0.4262, "step": 1922 }, { "epoch": 0.9546582823101109, "grad_norm": 0.49697139859199524, "learning_rate": 8.618791018485357e-06, "loss": 0.4244, "step": 1923 }, { "epoch": 0.9551547244745987, "grad_norm": 0.440626859664917, "learning_rate": 8.616796789760424e-06, "loss": 0.3894, "step": 1924 }, { "epoch": 0.9556511666390866, "grad_norm": 0.3984962999820709, "learning_rate": 8.614801353485343e-06, "loss": 0.3796, "step": 1925 }, { "epoch": 0.9561476088035744, "grad_norm": 0.4690248668193817, "learning_rate": 8.612804710326332e-06, "loss": 0.4169, "step": 1926 }, { "epoch": 0.9566440509680623, "grad_norm": 0.48797208070755005, "learning_rate": 8.610806860950023e-06, "loss": 0.4006, "step": 1927 }, { "epoch": 0.95714049313255, "grad_norm": 0.4552616477012634, "learning_rate": 8.608807806023436e-06, "loss": 0.4185, "step": 1928 }, { "epoch": 0.9576369352970379, "grad_norm": 0.4672681987285614, "learning_rate": 8.606807546214007e-06, "loss": 0.3967, "step": 1929 }, { "epoch": 0.9581333774615257, "grad_norm": 0.4865618646144867, "learning_rate": 8.604806082189564e-06, "loss": 0.4057, "step": 1930 }, { "epoch": 0.9586298196260136, "grad_norm": 0.4318315386772156, "learning_rate": 8.602803414618343e-06, "loss": 0.3961, "step": 1931 }, { "epoch": 0.9591262617905014, "grad_norm": 0.46353837847709656, "learning_rate": 8.600799544168983e-06, "loss": 0.4449, "step": 1932 }, { "epoch": 0.9596227039549893, "grad_norm": 0.4515054225921631, "learning_rate": 8.598794471510519e-06, "loss": 0.4222, "step": 1933 }, { "epoch": 0.9601191461194771, "grad_norm": 0.4942565858364105, "learning_rate": 8.596788197312389e-06, "loss": 0.4238, "step": 1934 }, { "epoch": 0.9606155882839649, "grad_norm": 0.4304456412792206, "learning_rate": 8.594780722244436e-06, "loss": 0.407, "step": 1935 }, { "epoch": 0.9611120304484527, "grad_norm": 0.4556758999824524, "learning_rate": 8.592772046976901e-06, "loss": 0.4095, "step": 1936 }, { "epoch": 0.9616084726129406, "grad_norm": 0.4729948341846466, "learning_rate": 8.590762172180426e-06, "loss": 0.3962, "step": 1937 }, { "epoch": 0.9621049147774284, "grad_norm": 0.42960289120674133, "learning_rate": 8.588751098526053e-06, "loss": 0.4177, "step": 1938 }, { "epoch": 0.9626013569419163, "grad_norm": 0.48604536056518555, "learning_rate": 8.586738826685223e-06, "loss": 0.4166, "step": 1939 }, { "epoch": 0.9630977991064041, "grad_norm": 0.4614291191101074, "learning_rate": 8.584725357329784e-06, "loss": 0.4259, "step": 1940 }, { "epoch": 0.963594241270892, "grad_norm": 0.43939536809921265, "learning_rate": 8.582710691131975e-06, "loss": 0.4142, "step": 1941 }, { "epoch": 0.9640906834353797, "grad_norm": 0.405640572309494, "learning_rate": 8.580694828764438e-06, "loss": 0.3723, "step": 1942 }, { "epoch": 0.9645871255998676, "grad_norm": 0.47491544485092163, "learning_rate": 8.578677770900215e-06, "loss": 0.4103, "step": 1943 }, { "epoch": 0.9650835677643554, "grad_norm": 0.4323413074016571, "learning_rate": 8.57665951821275e-06, "loss": 0.3894, "step": 1944 }, { "epoch": 0.9655800099288433, "grad_norm": 0.436518132686615, "learning_rate": 8.574640071375877e-06, "loss": 0.4234, "step": 1945 }, { "epoch": 0.9660764520933312, "grad_norm": 0.507270336151123, "learning_rate": 8.572619431063839e-06, "loss": 0.402, "step": 1946 }, { "epoch": 0.966572894257819, "grad_norm": 0.4357856214046478, "learning_rate": 8.570597597951272e-06, "loss": 0.4007, "step": 1947 }, { "epoch": 0.9670693364223067, "grad_norm": 0.4481806755065918, "learning_rate": 8.568574572713208e-06, "loss": 0.3794, "step": 1948 }, { "epoch": 0.9675657785867946, "grad_norm": 0.48675641417503357, "learning_rate": 8.566550356025083e-06, "loss": 0.4172, "step": 1949 }, { "epoch": 0.9680622207512825, "grad_norm": 0.4157714247703552, "learning_rate": 8.56452494856273e-06, "loss": 0.3736, "step": 1950 }, { "epoch": 0.9685586629157703, "grad_norm": 0.46107053756713867, "learning_rate": 8.562498351002375e-06, "loss": 0.3932, "step": 1951 }, { "epoch": 0.9690551050802582, "grad_norm": 0.44594988226890564, "learning_rate": 8.560470564020642e-06, "loss": 0.4019, "step": 1952 }, { "epoch": 0.969551547244746, "grad_norm": 0.4910518527030945, "learning_rate": 8.558441588294556e-06, "loss": 0.4145, "step": 1953 }, { "epoch": 0.9700479894092339, "grad_norm": 0.5320470929145813, "learning_rate": 8.556411424501539e-06, "loss": 0.4203, "step": 1954 }, { "epoch": 0.9705444315737216, "grad_norm": 0.49482467770576477, "learning_rate": 8.554380073319403e-06, "loss": 0.3881, "step": 1955 }, { "epoch": 0.9710408737382095, "grad_norm": 0.4978364109992981, "learning_rate": 8.552347535426365e-06, "loss": 0.387, "step": 1956 }, { "epoch": 0.9715373159026973, "grad_norm": 0.542545735836029, "learning_rate": 8.55031381150103e-06, "loss": 0.443, "step": 1957 }, { "epoch": 0.9720337580671852, "grad_norm": 0.4508587121963501, "learning_rate": 8.548278902222408e-06, "loss": 0.3857, "step": 1958 }, { "epoch": 0.972530200231673, "grad_norm": 0.4547988772392273, "learning_rate": 8.546242808269895e-06, "loss": 0.4164, "step": 1959 }, { "epoch": 0.9730266423961609, "grad_norm": 0.5304481387138367, "learning_rate": 8.544205530323294e-06, "loss": 0.4168, "step": 1960 }, { "epoch": 0.9735230845606487, "grad_norm": 0.48629114031791687, "learning_rate": 8.542167069062788e-06, "loss": 0.4308, "step": 1961 }, { "epoch": 0.9740195267251365, "grad_norm": 0.5232369303703308, "learning_rate": 8.54012742516897e-06, "loss": 0.4285, "step": 1962 }, { "epoch": 0.9745159688896243, "grad_norm": 0.41860267519950867, "learning_rate": 8.538086599322821e-06, "loss": 0.4008, "step": 1963 }, { "epoch": 0.9750124110541122, "grad_norm": 0.4931492507457733, "learning_rate": 8.536044592205716e-06, "loss": 0.4423, "step": 1964 }, { "epoch": 0.9755088532186, "grad_norm": 0.47381100058555603, "learning_rate": 8.534001404499426e-06, "loss": 0.4131, "step": 1965 }, { "epoch": 0.9760052953830879, "grad_norm": 0.4969824552536011, "learning_rate": 8.531957036886114e-06, "loss": 0.3946, "step": 1966 }, { "epoch": 0.9765017375475757, "grad_norm": 0.46186360716819763, "learning_rate": 8.529911490048343e-06, "loss": 0.4502, "step": 1967 }, { "epoch": 0.9769981797120636, "grad_norm": 0.4313708543777466, "learning_rate": 8.527864764669063e-06, "loss": 0.4206, "step": 1968 }, { "epoch": 0.9774946218765513, "grad_norm": 0.388365775346756, "learning_rate": 8.525816861431617e-06, "loss": 0.3818, "step": 1969 }, { "epoch": 0.9779910640410392, "grad_norm": 0.4089495837688446, "learning_rate": 8.523767781019752e-06, "loss": 0.3962, "step": 1970 }, { "epoch": 0.978487506205527, "grad_norm": 0.44406622648239136, "learning_rate": 8.521717524117592e-06, "loss": 0.3998, "step": 1971 }, { "epoch": 0.9789839483700149, "grad_norm": 0.4504345655441284, "learning_rate": 8.519666091409669e-06, "loss": 0.4242, "step": 1972 }, { "epoch": 0.9794803905345028, "grad_norm": 0.4428735077381134, "learning_rate": 8.517613483580893e-06, "loss": 0.3954, "step": 1973 }, { "epoch": 0.9799768326989906, "grad_norm": 0.45324358344078064, "learning_rate": 8.515559701316583e-06, "loss": 0.3942, "step": 1974 }, { "epoch": 0.9804732748634784, "grad_norm": 0.43344277143478394, "learning_rate": 8.513504745302432e-06, "loss": 0.406, "step": 1975 }, { "epoch": 0.9809697170279662, "grad_norm": 0.4407244026660919, "learning_rate": 8.51144861622454e-06, "loss": 0.3981, "step": 1976 }, { "epoch": 0.9814661591924541, "grad_norm": 0.4405611753463745, "learning_rate": 8.509391314769394e-06, "loss": 0.392, "step": 1977 }, { "epoch": 0.9819626013569419, "grad_norm": 0.4940897524356842, "learning_rate": 8.507332841623862e-06, "loss": 0.4264, "step": 1978 }, { "epoch": 0.9824590435214298, "grad_norm": 0.4433237910270691, "learning_rate": 8.505273197475224e-06, "loss": 0.3988, "step": 1979 }, { "epoch": 0.9829554856859176, "grad_norm": 0.4208258092403412, "learning_rate": 8.50321238301113e-06, "loss": 0.4009, "step": 1980 }, { "epoch": 0.9834519278504055, "grad_norm": 0.5699224472045898, "learning_rate": 8.501150398919634e-06, "loss": 0.4059, "step": 1981 }, { "epoch": 0.9839483700148932, "grad_norm": 0.49822568893432617, "learning_rate": 8.499087245889176e-06, "loss": 0.4371, "step": 1982 }, { "epoch": 0.9844448121793811, "grad_norm": 0.444363534450531, "learning_rate": 8.497022924608587e-06, "loss": 0.4463, "step": 1983 }, { "epoch": 0.9849412543438689, "grad_norm": 0.4948594868183136, "learning_rate": 8.494957435767086e-06, "loss": 0.3811, "step": 1984 }, { "epoch": 0.9854376965083568, "grad_norm": 0.49296411871910095, "learning_rate": 8.492890780054285e-06, "loss": 0.4173, "step": 1985 }, { "epoch": 0.9859341386728446, "grad_norm": 0.49038344621658325, "learning_rate": 8.490822958160186e-06, "loss": 0.396, "step": 1986 }, { "epoch": 0.9864305808373325, "grad_norm": 0.45643866062164307, "learning_rate": 8.488753970775176e-06, "loss": 0.4017, "step": 1987 }, { "epoch": 0.9869270230018203, "grad_norm": 0.5680702328681946, "learning_rate": 8.486683818590033e-06, "loss": 0.4184, "step": 1988 }, { "epoch": 0.9874234651663081, "grad_norm": 0.5189527273178101, "learning_rate": 8.484612502295926e-06, "loss": 0.4128, "step": 1989 }, { "epoch": 0.9879199073307959, "grad_norm": 0.4349716007709503, "learning_rate": 8.48254002258441e-06, "loss": 0.4075, "step": 1990 }, { "epoch": 0.9884163494952838, "grad_norm": 0.5217468738555908, "learning_rate": 8.480466380147435e-06, "loss": 0.4364, "step": 1991 }, { "epoch": 0.9889127916597716, "grad_norm": 0.4090358018875122, "learning_rate": 8.478391575677325e-06, "loss": 0.3787, "step": 1992 }, { "epoch": 0.9894092338242595, "grad_norm": 0.5228391885757446, "learning_rate": 8.476315609866807e-06, "loss": 0.4235, "step": 1993 }, { "epoch": 0.9899056759887473, "grad_norm": 0.5290006995201111, "learning_rate": 8.474238483408987e-06, "loss": 0.3986, "step": 1994 }, { "epoch": 0.9904021181532352, "grad_norm": 0.45557376742362976, "learning_rate": 8.472160196997364e-06, "loss": 0.4077, "step": 1995 }, { "epoch": 0.9908985603177229, "grad_norm": 0.48027968406677246, "learning_rate": 8.470080751325816e-06, "loss": 0.3695, "step": 1996 }, { "epoch": 0.9913950024822108, "grad_norm": 0.4616791307926178, "learning_rate": 8.468000147088619e-06, "loss": 0.4036, "step": 1997 }, { "epoch": 0.9918914446466987, "grad_norm": 0.5227168202400208, "learning_rate": 8.465918384980429e-06, "loss": 0.4072, "step": 1998 }, { "epoch": 0.9923878868111865, "grad_norm": 0.4550807774066925, "learning_rate": 8.463835465696286e-06, "loss": 0.3537, "step": 1999 }, { "epoch": 0.9928843289756744, "grad_norm": 0.5018197894096375, "learning_rate": 8.461751389931624e-06, "loss": 0.4011, "step": 2000 }, { "epoch": 0.9933807711401622, "grad_norm": 0.5709302425384521, "learning_rate": 8.459666158382257e-06, "loss": 0.4104, "step": 2001 }, { "epoch": 0.99387721330465, "grad_norm": 0.46812933683395386, "learning_rate": 8.457579771744391e-06, "loss": 0.4098, "step": 2002 }, { "epoch": 0.9943736554691378, "grad_norm": 0.5349106788635254, "learning_rate": 8.455492230714611e-06, "loss": 0.4062, "step": 2003 }, { "epoch": 0.9948700976336257, "grad_norm": 0.48608529567718506, "learning_rate": 8.453403535989888e-06, "loss": 0.3988, "step": 2004 }, { "epoch": 0.9953665397981135, "grad_norm": 0.5203148126602173, "learning_rate": 8.451313688267582e-06, "loss": 0.3988, "step": 2005 }, { "epoch": 0.9958629819626014, "grad_norm": 0.4495261609554291, "learning_rate": 8.44922268824544e-06, "loss": 0.3754, "step": 2006 }, { "epoch": 0.9963594241270892, "grad_norm": 0.42970016598701477, "learning_rate": 8.447130536621584e-06, "loss": 0.3977, "step": 2007 }, { "epoch": 0.9968558662915771, "grad_norm": 0.5146880745887756, "learning_rate": 8.44503723409453e-06, "loss": 0.3716, "step": 2008 }, { "epoch": 0.9973523084560648, "grad_norm": 0.5144175887107849, "learning_rate": 8.442942781363177e-06, "loss": 0.4094, "step": 2009 }, { "epoch": 0.9978487506205527, "grad_norm": 0.48175784945487976, "learning_rate": 8.440847179126802e-06, "loss": 0.4098, "step": 2010 }, { "epoch": 0.9983451927850405, "grad_norm": 0.5336799025535583, "learning_rate": 8.43875042808507e-06, "loss": 0.4166, "step": 2011 }, { "epoch": 0.9988416349495284, "grad_norm": 0.7069742679595947, "learning_rate": 8.43665252893803e-06, "loss": 0.4116, "step": 2012 }, { "epoch": 0.9993380771140162, "grad_norm": 0.48014312982559204, "learning_rate": 8.434553482386116e-06, "loss": 0.3985, "step": 2013 }, { "epoch": 0.9998345192785041, "grad_norm": 0.4675745666027069, "learning_rate": 8.432453289130139e-06, "loss": 0.3896, "step": 2014 }, { "epoch": 1.0003309614429918, "grad_norm": 1.2231364250183105, "learning_rate": 8.430351949871298e-06, "loss": 0.5822, "step": 2015 }, { "epoch": 1.0008274036074798, "grad_norm": 0.4703157842159271, "learning_rate": 8.42824946531117e-06, "loss": 0.3178, "step": 2016 }, { "epoch": 1.0013238457719675, "grad_norm": 0.5114287734031677, "learning_rate": 8.426145836151723e-06, "loss": 0.3721, "step": 2017 }, { "epoch": 1.0018202879364555, "grad_norm": 0.5079691410064697, "learning_rate": 8.424041063095298e-06, "loss": 0.4271, "step": 2018 }, { "epoch": 1.0023167301009432, "grad_norm": 0.42704883217811584, "learning_rate": 8.421935146844622e-06, "loss": 0.3466, "step": 2019 }, { "epoch": 1.002813172265431, "grad_norm": 0.585361897945404, "learning_rate": 8.419828088102804e-06, "loss": 0.4106, "step": 2020 }, { "epoch": 1.003309614429919, "grad_norm": 0.4692299962043762, "learning_rate": 8.417719887573334e-06, "loss": 0.3602, "step": 2021 }, { "epoch": 1.0038060565944067, "grad_norm": 0.5147997140884399, "learning_rate": 8.41561054596008e-06, "loss": 0.3844, "step": 2022 }, { "epoch": 1.0043024987588947, "grad_norm": 0.42856481671333313, "learning_rate": 8.413500063967296e-06, "loss": 0.3691, "step": 2023 }, { "epoch": 1.0047989409233824, "grad_norm": 0.47356683015823364, "learning_rate": 8.411388442299617e-06, "loss": 0.4031, "step": 2024 }, { "epoch": 1.0052953830878704, "grad_norm": 0.5266690254211426, "learning_rate": 8.40927568166205e-06, "loss": 0.3661, "step": 2025 }, { "epoch": 1.005791825252358, "grad_norm": 0.5531805753707886, "learning_rate": 8.407161782759995e-06, "loss": 0.4136, "step": 2026 }, { "epoch": 1.0062882674168458, "grad_norm": 0.4809936583042145, "learning_rate": 8.405046746299221e-06, "loss": 0.3772, "step": 2027 }, { "epoch": 1.0067847095813338, "grad_norm": 0.4996362030506134, "learning_rate": 8.402930572985884e-06, "loss": 0.3744, "step": 2028 }, { "epoch": 1.0072811517458216, "grad_norm": 0.4794630706310272, "learning_rate": 8.400813263526512e-06, "loss": 0.3389, "step": 2029 }, { "epoch": 1.0077775939103095, "grad_norm": 0.5237377285957336, "learning_rate": 8.398694818628023e-06, "loss": 0.3815, "step": 2030 }, { "epoch": 1.0082740360747973, "grad_norm": 0.44274216890335083, "learning_rate": 8.396575238997704e-06, "loss": 0.3322, "step": 2031 }, { "epoch": 1.0087704782392852, "grad_norm": 0.5766563415527344, "learning_rate": 8.394454525343227e-06, "loss": 0.3988, "step": 2032 }, { "epoch": 1.009266920403773, "grad_norm": 0.471641480922699, "learning_rate": 8.39233267837264e-06, "loss": 0.3715, "step": 2033 }, { "epoch": 1.0097633625682607, "grad_norm": 0.4939892292022705, "learning_rate": 8.390209698794371e-06, "loss": 0.3973, "step": 2034 }, { "epoch": 1.0102598047327487, "grad_norm": 0.4350561201572418, "learning_rate": 8.388085587317224e-06, "loss": 0.3408, "step": 2035 }, { "epoch": 1.0107562468972364, "grad_norm": 0.5029792189598083, "learning_rate": 8.38596034465038e-06, "loss": 0.3553, "step": 2036 }, { "epoch": 1.0112526890617244, "grad_norm": 0.49405038356781006, "learning_rate": 8.383833971503405e-06, "loss": 0.3305, "step": 2037 }, { "epoch": 1.0117491312262121, "grad_norm": 0.5190739631652832, "learning_rate": 8.381706468586234e-06, "loss": 0.3728, "step": 2038 }, { "epoch": 1.0122455733907, "grad_norm": 0.5235628485679626, "learning_rate": 8.379577836609183e-06, "loss": 0.3848, "step": 2039 }, { "epoch": 1.0127420155551878, "grad_norm": 0.5675264000892639, "learning_rate": 8.377448076282942e-06, "loss": 0.4219, "step": 2040 }, { "epoch": 1.0132384577196756, "grad_norm": 0.5550568103790283, "learning_rate": 8.375317188318586e-06, "loss": 0.3696, "step": 2041 }, { "epoch": 1.0137348998841635, "grad_norm": 0.5178881287574768, "learning_rate": 8.373185173427553e-06, "loss": 0.387, "step": 2042 }, { "epoch": 1.0142313420486513, "grad_norm": 0.44530072808265686, "learning_rate": 8.371052032321672e-06, "loss": 0.3458, "step": 2043 }, { "epoch": 1.0147277842131393, "grad_norm": 0.5252585411071777, "learning_rate": 8.368917765713136e-06, "loss": 0.3826, "step": 2044 }, { "epoch": 1.015224226377627, "grad_norm": 0.5486982464790344, "learning_rate": 8.36678237431452e-06, "loss": 0.3881, "step": 2045 }, { "epoch": 1.0157206685421147, "grad_norm": 0.49466100335121155, "learning_rate": 8.364645858838773e-06, "loss": 0.345, "step": 2046 }, { "epoch": 1.0162171107066027, "grad_norm": 0.4710092544555664, "learning_rate": 8.362508219999222e-06, "loss": 0.4243, "step": 2047 }, { "epoch": 1.0167135528710904, "grad_norm": 0.5754542946815491, "learning_rate": 8.36036945850956e-06, "loss": 0.3806, "step": 2048 }, { "epoch": 1.0172099950355784, "grad_norm": 0.40360188484191895, "learning_rate": 8.35822957508387e-06, "loss": 0.3174, "step": 2049 }, { "epoch": 1.0177064372000661, "grad_norm": 0.5208351612091064, "learning_rate": 8.356088570436593e-06, "loss": 0.4517, "step": 2050 }, { "epoch": 1.0182028793645541, "grad_norm": 0.463577002286911, "learning_rate": 8.353946445282558e-06, "loss": 0.3491, "step": 2051 }, { "epoch": 1.0186993215290419, "grad_norm": 0.5190997123718262, "learning_rate": 8.35180320033696e-06, "loss": 0.3783, "step": 2052 }, { "epoch": 1.0191957636935296, "grad_norm": 0.4659409821033478, "learning_rate": 8.349658836315369e-06, "loss": 0.3704, "step": 2053 }, { "epoch": 1.0196922058580176, "grad_norm": 0.4428440034389496, "learning_rate": 8.347513353933733e-06, "loss": 0.3598, "step": 2054 }, { "epoch": 1.0201886480225053, "grad_norm": 0.5023168325424194, "learning_rate": 8.345366753908366e-06, "loss": 0.3889, "step": 2055 }, { "epoch": 1.0206850901869933, "grad_norm": 0.44394657015800476, "learning_rate": 8.343219036955965e-06, "loss": 0.3475, "step": 2056 }, { "epoch": 1.021181532351481, "grad_norm": 0.4853634536266327, "learning_rate": 8.34107020379359e-06, "loss": 0.4096, "step": 2057 }, { "epoch": 1.021677974515969, "grad_norm": 0.4530080258846283, "learning_rate": 8.338920255138679e-06, "loss": 0.3584, "step": 2058 }, { "epoch": 1.0221744166804567, "grad_norm": 0.4509251117706299, "learning_rate": 8.336769191709041e-06, "loss": 0.3909, "step": 2059 }, { "epoch": 1.0226708588449445, "grad_norm": 0.462624728679657, "learning_rate": 8.334617014222858e-06, "loss": 0.3775, "step": 2060 }, { "epoch": 1.0231673010094324, "grad_norm": 0.43549633026123047, "learning_rate": 8.332463723398684e-06, "loss": 0.3777, "step": 2061 }, { "epoch": 1.0236637431739202, "grad_norm": 0.48341792821884155, "learning_rate": 8.330309319955446e-06, "loss": 0.3533, "step": 2062 }, { "epoch": 1.0241601853384081, "grad_norm": 0.49403509497642517, "learning_rate": 8.328153804612437e-06, "loss": 0.3762, "step": 2063 }, { "epoch": 1.0246566275028959, "grad_norm": 0.43605130910873413, "learning_rate": 8.325997178089329e-06, "loss": 0.3642, "step": 2064 }, { "epoch": 1.0251530696673838, "grad_norm": 0.4686425030231476, "learning_rate": 8.323839441106156e-06, "loss": 0.3568, "step": 2065 }, { "epoch": 1.0256495118318716, "grad_norm": 0.47518813610076904, "learning_rate": 8.321680594383332e-06, "loss": 0.3841, "step": 2066 }, { "epoch": 1.0261459539963593, "grad_norm": 0.3993127644062042, "learning_rate": 8.319520638641636e-06, "loss": 0.3836, "step": 2067 }, { "epoch": 1.0266423961608473, "grad_norm": 0.4858883321285248, "learning_rate": 8.317359574602217e-06, "loss": 0.4229, "step": 2068 }, { "epoch": 1.027138838325335, "grad_norm": 0.44590088725090027, "learning_rate": 8.315197402986599e-06, "loss": 0.358, "step": 2069 }, { "epoch": 1.027635280489823, "grad_norm": 0.45984697341918945, "learning_rate": 8.313034124516668e-06, "loss": 0.3657, "step": 2070 }, { "epoch": 1.0281317226543107, "grad_norm": 0.42787933349609375, "learning_rate": 8.310869739914688e-06, "loss": 0.4032, "step": 2071 }, { "epoch": 1.0286281648187987, "grad_norm": 0.4354044795036316, "learning_rate": 8.308704249903286e-06, "loss": 0.3966, "step": 2072 }, { "epoch": 1.0291246069832864, "grad_norm": 0.48064377903938293, "learning_rate": 8.30653765520546e-06, "loss": 0.3776, "step": 2073 }, { "epoch": 1.0296210491477742, "grad_norm": 0.43805134296417236, "learning_rate": 8.304369956544576e-06, "loss": 0.3336, "step": 2074 }, { "epoch": 1.0301174913122622, "grad_norm": 0.43764588236808777, "learning_rate": 8.302201154644373e-06, "loss": 0.4054, "step": 2075 }, { "epoch": 1.03061393347675, "grad_norm": 0.4154440760612488, "learning_rate": 8.300031250228954e-06, "loss": 0.3328, "step": 2076 }, { "epoch": 1.0311103756412379, "grad_norm": 0.42976880073547363, "learning_rate": 8.29786024402279e-06, "loss": 0.3893, "step": 2077 }, { "epoch": 1.0316068178057256, "grad_norm": 0.4413272440433502, "learning_rate": 8.295688136750721e-06, "loss": 0.3424, "step": 2078 }, { "epoch": 1.0321032599702136, "grad_norm": 0.4166176915168762, "learning_rate": 8.293514929137954e-06, "loss": 0.3075, "step": 2079 }, { "epoch": 1.0325997021347013, "grad_norm": 0.44595396518707275, "learning_rate": 8.291340621910066e-06, "loss": 0.3641, "step": 2080 }, { "epoch": 1.033096144299189, "grad_norm": 0.4925845265388489, "learning_rate": 8.289165215792998e-06, "loss": 0.4309, "step": 2081 }, { "epoch": 1.033592586463677, "grad_norm": 0.4509740173816681, "learning_rate": 8.28698871151306e-06, "loss": 0.3758, "step": 2082 }, { "epoch": 1.0340890286281648, "grad_norm": 0.4699709117412567, "learning_rate": 8.284811109796926e-06, "loss": 0.3742, "step": 2083 }, { "epoch": 1.0345854707926527, "grad_norm": 0.45973485708236694, "learning_rate": 8.282632411371639e-06, "loss": 0.3912, "step": 2084 }, { "epoch": 1.0350819129571405, "grad_norm": 0.5867223739624023, "learning_rate": 8.280452616964604e-06, "loss": 0.4121, "step": 2085 }, { "epoch": 1.0355783551216284, "grad_norm": 0.3768344223499298, "learning_rate": 8.278271727303602e-06, "loss": 0.3221, "step": 2086 }, { "epoch": 1.0360747972861162, "grad_norm": 0.477211594581604, "learning_rate": 8.276089743116765e-06, "loss": 0.3741, "step": 2087 }, { "epoch": 1.036571239450604, "grad_norm": 0.5089483261108398, "learning_rate": 8.273906665132605e-06, "loss": 0.4005, "step": 2088 }, { "epoch": 1.0370676816150919, "grad_norm": 0.418852299451828, "learning_rate": 8.271722494079987e-06, "loss": 0.3281, "step": 2089 }, { "epoch": 1.0375641237795796, "grad_norm": 0.4801664352416992, "learning_rate": 8.26953723068815e-06, "loss": 0.4099, "step": 2090 }, { "epoch": 1.0380605659440676, "grad_norm": 0.49725309014320374, "learning_rate": 8.267350875686693e-06, "loss": 0.4027, "step": 2091 }, { "epoch": 1.0385570081085553, "grad_norm": 0.4489525258541107, "learning_rate": 8.26516342980558e-06, "loss": 0.3828, "step": 2092 }, { "epoch": 1.0390534502730433, "grad_norm": 0.4200659990310669, "learning_rate": 8.26297489377514e-06, "loss": 0.3301, "step": 2093 }, { "epoch": 1.039549892437531, "grad_norm": 0.5314984917640686, "learning_rate": 8.260785268326066e-06, "loss": 0.429, "step": 2094 }, { "epoch": 1.0400463346020188, "grad_norm": 0.4351838231086731, "learning_rate": 8.258594554189415e-06, "loss": 0.3485, "step": 2095 }, { "epoch": 1.0405427767665067, "grad_norm": 0.44463178515434265, "learning_rate": 8.256402752096603e-06, "loss": 0.3971, "step": 2096 }, { "epoch": 1.0410392189309945, "grad_norm": 0.40592196583747864, "learning_rate": 8.25420986277942e-06, "loss": 0.3137, "step": 2097 }, { "epoch": 1.0415356610954825, "grad_norm": 0.49466800689697266, "learning_rate": 8.252015886970005e-06, "loss": 0.4012, "step": 2098 }, { "epoch": 1.0420321032599702, "grad_norm": 0.5810598134994507, "learning_rate": 8.249820825400871e-06, "loss": 0.4168, "step": 2099 }, { "epoch": 1.042528545424458, "grad_norm": 0.4754047095775604, "learning_rate": 8.24762467880489e-06, "loss": 0.4017, "step": 2100 }, { "epoch": 1.043024987588946, "grad_norm": 0.46900513768196106, "learning_rate": 8.245427447915293e-06, "loss": 0.2779, "step": 2101 }, { "epoch": 1.0435214297534336, "grad_norm": 0.5667792558670044, "learning_rate": 8.243229133465677e-06, "loss": 0.4319, "step": 2102 }, { "epoch": 1.0440178719179216, "grad_norm": 0.4256795048713684, "learning_rate": 8.241029736190001e-06, "loss": 0.363, "step": 2103 }, { "epoch": 1.0445143140824094, "grad_norm": 0.4931028187274933, "learning_rate": 8.23882925682258e-06, "loss": 0.3781, "step": 2104 }, { "epoch": 1.0450107562468973, "grad_norm": 0.4173921048641205, "learning_rate": 8.236627696098099e-06, "loss": 0.2883, "step": 2105 }, { "epoch": 1.045507198411385, "grad_norm": 0.4408971965312958, "learning_rate": 8.234425054751595e-06, "loss": 0.407, "step": 2106 }, { "epoch": 1.0460036405758728, "grad_norm": 0.5244305729866028, "learning_rate": 8.232221333518474e-06, "loss": 0.4143, "step": 2107 }, { "epoch": 1.0465000827403608, "grad_norm": 0.44997304677963257, "learning_rate": 8.230016533134495e-06, "loss": 0.3996, "step": 2108 }, { "epoch": 1.0469965249048485, "grad_norm": 0.4167431592941284, "learning_rate": 8.227810654335784e-06, "loss": 0.3638, "step": 2109 }, { "epoch": 1.0474929670693365, "grad_norm": 0.5004698634147644, "learning_rate": 8.225603697858822e-06, "loss": 0.3487, "step": 2110 }, { "epoch": 1.0479894092338242, "grad_norm": 0.4114759862422943, "learning_rate": 8.223395664440451e-06, "loss": 0.3565, "step": 2111 }, { "epoch": 1.0484858513983122, "grad_norm": 0.5194129943847656, "learning_rate": 8.221186554817877e-06, "loss": 0.4145, "step": 2112 }, { "epoch": 1.0489822935628, "grad_norm": 0.43837982416152954, "learning_rate": 8.218976369728658e-06, "loss": 0.3166, "step": 2113 }, { "epoch": 1.0494787357272877, "grad_norm": 0.4437297582626343, "learning_rate": 8.216765109910716e-06, "loss": 0.3401, "step": 2114 }, { "epoch": 1.0499751778917756, "grad_norm": 0.49138307571411133, "learning_rate": 8.21455277610233e-06, "loss": 0.3482, "step": 2115 }, { "epoch": 1.0504716200562634, "grad_norm": 0.4445089101791382, "learning_rate": 8.212339369042139e-06, "loss": 0.3735, "step": 2116 }, { "epoch": 1.0509680622207513, "grad_norm": 0.5363616943359375, "learning_rate": 8.21012488946914e-06, "loss": 0.359, "step": 2117 }, { "epoch": 1.051464504385239, "grad_norm": 0.4741666615009308, "learning_rate": 8.207909338122687e-06, "loss": 0.348, "step": 2118 }, { "epoch": 1.051960946549727, "grad_norm": 0.4201664626598358, "learning_rate": 8.205692715742491e-06, "loss": 0.3499, "step": 2119 }, { "epoch": 1.0524573887142148, "grad_norm": 0.5436772704124451, "learning_rate": 8.203475023068624e-06, "loss": 0.3564, "step": 2120 }, { "epoch": 1.0529538308787025, "grad_norm": 0.4401492774486542, "learning_rate": 8.201256260841513e-06, "loss": 0.3912, "step": 2121 }, { "epoch": 1.0534502730431905, "grad_norm": 0.474699467420578, "learning_rate": 8.199036429801942e-06, "loss": 0.3843, "step": 2122 }, { "epoch": 1.0539467152076782, "grad_norm": 0.5225339531898499, "learning_rate": 8.19681553069105e-06, "loss": 0.3712, "step": 2123 }, { "epoch": 1.0544431573721662, "grad_norm": 0.4305037260055542, "learning_rate": 8.194593564250337e-06, "loss": 0.3703, "step": 2124 }, { "epoch": 1.054939599536654, "grad_norm": 0.5216330289840698, "learning_rate": 8.192370531221659e-06, "loss": 0.383, "step": 2125 }, { "epoch": 1.055436041701142, "grad_norm": 0.4917902946472168, "learning_rate": 8.190146432347223e-06, "loss": 0.346, "step": 2126 }, { "epoch": 1.0559324838656297, "grad_norm": 0.4213789701461792, "learning_rate": 8.187921268369598e-06, "loss": 0.3412, "step": 2127 }, { "epoch": 1.0564289260301174, "grad_norm": 0.49304908514022827, "learning_rate": 8.185695040031702e-06, "loss": 0.3419, "step": 2128 }, { "epoch": 1.0569253681946054, "grad_norm": 0.5795585513114929, "learning_rate": 8.183467748076817e-06, "loss": 0.3976, "step": 2129 }, { "epoch": 1.057421810359093, "grad_norm": 0.47409871220588684, "learning_rate": 8.181239393248572e-06, "loss": 0.3937, "step": 2130 }, { "epoch": 1.057918252523581, "grad_norm": 0.5262275338172913, "learning_rate": 8.179009976290955e-06, "loss": 0.377, "step": 2131 }, { "epoch": 1.0584146946880688, "grad_norm": 0.5343154668807983, "learning_rate": 8.176779497948308e-06, "loss": 0.3782, "step": 2132 }, { "epoch": 1.0589111368525568, "grad_norm": 0.5253915786743164, "learning_rate": 8.174547958965325e-06, "loss": 0.386, "step": 2133 }, { "epoch": 1.0594075790170445, "grad_norm": 0.44773003458976746, "learning_rate": 8.17231536008706e-06, "loss": 0.3762, "step": 2134 }, { "epoch": 1.0599040211815323, "grad_norm": 0.477754145860672, "learning_rate": 8.170081702058914e-06, "loss": 0.4, "step": 2135 }, { "epoch": 1.0604004633460202, "grad_norm": 0.4662474989891052, "learning_rate": 8.167846985626646e-06, "loss": 0.3442, "step": 2136 }, { "epoch": 1.060896905510508, "grad_norm": 0.4979279041290283, "learning_rate": 8.165611211536365e-06, "loss": 0.3971, "step": 2137 }, { "epoch": 1.061393347674996, "grad_norm": 0.44392815232276917, "learning_rate": 8.16337438053454e-06, "loss": 0.4322, "step": 2138 }, { "epoch": 1.0618897898394837, "grad_norm": 0.48140275478363037, "learning_rate": 8.161136493367983e-06, "loss": 0.397, "step": 2139 }, { "epoch": 1.0623862320039716, "grad_norm": 0.5236449241638184, "learning_rate": 8.158897550783868e-06, "loss": 0.3878, "step": 2140 }, { "epoch": 1.0628826741684594, "grad_norm": 0.3892556428909302, "learning_rate": 8.156657553529712e-06, "loss": 0.3476, "step": 2141 }, { "epoch": 1.0633791163329471, "grad_norm": 0.5381006598472595, "learning_rate": 8.154416502353394e-06, "loss": 0.4309, "step": 2142 }, { "epoch": 1.063875558497435, "grad_norm": 0.4726612865924835, "learning_rate": 8.152174398003138e-06, "loss": 0.3443, "step": 2143 }, { "epoch": 1.0643720006619228, "grad_norm": 0.4414731562137604, "learning_rate": 8.149931241227522e-06, "loss": 0.3763, "step": 2144 }, { "epoch": 1.0648684428264108, "grad_norm": 0.5111587047576904, "learning_rate": 8.147687032775473e-06, "loss": 0.4103, "step": 2145 }, { "epoch": 1.0653648849908985, "grad_norm": 0.4189874827861786, "learning_rate": 8.145441773396276e-06, "loss": 0.374, "step": 2146 }, { "epoch": 1.0658613271553863, "grad_norm": 0.5339285731315613, "learning_rate": 8.143195463839557e-06, "loss": 0.3922, "step": 2147 }, { "epoch": 1.0663577693198742, "grad_norm": 0.5404478311538696, "learning_rate": 8.140948104855301e-06, "loss": 0.3927, "step": 2148 }, { "epoch": 1.066854211484362, "grad_norm": 0.44829270243644714, "learning_rate": 8.13869969719384e-06, "loss": 0.3382, "step": 2149 }, { "epoch": 1.06735065364885, "grad_norm": 0.6064449548721313, "learning_rate": 8.136450241605854e-06, "loss": 0.376, "step": 2150 }, { "epoch": 1.0678470958133377, "grad_norm": 0.4999914765357971, "learning_rate": 8.134199738842376e-06, "loss": 0.3921, "step": 2151 }, { "epoch": 1.0683435379778257, "grad_norm": 0.4825313687324524, "learning_rate": 8.131948189654789e-06, "loss": 0.3726, "step": 2152 }, { "epoch": 1.0688399801423134, "grad_norm": 0.4793812334537506, "learning_rate": 8.129695594794822e-06, "loss": 0.3632, "step": 2153 }, { "epoch": 1.0693364223068014, "grad_norm": 0.5007010102272034, "learning_rate": 8.127441955014557e-06, "loss": 0.3959, "step": 2154 }, { "epoch": 1.0698328644712891, "grad_norm": 0.48736804723739624, "learning_rate": 8.12518727106642e-06, "loss": 0.3901, "step": 2155 }, { "epoch": 1.0703293066357769, "grad_norm": 0.5061100125312805, "learning_rate": 8.122931543703194e-06, "loss": 0.3606, "step": 2156 }, { "epoch": 1.0708257488002648, "grad_norm": 0.47136133909225464, "learning_rate": 8.120674773678e-06, "loss": 0.3097, "step": 2157 }, { "epoch": 1.0713221909647526, "grad_norm": 0.5899022817611694, "learning_rate": 8.118416961744318e-06, "loss": 0.4065, "step": 2158 }, { "epoch": 1.0718186331292405, "grad_norm": 0.5074941515922546, "learning_rate": 8.116158108655964e-06, "loss": 0.3714, "step": 2159 }, { "epoch": 1.0723150752937283, "grad_norm": 0.5060569047927856, "learning_rate": 8.113898215167109e-06, "loss": 0.3906, "step": 2160 }, { "epoch": 1.072811517458216, "grad_norm": 0.48275718092918396, "learning_rate": 8.111637282032273e-06, "loss": 0.3607, "step": 2161 }, { "epoch": 1.073307959622704, "grad_norm": 0.510860800743103, "learning_rate": 8.109375310006317e-06, "loss": 0.3754, "step": 2162 }, { "epoch": 1.0738044017871917, "grad_norm": 0.44155192375183105, "learning_rate": 8.107112299844453e-06, "loss": 0.395, "step": 2163 }, { "epoch": 1.0743008439516797, "grad_norm": 0.4252348244190216, "learning_rate": 8.10484825230224e-06, "loss": 0.3331, "step": 2164 }, { "epoch": 1.0747972861161674, "grad_norm": 0.5054444074630737, "learning_rate": 8.102583168135579e-06, "loss": 0.3559, "step": 2165 }, { "epoch": 1.0752937282806554, "grad_norm": 0.5058158040046692, "learning_rate": 8.100317048100722e-06, "loss": 0.3525, "step": 2166 }, { "epoch": 1.0757901704451431, "grad_norm": 0.411449134349823, "learning_rate": 8.098049892954264e-06, "loss": 0.3076, "step": 2167 }, { "epoch": 1.0762866126096309, "grad_norm": 0.5852285623550415, "learning_rate": 8.095781703453149e-06, "loss": 0.408, "step": 2168 }, { "epoch": 1.0767830547741188, "grad_norm": 0.5020864605903625, "learning_rate": 8.093512480354662e-06, "loss": 0.4277, "step": 2169 }, { "epoch": 1.0772794969386066, "grad_norm": 0.411857008934021, "learning_rate": 8.091242224416434e-06, "loss": 0.3551, "step": 2170 }, { "epoch": 1.0777759391030945, "grad_norm": 0.49334776401519775, "learning_rate": 8.08897093639644e-06, "loss": 0.3811, "step": 2171 }, { "epoch": 1.0782723812675823, "grad_norm": 0.46021562814712524, "learning_rate": 8.086698617053009e-06, "loss": 0.3543, "step": 2172 }, { "epoch": 1.0787688234320703, "grad_norm": 0.4459286630153656, "learning_rate": 8.084425267144798e-06, "loss": 0.364, "step": 2173 }, { "epoch": 1.079265265596558, "grad_norm": 0.4140971601009369, "learning_rate": 8.08215088743082e-06, "loss": 0.3483, "step": 2174 }, { "epoch": 1.0797617077610457, "grad_norm": 0.4309522211551666, "learning_rate": 8.079875478670431e-06, "loss": 0.3789, "step": 2175 }, { "epoch": 1.0802581499255337, "grad_norm": 0.4741068482398987, "learning_rate": 8.077599041623325e-06, "loss": 0.3449, "step": 2176 }, { "epoch": 1.0807545920900214, "grad_norm": 0.47122737765312195, "learning_rate": 8.075321577049545e-06, "loss": 0.3518, "step": 2177 }, { "epoch": 1.0812510342545094, "grad_norm": 0.46342340111732483, "learning_rate": 8.07304308570947e-06, "loss": 0.3684, "step": 2178 }, { "epoch": 1.0817474764189972, "grad_norm": 0.47760099172592163, "learning_rate": 8.07076356836383e-06, "loss": 0.408, "step": 2179 }, { "epoch": 1.0822439185834851, "grad_norm": 0.4274387061595917, "learning_rate": 8.068483025773694e-06, "loss": 0.2874, "step": 2180 }, { "epoch": 1.0827403607479729, "grad_norm": 0.5520479679107666, "learning_rate": 8.066201458700474e-06, "loss": 0.385, "step": 2181 }, { "epoch": 1.0832368029124606, "grad_norm": 0.4773663282394409, "learning_rate": 8.06391886790592e-06, "loss": 0.3682, "step": 2182 }, { "epoch": 1.0837332450769486, "grad_norm": 0.4511011838912964, "learning_rate": 8.061635254152129e-06, "loss": 0.4252, "step": 2183 }, { "epoch": 1.0842296872414363, "grad_norm": 0.5152929425239563, "learning_rate": 8.059350618201538e-06, "loss": 0.4017, "step": 2184 }, { "epoch": 1.0847261294059243, "grad_norm": 0.5075392723083496, "learning_rate": 8.057064960816924e-06, "loss": 0.3896, "step": 2185 }, { "epoch": 1.085222571570412, "grad_norm": 0.45015016198158264, "learning_rate": 8.054778282761405e-06, "loss": 0.351, "step": 2186 }, { "epoch": 1.0857190137349, "grad_norm": 0.4855548143386841, "learning_rate": 8.052490584798442e-06, "loss": 0.3528, "step": 2187 }, { "epoch": 1.0862154558993877, "grad_norm": 0.4736725986003876, "learning_rate": 8.050201867691836e-06, "loss": 0.3875, "step": 2188 }, { "epoch": 1.0867118980638755, "grad_norm": 0.5113661289215088, "learning_rate": 8.047912132205725e-06, "loss": 0.4054, "step": 2189 }, { "epoch": 1.0872083402283634, "grad_norm": 0.47401928901672363, "learning_rate": 8.045621379104592e-06, "loss": 0.3886, "step": 2190 }, { "epoch": 1.0877047823928512, "grad_norm": 0.4730497896671295, "learning_rate": 8.043329609153254e-06, "loss": 0.393, "step": 2191 }, { "epoch": 1.0882012245573391, "grad_norm": 0.4214080572128296, "learning_rate": 8.041036823116874e-06, "loss": 0.3617, "step": 2192 }, { "epoch": 1.0886976667218269, "grad_norm": 0.4551169276237488, "learning_rate": 8.038743021760948e-06, "loss": 0.369, "step": 2193 }, { "epoch": 1.0891941088863146, "grad_norm": 0.4199565351009369, "learning_rate": 8.036448205851316e-06, "loss": 0.3715, "step": 2194 }, { "epoch": 1.0896905510508026, "grad_norm": 0.4544364809989929, "learning_rate": 8.034152376154156e-06, "loss": 0.3505, "step": 2195 }, { "epoch": 1.0901869932152903, "grad_norm": 0.5132949352264404, "learning_rate": 8.031855533435979e-06, "loss": 0.4401, "step": 2196 }, { "epoch": 1.0906834353797783, "grad_norm": 0.40964001417160034, "learning_rate": 8.029557678463642e-06, "loss": 0.338, "step": 2197 }, { "epoch": 1.091179877544266, "grad_norm": 0.44715890288352966, "learning_rate": 8.027258812004335e-06, "loss": 0.3532, "step": 2198 }, { "epoch": 1.091676319708754, "grad_norm": 0.4586218297481537, "learning_rate": 8.024958934825587e-06, "loss": 0.3346, "step": 2199 }, { "epoch": 1.0921727618732417, "grad_norm": 0.4592781662940979, "learning_rate": 8.022658047695264e-06, "loss": 0.3888, "step": 2200 }, { "epoch": 1.0926692040377297, "grad_norm": 0.43645840883255005, "learning_rate": 8.020356151381569e-06, "loss": 0.3741, "step": 2201 }, { "epoch": 1.0931656462022175, "grad_norm": 0.44240838289260864, "learning_rate": 8.018053246653047e-06, "loss": 0.3719, "step": 2202 }, { "epoch": 1.0936620883667052, "grad_norm": 0.4429606795310974, "learning_rate": 8.015749334278569e-06, "loss": 0.3313, "step": 2203 }, { "epoch": 1.0941585305311932, "grad_norm": 0.5266910791397095, "learning_rate": 8.013444415027352e-06, "loss": 0.426, "step": 2204 }, { "epoch": 1.094654972695681, "grad_norm": 0.4608582854270935, "learning_rate": 8.011138489668948e-06, "loss": 0.3729, "step": 2205 }, { "epoch": 1.0951514148601689, "grad_norm": 0.4503844976425171, "learning_rate": 8.008831558973237e-06, "loss": 0.3523, "step": 2206 }, { "epoch": 1.0956478570246566, "grad_norm": 0.46421998739242554, "learning_rate": 8.006523623710449e-06, "loss": 0.3603, "step": 2207 }, { "epoch": 1.0961442991891444, "grad_norm": 0.4284246861934662, "learning_rate": 8.004214684651133e-06, "loss": 0.3704, "step": 2208 }, { "epoch": 1.0966407413536323, "grad_norm": 0.43185022473335266, "learning_rate": 8.001904742566183e-06, "loss": 0.3534, "step": 2209 }, { "epoch": 1.09713718351812, "grad_norm": 0.48349782824516296, "learning_rate": 7.999593798226827e-06, "loss": 0.4097, "step": 2210 }, { "epoch": 1.097633625682608, "grad_norm": 0.4026587903499603, "learning_rate": 7.997281852404629e-06, "loss": 0.3107, "step": 2211 }, { "epoch": 1.0981300678470958, "grad_norm": 0.5209432244300842, "learning_rate": 7.994968905871479e-06, "loss": 0.3994, "step": 2212 }, { "epoch": 1.0986265100115837, "grad_norm": 0.40025973320007324, "learning_rate": 7.992654959399611e-06, "loss": 0.326, "step": 2213 }, { "epoch": 1.0991229521760715, "grad_norm": 0.4958815276622772, "learning_rate": 7.990340013761587e-06, "loss": 0.4332, "step": 2214 }, { "epoch": 1.0996193943405594, "grad_norm": 0.4174710214138031, "learning_rate": 7.988024069730306e-06, "loss": 0.3319, "step": 2215 }, { "epoch": 1.1001158365050472, "grad_norm": 0.48667672276496887, "learning_rate": 7.985707128079e-06, "loss": 0.3934, "step": 2216 }, { "epoch": 1.100612278669535, "grad_norm": 0.4292362332344055, "learning_rate": 7.983389189581227e-06, "loss": 0.374, "step": 2217 }, { "epoch": 1.101108720834023, "grad_norm": 0.528611421585083, "learning_rate": 7.98107025501089e-06, "loss": 0.3314, "step": 2218 }, { "epoch": 1.1016051629985106, "grad_norm": 0.4795231223106384, "learning_rate": 7.978750325142217e-06, "loss": 0.3569, "step": 2219 }, { "epoch": 1.1021016051629986, "grad_norm": 0.47868427634239197, "learning_rate": 7.976429400749766e-06, "loss": 0.3787, "step": 2220 }, { "epoch": 1.1025980473274863, "grad_norm": 0.5478906035423279, "learning_rate": 7.974107482608434e-06, "loss": 0.4121, "step": 2221 }, { "epoch": 1.103094489491974, "grad_norm": 0.41643279790878296, "learning_rate": 7.971784571493446e-06, "loss": 0.3646, "step": 2222 }, { "epoch": 1.103590931656462, "grad_norm": 0.38744989037513733, "learning_rate": 7.969460668180358e-06, "loss": 0.3169, "step": 2223 }, { "epoch": 1.1040873738209498, "grad_norm": 0.5233725905418396, "learning_rate": 7.967135773445059e-06, "loss": 0.4267, "step": 2224 }, { "epoch": 1.1045838159854378, "grad_norm": 0.4897370934486389, "learning_rate": 7.964809888063765e-06, "loss": 0.3793, "step": 2225 }, { "epoch": 1.1050802581499255, "grad_norm": 0.4520472586154938, "learning_rate": 7.962483012813029e-06, "loss": 0.3651, "step": 2226 }, { "epoch": 1.1055767003144135, "grad_norm": 0.4306524991989136, "learning_rate": 7.960155148469733e-06, "loss": 0.3237, "step": 2227 }, { "epoch": 1.1060731424789012, "grad_norm": 0.5460237264633179, "learning_rate": 7.957826295811085e-06, "loss": 0.4068, "step": 2228 }, { "epoch": 1.106569584643389, "grad_norm": 0.46652451157569885, "learning_rate": 7.955496455614624e-06, "loss": 0.3784, "step": 2229 }, { "epoch": 1.107066026807877, "grad_norm": 0.47759974002838135, "learning_rate": 7.953165628658224e-06, "loss": 0.3555, "step": 2230 }, { "epoch": 1.1075624689723647, "grad_norm": 0.3979974687099457, "learning_rate": 7.950833815720083e-06, "loss": 0.368, "step": 2231 }, { "epoch": 1.1080589111368526, "grad_norm": 0.4611203968524933, "learning_rate": 7.948501017578728e-06, "loss": 0.3696, "step": 2232 }, { "epoch": 1.1085553533013404, "grad_norm": 0.4669390916824341, "learning_rate": 7.946167235013023e-06, "loss": 0.3675, "step": 2233 }, { "epoch": 1.1090517954658283, "grad_norm": 0.466389000415802, "learning_rate": 7.94383246880215e-06, "loss": 0.4124, "step": 2234 }, { "epoch": 1.109548237630316, "grad_norm": 0.4111332297325134, "learning_rate": 7.941496719725622e-06, "loss": 0.3501, "step": 2235 }, { "epoch": 1.1100446797948038, "grad_norm": 0.4854249656200409, "learning_rate": 7.939159988563286e-06, "loss": 0.4059, "step": 2236 }, { "epoch": 1.1105411219592918, "grad_norm": 0.4296104311943054, "learning_rate": 7.936822276095312e-06, "loss": 0.3655, "step": 2237 }, { "epoch": 1.1110375641237795, "grad_norm": 0.4432765245437622, "learning_rate": 7.934483583102197e-06, "loss": 0.3663, "step": 2238 }, { "epoch": 1.1115340062882675, "grad_norm": 0.4593694806098938, "learning_rate": 7.932143910364771e-06, "loss": 0.3903, "step": 2239 }, { "epoch": 1.1120304484527552, "grad_norm": 0.3995046615600586, "learning_rate": 7.929803258664182e-06, "loss": 0.3424, "step": 2240 }, { "epoch": 1.1125268906172432, "grad_norm": 0.48543405532836914, "learning_rate": 7.927461628781915e-06, "loss": 0.3834, "step": 2241 }, { "epoch": 1.113023332781731, "grad_norm": 0.4102519750595093, "learning_rate": 7.925119021499771e-06, "loss": 0.3842, "step": 2242 }, { "epoch": 1.1135197749462187, "grad_norm": 0.3855830430984497, "learning_rate": 7.92277543759989e-06, "loss": 0.3849, "step": 2243 }, { "epoch": 1.1140162171107066, "grad_norm": 0.4527967572212219, "learning_rate": 7.920430877864725e-06, "loss": 0.4215, "step": 2244 }, { "epoch": 1.1145126592751944, "grad_norm": 0.42437201738357544, "learning_rate": 7.918085343077062e-06, "loss": 0.3994, "step": 2245 }, { "epoch": 1.1150091014396823, "grad_norm": 0.46794918179512024, "learning_rate": 7.915738834020014e-06, "loss": 0.3776, "step": 2246 }, { "epoch": 1.11550554360417, "grad_norm": 0.49205613136291504, "learning_rate": 7.913391351477013e-06, "loss": 0.3865, "step": 2247 }, { "epoch": 1.116001985768658, "grad_norm": 0.4583798050880432, "learning_rate": 7.911042896231822e-06, "loss": 0.3641, "step": 2248 }, { "epoch": 1.1164984279331458, "grad_norm": 0.492125540971756, "learning_rate": 7.908693469068525e-06, "loss": 0.3045, "step": 2249 }, { "epoch": 1.1169948700976335, "grad_norm": 0.48693758249282837, "learning_rate": 7.906343070771534e-06, "loss": 0.412, "step": 2250 }, { "epoch": 1.1174913122621215, "grad_norm": 0.37780439853668213, "learning_rate": 7.903991702125583e-06, "loss": 0.3165, "step": 2251 }, { "epoch": 1.1179877544266092, "grad_norm": 0.5138712525367737, "learning_rate": 7.901639363915724e-06, "loss": 0.4407, "step": 2252 }, { "epoch": 1.1184841965910972, "grad_norm": 0.429067462682724, "learning_rate": 7.899286056927347e-06, "loss": 0.3584, "step": 2253 }, { "epoch": 1.118980638755585, "grad_norm": 0.44221264123916626, "learning_rate": 7.896931781946153e-06, "loss": 0.3712, "step": 2254 }, { "epoch": 1.1194770809200727, "grad_norm": 0.4297352433204651, "learning_rate": 7.894576539758173e-06, "loss": 0.3411, "step": 2255 }, { "epoch": 1.1199735230845607, "grad_norm": 0.4593808948993683, "learning_rate": 7.892220331149753e-06, "loss": 0.3815, "step": 2256 }, { "epoch": 1.1204699652490484, "grad_norm": 0.4692235291004181, "learning_rate": 7.889863156907574e-06, "loss": 0.3665, "step": 2257 }, { "epoch": 1.1209664074135364, "grad_norm": 0.3974051773548126, "learning_rate": 7.887505017818626e-06, "loss": 0.3685, "step": 2258 }, { "epoch": 1.1214628495780241, "grad_norm": 0.49319592118263245, "learning_rate": 7.885145914670234e-06, "loss": 0.4568, "step": 2259 }, { "epoch": 1.121959291742512, "grad_norm": 0.4300570487976074, "learning_rate": 7.882785848250033e-06, "loss": 0.3157, "step": 2260 }, { "epoch": 1.1224557339069998, "grad_norm": 0.464566707611084, "learning_rate": 7.880424819345987e-06, "loss": 0.3862, "step": 2261 }, { "epoch": 1.1229521760714878, "grad_norm": 0.47330373525619507, "learning_rate": 7.87806282874638e-06, "loss": 0.3584, "step": 2262 }, { "epoch": 1.1234486182359755, "grad_norm": 0.44961753487586975, "learning_rate": 7.875699877239815e-06, "loss": 0.3765, "step": 2263 }, { "epoch": 1.1239450604004633, "grad_norm": 0.4254727363586426, "learning_rate": 7.873335965615219e-06, "loss": 0.3585, "step": 2264 }, { "epoch": 1.1244415025649512, "grad_norm": 0.4926202893257141, "learning_rate": 7.870971094661836e-06, "loss": 0.417, "step": 2265 }, { "epoch": 1.124937944729439, "grad_norm": 0.4544256329536438, "learning_rate": 7.868605265169236e-06, "loss": 0.3983, "step": 2266 }, { "epoch": 1.125434386893927, "grad_norm": 0.4378160536289215, "learning_rate": 7.8662384779273e-06, "loss": 0.3578, "step": 2267 }, { "epoch": 1.1259308290584147, "grad_norm": 0.446018248796463, "learning_rate": 7.863870733726237e-06, "loss": 0.3957, "step": 2268 }, { "epoch": 1.1264272712229024, "grad_norm": 0.4196249544620514, "learning_rate": 7.861502033356572e-06, "loss": 0.4111, "step": 2269 }, { "epoch": 1.1269237133873904, "grad_norm": 0.4358330965042114, "learning_rate": 7.859132377609146e-06, "loss": 0.3915, "step": 2270 }, { "epoch": 1.1274201555518781, "grad_norm": 0.4084653854370117, "learning_rate": 7.85676176727513e-06, "loss": 0.3416, "step": 2271 }, { "epoch": 1.127916597716366, "grad_norm": 0.407000333070755, "learning_rate": 7.854390203146e-06, "loss": 0.3582, "step": 2272 }, { "epoch": 1.1284130398808538, "grad_norm": 0.49834296107292175, "learning_rate": 7.852017686013561e-06, "loss": 0.4083, "step": 2273 }, { "epoch": 1.1289094820453418, "grad_norm": 0.43483537435531616, "learning_rate": 7.849644216669929e-06, "loss": 0.3648, "step": 2274 }, { "epoch": 1.1294059242098295, "grad_norm": 0.4243127405643463, "learning_rate": 7.847269795907543e-06, "loss": 0.3067, "step": 2275 }, { "epoch": 1.1299023663743175, "grad_norm": 0.4894692897796631, "learning_rate": 7.844894424519156e-06, "loss": 0.3739, "step": 2276 }, { "epoch": 1.1303988085388053, "grad_norm": 0.4782841205596924, "learning_rate": 7.842518103297842e-06, "loss": 0.4214, "step": 2277 }, { "epoch": 1.130895250703293, "grad_norm": 0.38419827818870544, "learning_rate": 7.840140833036987e-06, "loss": 0.3084, "step": 2278 }, { "epoch": 1.131391692867781, "grad_norm": 0.4710744619369507, "learning_rate": 7.8377626145303e-06, "loss": 0.3769, "step": 2279 }, { "epoch": 1.1318881350322687, "grad_norm": 0.4180232882499695, "learning_rate": 7.835383448571801e-06, "loss": 0.3458, "step": 2280 }, { "epoch": 1.1323845771967567, "grad_norm": 0.43119266629219055, "learning_rate": 7.83300333595583e-06, "loss": 0.3614, "step": 2281 }, { "epoch": 1.1328810193612444, "grad_norm": 0.4655759334564209, "learning_rate": 7.830622277477042e-06, "loss": 0.4032, "step": 2282 }, { "epoch": 1.1333774615257322, "grad_norm": 0.4514656960964203, "learning_rate": 7.828240273930408e-06, "loss": 0.3552, "step": 2283 }, { "epoch": 1.1338739036902201, "grad_norm": 0.4258189797401428, "learning_rate": 7.825857326111213e-06, "loss": 0.3915, "step": 2284 }, { "epoch": 1.1343703458547079, "grad_norm": 0.43263328075408936, "learning_rate": 7.82347343481506e-06, "loss": 0.3338, "step": 2285 }, { "epoch": 1.1348667880191958, "grad_norm": 0.443960577249527, "learning_rate": 7.821088600837865e-06, "loss": 0.369, "step": 2286 }, { "epoch": 1.1353632301836836, "grad_norm": 0.449949711561203, "learning_rate": 7.81870282497586e-06, "loss": 0.3618, "step": 2287 }, { "epoch": 1.1358596723481715, "grad_norm": 0.5815337300300598, "learning_rate": 7.816316108025588e-06, "loss": 0.4194, "step": 2288 }, { "epoch": 1.1363561145126593, "grad_norm": 0.459582656621933, "learning_rate": 7.81392845078391e-06, "loss": 0.3029, "step": 2289 }, { "epoch": 1.136852556677147, "grad_norm": 0.44244226813316345, "learning_rate": 7.811539854048003e-06, "loss": 0.3785, "step": 2290 }, { "epoch": 1.137348998841635, "grad_norm": 0.453203946352005, "learning_rate": 7.809150318615351e-06, "loss": 0.3646, "step": 2291 }, { "epoch": 1.1378454410061227, "grad_norm": 0.5639541745185852, "learning_rate": 7.806759845283755e-06, "loss": 0.3766, "step": 2292 }, { "epoch": 1.1383418831706107, "grad_norm": 0.37797847390174866, "learning_rate": 7.804368434851333e-06, "loss": 0.3465, "step": 2293 }, { "epoch": 1.1388383253350984, "grad_norm": 0.5298764109611511, "learning_rate": 7.801976088116507e-06, "loss": 0.4083, "step": 2294 }, { "epoch": 1.1393347674995864, "grad_norm": 0.4164845645427704, "learning_rate": 7.799582805878022e-06, "loss": 0.3355, "step": 2295 }, { "epoch": 1.1398312096640741, "grad_norm": 0.3912183344364166, "learning_rate": 7.797188588934921e-06, "loss": 0.3665, "step": 2296 }, { "epoch": 1.1403276518285619, "grad_norm": 0.44354742765426636, "learning_rate": 7.794793438086578e-06, "loss": 0.3612, "step": 2297 }, { "epoch": 1.1408240939930498, "grad_norm": 0.4576334059238434, "learning_rate": 7.792397354132661e-06, "loss": 0.3387, "step": 2298 }, { "epoch": 1.1413205361575376, "grad_norm": 0.4898923933506012, "learning_rate": 7.790000337873162e-06, "loss": 0.3879, "step": 2299 }, { "epoch": 1.1418169783220256, "grad_norm": 0.3950931429862976, "learning_rate": 7.78760239010838e-06, "loss": 0.3818, "step": 2300 }, { "epoch": 1.1423134204865133, "grad_norm": 0.42925214767456055, "learning_rate": 7.78520351163892e-06, "loss": 0.3653, "step": 2301 }, { "epoch": 1.142809862651001, "grad_norm": 0.42684975266456604, "learning_rate": 7.782803703265707e-06, "loss": 0.364, "step": 2302 }, { "epoch": 1.143306304815489, "grad_norm": 0.48592349886894226, "learning_rate": 7.780402965789968e-06, "loss": 0.4176, "step": 2303 }, { "epoch": 1.1438027469799767, "grad_norm": 0.4994070529937744, "learning_rate": 7.778001300013248e-06, "loss": 0.4203, "step": 2304 }, { "epoch": 1.1442991891444647, "grad_norm": 0.44008028507232666, "learning_rate": 7.775598706737395e-06, "loss": 0.3446, "step": 2305 }, { "epoch": 1.1447956313089525, "grad_norm": 0.4737931191921234, "learning_rate": 7.77319518676457e-06, "loss": 0.3881, "step": 2306 }, { "epoch": 1.1452920734734404, "grad_norm": 0.5133823156356812, "learning_rate": 7.770790740897245e-06, "loss": 0.3878, "step": 2307 }, { "epoch": 1.1457885156379282, "grad_norm": 0.4722055196762085, "learning_rate": 7.768385369938196e-06, "loss": 0.3739, "step": 2308 }, { "epoch": 1.1462849578024161, "grad_norm": 0.41011688113212585, "learning_rate": 7.765979074690512e-06, "loss": 0.3569, "step": 2309 }, { "epoch": 1.1467813999669039, "grad_norm": 0.5157063603401184, "learning_rate": 7.763571855957592e-06, "loss": 0.3868, "step": 2310 }, { "epoch": 1.1472778421313916, "grad_norm": 0.46075186133384705, "learning_rate": 7.761163714543137e-06, "loss": 0.4021, "step": 2311 }, { "epoch": 1.1477742842958796, "grad_norm": 0.4221634268760681, "learning_rate": 7.758754651251163e-06, "loss": 0.3784, "step": 2312 }, { "epoch": 1.1482707264603673, "grad_norm": 0.5752555727958679, "learning_rate": 7.75634466688599e-06, "loss": 0.4198, "step": 2313 }, { "epoch": 1.1487671686248553, "grad_norm": 0.39917659759521484, "learning_rate": 7.753933762252246e-06, "loss": 0.3147, "step": 2314 }, { "epoch": 1.149263610789343, "grad_norm": 0.4162612855434418, "learning_rate": 7.751521938154867e-06, "loss": 0.3365, "step": 2315 }, { "epoch": 1.1497600529538308, "grad_norm": 0.4733930230140686, "learning_rate": 7.749109195399093e-06, "loss": 0.4153, "step": 2316 }, { "epoch": 1.1502564951183187, "grad_norm": 0.4203830361366272, "learning_rate": 7.746695534790477e-06, "loss": 0.3256, "step": 2317 }, { "epoch": 1.1507529372828065, "grad_norm": 0.4243525564670563, "learning_rate": 7.744280957134872e-06, "loss": 0.4039, "step": 2318 }, { "epoch": 1.1512493794472944, "grad_norm": 0.4051527976989746, "learning_rate": 7.741865463238442e-06, "loss": 0.3128, "step": 2319 }, { "epoch": 1.1517458216117822, "grad_norm": 0.48211073875427246, "learning_rate": 7.739449053907653e-06, "loss": 0.4166, "step": 2320 }, { "epoch": 1.1522422637762701, "grad_norm": 0.4426801800727844, "learning_rate": 7.737031729949279e-06, "loss": 0.398, "step": 2321 }, { "epoch": 1.152738705940758, "grad_norm": 0.4438612759113312, "learning_rate": 7.7346134921704e-06, "loss": 0.3606, "step": 2322 }, { "epoch": 1.1532351481052459, "grad_norm": 0.48513635993003845, "learning_rate": 7.732194341378397e-06, "loss": 0.3677, "step": 2323 }, { "epoch": 1.1537315902697336, "grad_norm": 0.3809352219104767, "learning_rate": 7.72977427838096e-06, "loss": 0.3365, "step": 2324 }, { "epoch": 1.1542280324342213, "grad_norm": 0.5574825406074524, "learning_rate": 7.727353303986084e-06, "loss": 0.4293, "step": 2325 }, { "epoch": 1.1547244745987093, "grad_norm": 0.4040408730506897, "learning_rate": 7.724931419002063e-06, "loss": 0.3254, "step": 2326 }, { "epoch": 1.155220916763197, "grad_norm": 0.42839956283569336, "learning_rate": 7.722508624237503e-06, "loss": 0.3339, "step": 2327 }, { "epoch": 1.155717358927685, "grad_norm": 0.5418014526367188, "learning_rate": 7.720084920501306e-06, "loss": 0.3996, "step": 2328 }, { "epoch": 1.1562138010921728, "grad_norm": 0.4669536054134369, "learning_rate": 7.717660308602681e-06, "loss": 0.3609, "step": 2329 }, { "epoch": 1.1567102432566605, "grad_norm": 0.446336567401886, "learning_rate": 7.715234789351144e-06, "loss": 0.3476, "step": 2330 }, { "epoch": 1.1572066854211485, "grad_norm": 0.4501841068267822, "learning_rate": 7.712808363556504e-06, "loss": 0.3807, "step": 2331 }, { "epoch": 1.1577031275856362, "grad_norm": 0.41991737484931946, "learning_rate": 7.710381032028882e-06, "loss": 0.3371, "step": 2332 }, { "epoch": 1.1581995697501242, "grad_norm": 0.48912695050239563, "learning_rate": 7.707952795578698e-06, "loss": 0.3563, "step": 2333 }, { "epoch": 1.158696011914612, "grad_norm": 0.39969030022621155, "learning_rate": 7.705523655016674e-06, "loss": 0.3375, "step": 2334 }, { "epoch": 1.1591924540790999, "grad_norm": 0.41734302043914795, "learning_rate": 7.703093611153833e-06, "loss": 0.3473, "step": 2335 }, { "epoch": 1.1596888962435876, "grad_norm": 0.46954113245010376, "learning_rate": 7.700662664801501e-06, "loss": 0.3857, "step": 2336 }, { "epoch": 1.1601853384080756, "grad_norm": 0.5048080682754517, "learning_rate": 7.698230816771307e-06, "loss": 0.3452, "step": 2337 }, { "epoch": 1.1606817805725633, "grad_norm": 0.46426787972450256, "learning_rate": 7.695798067875174e-06, "loss": 0.3912, "step": 2338 }, { "epoch": 1.161178222737051, "grad_norm": 0.4425739049911499, "learning_rate": 7.693364418925335e-06, "loss": 0.3738, "step": 2339 }, { "epoch": 1.161674664901539, "grad_norm": 0.4138091802597046, "learning_rate": 7.690929870734319e-06, "loss": 0.361, "step": 2340 }, { "epoch": 1.1621711070660268, "grad_norm": 0.4899192452430725, "learning_rate": 7.688494424114954e-06, "loss": 0.4172, "step": 2341 }, { "epoch": 1.1626675492305147, "grad_norm": 0.5160532593727112, "learning_rate": 7.686058079880371e-06, "loss": 0.4112, "step": 2342 }, { "epoch": 1.1631639913950025, "grad_norm": 0.40692269802093506, "learning_rate": 7.683620838843997e-06, "loss": 0.3613, "step": 2343 }, { "epoch": 1.1636604335594902, "grad_norm": 0.46925848722457886, "learning_rate": 7.681182701819563e-06, "loss": 0.4488, "step": 2344 }, { "epoch": 1.1641568757239782, "grad_norm": 0.40495193004608154, "learning_rate": 7.678743669621094e-06, "loss": 0.3537, "step": 2345 }, { "epoch": 1.164653317888466, "grad_norm": 0.4747593104839325, "learning_rate": 7.676303743062917e-06, "loss": 0.3808, "step": 2346 }, { "epoch": 1.165149760052954, "grad_norm": 0.4353421926498413, "learning_rate": 7.67386292295966e-06, "loss": 0.314, "step": 2347 }, { "epoch": 1.1656462022174416, "grad_norm": 0.5408320426940918, "learning_rate": 7.671421210126245e-06, "loss": 0.3651, "step": 2348 }, { "epoch": 1.1661426443819294, "grad_norm": 0.4384588897228241, "learning_rate": 7.668978605377892e-06, "loss": 0.378, "step": 2349 }, { "epoch": 1.1666390865464173, "grad_norm": 0.4791626036167145, "learning_rate": 7.666535109530121e-06, "loss": 0.3515, "step": 2350 }, { "epoch": 1.167135528710905, "grad_norm": 0.5721251368522644, "learning_rate": 7.66409072339875e-06, "loss": 0.4281, "step": 2351 }, { "epoch": 1.167631970875393, "grad_norm": 0.4832015633583069, "learning_rate": 7.661645447799893e-06, "loss": 0.3245, "step": 2352 }, { "epoch": 1.1681284130398808, "grad_norm": 0.5681409239768982, "learning_rate": 7.65919928354996e-06, "loss": 0.4042, "step": 2353 }, { "epoch": 1.1686248552043688, "grad_norm": 0.4460003077983856, "learning_rate": 7.656752231465659e-06, "loss": 0.3748, "step": 2354 }, { "epoch": 1.1691212973688565, "grad_norm": 0.545538604259491, "learning_rate": 7.654304292363993e-06, "loss": 0.4099, "step": 2355 }, { "epoch": 1.1696177395333445, "grad_norm": 0.4680410921573639, "learning_rate": 7.651855467062265e-06, "loss": 0.3476, "step": 2356 }, { "epoch": 1.1701141816978322, "grad_norm": 0.4829736351966858, "learning_rate": 7.649405756378072e-06, "loss": 0.407, "step": 2357 }, { "epoch": 1.17061062386232, "grad_norm": 0.4664817154407501, "learning_rate": 7.646955161129302e-06, "loss": 0.377, "step": 2358 }, { "epoch": 1.171107066026808, "grad_norm": 0.40486952662467957, "learning_rate": 7.644503682134143e-06, "loss": 0.3464, "step": 2359 }, { "epoch": 1.1716035081912957, "grad_norm": 0.44941776990890503, "learning_rate": 7.642051320211082e-06, "loss": 0.3944, "step": 2360 }, { "epoch": 1.1720999503557836, "grad_norm": 0.4177255630493164, "learning_rate": 7.639598076178887e-06, "loss": 0.3681, "step": 2361 }, { "epoch": 1.1725963925202714, "grad_norm": 0.4446326196193695, "learning_rate": 7.637143950856638e-06, "loss": 0.3898, "step": 2362 }, { "epoch": 1.1730928346847591, "grad_norm": 0.44905877113342285, "learning_rate": 7.634688945063696e-06, "loss": 0.3425, "step": 2363 }, { "epoch": 1.173589276849247, "grad_norm": 0.5092370510101318, "learning_rate": 7.632233059619723e-06, "loss": 0.3796, "step": 2364 }, { "epoch": 1.1740857190137348, "grad_norm": 0.4926864206790924, "learning_rate": 7.629776295344672e-06, "loss": 0.4434, "step": 2365 }, { "epoch": 1.1745821611782228, "grad_norm": 0.5456365942955017, "learning_rate": 7.627318653058789e-06, "loss": 0.3785, "step": 2366 }, { "epoch": 1.1750786033427105, "grad_norm": 0.5017502307891846, "learning_rate": 7.624860133582612e-06, "loss": 0.3761, "step": 2367 }, { "epoch": 1.1755750455071985, "grad_norm": 0.5004932284355164, "learning_rate": 7.622400737736978e-06, "loss": 0.3691, "step": 2368 }, { "epoch": 1.1760714876716862, "grad_norm": 0.4964345097541809, "learning_rate": 7.61994046634301e-06, "loss": 0.327, "step": 2369 }, { "epoch": 1.1765679298361742, "grad_norm": 0.49957379698753357, "learning_rate": 7.6174793202221275e-06, "loss": 0.3926, "step": 2370 }, { "epoch": 1.177064372000662, "grad_norm": 0.48547375202178955, "learning_rate": 7.615017300196038e-06, "loss": 0.372, "step": 2371 }, { "epoch": 1.1775608141651497, "grad_norm": 0.49307212233543396, "learning_rate": 7.6125544070867456e-06, "loss": 0.3647, "step": 2372 }, { "epoch": 1.1780572563296376, "grad_norm": 0.47325485944747925, "learning_rate": 7.610090641716541e-06, "loss": 0.3916, "step": 2373 }, { "epoch": 1.1785536984941254, "grad_norm": 0.421774685382843, "learning_rate": 7.607626004908009e-06, "loss": 0.3785, "step": 2374 }, { "epoch": 1.1790501406586134, "grad_norm": 0.4343548119068146, "learning_rate": 7.605160497484027e-06, "loss": 0.3734, "step": 2375 }, { "epoch": 1.179546582823101, "grad_norm": 0.409554123878479, "learning_rate": 7.602694120267757e-06, "loss": 0.3376, "step": 2376 }, { "epoch": 1.1800430249875888, "grad_norm": 0.4628521502017975, "learning_rate": 7.600226874082659e-06, "loss": 0.3531, "step": 2377 }, { "epoch": 1.1805394671520768, "grad_norm": 0.4191890358924866, "learning_rate": 7.597758759752476e-06, "loss": 0.3453, "step": 2378 }, { "epoch": 1.1810359093165645, "grad_norm": 0.5072941184043884, "learning_rate": 7.595289778101249e-06, "loss": 0.4063, "step": 2379 }, { "epoch": 1.1815323514810525, "grad_norm": 0.4979794919490814, "learning_rate": 7.592819929953299e-06, "loss": 0.4326, "step": 2380 }, { "epoch": 1.1820287936455403, "grad_norm": 0.42829015851020813, "learning_rate": 7.590349216133245e-06, "loss": 0.3348, "step": 2381 }, { "epoch": 1.1825252358100282, "grad_norm": 0.5008774399757385, "learning_rate": 7.587877637465989e-06, "loss": 0.3536, "step": 2382 }, { "epoch": 1.183021677974516, "grad_norm": 0.47311970591545105, "learning_rate": 7.5854051947767235e-06, "loss": 0.3443, "step": 2383 }, { "epoch": 1.183518120139004, "grad_norm": 0.47271212935447693, "learning_rate": 7.582931888890933e-06, "loss": 0.3676, "step": 2384 }, { "epoch": 1.1840145623034917, "grad_norm": 0.45941412448883057, "learning_rate": 7.580457720634383e-06, "loss": 0.4009, "step": 2385 }, { "epoch": 1.1845110044679794, "grad_norm": 0.4313892722129822, "learning_rate": 7.577982690833135e-06, "loss": 0.3731, "step": 2386 }, { "epoch": 1.1850074466324674, "grad_norm": 0.5326284170150757, "learning_rate": 7.575506800313529e-06, "loss": 0.3748, "step": 2387 }, { "epoch": 1.1855038887969551, "grad_norm": 0.5055777430534363, "learning_rate": 7.573030049902204e-06, "loss": 0.3735, "step": 2388 }, { "epoch": 1.186000330961443, "grad_norm": 0.5303782820701599, "learning_rate": 7.570552440426075e-06, "loss": 0.3676, "step": 2389 }, { "epoch": 1.1864967731259308, "grad_norm": 0.42851078510284424, "learning_rate": 7.56807397271235e-06, "loss": 0.3515, "step": 2390 }, { "epoch": 1.1869932152904186, "grad_norm": 0.5200013518333435, "learning_rate": 7.565594647588521e-06, "loss": 0.3914, "step": 2391 }, { "epoch": 1.1874896574549065, "grad_norm": 0.44073250889778137, "learning_rate": 7.563114465882369e-06, "loss": 0.3094, "step": 2392 }, { "epoch": 1.1879860996193943, "grad_norm": 0.4426272511482239, "learning_rate": 7.5606334284219586e-06, "loss": 0.3785, "step": 2393 }, { "epoch": 1.1884825417838822, "grad_norm": 0.44661247730255127, "learning_rate": 7.558151536035641e-06, "loss": 0.3941, "step": 2394 }, { "epoch": 1.18897898394837, "grad_norm": 0.4922610819339752, "learning_rate": 7.555668789552051e-06, "loss": 0.3303, "step": 2395 }, { "epoch": 1.189475426112858, "grad_norm": 0.5564074516296387, "learning_rate": 7.553185189800112e-06, "loss": 0.3598, "step": 2396 }, { "epoch": 1.1899718682773457, "grad_norm": 0.4367702007293701, "learning_rate": 7.550700737609031e-06, "loss": 0.3513, "step": 2397 }, { "epoch": 1.1904683104418334, "grad_norm": 0.49204686284065247, "learning_rate": 7.548215433808297e-06, "loss": 0.3445, "step": 2398 }, { "epoch": 1.1909647526063214, "grad_norm": 0.5725414752960205, "learning_rate": 7.545729279227687e-06, "loss": 0.4331, "step": 2399 }, { "epoch": 1.1914611947708091, "grad_norm": 0.4542293846607208, "learning_rate": 7.543242274697258e-06, "loss": 0.342, "step": 2400 }, { "epoch": 1.191957636935297, "grad_norm": 0.5240516662597656, "learning_rate": 7.540754421047356e-06, "loss": 0.4411, "step": 2401 }, { "epoch": 1.1924540790997848, "grad_norm": 0.45452404022216797, "learning_rate": 7.538265719108606e-06, "loss": 0.3444, "step": 2402 }, { "epoch": 1.1929505212642728, "grad_norm": 0.47297465801239014, "learning_rate": 7.5357761697119195e-06, "loss": 0.4306, "step": 2403 }, { "epoch": 1.1934469634287606, "grad_norm": 0.41522881388664246, "learning_rate": 7.533285773688488e-06, "loss": 0.3385, "step": 2404 }, { "epoch": 1.1939434055932483, "grad_norm": 0.4761541187763214, "learning_rate": 7.53079453186979e-06, "loss": 0.3862, "step": 2405 }, { "epoch": 1.1944398477577363, "grad_norm": 0.5642068982124329, "learning_rate": 7.528302445087577e-06, "loss": 0.4065, "step": 2406 }, { "epoch": 1.194936289922224, "grad_norm": 0.47464102506637573, "learning_rate": 7.525809514173896e-06, "loss": 0.3877, "step": 2407 }, { "epoch": 1.195432732086712, "grad_norm": 0.4719201624393463, "learning_rate": 7.523315739961065e-06, "loss": 0.3656, "step": 2408 }, { "epoch": 1.1959291742511997, "grad_norm": 0.470625102519989, "learning_rate": 7.5208211232816864e-06, "loss": 0.3587, "step": 2409 }, { "epoch": 1.1964256164156875, "grad_norm": 0.42122411727905273, "learning_rate": 7.518325664968649e-06, "loss": 0.3387, "step": 2410 }, { "epoch": 1.1969220585801754, "grad_norm": 0.43924039602279663, "learning_rate": 7.515829365855116e-06, "loss": 0.3794, "step": 2411 }, { "epoch": 1.1974185007446632, "grad_norm": 0.4865008592605591, "learning_rate": 7.513332226774535e-06, "loss": 0.3587, "step": 2412 }, { "epoch": 1.1979149429091511, "grad_norm": 0.4491385817527771, "learning_rate": 7.51083424856063e-06, "loss": 0.3855, "step": 2413 }, { "epoch": 1.1984113850736389, "grad_norm": 0.43620774149894714, "learning_rate": 7.508335432047412e-06, "loss": 0.375, "step": 2414 }, { "epoch": 1.1989078272381268, "grad_norm": 0.4251222014427185, "learning_rate": 7.505835778069166e-06, "loss": 0.3643, "step": 2415 }, { "epoch": 1.1994042694026146, "grad_norm": 0.41284313797950745, "learning_rate": 7.503335287460456e-06, "loss": 0.3657, "step": 2416 }, { "epoch": 1.1999007115671025, "grad_norm": 0.4308205842971802, "learning_rate": 7.500833961056133e-06, "loss": 0.3597, "step": 2417 }, { "epoch": 1.2003971537315903, "grad_norm": 0.45096150040626526, "learning_rate": 7.498331799691318e-06, "loss": 0.4077, "step": 2418 }, { "epoch": 1.200893595896078, "grad_norm": 0.4519040882587433, "learning_rate": 7.495828804201417e-06, "loss": 0.3782, "step": 2419 }, { "epoch": 1.201390038060566, "grad_norm": 0.4197423756122589, "learning_rate": 7.493324975422112e-06, "loss": 0.3411, "step": 2420 }, { "epoch": 1.2018864802250537, "grad_norm": 0.42298436164855957, "learning_rate": 7.4908203141893594e-06, "loss": 0.3553, "step": 2421 }, { "epoch": 1.2023829223895417, "grad_norm": 0.45123082399368286, "learning_rate": 7.488314821339403e-06, "loss": 0.3786, "step": 2422 }, { "epoch": 1.2028793645540294, "grad_norm": 0.46729928255081177, "learning_rate": 7.485808497708757e-06, "loss": 0.3912, "step": 2423 }, { "epoch": 1.2033758067185172, "grad_norm": 0.47860953211784363, "learning_rate": 7.483301344134213e-06, "loss": 0.3717, "step": 2424 }, { "epoch": 1.2038722488830051, "grad_norm": 0.4199686348438263, "learning_rate": 7.480793361452842e-06, "loss": 0.3651, "step": 2425 }, { "epoch": 1.204368691047493, "grad_norm": 0.4651489555835724, "learning_rate": 7.478284550501992e-06, "loss": 0.3617, "step": 2426 }, { "epoch": 1.2048651332119809, "grad_norm": 0.4891713857650757, "learning_rate": 7.475774912119287e-06, "loss": 0.3835, "step": 2427 }, { "epoch": 1.2053615753764686, "grad_norm": 0.447856605052948, "learning_rate": 7.473264447142626e-06, "loss": 0.3486, "step": 2428 }, { "epoch": 1.2058580175409566, "grad_norm": 0.4851597845554352, "learning_rate": 7.470753156410188e-06, "loss": 0.3957, "step": 2429 }, { "epoch": 1.2063544597054443, "grad_norm": 0.4494684338569641, "learning_rate": 7.46824104076042e-06, "loss": 0.3568, "step": 2430 }, { "epoch": 1.2068509018699323, "grad_norm": 0.4363216161727905, "learning_rate": 7.465728101032052e-06, "loss": 0.3432, "step": 2431 }, { "epoch": 1.20734734403442, "grad_norm": 0.46131497621536255, "learning_rate": 7.4632143380640875e-06, "loss": 0.3734, "step": 2432 }, { "epoch": 1.2078437861989078, "grad_norm": 0.45943018794059753, "learning_rate": 7.460699752695801e-06, "loss": 0.3942, "step": 2433 }, { "epoch": 1.2083402283633957, "grad_norm": 0.4332441985607147, "learning_rate": 7.458184345766744e-06, "loss": 0.3425, "step": 2434 }, { "epoch": 1.2088366705278835, "grad_norm": 0.47426170110702515, "learning_rate": 7.455668118116746e-06, "loss": 0.3775, "step": 2435 }, { "epoch": 1.2093331126923714, "grad_norm": 0.4703926146030426, "learning_rate": 7.453151070585903e-06, "loss": 0.3911, "step": 2436 }, { "epoch": 1.2098295548568592, "grad_norm": 0.49946263432502747, "learning_rate": 7.45063320401459e-06, "loss": 0.3517, "step": 2437 }, { "epoch": 1.210325997021347, "grad_norm": 0.4603123366832733, "learning_rate": 7.448114519243456e-06, "loss": 0.3603, "step": 2438 }, { "epoch": 1.2108224391858349, "grad_norm": 0.475078284740448, "learning_rate": 7.445595017113418e-06, "loss": 0.3931, "step": 2439 }, { "epoch": 1.2113188813503226, "grad_norm": 0.48516401648521423, "learning_rate": 7.4430746984656736e-06, "loss": 0.3515, "step": 2440 }, { "epoch": 1.2118153235148106, "grad_norm": 0.5204970836639404, "learning_rate": 7.440553564141686e-06, "loss": 0.4173, "step": 2441 }, { "epoch": 1.2123117656792983, "grad_norm": 0.4383276104927063, "learning_rate": 7.438031614983195e-06, "loss": 0.3458, "step": 2442 }, { "epoch": 1.2128082078437863, "grad_norm": 0.6076882481575012, "learning_rate": 7.4355088518322076e-06, "loss": 0.44, "step": 2443 }, { "epoch": 1.213304650008274, "grad_norm": 0.4292065501213074, "learning_rate": 7.432985275531009e-06, "loss": 0.3736, "step": 2444 }, { "epoch": 1.213801092172762, "grad_norm": 0.45195427536964417, "learning_rate": 7.430460886922152e-06, "loss": 0.3209, "step": 2445 }, { "epoch": 1.2142975343372497, "grad_norm": 0.5971665382385254, "learning_rate": 7.427935686848461e-06, "loss": 0.4144, "step": 2446 }, { "epoch": 1.2147939765017375, "grad_norm": 0.418998658657074, "learning_rate": 7.425409676153032e-06, "loss": 0.3357, "step": 2447 }, { "epoch": 1.2152904186662254, "grad_norm": 0.61359703540802, "learning_rate": 7.42288285567923e-06, "loss": 0.3837, "step": 2448 }, { "epoch": 1.2157868608307132, "grad_norm": 0.5112248659133911, "learning_rate": 7.420355226270693e-06, "loss": 0.4131, "step": 2449 }, { "epoch": 1.2162833029952012, "grad_norm": 0.45184069871902466, "learning_rate": 7.417826788771327e-06, "loss": 0.3726, "step": 2450 }, { "epoch": 1.216779745159689, "grad_norm": 0.5965778827667236, "learning_rate": 7.415297544025311e-06, "loss": 0.3647, "step": 2451 }, { "epoch": 1.2172761873241766, "grad_norm": 0.5160964131355286, "learning_rate": 7.412767492877089e-06, "loss": 0.3651, "step": 2452 }, { "epoch": 1.2177726294886646, "grad_norm": 0.5234505534172058, "learning_rate": 7.410236636171376e-06, "loss": 0.3806, "step": 2453 }, { "epoch": 1.2182690716531523, "grad_norm": 0.6424894332885742, "learning_rate": 7.407704974753157e-06, "loss": 0.4066, "step": 2454 }, { "epoch": 1.2187655138176403, "grad_norm": 0.4045441150665283, "learning_rate": 7.405172509467685e-06, "loss": 0.323, "step": 2455 }, { "epoch": 1.219261955982128, "grad_norm": 0.5840656757354736, "learning_rate": 7.402639241160479e-06, "loss": 0.4446, "step": 2456 }, { "epoch": 1.2197583981466158, "grad_norm": 0.5419365167617798, "learning_rate": 7.400105170677333e-06, "loss": 0.4139, "step": 2457 }, { "epoch": 1.2202548403111038, "grad_norm": 0.4173157215118408, "learning_rate": 7.3975702988643e-06, "loss": 0.3549, "step": 2458 }, { "epoch": 1.2207512824755915, "grad_norm": 0.49910762906074524, "learning_rate": 7.395034626567709e-06, "loss": 0.3732, "step": 2459 }, { "epoch": 1.2212477246400795, "grad_norm": 0.4677479565143585, "learning_rate": 7.392498154634147e-06, "loss": 0.3888, "step": 2460 }, { "epoch": 1.2217441668045672, "grad_norm": 0.4439126253128052, "learning_rate": 7.3899608839104775e-06, "loss": 0.4146, "step": 2461 }, { "epoch": 1.2222406089690552, "grad_norm": 0.4259490668773651, "learning_rate": 7.3874228152438236e-06, "loss": 0.369, "step": 2462 }, { "epoch": 1.222737051133543, "grad_norm": 0.4107957184314728, "learning_rate": 7.3848839494815775e-06, "loss": 0.3685, "step": 2463 }, { "epoch": 1.2232334932980309, "grad_norm": 0.43709081411361694, "learning_rate": 7.382344287471398e-06, "loss": 0.3451, "step": 2464 }, { "epoch": 1.2237299354625186, "grad_norm": 0.4899658262729645, "learning_rate": 7.379803830061211e-06, "loss": 0.4029, "step": 2465 }, { "epoch": 1.2242263776270064, "grad_norm": 0.48049047589302063, "learning_rate": 7.377262578099204e-06, "loss": 0.392, "step": 2466 }, { "epoch": 1.2247228197914943, "grad_norm": 0.4249815344810486, "learning_rate": 7.374720532433832e-06, "loss": 0.3448, "step": 2467 }, { "epoch": 1.225219261955982, "grad_norm": 0.5210739374160767, "learning_rate": 7.372177693913817e-06, "loss": 0.3949, "step": 2468 }, { "epoch": 1.22571570412047, "grad_norm": 0.44736218452453613, "learning_rate": 7.36963406338814e-06, "loss": 0.3789, "step": 2469 }, { "epoch": 1.2262121462849578, "grad_norm": 0.448662668466568, "learning_rate": 7.3670896417060555e-06, "loss": 0.3769, "step": 2470 }, { "epoch": 1.2267085884494455, "grad_norm": 0.49790850281715393, "learning_rate": 7.364544429717071e-06, "loss": 0.375, "step": 2471 }, { "epoch": 1.2272050306139335, "grad_norm": 0.4815690815448761, "learning_rate": 7.3619984282709665e-06, "loss": 0.3439, "step": 2472 }, { "epoch": 1.2277014727784212, "grad_norm": 0.44563281536102295, "learning_rate": 7.359451638217783e-06, "loss": 0.3493, "step": 2473 }, { "epoch": 1.2281979149429092, "grad_norm": 0.44755294919013977, "learning_rate": 7.356904060407823e-06, "loss": 0.324, "step": 2474 }, { "epoch": 1.228694357107397, "grad_norm": 0.49688637256622314, "learning_rate": 7.354355695691655e-06, "loss": 0.4083, "step": 2475 }, { "epoch": 1.229190799271885, "grad_norm": 0.4447444677352905, "learning_rate": 7.3518065449201095e-06, "loss": 0.3266, "step": 2476 }, { "epoch": 1.2296872414363726, "grad_norm": 0.43501606583595276, "learning_rate": 7.349256608944275e-06, "loss": 0.4083, "step": 2477 }, { "epoch": 1.2301836836008606, "grad_norm": 0.5211775302886963, "learning_rate": 7.346705888615509e-06, "loss": 0.3863, "step": 2478 }, { "epoch": 1.2306801257653484, "grad_norm": 0.43633025884628296, "learning_rate": 7.344154384785426e-06, "loss": 0.3208, "step": 2479 }, { "epoch": 1.231176567929836, "grad_norm": 0.45935729146003723, "learning_rate": 7.341602098305904e-06, "loss": 0.3874, "step": 2480 }, { "epoch": 1.231673010094324, "grad_norm": 0.4447050988674164, "learning_rate": 7.339049030029084e-06, "loss": 0.3531, "step": 2481 }, { "epoch": 1.2321694522588118, "grad_norm": 0.48731184005737305, "learning_rate": 7.336495180807364e-06, "loss": 0.3649, "step": 2482 }, { "epoch": 1.2326658944232998, "grad_norm": 0.45499303936958313, "learning_rate": 7.333940551493406e-06, "loss": 0.3808, "step": 2483 }, { "epoch": 1.2331623365877875, "grad_norm": 0.4297926723957062, "learning_rate": 7.331385142940131e-06, "loss": 0.3658, "step": 2484 }, { "epoch": 1.2336587787522753, "grad_norm": 0.40719810128211975, "learning_rate": 7.32882895600072e-06, "loss": 0.3456, "step": 2485 }, { "epoch": 1.2341552209167632, "grad_norm": 0.4255826771259308, "learning_rate": 7.326271991528614e-06, "loss": 0.3594, "step": 2486 }, { "epoch": 1.234651663081251, "grad_norm": 0.38139742612838745, "learning_rate": 7.323714250377515e-06, "loss": 0.3503, "step": 2487 }, { "epoch": 1.235148105245739, "grad_norm": 0.48240652680397034, "learning_rate": 7.321155733401382e-06, "loss": 0.3731, "step": 2488 }, { "epoch": 1.2356445474102267, "grad_norm": 0.4642679691314697, "learning_rate": 7.318596441454437e-06, "loss": 0.3557, "step": 2489 }, { "epoch": 1.2361409895747146, "grad_norm": 0.49490028619766235, "learning_rate": 7.316036375391156e-06, "loss": 0.4189, "step": 2490 }, { "epoch": 1.2366374317392024, "grad_norm": 0.4076560437679291, "learning_rate": 7.313475536066275e-06, "loss": 0.3352, "step": 2491 }, { "epoch": 1.2371338739036903, "grad_norm": 0.4406645596027374, "learning_rate": 7.31091392433479e-06, "loss": 0.3642, "step": 2492 }, { "epoch": 1.237630316068178, "grad_norm": 0.41539305448532104, "learning_rate": 7.3083515410519516e-06, "loss": 0.3852, "step": 2493 }, { "epoch": 1.2381267582326658, "grad_norm": 0.45313704013824463, "learning_rate": 7.305788387073272e-06, "loss": 0.385, "step": 2494 }, { "epoch": 1.2386232003971538, "grad_norm": 0.4441068768501282, "learning_rate": 7.303224463254517e-06, "loss": 0.3334, "step": 2495 }, { "epoch": 1.2391196425616415, "grad_norm": 0.4522591531276703, "learning_rate": 7.3006597704517115e-06, "loss": 0.3714, "step": 2496 }, { "epoch": 1.2396160847261295, "grad_norm": 0.4423907697200775, "learning_rate": 7.298094309521138e-06, "loss": 0.3559, "step": 2497 }, { "epoch": 1.2401125268906172, "grad_norm": 0.46095722913742065, "learning_rate": 7.295528081319334e-06, "loss": 0.3476, "step": 2498 }, { "epoch": 1.240608969055105, "grad_norm": 0.5034470558166504, "learning_rate": 7.292961086703091e-06, "loss": 0.4134, "step": 2499 }, { "epoch": 1.241105411219593, "grad_norm": 0.4412003457546234, "learning_rate": 7.290393326529463e-06, "loss": 0.3382, "step": 2500 }, { "epoch": 1.2416018533840807, "grad_norm": 0.45673736929893494, "learning_rate": 7.28782480165575e-06, "loss": 0.3708, "step": 2501 }, { "epoch": 1.2420982955485687, "grad_norm": 0.43862998485565186, "learning_rate": 7.285255512939516e-06, "loss": 0.3548, "step": 2502 }, { "epoch": 1.2425947377130564, "grad_norm": 0.3908565938472748, "learning_rate": 7.2826854612385756e-06, "loss": 0.3915, "step": 2503 }, { "epoch": 1.2430911798775444, "grad_norm": 0.47612464427948, "learning_rate": 7.280114647411001e-06, "loss": 0.4214, "step": 2504 }, { "epoch": 1.243587622042032, "grad_norm": 0.40879538655281067, "learning_rate": 7.2775430723151155e-06, "loss": 0.357, "step": 2505 }, { "epoch": 1.2440840642065198, "grad_norm": 0.47185882925987244, "learning_rate": 7.274970736809497e-06, "loss": 0.3894, "step": 2506 }, { "epoch": 1.2445805063710078, "grad_norm": 0.448064923286438, "learning_rate": 7.272397641752982e-06, "loss": 0.3023, "step": 2507 }, { "epoch": 1.2450769485354956, "grad_norm": 0.46386516094207764, "learning_rate": 7.269823788004653e-06, "loss": 0.3612, "step": 2508 }, { "epoch": 1.2455733906999835, "grad_norm": 0.4222564697265625, "learning_rate": 7.267249176423852e-06, "loss": 0.365, "step": 2509 }, { "epoch": 1.2460698328644713, "grad_norm": 0.4819212555885315, "learning_rate": 7.264673807870172e-06, "loss": 0.3905, "step": 2510 }, { "epoch": 1.2465662750289592, "grad_norm": 0.4523613154888153, "learning_rate": 7.262097683203456e-06, "loss": 0.3589, "step": 2511 }, { "epoch": 1.247062717193447, "grad_norm": 0.5093353390693665, "learning_rate": 7.259520803283806e-06, "loss": 0.361, "step": 2512 }, { "epoch": 1.2475591593579347, "grad_norm": 0.459796667098999, "learning_rate": 7.2569431689715695e-06, "loss": 0.3608, "step": 2513 }, { "epoch": 1.2480556015224227, "grad_norm": 0.47508886456489563, "learning_rate": 7.25436478112735e-06, "loss": 0.3888, "step": 2514 }, { "epoch": 1.2485520436869104, "grad_norm": 0.47870391607284546, "learning_rate": 7.251785640611999e-06, "loss": 0.3738, "step": 2515 }, { "epoch": 1.2490484858513984, "grad_norm": 0.48516330122947693, "learning_rate": 7.249205748286623e-06, "loss": 0.4446, "step": 2516 }, { "epoch": 1.2495449280158861, "grad_norm": 0.44432154297828674, "learning_rate": 7.246625105012579e-06, "loss": 0.3685, "step": 2517 }, { "epoch": 1.2500413701803739, "grad_norm": 0.4764401316642761, "learning_rate": 7.244043711651472e-06, "loss": 0.3831, "step": 2518 }, { "epoch": 1.2505378123448618, "grad_norm": 0.3864271342754364, "learning_rate": 7.241461569065158e-06, "loss": 0.2947, "step": 2519 }, { "epoch": 1.2510342545093498, "grad_norm": 0.5040217638015747, "learning_rate": 7.238878678115746e-06, "loss": 0.3227, "step": 2520 }, { "epoch": 1.2515306966738375, "grad_norm": 0.4411095082759857, "learning_rate": 7.2362950396655925e-06, "loss": 0.3369, "step": 2521 }, { "epoch": 1.2520271388383253, "grad_norm": 0.4647572636604309, "learning_rate": 7.233710654577306e-06, "loss": 0.3634, "step": 2522 }, { "epoch": 1.2525235810028132, "grad_norm": 0.43290236592292786, "learning_rate": 7.231125523713739e-06, "loss": 0.3474, "step": 2523 }, { "epoch": 1.253020023167301, "grad_norm": 0.489779531955719, "learning_rate": 7.228539647938e-06, "loss": 0.4346, "step": 2524 }, { "epoch": 1.253516465331789, "grad_norm": 0.447338342666626, "learning_rate": 7.225953028113439e-06, "loss": 0.367, "step": 2525 }, { "epoch": 1.2540129074962767, "grad_norm": 0.4512540102005005, "learning_rate": 7.223365665103662e-06, "loss": 0.3539, "step": 2526 }, { "epoch": 1.2545093496607644, "grad_norm": 0.4568824768066406, "learning_rate": 7.220777559772515e-06, "loss": 0.3882, "step": 2527 }, { "epoch": 1.2550057918252524, "grad_norm": 0.431236207485199, "learning_rate": 7.2181887129841e-06, "loss": 0.327, "step": 2528 }, { "epoch": 1.2555022339897401, "grad_norm": 0.43751397728919983, "learning_rate": 7.215599125602759e-06, "loss": 0.3852, "step": 2529 }, { "epoch": 1.255998676154228, "grad_norm": 0.4518450200557709, "learning_rate": 7.2130087984930885e-06, "loss": 0.3772, "step": 2530 }, { "epoch": 1.2564951183187159, "grad_norm": 0.4595933258533478, "learning_rate": 7.210417732519926e-06, "loss": 0.374, "step": 2531 }, { "epoch": 1.2569915604832036, "grad_norm": 0.45206156373023987, "learning_rate": 7.207825928548358e-06, "loss": 0.3667, "step": 2532 }, { "epoch": 1.2574880026476916, "grad_norm": 0.45643991231918335, "learning_rate": 7.2052333874437175e-06, "loss": 0.3648, "step": 2533 }, { "epoch": 1.2579844448121793, "grad_norm": 0.43160778284072876, "learning_rate": 7.202640110071584e-06, "loss": 0.3455, "step": 2534 }, { "epoch": 1.2584808869766673, "grad_norm": 0.4488155245780945, "learning_rate": 7.200046097297782e-06, "loss": 0.4019, "step": 2535 }, { "epoch": 1.258977329141155, "grad_norm": 0.44770383834838867, "learning_rate": 7.197451349988382e-06, "loss": 0.3766, "step": 2536 }, { "epoch": 1.259473771305643, "grad_norm": 0.46931561827659607, "learning_rate": 7.194855869009701e-06, "loss": 0.4077, "step": 2537 }, { "epoch": 1.2599702134701307, "grad_norm": 0.45346561074256897, "learning_rate": 7.192259655228298e-06, "loss": 0.3813, "step": 2538 }, { "epoch": 1.2604666556346187, "grad_norm": 0.41467586159706116, "learning_rate": 7.189662709510977e-06, "loss": 0.3874, "step": 2539 }, { "epoch": 1.2609630977991064, "grad_norm": 0.45600953698158264, "learning_rate": 7.1870650327247895e-06, "loss": 0.3482, "step": 2540 }, { "epoch": 1.2614595399635942, "grad_norm": 0.5105767250061035, "learning_rate": 7.1844666257370296e-06, "loss": 0.3983, "step": 2541 }, { "epoch": 1.2619559821280821, "grad_norm": 0.46266502141952515, "learning_rate": 7.181867489415233e-06, "loss": 0.3671, "step": 2542 }, { "epoch": 1.2624524242925699, "grad_norm": 0.4311472177505493, "learning_rate": 7.179267624627182e-06, "loss": 0.3254, "step": 2543 }, { "epoch": 1.2629488664570578, "grad_norm": 0.4673173725605011, "learning_rate": 7.1766670322409005e-06, "loss": 0.3403, "step": 2544 }, { "epoch": 1.2634453086215456, "grad_norm": 0.48472896218299866, "learning_rate": 7.1740657131246545e-06, "loss": 0.3692, "step": 2545 }, { "epoch": 1.2639417507860333, "grad_norm": 0.5226826071739197, "learning_rate": 7.171463668146957e-06, "loss": 0.3908, "step": 2546 }, { "epoch": 1.2644381929505213, "grad_norm": 0.4722929000854492, "learning_rate": 7.168860898176555e-06, "loss": 0.3782, "step": 2547 }, { "epoch": 1.264934635115009, "grad_norm": 0.4712083637714386, "learning_rate": 7.166257404082446e-06, "loss": 0.374, "step": 2548 }, { "epoch": 1.265431077279497, "grad_norm": 0.4747667908668518, "learning_rate": 7.163653186733867e-06, "loss": 0.394, "step": 2549 }, { "epoch": 1.2659275194439847, "grad_norm": 0.42555752396583557, "learning_rate": 7.161048247000292e-06, "loss": 0.354, "step": 2550 }, { "epoch": 1.2664239616084725, "grad_norm": 0.46204042434692383, "learning_rate": 7.158442585751442e-06, "loss": 0.3857, "step": 2551 }, { "epoch": 1.2669204037729604, "grad_norm": 0.4941166639328003, "learning_rate": 7.155836203857276e-06, "loss": 0.3807, "step": 2552 }, { "epoch": 1.2674168459374484, "grad_norm": 0.3808303475379944, "learning_rate": 7.153229102187994e-06, "loss": 0.3074, "step": 2553 }, { "epoch": 1.2679132881019362, "grad_norm": 0.48071038722991943, "learning_rate": 7.150621281614036e-06, "loss": 0.3763, "step": 2554 }, { "epoch": 1.268409730266424, "grad_norm": 0.4878233075141907, "learning_rate": 7.148012743006083e-06, "loss": 0.4039, "step": 2555 }, { "epoch": 1.2689061724309119, "grad_norm": 0.4420846700668335, "learning_rate": 7.145403487235057e-06, "loss": 0.36, "step": 2556 }, { "epoch": 1.2694026145953996, "grad_norm": 0.39321020245552063, "learning_rate": 7.142793515172112e-06, "loss": 0.3144, "step": 2557 }, { "epoch": 1.2698990567598876, "grad_norm": 0.5145472288131714, "learning_rate": 7.140182827688651e-06, "loss": 0.3946, "step": 2558 }, { "epoch": 1.2703954989243753, "grad_norm": 0.4533396065235138, "learning_rate": 7.137571425656311e-06, "loss": 0.3742, "step": 2559 }, { "epoch": 1.270891941088863, "grad_norm": 0.411055326461792, "learning_rate": 7.1349593099469676e-06, "loss": 0.3133, "step": 2560 }, { "epoch": 1.271388383253351, "grad_norm": 0.49590596556663513, "learning_rate": 7.132346481432737e-06, "loss": 0.3495, "step": 2561 }, { "epoch": 1.2718848254178388, "grad_norm": 0.5420020818710327, "learning_rate": 7.129732940985969e-06, "loss": 0.4042, "step": 2562 }, { "epoch": 1.2723812675823267, "grad_norm": 0.4519781470298767, "learning_rate": 7.127118689479256e-06, "loss": 0.3948, "step": 2563 }, { "epoch": 1.2728777097468145, "grad_norm": 0.46683427691459656, "learning_rate": 7.124503727785424e-06, "loss": 0.3728, "step": 2564 }, { "epoch": 1.2733741519113022, "grad_norm": 0.5520554780960083, "learning_rate": 7.121888056777538e-06, "loss": 0.4099, "step": 2565 }, { "epoch": 1.2738705940757902, "grad_norm": 0.42682793736457825, "learning_rate": 7.1192716773289e-06, "loss": 0.3377, "step": 2566 }, { "epoch": 1.2743670362402781, "grad_norm": 0.5201363563537598, "learning_rate": 7.116654590313045e-06, "loss": 0.3835, "step": 2567 }, { "epoch": 1.2748634784047659, "grad_norm": 0.48595064878463745, "learning_rate": 7.114036796603752e-06, "loss": 0.3693, "step": 2568 }, { "epoch": 1.2753599205692536, "grad_norm": 0.41082119941711426, "learning_rate": 7.11141829707503e-06, "loss": 0.3837, "step": 2569 }, { "epoch": 1.2758563627337416, "grad_norm": 0.47386544942855835, "learning_rate": 7.108799092601122e-06, "loss": 0.3707, "step": 2570 }, { "epoch": 1.2763528048982293, "grad_norm": 0.4675799310207367, "learning_rate": 7.106179184056512e-06, "loss": 0.3733, "step": 2571 }, { "epoch": 1.2768492470627173, "grad_norm": 0.43778279423713684, "learning_rate": 7.103558572315914e-06, "loss": 0.3866, "step": 2572 }, { "epoch": 1.277345689227205, "grad_norm": 0.4518514573574066, "learning_rate": 7.100937258254281e-06, "loss": 0.3365, "step": 2573 }, { "epoch": 1.2778421313916928, "grad_norm": 0.4588170051574707, "learning_rate": 7.098315242746797e-06, "loss": 0.3881, "step": 2574 }, { "epoch": 1.2783385735561807, "grad_norm": 0.40618979930877686, "learning_rate": 7.095692526668882e-06, "loss": 0.3328, "step": 2575 }, { "epoch": 1.2788350157206685, "grad_norm": 0.48340868949890137, "learning_rate": 7.093069110896194e-06, "loss": 0.36, "step": 2576 }, { "epoch": 1.2793314578851565, "grad_norm": 0.5234968662261963, "learning_rate": 7.090444996304613e-06, "loss": 0.3816, "step": 2577 }, { "epoch": 1.2798279000496442, "grad_norm": 0.4438231587409973, "learning_rate": 7.087820183770264e-06, "loss": 0.3799, "step": 2578 }, { "epoch": 1.280324342214132, "grad_norm": 0.4760311543941498, "learning_rate": 7.0851946741694975e-06, "loss": 0.3464, "step": 2579 }, { "epoch": 1.28082078437862, "grad_norm": 0.4759739935398102, "learning_rate": 7.082568468378905e-06, "loss": 0.3776, "step": 2580 }, { "epoch": 1.2813172265431076, "grad_norm": 0.5037909746170044, "learning_rate": 7.079941567275299e-06, "loss": 0.3923, "step": 2581 }, { "epoch": 1.2818136687075956, "grad_norm": 0.42776790261268616, "learning_rate": 7.077313971735735e-06, "loss": 0.3257, "step": 2582 }, { "epoch": 1.2823101108720834, "grad_norm": 0.44375109672546387, "learning_rate": 7.074685682637493e-06, "loss": 0.3644, "step": 2583 }, { "epoch": 1.2828065530365713, "grad_norm": 0.42405420541763306, "learning_rate": 7.07205670085809e-06, "loss": 0.392, "step": 2584 }, { "epoch": 1.283302995201059, "grad_norm": 0.4779243767261505, "learning_rate": 7.069427027275268e-06, "loss": 0.3315, "step": 2585 }, { "epoch": 1.283799437365547, "grad_norm": 0.45551711320877075, "learning_rate": 7.0667966627670085e-06, "loss": 0.3886, "step": 2586 }, { "epoch": 1.2842958795300348, "grad_norm": 0.44629189372062683, "learning_rate": 7.064165608211513e-06, "loss": 0.3658, "step": 2587 }, { "epoch": 1.2847923216945225, "grad_norm": 0.44934117794036865, "learning_rate": 7.061533864487222e-06, "loss": 0.3616, "step": 2588 }, { "epoch": 1.2852887638590105, "grad_norm": 0.434653639793396, "learning_rate": 7.058901432472805e-06, "loss": 0.3659, "step": 2589 }, { "epoch": 1.2857852060234982, "grad_norm": 0.45636841654777527, "learning_rate": 7.056268313047155e-06, "loss": 0.3454, "step": 2590 }, { "epoch": 1.2862816481879862, "grad_norm": 0.5297374725341797, "learning_rate": 7.053634507089402e-06, "loss": 0.3983, "step": 2591 }, { "epoch": 1.286778090352474, "grad_norm": 0.39058107137680054, "learning_rate": 7.051000015478903e-06, "loss": 0.3316, "step": 2592 }, { "epoch": 1.2872745325169617, "grad_norm": 0.5266125798225403, "learning_rate": 7.048364839095242e-06, "loss": 0.3593, "step": 2593 }, { "epoch": 1.2877709746814496, "grad_norm": 0.5115868449211121, "learning_rate": 7.045728978818231e-06, "loss": 0.3832, "step": 2594 }, { "epoch": 1.2882674168459374, "grad_norm": 0.45315229892730713, "learning_rate": 7.043092435527916e-06, "loss": 0.3968, "step": 2595 }, { "epoch": 1.2887638590104253, "grad_norm": 0.42696037888526917, "learning_rate": 7.040455210104564e-06, "loss": 0.3171, "step": 2596 }, { "epoch": 1.289260301174913, "grad_norm": 0.4612913727760315, "learning_rate": 7.037817303428674e-06, "loss": 0.3436, "step": 2597 }, { "epoch": 1.2897567433394008, "grad_norm": 0.4535265862941742, "learning_rate": 7.0351787163809695e-06, "loss": 0.3619, "step": 2598 }, { "epoch": 1.2902531855038888, "grad_norm": 0.5129346251487732, "learning_rate": 7.032539449842407e-06, "loss": 0.42, "step": 2599 }, { "epoch": 1.2907496276683768, "grad_norm": 0.4326920509338379, "learning_rate": 7.029899504694162e-06, "loss": 0.3507, "step": 2600 }, { "epoch": 1.2912460698328645, "grad_norm": 0.4557824432849884, "learning_rate": 7.0272588818176425e-06, "loss": 0.3332, "step": 2601 }, { "epoch": 1.2917425119973522, "grad_norm": 0.454269140958786, "learning_rate": 7.0246175820944815e-06, "loss": 0.3122, "step": 2602 }, { "epoch": 1.2922389541618402, "grad_norm": 0.5633291006088257, "learning_rate": 7.021975606406534e-06, "loss": 0.4718, "step": 2603 }, { "epoch": 1.292735396326328, "grad_norm": 0.4140956699848175, "learning_rate": 7.019332955635887e-06, "loss": 0.3734, "step": 2604 }, { "epoch": 1.293231838490816, "grad_norm": 0.5049256682395935, "learning_rate": 7.016689630664848e-06, "loss": 0.3618, "step": 2605 }, { "epoch": 1.2937282806553037, "grad_norm": 0.5748286247253418, "learning_rate": 7.014045632375952e-06, "loss": 0.4082, "step": 2606 }, { "epoch": 1.2942247228197914, "grad_norm": 0.4157061278820038, "learning_rate": 7.011400961651958e-06, "loss": 0.346, "step": 2607 }, { "epoch": 1.2947211649842794, "grad_norm": 0.5023199319839478, "learning_rate": 7.00875561937585e-06, "loss": 0.363, "step": 2608 }, { "epoch": 1.295217607148767, "grad_norm": 0.45741695165634155, "learning_rate": 7.006109606430836e-06, "loss": 0.3506, "step": 2609 }, { "epoch": 1.295714049313255, "grad_norm": 0.4909187853336334, "learning_rate": 7.003462923700346e-06, "loss": 0.3439, "step": 2610 }, { "epoch": 1.2962104914777428, "grad_norm": 0.5988786816596985, "learning_rate": 7.000815572068038e-06, "loss": 0.4219, "step": 2611 }, { "epoch": 1.2967069336422306, "grad_norm": 0.54853355884552, "learning_rate": 6.998167552417789e-06, "loss": 0.4239, "step": 2612 }, { "epoch": 1.2972033758067185, "grad_norm": 0.37832850217819214, "learning_rate": 6.995518865633703e-06, "loss": 0.3216, "step": 2613 }, { "epoch": 1.2976998179712065, "grad_norm": 0.5493369102478027, "learning_rate": 6.992869512600101e-06, "loss": 0.3648, "step": 2614 }, { "epoch": 1.2981962601356942, "grad_norm": 0.613169252872467, "learning_rate": 6.990219494201532e-06, "loss": 0.3726, "step": 2615 }, { "epoch": 1.298692702300182, "grad_norm": 0.410123735666275, "learning_rate": 6.9875688113227656e-06, "loss": 0.3559, "step": 2616 }, { "epoch": 1.29918914446467, "grad_norm": 0.44954559206962585, "learning_rate": 6.984917464848793e-06, "loss": 0.3636, "step": 2617 }, { "epoch": 1.2996855866291577, "grad_norm": 0.45685237646102905, "learning_rate": 6.982265455664825e-06, "loss": 0.3192, "step": 2618 }, { "epoch": 1.3001820287936456, "grad_norm": 0.473676860332489, "learning_rate": 6.979612784656298e-06, "loss": 0.3526, "step": 2619 }, { "epoch": 1.3006784709581334, "grad_norm": 0.48080015182495117, "learning_rate": 6.9769594527088625e-06, "loss": 0.4178, "step": 2620 }, { "epoch": 1.3011749131226211, "grad_norm": 0.44235387444496155, "learning_rate": 6.974305460708398e-06, "loss": 0.371, "step": 2621 }, { "epoch": 1.301671355287109, "grad_norm": 0.4325422942638397, "learning_rate": 6.9716508095409985e-06, "loss": 0.3391, "step": 2622 }, { "epoch": 1.3021677974515968, "grad_norm": 0.47832536697387695, "learning_rate": 6.968995500092981e-06, "loss": 0.352, "step": 2623 }, { "epoch": 1.3026642396160848, "grad_norm": 0.4703321158885956, "learning_rate": 6.966339533250879e-06, "loss": 0.3887, "step": 2624 }, { "epoch": 1.3031606817805725, "grad_norm": 0.4422002136707306, "learning_rate": 6.96368290990145e-06, "loss": 0.4147, "step": 2625 }, { "epoch": 1.3036571239450603, "grad_norm": 0.5275622606277466, "learning_rate": 6.961025630931667e-06, "loss": 0.3425, "step": 2626 }, { "epoch": 1.3041535661095482, "grad_norm": 0.4253457188606262, "learning_rate": 6.958367697228725e-06, "loss": 0.3403, "step": 2627 }, { "epoch": 1.3046500082740362, "grad_norm": 0.4934968054294586, "learning_rate": 6.955709109680032e-06, "loss": 0.3886, "step": 2628 }, { "epoch": 1.305146450438524, "grad_norm": 0.45170268416404724, "learning_rate": 6.9530498691732205e-06, "loss": 0.4483, "step": 2629 }, { "epoch": 1.3056428926030117, "grad_norm": 0.46395599842071533, "learning_rate": 6.9503899765961406e-06, "loss": 0.4079, "step": 2630 }, { "epoch": 1.3061393347674997, "grad_norm": 0.488667756319046, "learning_rate": 6.947729432836854e-06, "loss": 0.3276, "step": 2631 }, { "epoch": 1.3066357769319874, "grad_norm": 0.41679883003234863, "learning_rate": 6.945068238783648e-06, "loss": 0.3421, "step": 2632 }, { "epoch": 1.3071322190964754, "grad_norm": 0.46238020062446594, "learning_rate": 6.942406395325021e-06, "loss": 0.3835, "step": 2633 }, { "epoch": 1.307628661260963, "grad_norm": 0.4883055090904236, "learning_rate": 6.9397439033496894e-06, "loss": 0.3377, "step": 2634 }, { "epoch": 1.3081251034254509, "grad_norm": 0.4286307692527771, "learning_rate": 6.937080763746587e-06, "loss": 0.4043, "step": 2635 }, { "epoch": 1.3086215455899388, "grad_norm": 0.4356077015399933, "learning_rate": 6.9344169774048675e-06, "loss": 0.408, "step": 2636 }, { "epoch": 1.3091179877544266, "grad_norm": 0.4727447032928467, "learning_rate": 6.9317525452138915e-06, "loss": 0.352, "step": 2637 }, { "epoch": 1.3096144299189145, "grad_norm": 0.4045937657356262, "learning_rate": 6.929087468063242e-06, "loss": 0.3385, "step": 2638 }, { "epoch": 1.3101108720834023, "grad_norm": 0.465507447719574, "learning_rate": 6.9264217468427175e-06, "loss": 0.3894, "step": 2639 }, { "epoch": 1.31060731424789, "grad_norm": 0.4236934781074524, "learning_rate": 6.92375538244233e-06, "loss": 0.4204, "step": 2640 }, { "epoch": 1.311103756412378, "grad_norm": 0.40108513832092285, "learning_rate": 6.921088375752304e-06, "loss": 0.3474, "step": 2641 }, { "epoch": 1.3116001985768657, "grad_norm": 0.4405807852745056, "learning_rate": 6.918420727663084e-06, "loss": 0.3898, "step": 2642 }, { "epoch": 1.3120966407413537, "grad_norm": 0.44105738401412964, "learning_rate": 6.91575243906532e-06, "loss": 0.334, "step": 2643 }, { "epoch": 1.3125930829058414, "grad_norm": 0.45778152346611023, "learning_rate": 6.913083510849884e-06, "loss": 0.3692, "step": 2644 }, { "epoch": 1.3130895250703294, "grad_norm": 0.4708387851715088, "learning_rate": 6.910413943907859e-06, "loss": 0.3946, "step": 2645 }, { "epoch": 1.3135859672348171, "grad_norm": 0.46708986163139343, "learning_rate": 6.907743739130539e-06, "loss": 0.35, "step": 2646 }, { "epoch": 1.314082409399305, "grad_norm": 0.42406487464904785, "learning_rate": 6.905072897409436e-06, "loss": 0.3187, "step": 2647 }, { "epoch": 1.3145788515637928, "grad_norm": 0.500463604927063, "learning_rate": 6.902401419636269e-06, "loss": 0.4231, "step": 2648 }, { "epoch": 1.3150752937282806, "grad_norm": 0.4828680753707886, "learning_rate": 6.899729306702973e-06, "loss": 0.3922, "step": 2649 }, { "epoch": 1.3155717358927685, "grad_norm": 0.4060370624065399, "learning_rate": 6.897056559501693e-06, "loss": 0.3713, "step": 2650 }, { "epoch": 1.3160681780572563, "grad_norm": 0.4076467454433441, "learning_rate": 6.894383178924787e-06, "loss": 0.37, "step": 2651 }, { "epoch": 1.3165646202217443, "grad_norm": 0.45806884765625, "learning_rate": 6.891709165864824e-06, "loss": 0.3853, "step": 2652 }, { "epoch": 1.317061062386232, "grad_norm": 0.546109676361084, "learning_rate": 6.889034521214583e-06, "loss": 0.4067, "step": 2653 }, { "epoch": 1.3175575045507197, "grad_norm": 0.37518247961997986, "learning_rate": 6.886359245867057e-06, "loss": 0.2994, "step": 2654 }, { "epoch": 1.3180539467152077, "grad_norm": 0.4717075526714325, "learning_rate": 6.883683340715448e-06, "loss": 0.3713, "step": 2655 }, { "epoch": 1.3185503888796954, "grad_norm": 0.5149049162864685, "learning_rate": 6.881006806653167e-06, "loss": 0.3538, "step": 2656 }, { "epoch": 1.3190468310441834, "grad_norm": 0.48770368099212646, "learning_rate": 6.878329644573835e-06, "loss": 0.4014, "step": 2657 }, { "epoch": 1.3195432732086712, "grad_norm": 0.37870532274246216, "learning_rate": 6.875651855371287e-06, "loss": 0.3315, "step": 2658 }, { "epoch": 1.320039715373159, "grad_norm": 0.47378310561180115, "learning_rate": 6.872973439939561e-06, "loss": 0.3725, "step": 2659 }, { "epoch": 1.3205361575376469, "grad_norm": 0.4847486615180969, "learning_rate": 6.870294399172908e-06, "loss": 0.4075, "step": 2660 }, { "epoch": 1.3210325997021348, "grad_norm": 0.4782734215259552, "learning_rate": 6.867614733965786e-06, "loss": 0.3374, "step": 2661 }, { "epoch": 1.3215290418666226, "grad_norm": 0.45402151346206665, "learning_rate": 6.864934445212864e-06, "loss": 0.3426, "step": 2662 }, { "epoch": 1.3220254840311103, "grad_norm": 0.47395509481430054, "learning_rate": 6.862253533809017e-06, "loss": 0.3762, "step": 2663 }, { "epoch": 1.3225219261955983, "grad_norm": 0.5881268382072449, "learning_rate": 6.859572000649328e-06, "loss": 0.4245, "step": 2664 }, { "epoch": 1.323018368360086, "grad_norm": 0.408945232629776, "learning_rate": 6.856889846629089e-06, "loss": 0.3274, "step": 2665 }, { "epoch": 1.323514810524574, "grad_norm": 0.5390070080757141, "learning_rate": 6.854207072643797e-06, "loss": 0.3924, "step": 2666 }, { "epoch": 1.3240112526890617, "grad_norm": 0.4034024477005005, "learning_rate": 6.851523679589158e-06, "loss": 0.3375, "step": 2667 }, { "epoch": 1.3245076948535495, "grad_norm": 0.4620567858219147, "learning_rate": 6.848839668361085e-06, "loss": 0.3851, "step": 2668 }, { "epoch": 1.3250041370180374, "grad_norm": 0.45568907260894775, "learning_rate": 6.846155039855693e-06, "loss": 0.368, "step": 2669 }, { "epoch": 1.3255005791825252, "grad_norm": 0.43324604630470276, "learning_rate": 6.843469794969311e-06, "loss": 0.3917, "step": 2670 }, { "epoch": 1.3259970213470131, "grad_norm": 0.45262157917022705, "learning_rate": 6.840783934598467e-06, "loss": 0.3596, "step": 2671 }, { "epoch": 1.3264934635115009, "grad_norm": 0.46702566742897034, "learning_rate": 6.838097459639896e-06, "loss": 0.3587, "step": 2672 }, { "epoch": 1.3269899056759886, "grad_norm": 0.4292459189891815, "learning_rate": 6.8354103709905415e-06, "loss": 0.3561, "step": 2673 }, { "epoch": 1.3274863478404766, "grad_norm": 0.5086069703102112, "learning_rate": 6.8327226695475464e-06, "loss": 0.4134, "step": 2674 }, { "epoch": 1.3279827900049646, "grad_norm": 0.4345073699951172, "learning_rate": 6.830034356208264e-06, "loss": 0.3295, "step": 2675 }, { "epoch": 1.3284792321694523, "grad_norm": 0.46440112590789795, "learning_rate": 6.827345431870247e-06, "loss": 0.3651, "step": 2676 }, { "epoch": 1.32897567433394, "grad_norm": 0.4416405260562897, "learning_rate": 6.824655897431254e-06, "loss": 0.3675, "step": 2677 }, { "epoch": 1.329472116498428, "grad_norm": 0.41631585359573364, "learning_rate": 6.821965753789248e-06, "loss": 0.3524, "step": 2678 }, { "epoch": 1.3299685586629157, "grad_norm": 0.48350265622138977, "learning_rate": 6.819275001842397e-06, "loss": 0.4013, "step": 2679 }, { "epoch": 1.3304650008274037, "grad_norm": 0.4288997948169708, "learning_rate": 6.8165836424890665e-06, "loss": 0.3555, "step": 2680 }, { "epoch": 1.3309614429918915, "grad_norm": 0.43020549416542053, "learning_rate": 6.813891676627831e-06, "loss": 0.3337, "step": 2681 }, { "epoch": 1.3314578851563792, "grad_norm": 0.4604046046733856, "learning_rate": 6.811199105157462e-06, "loss": 0.3579, "step": 2682 }, { "epoch": 1.3319543273208672, "grad_norm": 0.4408448040485382, "learning_rate": 6.808505928976939e-06, "loss": 0.3575, "step": 2683 }, { "epoch": 1.332450769485355, "grad_norm": 0.4076693654060364, "learning_rate": 6.805812148985438e-06, "loss": 0.3562, "step": 2684 }, { "epoch": 1.3329472116498429, "grad_norm": 0.4306597411632538, "learning_rate": 6.803117766082339e-06, "loss": 0.3561, "step": 2685 }, { "epoch": 1.3334436538143306, "grad_norm": 0.5067901015281677, "learning_rate": 6.800422781167224e-06, "loss": 0.3927, "step": 2686 }, { "epoch": 1.3339400959788184, "grad_norm": 0.38089531660079956, "learning_rate": 6.797727195139876e-06, "loss": 0.314, "step": 2687 }, { "epoch": 1.3344365381433063, "grad_norm": 0.49155500531196594, "learning_rate": 6.795031008900277e-06, "loss": 0.3827, "step": 2688 }, { "epoch": 1.334932980307794, "grad_norm": 0.44685712456703186, "learning_rate": 6.792334223348609e-06, "loss": 0.3504, "step": 2689 }, { "epoch": 1.335429422472282, "grad_norm": 0.4385834336280823, "learning_rate": 6.78963683938526e-06, "loss": 0.397, "step": 2690 }, { "epoch": 1.3359258646367698, "grad_norm": 0.4348820745944977, "learning_rate": 6.786938857910806e-06, "loss": 0.3566, "step": 2691 }, { "epoch": 1.3364223068012577, "grad_norm": 0.43718576431274414, "learning_rate": 6.784240279826035e-06, "loss": 0.3625, "step": 2692 }, { "epoch": 1.3369187489657455, "grad_norm": 0.3824494779109955, "learning_rate": 6.781541106031928e-06, "loss": 0.3101, "step": 2693 }, { "epoch": 1.3374151911302334, "grad_norm": 0.4111395478248596, "learning_rate": 6.7788413374296665e-06, "loss": 0.3435, "step": 2694 }, { "epoch": 1.3379116332947212, "grad_norm": 0.4269261956214905, "learning_rate": 6.776140974920627e-06, "loss": 0.346, "step": 2695 }, { "epoch": 1.338408075459209, "grad_norm": 0.426853209733963, "learning_rate": 6.77344001940639e-06, "loss": 0.3499, "step": 2696 }, { "epoch": 1.3389045176236969, "grad_norm": 0.42341211438179016, "learning_rate": 6.770738471788729e-06, "loss": 0.3193, "step": 2697 }, { "epoch": 1.3394009597881846, "grad_norm": 0.4534164071083069, "learning_rate": 6.7680363329696184e-06, "loss": 0.4095, "step": 2698 }, { "epoch": 1.3398974019526726, "grad_norm": 0.4048832952976227, "learning_rate": 6.7653336038512294e-06, "loss": 0.3797, "step": 2699 }, { "epoch": 1.3403938441171603, "grad_norm": 0.39397212862968445, "learning_rate": 6.762630285335929e-06, "loss": 0.3292, "step": 2700 }, { "epoch": 1.340890286281648, "grad_norm": 0.47821322083473206, "learning_rate": 6.759926378326281e-06, "loss": 0.4361, "step": 2701 }, { "epoch": 1.341386728446136, "grad_norm": 0.43333151936531067, "learning_rate": 6.757221883725048e-06, "loss": 0.3916, "step": 2702 }, { "epoch": 1.3418831706106238, "grad_norm": 0.4449414610862732, "learning_rate": 6.754516802435187e-06, "loss": 0.3239, "step": 2703 }, { "epoch": 1.3423796127751118, "grad_norm": 0.42254242300987244, "learning_rate": 6.751811135359851e-06, "loss": 0.3494, "step": 2704 }, { "epoch": 1.3428760549395995, "grad_norm": 0.4693250358104706, "learning_rate": 6.7491048834023884e-06, "loss": 0.4063, "step": 2705 }, { "epoch": 1.3433724971040872, "grad_norm": 0.38476210832595825, "learning_rate": 6.746398047466343e-06, "loss": 0.3286, "step": 2706 }, { "epoch": 1.3438689392685752, "grad_norm": 0.4371069371700287, "learning_rate": 6.7436906284554545e-06, "loss": 0.3902, "step": 2707 }, { "epoch": 1.3443653814330632, "grad_norm": 0.4368372857570648, "learning_rate": 6.740982627273655e-06, "loss": 0.4047, "step": 2708 }, { "epoch": 1.344861823597551, "grad_norm": 0.4275979995727539, "learning_rate": 6.738274044825074e-06, "loss": 0.3589, "step": 2709 }, { "epoch": 1.3453582657620387, "grad_norm": 0.3859672248363495, "learning_rate": 6.735564882014032e-06, "loss": 0.3564, "step": 2710 }, { "epoch": 1.3458547079265266, "grad_norm": 0.44915783405303955, "learning_rate": 6.732855139745047e-06, "loss": 0.4258, "step": 2711 }, { "epoch": 1.3463511500910144, "grad_norm": 0.4173596501350403, "learning_rate": 6.730144818922828e-06, "loss": 0.3277, "step": 2712 }, { "epoch": 1.3468475922555023, "grad_norm": 0.37757718563079834, "learning_rate": 6.727433920452275e-06, "loss": 0.3548, "step": 2713 }, { "epoch": 1.34734403441999, "grad_norm": 0.3799987733364105, "learning_rate": 6.724722445238487e-06, "loss": 0.3499, "step": 2714 }, { "epoch": 1.3478404765844778, "grad_norm": 0.5116890072822571, "learning_rate": 6.722010394186748e-06, "loss": 0.3982, "step": 2715 }, { "epoch": 1.3483369187489658, "grad_norm": 0.45695140957832336, "learning_rate": 6.719297768202541e-06, "loss": 0.4355, "step": 2716 }, { "epoch": 1.3488333609134535, "grad_norm": 0.4457035958766937, "learning_rate": 6.716584568191538e-06, "loss": 0.3369, "step": 2717 }, { "epoch": 1.3493298030779415, "grad_norm": 0.5188837051391602, "learning_rate": 6.713870795059601e-06, "loss": 0.3968, "step": 2718 }, { "epoch": 1.3498262452424292, "grad_norm": 0.43511778116226196, "learning_rate": 6.711156449712786e-06, "loss": 0.3433, "step": 2719 }, { "epoch": 1.350322687406917, "grad_norm": 0.4984835684299469, "learning_rate": 6.70844153305734e-06, "loss": 0.4133, "step": 2720 }, { "epoch": 1.350819129571405, "grad_norm": 0.4243808388710022, "learning_rate": 6.705726045999697e-06, "loss": 0.3324, "step": 2721 }, { "epoch": 1.351315571735893, "grad_norm": 0.4294620454311371, "learning_rate": 6.703009989446487e-06, "loss": 0.3793, "step": 2722 }, { "epoch": 1.3518120139003806, "grad_norm": 0.4268113076686859, "learning_rate": 6.700293364304528e-06, "loss": 0.3931, "step": 2723 }, { "epoch": 1.3523084560648684, "grad_norm": 0.5017472505569458, "learning_rate": 6.697576171480824e-06, "loss": 0.4124, "step": 2724 }, { "epoch": 1.3528048982293563, "grad_norm": 0.4547306299209595, "learning_rate": 6.6948584118825745e-06, "loss": 0.348, "step": 2725 }, { "epoch": 1.353301340393844, "grad_norm": 0.4662696123123169, "learning_rate": 6.692140086417165e-06, "loss": 0.4072, "step": 2726 }, { "epoch": 1.353797782558332, "grad_norm": 0.47194546461105347, "learning_rate": 6.689421195992172e-06, "loss": 0.3307, "step": 2727 }, { "epoch": 1.3542942247228198, "grad_norm": 0.5228870511054993, "learning_rate": 6.686701741515355e-06, "loss": 0.3973, "step": 2728 }, { "epoch": 1.3547906668873075, "grad_norm": 0.41070470213890076, "learning_rate": 6.683981723894672e-06, "loss": 0.3751, "step": 2729 }, { "epoch": 1.3552871090517955, "grad_norm": 0.40111044049263, "learning_rate": 6.681261144038257e-06, "loss": 0.3618, "step": 2730 }, { "epoch": 1.3557835512162832, "grad_norm": 0.4132111072540283, "learning_rate": 6.678540002854441e-06, "loss": 0.3314, "step": 2731 }, { "epoch": 1.3562799933807712, "grad_norm": 0.4599568545818329, "learning_rate": 6.675818301251737e-06, "loss": 0.393, "step": 2732 }, { "epoch": 1.356776435545259, "grad_norm": 0.3884027898311615, "learning_rate": 6.6730960401388504e-06, "loss": 0.316, "step": 2733 }, { "epoch": 1.3572728777097467, "grad_norm": 0.41320544481277466, "learning_rate": 6.670373220424666e-06, "loss": 0.3376, "step": 2734 }, { "epoch": 1.3577693198742347, "grad_norm": 0.47948476672172546, "learning_rate": 6.6676498430182646e-06, "loss": 0.3717, "step": 2735 }, { "epoch": 1.3582657620387226, "grad_norm": 0.39805367588996887, "learning_rate": 6.664925908828902e-06, "loss": 0.3541, "step": 2736 }, { "epoch": 1.3587622042032104, "grad_norm": 0.43372204899787903, "learning_rate": 6.66220141876603e-06, "loss": 0.3903, "step": 2737 }, { "epoch": 1.359258646367698, "grad_norm": 0.4231403172016144, "learning_rate": 6.6594763737392794e-06, "loss": 0.3551, "step": 2738 }, { "epoch": 1.359755088532186, "grad_norm": 0.39570796489715576, "learning_rate": 6.656750774658471e-06, "loss": 0.3363, "step": 2739 }, { "epoch": 1.3602515306966738, "grad_norm": 0.4095968008041382, "learning_rate": 6.6540246224336045e-06, "loss": 0.4036, "step": 2740 }, { "epoch": 1.3607479728611618, "grad_norm": 0.4105670750141144, "learning_rate": 6.651297917974872e-06, "loss": 0.3975, "step": 2741 }, { "epoch": 1.3612444150256495, "grad_norm": 0.4876120388507843, "learning_rate": 6.648570662192646e-06, "loss": 0.4092, "step": 2742 }, { "epoch": 1.3617408571901373, "grad_norm": 0.3748372495174408, "learning_rate": 6.64584285599748e-06, "loss": 0.333, "step": 2743 }, { "epoch": 1.3622372993546252, "grad_norm": 0.3782437741756439, "learning_rate": 6.643114500300116e-06, "loss": 0.3558, "step": 2744 }, { "epoch": 1.362733741519113, "grad_norm": 0.44858452677726746, "learning_rate": 6.640385596011478e-06, "loss": 0.3784, "step": 2745 }, { "epoch": 1.363230183683601, "grad_norm": 0.4118614196777344, "learning_rate": 6.637656144042672e-06, "loss": 0.3903, "step": 2746 }, { "epoch": 1.3637266258480887, "grad_norm": 0.3945225179195404, "learning_rate": 6.6349261453049895e-06, "loss": 0.3466, "step": 2747 }, { "epoch": 1.3642230680125764, "grad_norm": 0.5345625281333923, "learning_rate": 6.632195600709901e-06, "loss": 0.3832, "step": 2748 }, { "epoch": 1.3647195101770644, "grad_norm": 0.43603745102882385, "learning_rate": 6.629464511169062e-06, "loss": 0.3579, "step": 2749 }, { "epoch": 1.3652159523415521, "grad_norm": 0.3716999888420105, "learning_rate": 6.626732877594311e-06, "loss": 0.3255, "step": 2750 }, { "epoch": 1.36571239450604, "grad_norm": 0.45054659247398376, "learning_rate": 6.624000700897662e-06, "loss": 0.3901, "step": 2751 }, { "epoch": 1.3662088366705278, "grad_norm": 0.4151257276535034, "learning_rate": 6.6212679819913185e-06, "loss": 0.3828, "step": 2752 }, { "epoch": 1.3667052788350158, "grad_norm": 0.44233566522598267, "learning_rate": 6.618534721787658e-06, "loss": 0.3957, "step": 2753 }, { "epoch": 1.3672017209995035, "grad_norm": 0.4031870365142822, "learning_rate": 6.615800921199245e-06, "loss": 0.3291, "step": 2754 }, { "epoch": 1.3676981631639915, "grad_norm": 0.446389764547348, "learning_rate": 6.613066581138819e-06, "loss": 0.3578, "step": 2755 }, { "epoch": 1.3681946053284793, "grad_norm": 0.4432697594165802, "learning_rate": 6.610331702519299e-06, "loss": 0.3718, "step": 2756 }, { "epoch": 1.368691047492967, "grad_norm": 0.49310368299484253, "learning_rate": 6.6075962862537934e-06, "loss": 0.4227, "step": 2757 }, { "epoch": 1.369187489657455, "grad_norm": 0.46096518635749817, "learning_rate": 6.6048603332555796e-06, "loss": 0.4111, "step": 2758 }, { "epoch": 1.3696839318219427, "grad_norm": 0.3813116252422333, "learning_rate": 6.602123844438117e-06, "loss": 0.3647, "step": 2759 }, { "epoch": 1.3701803739864307, "grad_norm": 0.4673171937465668, "learning_rate": 6.5993868207150465e-06, "loss": 0.3706, "step": 2760 }, { "epoch": 1.3706768161509184, "grad_norm": 0.5275506377220154, "learning_rate": 6.596649263000187e-06, "loss": 0.3694, "step": 2761 }, { "epoch": 1.3711732583154062, "grad_norm": 0.45668482780456543, "learning_rate": 6.593911172207532e-06, "loss": 0.3773, "step": 2762 }, { "epoch": 1.3716697004798941, "grad_norm": 0.5259037613868713, "learning_rate": 6.591172549251255e-06, "loss": 0.3981, "step": 2763 }, { "epoch": 1.3721661426443819, "grad_norm": 0.4520167410373688, "learning_rate": 6.588433395045711e-06, "loss": 0.3761, "step": 2764 }, { "epoch": 1.3726625848088698, "grad_norm": 0.46842536330223083, "learning_rate": 6.5856937105054285e-06, "loss": 0.365, "step": 2765 }, { "epoch": 1.3731590269733576, "grad_norm": 0.46221697330474854, "learning_rate": 6.582953496545112e-06, "loss": 0.3529, "step": 2766 }, { "epoch": 1.3736554691378453, "grad_norm": 0.42122682929039, "learning_rate": 6.580212754079644e-06, "loss": 0.3418, "step": 2767 }, { "epoch": 1.3741519113023333, "grad_norm": 0.4663468301296234, "learning_rate": 6.5774714840240875e-06, "loss": 0.3902, "step": 2768 }, { "epoch": 1.3746483534668212, "grad_norm": 0.47317928075790405, "learning_rate": 6.574729687293675e-06, "loss": 0.3645, "step": 2769 }, { "epoch": 1.375144795631309, "grad_norm": 0.5238403081893921, "learning_rate": 6.571987364803819e-06, "loss": 0.3797, "step": 2770 }, { "epoch": 1.3756412377957967, "grad_norm": 0.4729701578617096, "learning_rate": 6.569244517470105e-06, "loss": 0.3569, "step": 2771 }, { "epoch": 1.3761376799602847, "grad_norm": 0.5675663352012634, "learning_rate": 6.5665011462082975e-06, "loss": 0.3772, "step": 2772 }, { "epoch": 1.3766341221247724, "grad_norm": 0.4850043058395386, "learning_rate": 6.5637572519343305e-06, "loss": 0.3458, "step": 2773 }, { "epoch": 1.3771305642892604, "grad_norm": 0.4338068664073944, "learning_rate": 6.56101283556432e-06, "loss": 0.3898, "step": 2774 }, { "epoch": 1.3776270064537481, "grad_norm": 0.5448220372200012, "learning_rate": 6.5582678980145476e-06, "loss": 0.4051, "step": 2775 }, { "epoch": 1.3781234486182359, "grad_norm": 0.580350399017334, "learning_rate": 6.555522440201477e-06, "loss": 0.3879, "step": 2776 }, { "epoch": 1.3786198907827238, "grad_norm": 0.45247000455856323, "learning_rate": 6.55277646304174e-06, "loss": 0.3611, "step": 2777 }, { "epoch": 1.3791163329472116, "grad_norm": 0.4837375581264496, "learning_rate": 6.550029967452145e-06, "loss": 0.3309, "step": 2778 }, { "epoch": 1.3796127751116996, "grad_norm": 0.49499964714050293, "learning_rate": 6.547282954349669e-06, "loss": 0.3974, "step": 2779 }, { "epoch": 1.3801092172761873, "grad_norm": 0.4467627704143524, "learning_rate": 6.544535424651468e-06, "loss": 0.3712, "step": 2780 }, { "epoch": 1.380605659440675, "grad_norm": 0.4424203634262085, "learning_rate": 6.541787379274869e-06, "loss": 0.3587, "step": 2781 }, { "epoch": 1.381102101605163, "grad_norm": 0.46571898460388184, "learning_rate": 6.539038819137364e-06, "loss": 0.3703, "step": 2782 }, { "epoch": 1.381598543769651, "grad_norm": 0.44776126742362976, "learning_rate": 6.53628974515663e-06, "loss": 0.3435, "step": 2783 }, { "epoch": 1.3820949859341387, "grad_norm": 0.4512685239315033, "learning_rate": 6.533540158250502e-06, "loss": 0.3555, "step": 2784 }, { "epoch": 1.3825914280986265, "grad_norm": 0.4336758852005005, "learning_rate": 6.530790059336995e-06, "loss": 0.3678, "step": 2785 }, { "epoch": 1.3830878702631144, "grad_norm": 0.411609023809433, "learning_rate": 6.528039449334291e-06, "loss": 0.3591, "step": 2786 }, { "epoch": 1.3835843124276022, "grad_norm": 0.500707745552063, "learning_rate": 6.525288329160745e-06, "loss": 0.3868, "step": 2787 }, { "epoch": 1.3840807545920901, "grad_norm": 0.4606240689754486, "learning_rate": 6.522536699734881e-06, "loss": 0.3331, "step": 2788 }, { "epoch": 1.3845771967565779, "grad_norm": 0.4536358416080475, "learning_rate": 6.519784561975393e-06, "loss": 0.3372, "step": 2789 }, { "epoch": 1.3850736389210656, "grad_norm": 0.45296528935432434, "learning_rate": 6.5170319168011455e-06, "loss": 0.3506, "step": 2790 }, { "epoch": 1.3855700810855536, "grad_norm": 0.4871671199798584, "learning_rate": 6.514278765131172e-06, "loss": 0.371, "step": 2791 }, { "epoch": 1.3860665232500413, "grad_norm": 0.4494132995605469, "learning_rate": 6.511525107884674e-06, "loss": 0.3523, "step": 2792 }, { "epoch": 1.3865629654145293, "grad_norm": 0.44935861229896545, "learning_rate": 6.5087709459810245e-06, "loss": 0.3879, "step": 2793 }, { "epoch": 1.387059407579017, "grad_norm": 0.37875303626060486, "learning_rate": 6.506016280339762e-06, "loss": 0.3062, "step": 2794 }, { "epoch": 1.3875558497435048, "grad_norm": 0.3909286558628082, "learning_rate": 6.503261111880593e-06, "loss": 0.3424, "step": 2795 }, { "epoch": 1.3880522919079927, "grad_norm": 0.4856712520122528, "learning_rate": 6.500505441523396e-06, "loss": 0.3745, "step": 2796 }, { "epoch": 1.3885487340724805, "grad_norm": 0.5385920405387878, "learning_rate": 6.497749270188214e-06, "loss": 0.4061, "step": 2797 }, { "epoch": 1.3890451762369684, "grad_norm": 0.4610712230205536, "learning_rate": 6.494992598795258e-06, "loss": 0.3691, "step": 2798 }, { "epoch": 1.3895416184014562, "grad_norm": 0.38353338837623596, "learning_rate": 6.492235428264903e-06, "loss": 0.3327, "step": 2799 }, { "epoch": 1.3900380605659441, "grad_norm": 0.41884052753448486, "learning_rate": 6.489477759517697e-06, "loss": 0.3407, "step": 2800 }, { "epoch": 1.3905345027304319, "grad_norm": 0.4902685880661011, "learning_rate": 6.486719593474347e-06, "loss": 0.3949, "step": 2801 }, { "epoch": 1.3910309448949199, "grad_norm": 0.5424469709396362, "learning_rate": 6.483960931055735e-06, "loss": 0.3748, "step": 2802 }, { "epoch": 1.3915273870594076, "grad_norm": 0.41129186749458313, "learning_rate": 6.481201773182896e-06, "loss": 0.3284, "step": 2803 }, { "epoch": 1.3920238292238953, "grad_norm": 0.4602360725402832, "learning_rate": 6.478442120777044e-06, "loss": 0.3646, "step": 2804 }, { "epoch": 1.3925202713883833, "grad_norm": 0.40765008330345154, "learning_rate": 6.4756819747595486e-06, "loss": 0.3869, "step": 2805 }, { "epoch": 1.393016713552871, "grad_norm": 0.4212961196899414, "learning_rate": 6.472921336051949e-06, "loss": 0.3363, "step": 2806 }, { "epoch": 1.393513155717359, "grad_norm": 0.40995001792907715, "learning_rate": 6.4701602055759475e-06, "loss": 0.3476, "step": 2807 }, { "epoch": 1.3940095978818468, "grad_norm": 0.4108290374279022, "learning_rate": 6.4673985842534094e-06, "loss": 0.3615, "step": 2808 }, { "epoch": 1.3945060400463345, "grad_norm": 0.42506924271583557, "learning_rate": 6.464636473006367e-06, "loss": 0.3484, "step": 2809 }, { "epoch": 1.3950024822108225, "grad_norm": 0.4409545361995697, "learning_rate": 6.461873872757012e-06, "loss": 0.4145, "step": 2810 }, { "epoch": 1.3954989243753102, "grad_norm": 0.4252682626247406, "learning_rate": 6.4591107844277015e-06, "loss": 0.325, "step": 2811 }, { "epoch": 1.3959953665397982, "grad_norm": 0.41473162174224854, "learning_rate": 6.456347208940956e-06, "loss": 0.3375, "step": 2812 }, { "epoch": 1.396491808704286, "grad_norm": 0.49783000349998474, "learning_rate": 6.453583147219462e-06, "loss": 0.411, "step": 2813 }, { "epoch": 1.3969882508687737, "grad_norm": 0.4215382933616638, "learning_rate": 6.45081860018606e-06, "loss": 0.3359, "step": 2814 }, { "epoch": 1.3974846930332616, "grad_norm": 0.46508100628852844, "learning_rate": 6.448053568763757e-06, "loss": 0.3673, "step": 2815 }, { "epoch": 1.3979811351977496, "grad_norm": 0.49133074283599854, "learning_rate": 6.445288053875724e-06, "loss": 0.3574, "step": 2816 }, { "epoch": 1.3984775773622373, "grad_norm": 0.4353756308555603, "learning_rate": 6.442522056445292e-06, "loss": 0.3646, "step": 2817 }, { "epoch": 1.398974019526725, "grad_norm": 0.43570172786712646, "learning_rate": 6.43975557739595e-06, "loss": 0.3264, "step": 2818 }, { "epoch": 1.399470461691213, "grad_norm": 0.5154541730880737, "learning_rate": 6.43698861765135e-06, "loss": 0.4033, "step": 2819 }, { "epoch": 1.3999669038557008, "grad_norm": 0.43559888005256653, "learning_rate": 6.434221178135306e-06, "loss": 0.3676, "step": 2820 }, { "epoch": 1.4004633460201887, "grad_norm": 0.4478338658809662, "learning_rate": 6.431453259771792e-06, "loss": 0.3757, "step": 2821 }, { "epoch": 1.4009597881846765, "grad_norm": 0.4406794607639313, "learning_rate": 6.428684863484937e-06, "loss": 0.3886, "step": 2822 }, { "epoch": 1.4014562303491642, "grad_norm": 0.43514013290405273, "learning_rate": 6.425915990199038e-06, "loss": 0.3657, "step": 2823 }, { "epoch": 1.4019526725136522, "grad_norm": 0.5361088514328003, "learning_rate": 6.423146640838543e-06, "loss": 0.401, "step": 2824 }, { "epoch": 1.40244911467814, "grad_norm": 0.3890208303928375, "learning_rate": 6.4203768163280645e-06, "loss": 0.3277, "step": 2825 }, { "epoch": 1.402945556842628, "grad_norm": 0.41233518719673157, "learning_rate": 6.417606517592371e-06, "loss": 0.3747, "step": 2826 }, { "epoch": 1.4034419990071156, "grad_norm": 0.4915914535522461, "learning_rate": 6.414835745556387e-06, "loss": 0.4425, "step": 2827 }, { "epoch": 1.4039384411716034, "grad_norm": 0.42793408036231995, "learning_rate": 6.412064501145203e-06, "loss": 0.3401, "step": 2828 }, { "epoch": 1.4044348833360913, "grad_norm": 0.45271456241607666, "learning_rate": 6.409292785284058e-06, "loss": 0.3295, "step": 2829 }, { "epoch": 1.4049313255005793, "grad_norm": 0.4423424005508423, "learning_rate": 6.406520598898357e-06, "loss": 0.3769, "step": 2830 }, { "epoch": 1.405427767665067, "grad_norm": 0.4808540940284729, "learning_rate": 6.403747942913654e-06, "loss": 0.3922, "step": 2831 }, { "epoch": 1.4059242098295548, "grad_norm": 0.39557144045829773, "learning_rate": 6.400974818255665e-06, "loss": 0.3274, "step": 2832 }, { "epoch": 1.4064206519940428, "grad_norm": 0.4178733825683594, "learning_rate": 6.398201225850259e-06, "loss": 0.337, "step": 2833 }, { "epoch": 1.4069170941585305, "grad_norm": 0.4536067247390747, "learning_rate": 6.395427166623466e-06, "loss": 0.4317, "step": 2834 }, { "epoch": 1.4074135363230185, "grad_norm": 0.40786805748939514, "learning_rate": 6.392652641501467e-06, "loss": 0.3748, "step": 2835 }, { "epoch": 1.4079099784875062, "grad_norm": 0.4031201899051666, "learning_rate": 6.389877651410601e-06, "loss": 0.2829, "step": 2836 }, { "epoch": 1.408406420651994, "grad_norm": 0.4646221697330475, "learning_rate": 6.387102197277364e-06, "loss": 0.3716, "step": 2837 }, { "epoch": 1.408902862816482, "grad_norm": 0.4514201879501343, "learning_rate": 6.3843262800284e-06, "loss": 0.3892, "step": 2838 }, { "epoch": 1.4093993049809697, "grad_norm": 0.40900084376335144, "learning_rate": 6.381549900590517e-06, "loss": 0.3757, "step": 2839 }, { "epoch": 1.4098957471454576, "grad_norm": 0.535298228263855, "learning_rate": 6.378773059890669e-06, "loss": 0.3928, "step": 2840 }, { "epoch": 1.4103921893099454, "grad_norm": 0.3884401321411133, "learning_rate": 6.375995758855971e-06, "loss": 0.321, "step": 2841 }, { "epoch": 1.410888631474433, "grad_norm": 0.42571139335632324, "learning_rate": 6.3732179984136855e-06, "loss": 0.3947, "step": 2842 }, { "epoch": 1.411385073638921, "grad_norm": 0.43353986740112305, "learning_rate": 6.370439779491233e-06, "loss": 0.4188, "step": 2843 }, { "epoch": 1.411881515803409, "grad_norm": 0.4116382598876953, "learning_rate": 6.367661103016183e-06, "loss": 0.3397, "step": 2844 }, { "epoch": 1.4123779579678968, "grad_norm": 0.40868547558784485, "learning_rate": 6.3648819699162634e-06, "loss": 0.3218, "step": 2845 }, { "epoch": 1.4128744001323845, "grad_norm": 0.42996707558631897, "learning_rate": 6.362102381119349e-06, "loss": 0.3587, "step": 2846 }, { "epoch": 1.4133708422968725, "grad_norm": 0.4231809675693512, "learning_rate": 6.359322337553471e-06, "loss": 0.3728, "step": 2847 }, { "epoch": 1.4138672844613602, "grad_norm": 0.43802115321159363, "learning_rate": 6.356541840146806e-06, "loss": 0.3646, "step": 2848 }, { "epoch": 1.4143637266258482, "grad_norm": 0.5279556512832642, "learning_rate": 6.35376088982769e-06, "loss": 0.3707, "step": 2849 }, { "epoch": 1.414860168790336, "grad_norm": 0.4436330199241638, "learning_rate": 6.350979487524607e-06, "loss": 0.3599, "step": 2850 }, { "epoch": 1.4153566109548237, "grad_norm": 0.504306435585022, "learning_rate": 6.34819763416619e-06, "loss": 0.3741, "step": 2851 }, { "epoch": 1.4158530531193116, "grad_norm": 0.43956977128982544, "learning_rate": 6.345415330681226e-06, "loss": 0.3723, "step": 2852 }, { "epoch": 1.4163494952837994, "grad_norm": 0.40212753415107727, "learning_rate": 6.342632577998648e-06, "loss": 0.334, "step": 2853 }, { "epoch": 1.4168459374482874, "grad_norm": 0.551528811454773, "learning_rate": 6.3398493770475445e-06, "loss": 0.3756, "step": 2854 }, { "epoch": 1.417342379612775, "grad_norm": 0.4656948149204254, "learning_rate": 6.337065728757148e-06, "loss": 0.3903, "step": 2855 }, { "epoch": 1.4178388217772628, "grad_norm": 0.44498899579048157, "learning_rate": 6.334281634056845e-06, "loss": 0.367, "step": 2856 }, { "epoch": 1.4183352639417508, "grad_norm": 0.457407683134079, "learning_rate": 6.3314970938761664e-06, "loss": 0.384, "step": 2857 }, { "epoch": 1.4188317061062385, "grad_norm": 0.45287585258483887, "learning_rate": 6.328712109144798e-06, "loss": 0.3554, "step": 2858 }, { "epoch": 1.4193281482707265, "grad_norm": 0.40868526697158813, "learning_rate": 6.325926680792567e-06, "loss": 0.3519, "step": 2859 }, { "epoch": 1.4198245904352143, "grad_norm": 0.5020957589149475, "learning_rate": 6.323140809749456e-06, "loss": 0.3726, "step": 2860 }, { "epoch": 1.4203210325997022, "grad_norm": 0.49714022874832153, "learning_rate": 6.320354496945588e-06, "loss": 0.3945, "step": 2861 }, { "epoch": 1.42081747476419, "grad_norm": 0.3535408675670624, "learning_rate": 6.31756774331124e-06, "loss": 0.3295, "step": 2862 }, { "epoch": 1.421313916928678, "grad_norm": 0.5570643544197083, "learning_rate": 6.3147805497768314e-06, "loss": 0.3783, "step": 2863 }, { "epoch": 1.4218103590931657, "grad_norm": 0.48964837193489075, "learning_rate": 6.311992917272931e-06, "loss": 0.3798, "step": 2864 }, { "epoch": 1.4223068012576534, "grad_norm": 0.37364569306373596, "learning_rate": 6.309204846730254e-06, "loss": 0.3267, "step": 2865 }, { "epoch": 1.4228032434221414, "grad_norm": 0.5678357481956482, "learning_rate": 6.30641633907966e-06, "loss": 0.3897, "step": 2866 }, { "epoch": 1.4232996855866291, "grad_norm": 0.5307788848876953, "learning_rate": 6.303627395252156e-06, "loss": 0.3661, "step": 2867 }, { "epoch": 1.423796127751117, "grad_norm": 0.4314323663711548, "learning_rate": 6.3008380161788965e-06, "loss": 0.3488, "step": 2868 }, { "epoch": 1.4242925699156048, "grad_norm": 0.4565404951572418, "learning_rate": 6.298048202791179e-06, "loss": 0.3463, "step": 2869 }, { "epoch": 1.4247890120800926, "grad_norm": 0.570925772190094, "learning_rate": 6.295257956020444e-06, "loss": 0.3814, "step": 2870 }, { "epoch": 1.4252854542445805, "grad_norm": 0.4347098767757416, "learning_rate": 6.2924672767982834e-06, "loss": 0.3485, "step": 2871 }, { "epoch": 1.4257818964090683, "grad_norm": 0.5504652261734009, "learning_rate": 6.2896761660564245e-06, "loss": 0.4092, "step": 2872 }, { "epoch": 1.4262783385735562, "grad_norm": 0.5694171190261841, "learning_rate": 6.286884624726746e-06, "loss": 0.3672, "step": 2873 }, { "epoch": 1.426774780738044, "grad_norm": 0.35360831022262573, "learning_rate": 6.284092653741264e-06, "loss": 0.3067, "step": 2874 }, { "epoch": 1.4272712229025317, "grad_norm": 0.4886743128299713, "learning_rate": 6.281300254032148e-06, "loss": 0.4036, "step": 2875 }, { "epoch": 1.4277676650670197, "grad_norm": 0.4501666724681854, "learning_rate": 6.278507426531698e-06, "loss": 0.3119, "step": 2876 }, { "epoch": 1.4282641072315077, "grad_norm": 0.4993211328983307, "learning_rate": 6.275714172172368e-06, "loss": 0.4055, "step": 2877 }, { "epoch": 1.4287605493959954, "grad_norm": 0.5025604367256165, "learning_rate": 6.272920491886748e-06, "loss": 0.3845, "step": 2878 }, { "epoch": 1.4292569915604831, "grad_norm": 0.4254264533519745, "learning_rate": 6.270126386607571e-06, "loss": 0.3516, "step": 2879 }, { "epoch": 1.429753433724971, "grad_norm": 0.5458546280860901, "learning_rate": 6.267331857267716e-06, "loss": 0.4347, "step": 2880 }, { "epoch": 1.4302498758894588, "grad_norm": 0.4506624639034271, "learning_rate": 6.264536904800196e-06, "loss": 0.3697, "step": 2881 }, { "epoch": 1.4307463180539468, "grad_norm": 0.4226951599121094, "learning_rate": 6.261741530138172e-06, "loss": 0.3696, "step": 2882 }, { "epoch": 1.4312427602184346, "grad_norm": 0.508124589920044, "learning_rate": 6.258945734214942e-06, "loss": 0.386, "step": 2883 }, { "epoch": 1.4317392023829223, "grad_norm": 0.4291067123413086, "learning_rate": 6.25614951796395e-06, "loss": 0.3503, "step": 2884 }, { "epoch": 1.4322356445474103, "grad_norm": 0.44818979501724243, "learning_rate": 6.2533528823187725e-06, "loss": 0.3911, "step": 2885 }, { "epoch": 1.432732086711898, "grad_norm": 0.4772303104400635, "learning_rate": 6.250555828213133e-06, "loss": 0.3795, "step": 2886 }, { "epoch": 1.433228528876386, "grad_norm": 0.4721188545227051, "learning_rate": 6.24775835658089e-06, "loss": 0.3924, "step": 2887 }, { "epoch": 1.4337249710408737, "grad_norm": 0.39368125796318054, "learning_rate": 6.244960468356044e-06, "loss": 0.3447, "step": 2888 }, { "epoch": 1.4342214132053615, "grad_norm": 0.3885250687599182, "learning_rate": 6.242162164472734e-06, "loss": 0.338, "step": 2889 }, { "epoch": 1.4347178553698494, "grad_norm": 0.5212715268135071, "learning_rate": 6.239363445865237e-06, "loss": 0.3918, "step": 2890 }, { "epoch": 1.4352142975343374, "grad_norm": 0.4918370246887207, "learning_rate": 6.236564313467969e-06, "loss": 0.4223, "step": 2891 }, { "epoch": 1.4357107396988251, "grad_norm": 0.38370954990386963, "learning_rate": 6.233764768215485e-06, "loss": 0.3271, "step": 2892 }, { "epoch": 1.4362071818633129, "grad_norm": 0.4532518982887268, "learning_rate": 6.230964811042477e-06, "loss": 0.3737, "step": 2893 }, { "epoch": 1.4367036240278008, "grad_norm": 0.5329140424728394, "learning_rate": 6.228164442883775e-06, "loss": 0.3771, "step": 2894 }, { "epoch": 1.4372000661922886, "grad_norm": 0.46147361397743225, "learning_rate": 6.225363664674345e-06, "loss": 0.3877, "step": 2895 }, { "epoch": 1.4376965083567765, "grad_norm": 0.4279319941997528, "learning_rate": 6.22256247734929e-06, "loss": 0.407, "step": 2896 }, { "epoch": 1.4381929505212643, "grad_norm": 0.5776951313018799, "learning_rate": 6.2197608818438515e-06, "loss": 0.3953, "step": 2897 }, { "epoch": 1.438689392685752, "grad_norm": 0.45464950799942017, "learning_rate": 6.216958879093405e-06, "loss": 0.3564, "step": 2898 }, { "epoch": 1.43918583485024, "grad_norm": 0.49621257185935974, "learning_rate": 6.214156470033467e-06, "loss": 0.3774, "step": 2899 }, { "epoch": 1.4396822770147277, "grad_norm": 0.5221878886222839, "learning_rate": 6.211353655599679e-06, "loss": 0.3734, "step": 2900 }, { "epoch": 1.4401787191792157, "grad_norm": 0.44852039217948914, "learning_rate": 6.208550436727831e-06, "loss": 0.3402, "step": 2901 }, { "epoch": 1.4406751613437034, "grad_norm": 0.47436270117759705, "learning_rate": 6.2057468143538365e-06, "loss": 0.391, "step": 2902 }, { "epoch": 1.4411716035081912, "grad_norm": 0.42725762724876404, "learning_rate": 6.202942789413753e-06, "loss": 0.3428, "step": 2903 }, { "epoch": 1.4416680456726791, "grad_norm": 0.4203522205352783, "learning_rate": 6.200138362843765e-06, "loss": 0.4173, "step": 2904 }, { "epoch": 1.442164487837167, "grad_norm": 0.4445686936378479, "learning_rate": 6.197333535580196e-06, "loss": 0.3435, "step": 2905 }, { "epoch": 1.4426609300016549, "grad_norm": 0.5531978607177734, "learning_rate": 6.194528308559501e-06, "loss": 0.416, "step": 2906 }, { "epoch": 1.4431573721661426, "grad_norm": 0.4822254180908203, "learning_rate": 6.191722682718269e-06, "loss": 0.3872, "step": 2907 }, { "epoch": 1.4436538143306306, "grad_norm": 0.4216371178627014, "learning_rate": 6.188916658993223e-06, "loss": 0.3245, "step": 2908 }, { "epoch": 1.4441502564951183, "grad_norm": 0.4761943817138672, "learning_rate": 6.186110238321217e-06, "loss": 0.4024, "step": 2909 }, { "epoch": 1.4446466986596063, "grad_norm": 0.462619423866272, "learning_rate": 6.18330342163924e-06, "loss": 0.3987, "step": 2910 }, { "epoch": 1.445143140824094, "grad_norm": 0.41312238574028015, "learning_rate": 6.1804962098844105e-06, "loss": 0.326, "step": 2911 }, { "epoch": 1.4456395829885818, "grad_norm": 0.49275368452072144, "learning_rate": 6.177688603993981e-06, "loss": 0.3778, "step": 2912 }, { "epoch": 1.4461360251530697, "grad_norm": 0.40190204977989197, "learning_rate": 6.174880604905334e-06, "loss": 0.3441, "step": 2913 }, { "epoch": 1.4466324673175575, "grad_norm": 0.4399977922439575, "learning_rate": 6.1720722135559844e-06, "loss": 0.3527, "step": 2914 }, { "epoch": 1.4471289094820454, "grad_norm": 0.5164320468902588, "learning_rate": 6.1692634308835766e-06, "loss": 0.3358, "step": 2915 }, { "epoch": 1.4476253516465332, "grad_norm": 0.43708938360214233, "learning_rate": 6.16645425782589e-06, "loss": 0.3764, "step": 2916 }, { "epoch": 1.448121793811021, "grad_norm": 0.4584163725376129, "learning_rate": 6.163644695320829e-06, "loss": 0.3822, "step": 2917 }, { "epoch": 1.4486182359755089, "grad_norm": 0.44861093163490295, "learning_rate": 6.160834744306429e-06, "loss": 0.3526, "step": 2918 }, { "epoch": 1.4491146781399966, "grad_norm": 0.5252468585968018, "learning_rate": 6.158024405720859e-06, "loss": 0.4219, "step": 2919 }, { "epoch": 1.4496111203044846, "grad_norm": 0.43593835830688477, "learning_rate": 6.155213680502412e-06, "loss": 0.3819, "step": 2920 }, { "epoch": 1.4501075624689723, "grad_norm": 0.400981068611145, "learning_rate": 6.1524025695895155e-06, "loss": 0.3463, "step": 2921 }, { "epoch": 1.45060400463346, "grad_norm": 0.43295174837112427, "learning_rate": 6.14959107392072e-06, "loss": 0.3224, "step": 2922 }, { "epoch": 1.451100446797948, "grad_norm": 0.5765234231948853, "learning_rate": 6.146779194434711e-06, "loss": 0.3865, "step": 2923 }, { "epoch": 1.451596888962436, "grad_norm": 0.351360559463501, "learning_rate": 6.143966932070295e-06, "loss": 0.3395, "step": 2924 }, { "epoch": 1.4520933311269237, "grad_norm": 0.4521978795528412, "learning_rate": 6.141154287766413e-06, "loss": 0.3799, "step": 2925 }, { "epoch": 1.4525897732914115, "grad_norm": 0.48558515310287476, "learning_rate": 6.138341262462129e-06, "loss": 0.4141, "step": 2926 }, { "epoch": 1.4530862154558994, "grad_norm": 0.43512406945228577, "learning_rate": 6.135527857096635e-06, "loss": 0.3617, "step": 2927 }, { "epoch": 1.4535826576203872, "grad_norm": 0.411963552236557, "learning_rate": 6.132714072609251e-06, "loss": 0.3804, "step": 2928 }, { "epoch": 1.4540790997848752, "grad_norm": 0.4240255057811737, "learning_rate": 6.1298999099394256e-06, "loss": 0.3844, "step": 2929 }, { "epoch": 1.454575541949363, "grad_norm": 0.44861119985580444, "learning_rate": 6.1270853700267275e-06, "loss": 0.366, "step": 2930 }, { "epoch": 1.4550719841138506, "grad_norm": 0.3815794289112091, "learning_rate": 6.124270453810858e-06, "loss": 0.3477, "step": 2931 }, { "epoch": 1.4555684262783386, "grad_norm": 0.4740927219390869, "learning_rate": 6.1214551622316385e-06, "loss": 0.3526, "step": 2932 }, { "epoch": 1.4560648684428263, "grad_norm": 0.4641697108745575, "learning_rate": 6.118639496229021e-06, "loss": 0.3942, "step": 2933 }, { "epoch": 1.4565613106073143, "grad_norm": 0.40029942989349365, "learning_rate": 6.115823456743079e-06, "loss": 0.3457, "step": 2934 }, { "epoch": 1.457057752771802, "grad_norm": 0.4153989851474762, "learning_rate": 6.11300704471401e-06, "loss": 0.3582, "step": 2935 }, { "epoch": 1.4575541949362898, "grad_norm": 0.4690137505531311, "learning_rate": 6.11019026108214e-06, "loss": 0.3853, "step": 2936 }, { "epoch": 1.4580506371007778, "grad_norm": 0.367489218711853, "learning_rate": 6.107373106787914e-06, "loss": 0.3352, "step": 2937 }, { "epoch": 1.4585470792652657, "grad_norm": 0.43339303135871887, "learning_rate": 6.104555582771904e-06, "loss": 0.3904, "step": 2938 }, { "epoch": 1.4590435214297535, "grad_norm": 0.3871965706348419, "learning_rate": 6.101737689974805e-06, "loss": 0.3191, "step": 2939 }, { "epoch": 1.4595399635942412, "grad_norm": 0.3694908320903778, "learning_rate": 6.098919429337436e-06, "loss": 0.3577, "step": 2940 }, { "epoch": 1.4600364057587292, "grad_norm": 0.47464263439178467, "learning_rate": 6.0961008018007365e-06, "loss": 0.405, "step": 2941 }, { "epoch": 1.460532847923217, "grad_norm": 0.38684698939323425, "learning_rate": 6.09328180830577e-06, "loss": 0.3414, "step": 2942 }, { "epoch": 1.4610292900877049, "grad_norm": 0.4137904644012451, "learning_rate": 6.090462449793721e-06, "loss": 0.3699, "step": 2943 }, { "epoch": 1.4615257322521926, "grad_norm": 0.42093202471733093, "learning_rate": 6.0876427272058955e-06, "loss": 0.3757, "step": 2944 }, { "epoch": 1.4620221744166804, "grad_norm": 0.4019743800163269, "learning_rate": 6.084822641483725e-06, "loss": 0.3281, "step": 2945 }, { "epoch": 1.4625186165811683, "grad_norm": 0.4547860622406006, "learning_rate": 6.082002193568759e-06, "loss": 0.4173, "step": 2946 }, { "epoch": 1.463015058745656, "grad_norm": 0.4032314419746399, "learning_rate": 6.079181384402667e-06, "loss": 0.3564, "step": 2947 }, { "epoch": 1.463511500910144, "grad_norm": 0.4073452055454254, "learning_rate": 6.076360214927242e-06, "loss": 0.3718, "step": 2948 }, { "epoch": 1.4640079430746318, "grad_norm": 0.39136117696762085, "learning_rate": 6.0735386860843944e-06, "loss": 0.3524, "step": 2949 }, { "epoch": 1.4645043852391195, "grad_norm": 0.4113900065422058, "learning_rate": 6.070716798816157e-06, "loss": 0.3468, "step": 2950 }, { "epoch": 1.4650008274036075, "grad_norm": 0.4182823896408081, "learning_rate": 6.0678945540646815e-06, "loss": 0.349, "step": 2951 }, { "epoch": 1.4654972695680955, "grad_norm": 0.4477221369743347, "learning_rate": 6.065071952772238e-06, "loss": 0.4198, "step": 2952 }, { "epoch": 1.4659937117325832, "grad_norm": 0.395856112241745, "learning_rate": 6.062248995881216e-06, "loss": 0.3782, "step": 2953 }, { "epoch": 1.466490153897071, "grad_norm": 0.46256282925605774, "learning_rate": 6.0594256843341235e-06, "loss": 0.3748, "step": 2954 }, { "epoch": 1.466986596061559, "grad_norm": 0.5042625069618225, "learning_rate": 6.056602019073591e-06, "loss": 0.3605, "step": 2955 }, { "epoch": 1.4674830382260466, "grad_norm": 0.40807655453681946, "learning_rate": 6.05377800104236e-06, "loss": 0.3678, "step": 2956 }, { "epoch": 1.4679794803905346, "grad_norm": 0.4666697084903717, "learning_rate": 6.050953631183295e-06, "loss": 0.3679, "step": 2957 }, { "epoch": 1.4684759225550224, "grad_norm": 0.5077946186065674, "learning_rate": 6.048128910439374e-06, "loss": 0.3643, "step": 2958 }, { "epoch": 1.46897236471951, "grad_norm": 0.4219443202018738, "learning_rate": 6.045303839753699e-06, "loss": 0.341, "step": 2959 }, { "epoch": 1.469468806883998, "grad_norm": 0.4478159546852112, "learning_rate": 6.042478420069481e-06, "loss": 0.4011, "step": 2960 }, { "epoch": 1.4699652490484858, "grad_norm": 0.4537481665611267, "learning_rate": 6.03965265233005e-06, "loss": 0.3821, "step": 2961 }, { "epoch": 1.4704616912129738, "grad_norm": 0.3947198688983917, "learning_rate": 6.036826537478856e-06, "loss": 0.3555, "step": 2962 }, { "epoch": 1.4709581333774615, "grad_norm": 0.41239088773727417, "learning_rate": 6.0340000764594595e-06, "loss": 0.3845, "step": 2963 }, { "epoch": 1.4714545755419493, "grad_norm": 0.4174197316169739, "learning_rate": 6.031173270215541e-06, "loss": 0.3923, "step": 2964 }, { "epoch": 1.4719510177064372, "grad_norm": 0.4443199932575226, "learning_rate": 6.028346119690893e-06, "loss": 0.3703, "step": 2965 }, { "epoch": 1.472447459870925, "grad_norm": 0.460604727268219, "learning_rate": 6.025518625829425e-06, "loss": 0.3969, "step": 2966 }, { "epoch": 1.472943902035413, "grad_norm": 0.4368669092655182, "learning_rate": 6.022690789575159e-06, "loss": 0.3332, "step": 2967 }, { "epoch": 1.4734403441999007, "grad_norm": 0.4222120940685272, "learning_rate": 6.019862611872234e-06, "loss": 0.362, "step": 2968 }, { "epoch": 1.4739367863643886, "grad_norm": 0.4268454313278198, "learning_rate": 6.017034093664901e-06, "loss": 0.3284, "step": 2969 }, { "epoch": 1.4744332285288764, "grad_norm": 0.40898099541664124, "learning_rate": 6.014205235897526e-06, "loss": 0.3709, "step": 2970 }, { "epoch": 1.4749296706933643, "grad_norm": 0.4588741660118103, "learning_rate": 6.011376039514587e-06, "loss": 0.3912, "step": 2971 }, { "epoch": 1.475426112857852, "grad_norm": 0.4082125723361969, "learning_rate": 6.008546505460677e-06, "loss": 0.3442, "step": 2972 }, { "epoch": 1.4759225550223398, "grad_norm": 0.4934128224849701, "learning_rate": 6.005716634680499e-06, "loss": 0.3976, "step": 2973 }, { "epoch": 1.4764189971868278, "grad_norm": 0.44860151410102844, "learning_rate": 6.002886428118869e-06, "loss": 0.3991, "step": 2974 }, { "epoch": 1.4769154393513155, "grad_norm": 0.3917906582355499, "learning_rate": 6.000055886720719e-06, "loss": 0.3615, "step": 2975 }, { "epoch": 1.4774118815158035, "grad_norm": 0.3949987590312958, "learning_rate": 5.997225011431089e-06, "loss": 0.335, "step": 2976 }, { "epoch": 1.4779083236802912, "grad_norm": 0.40368029475212097, "learning_rate": 5.994393803195129e-06, "loss": 0.3396, "step": 2977 }, { "epoch": 1.478404765844779, "grad_norm": 0.4296615719795227, "learning_rate": 5.991562262958105e-06, "loss": 0.3585, "step": 2978 }, { "epoch": 1.478901208009267, "grad_norm": 0.3959008753299713, "learning_rate": 5.9887303916653916e-06, "loss": 0.3158, "step": 2979 }, { "epoch": 1.4793976501737547, "grad_norm": 0.46235930919647217, "learning_rate": 5.985898190262471e-06, "loss": 0.3786, "step": 2980 }, { "epoch": 1.4798940923382427, "grad_norm": 0.36681291460990906, "learning_rate": 5.983065659694942e-06, "loss": 0.3272, "step": 2981 }, { "epoch": 1.4803905345027304, "grad_norm": 0.41831129789352417, "learning_rate": 5.980232800908507e-06, "loss": 0.34, "step": 2982 }, { "epoch": 1.4808869766672181, "grad_norm": 0.43600404262542725, "learning_rate": 5.97739961484898e-06, "loss": 0.3894, "step": 2983 }, { "epoch": 1.481383418831706, "grad_norm": 0.38908350467681885, "learning_rate": 5.974566102462286e-06, "loss": 0.3777, "step": 2984 }, { "epoch": 1.481879860996194, "grad_norm": 0.4091688096523285, "learning_rate": 5.971732264694458e-06, "loss": 0.3679, "step": 2985 }, { "epoch": 1.4823763031606818, "grad_norm": 0.43297865986824036, "learning_rate": 5.9688981024916355e-06, "loss": 0.331, "step": 2986 }, { "epoch": 1.4828727453251696, "grad_norm": 0.4597755968570709, "learning_rate": 5.966063616800072e-06, "loss": 0.3963, "step": 2987 }, { "epoch": 1.4833691874896575, "grad_norm": 0.41396066546440125, "learning_rate": 5.9632288085661215e-06, "loss": 0.4282, "step": 2988 }, { "epoch": 1.4838656296541453, "grad_norm": 0.4593130946159363, "learning_rate": 5.960393678736252e-06, "loss": 0.3281, "step": 2989 }, { "epoch": 1.4843620718186332, "grad_norm": 0.4225709140300751, "learning_rate": 5.9575582282570356e-06, "loss": 0.3697, "step": 2990 }, { "epoch": 1.484858513983121, "grad_norm": 0.42987433075904846, "learning_rate": 5.95472245807515e-06, "loss": 0.4083, "step": 2991 }, { "epoch": 1.4853549561476087, "grad_norm": 0.4430399537086487, "learning_rate": 5.951886369137384e-06, "loss": 0.3881, "step": 2992 }, { "epoch": 1.4858513983120967, "grad_norm": 0.4370708167552948, "learning_rate": 5.94904996239063e-06, "loss": 0.366, "step": 2993 }, { "epoch": 1.4863478404765844, "grad_norm": 0.4012905955314636, "learning_rate": 5.946213238781889e-06, "loss": 0.3744, "step": 2994 }, { "epoch": 1.4868442826410724, "grad_norm": 0.38869142532348633, "learning_rate": 5.943376199258264e-06, "loss": 0.3629, "step": 2995 }, { "epoch": 1.4873407248055601, "grad_norm": 0.4781147837638855, "learning_rate": 5.9405388447669655e-06, "loss": 0.3905, "step": 2996 }, { "epoch": 1.4878371669700479, "grad_norm": 0.4393221437931061, "learning_rate": 5.9377011762553075e-06, "loss": 0.4102, "step": 2997 }, { "epoch": 1.4883336091345358, "grad_norm": 0.44763684272766113, "learning_rate": 5.9348631946707135e-06, "loss": 0.3677, "step": 2998 }, { "epoch": 1.4888300512990238, "grad_norm": 0.4430927336215973, "learning_rate": 5.932024900960707e-06, "loss": 0.3295, "step": 2999 }, { "epoch": 1.4893264934635115, "grad_norm": 0.41705137491226196, "learning_rate": 5.929186296072915e-06, "loss": 0.3931, "step": 3000 }, { "epoch": 1.4898229356279993, "grad_norm": 0.4386925995349884, "learning_rate": 5.926347380955074e-06, "loss": 0.4251, "step": 3001 }, { "epoch": 1.4903193777924872, "grad_norm": 0.44126296043395996, "learning_rate": 5.9235081565550205e-06, "loss": 0.3488, "step": 3002 }, { "epoch": 1.490815819956975, "grad_norm": 0.44367387890815735, "learning_rate": 5.920668623820692e-06, "loss": 0.3617, "step": 3003 }, { "epoch": 1.491312262121463, "grad_norm": 0.46277692914009094, "learning_rate": 5.917828783700132e-06, "loss": 0.4049, "step": 3004 }, { "epoch": 1.4918087042859507, "grad_norm": 0.4157758355140686, "learning_rate": 5.914988637141488e-06, "loss": 0.3154, "step": 3005 }, { "epoch": 1.4923051464504384, "grad_norm": 0.5002428293228149, "learning_rate": 5.912148185093004e-06, "loss": 0.3917, "step": 3006 }, { "epoch": 1.4928015886149264, "grad_norm": 0.41591474413871765, "learning_rate": 5.909307428503033e-06, "loss": 0.3421, "step": 3007 }, { "epoch": 1.4932980307794141, "grad_norm": 0.44445839524269104, "learning_rate": 5.906466368320025e-06, "loss": 0.341, "step": 3008 }, { "epoch": 1.493794472943902, "grad_norm": 0.4971355199813843, "learning_rate": 5.903625005492532e-06, "loss": 0.3865, "step": 3009 }, { "epoch": 1.4942909151083899, "grad_norm": 0.4633837640285492, "learning_rate": 5.9007833409692094e-06, "loss": 0.3426, "step": 3010 }, { "epoch": 1.4947873572728776, "grad_norm": 0.505354106426239, "learning_rate": 5.897941375698812e-06, "loss": 0.3226, "step": 3011 }, { "epoch": 1.4952837994373656, "grad_norm": 0.43259546160697937, "learning_rate": 5.895099110630193e-06, "loss": 0.4114, "step": 3012 }, { "epoch": 1.4957802416018535, "grad_norm": 0.45838993787765503, "learning_rate": 5.892256546712311e-06, "loss": 0.3251, "step": 3013 }, { "epoch": 1.4962766837663413, "grad_norm": 0.4791962206363678, "learning_rate": 5.889413684894215e-06, "loss": 0.3417, "step": 3014 }, { "epoch": 1.496773125930829, "grad_norm": 0.38927188515663147, "learning_rate": 5.886570526125064e-06, "loss": 0.3717, "step": 3015 }, { "epoch": 1.497269568095317, "grad_norm": 0.3960299491882324, "learning_rate": 5.883727071354109e-06, "loss": 0.3958, "step": 3016 }, { "epoch": 1.4977660102598047, "grad_norm": 0.45992323756217957, "learning_rate": 5.880883321530702e-06, "loss": 0.3846, "step": 3017 }, { "epoch": 1.4982624524242927, "grad_norm": 0.3964899480342865, "learning_rate": 5.878039277604298e-06, "loss": 0.2914, "step": 3018 }, { "epoch": 1.4987588945887804, "grad_norm": 0.4494767189025879, "learning_rate": 5.875194940524442e-06, "loss": 0.3581, "step": 3019 }, { "epoch": 1.4992553367532682, "grad_norm": 0.41567373275756836, "learning_rate": 5.872350311240782e-06, "loss": 0.3582, "step": 3020 }, { "epoch": 1.4997517789177561, "grad_norm": 0.41856837272644043, "learning_rate": 5.869505390703062e-06, "loss": 0.3383, "step": 3021 }, { "epoch": 1.5002482210822439, "grad_norm": 0.4053589105606079, "learning_rate": 5.866660179861125e-06, "loss": 0.3538, "step": 3022 }, { "epoch": 1.5007446632467318, "grad_norm": 0.4620252847671509, "learning_rate": 5.8638146796649065e-06, "loss": 0.384, "step": 3023 }, { "epoch": 1.5012411054112196, "grad_norm": 0.4552159309387207, "learning_rate": 5.860968891064445e-06, "loss": 0.4208, "step": 3024 }, { "epoch": 1.5017375475757073, "grad_norm": 0.39452168345451355, "learning_rate": 5.858122815009869e-06, "loss": 0.3526, "step": 3025 }, { "epoch": 1.5022339897401953, "grad_norm": 0.4029759466648102, "learning_rate": 5.8552764524514095e-06, "loss": 0.3036, "step": 3026 }, { "epoch": 1.5027304319046833, "grad_norm": 0.48161229491233826, "learning_rate": 5.852429804339386e-06, "loss": 0.3811, "step": 3027 }, { "epoch": 1.503226874069171, "grad_norm": 0.42338380217552185, "learning_rate": 5.84958287162422e-06, "loss": 0.3334, "step": 3028 }, { "epoch": 1.5037233162336587, "grad_norm": 0.409802109003067, "learning_rate": 5.846735655256423e-06, "loss": 0.376, "step": 3029 }, { "epoch": 1.5042197583981465, "grad_norm": 0.5199187994003296, "learning_rate": 5.843888156186604e-06, "loss": 0.4437, "step": 3030 }, { "epoch": 1.5047162005626344, "grad_norm": 0.6799866557121277, "learning_rate": 5.841040375365464e-06, "loss": 0.357, "step": 3031 }, { "epoch": 1.5052126427271224, "grad_norm": 0.40382999181747437, "learning_rate": 5.838192313743802e-06, "loss": 0.3242, "step": 3032 }, { "epoch": 1.5057090848916101, "grad_norm": 0.47412925958633423, "learning_rate": 5.835343972272507e-06, "loss": 0.366, "step": 3033 }, { "epoch": 1.506205527056098, "grad_norm": 0.4346078336238861, "learning_rate": 5.832495351902563e-06, "loss": 0.3511, "step": 3034 }, { "epoch": 1.5067019692205859, "grad_norm": 0.4684021770954132, "learning_rate": 5.829646453585047e-06, "loss": 0.3661, "step": 3035 }, { "epoch": 1.5071984113850736, "grad_norm": 0.4598635137081146, "learning_rate": 5.826797278271128e-06, "loss": 0.3172, "step": 3036 }, { "epoch": 1.5076948535495616, "grad_norm": 0.41721245646476746, "learning_rate": 5.8239478269120706e-06, "loss": 0.3432, "step": 3037 }, { "epoch": 1.5081912957140493, "grad_norm": 0.4254276156425476, "learning_rate": 5.821098100459226e-06, "loss": 0.3859, "step": 3038 }, { "epoch": 1.508687737878537, "grad_norm": 0.3991883397102356, "learning_rate": 5.818248099864042e-06, "loss": 0.349, "step": 3039 }, { "epoch": 1.509184180043025, "grad_norm": 0.43890705704689026, "learning_rate": 5.815397826078056e-06, "loss": 0.3511, "step": 3040 }, { "epoch": 1.509680622207513, "grad_norm": 0.4250529110431671, "learning_rate": 5.812547280052899e-06, "loss": 0.3656, "step": 3041 }, { "epoch": 1.5101770643720007, "grad_norm": 0.42761802673339844, "learning_rate": 5.809696462740287e-06, "loss": 0.4056, "step": 3042 }, { "epoch": 1.5106735065364885, "grad_norm": 0.4161491096019745, "learning_rate": 5.806845375092033e-06, "loss": 0.3972, "step": 3043 }, { "epoch": 1.5111699487009762, "grad_norm": 0.43089455366134644, "learning_rate": 5.803994018060038e-06, "loss": 0.3408, "step": 3044 }, { "epoch": 1.5116663908654642, "grad_norm": 0.4841177463531494, "learning_rate": 5.801142392596291e-06, "loss": 0.3809, "step": 3045 }, { "epoch": 1.5121628330299521, "grad_norm": 0.41773074865341187, "learning_rate": 5.798290499652873e-06, "loss": 0.3906, "step": 3046 }, { "epoch": 1.5126592751944399, "grad_norm": 0.4137495160102844, "learning_rate": 5.795438340181954e-06, "loss": 0.3724, "step": 3047 }, { "epoch": 1.5131557173589276, "grad_norm": 0.4132876396179199, "learning_rate": 5.79258591513579e-06, "loss": 0.3404, "step": 3048 }, { "epoch": 1.5136521595234154, "grad_norm": 0.4589814245700836, "learning_rate": 5.789733225466732e-06, "loss": 0.3712, "step": 3049 }, { "epoch": 1.5141486016879033, "grad_norm": 0.4139741361141205, "learning_rate": 5.786880272127213e-06, "loss": 0.346, "step": 3050 }, { "epoch": 1.5146450438523913, "grad_norm": 0.4641878306865692, "learning_rate": 5.784027056069757e-06, "loss": 0.379, "step": 3051 }, { "epoch": 1.515141486016879, "grad_norm": 0.4551846385002136, "learning_rate": 5.781173578246978e-06, "loss": 0.3518, "step": 3052 }, { "epoch": 1.5156379281813668, "grad_norm": 0.4429434835910797, "learning_rate": 5.77831983961157e-06, "loss": 0.3602, "step": 3053 }, { "epoch": 1.5161343703458547, "grad_norm": 0.41403883695602417, "learning_rate": 5.775465841116323e-06, "loss": 0.3489, "step": 3054 }, { "epoch": 1.5166308125103427, "grad_norm": 0.4450613558292389, "learning_rate": 5.772611583714106e-06, "loss": 0.3822, "step": 3055 }, { "epoch": 1.5171272546748304, "grad_norm": 0.39542025327682495, "learning_rate": 5.769757068357878e-06, "loss": 0.3335, "step": 3056 }, { "epoch": 1.5176236968393182, "grad_norm": 0.4393356144428253, "learning_rate": 5.766902296000689e-06, "loss": 0.3691, "step": 3057 }, { "epoch": 1.518120139003806, "grad_norm": 0.4293613135814667, "learning_rate": 5.7640472675956664e-06, "loss": 0.3412, "step": 3058 }, { "epoch": 1.518616581168294, "grad_norm": 0.42213183641433716, "learning_rate": 5.761191984096026e-06, "loss": 0.3663, "step": 3059 }, { "epoch": 1.5191130233327819, "grad_norm": 0.4117438793182373, "learning_rate": 5.758336446455069e-06, "loss": 0.3768, "step": 3060 }, { "epoch": 1.5196094654972696, "grad_norm": 0.40208157896995544, "learning_rate": 5.755480655626185e-06, "loss": 0.3706, "step": 3061 }, { "epoch": 1.5201059076617573, "grad_norm": 0.4075888395309448, "learning_rate": 5.752624612562841e-06, "loss": 0.3713, "step": 3062 }, { "epoch": 1.520602349826245, "grad_norm": 0.43917128443717957, "learning_rate": 5.749768318218595e-06, "loss": 0.3596, "step": 3063 }, { "epoch": 1.521098791990733, "grad_norm": 0.4218132793903351, "learning_rate": 5.746911773547084e-06, "loss": 0.3729, "step": 3064 }, { "epoch": 1.521595234155221, "grad_norm": 0.4419352114200592, "learning_rate": 5.744054979502035e-06, "loss": 0.3405, "step": 3065 }, { "epoch": 1.5220916763197088, "grad_norm": 0.4390243589878082, "learning_rate": 5.741197937037248e-06, "loss": 0.3987, "step": 3066 }, { "epoch": 1.5225881184841965, "grad_norm": 0.4071175158023834, "learning_rate": 5.738340647106615e-06, "loss": 0.3727, "step": 3067 }, { "epoch": 1.5230845606486845, "grad_norm": 0.3990076780319214, "learning_rate": 5.735483110664107e-06, "loss": 0.2992, "step": 3068 }, { "epoch": 1.5235810028131722, "grad_norm": 0.5133251547813416, "learning_rate": 5.732625328663777e-06, "loss": 0.3845, "step": 3069 }, { "epoch": 1.5240774449776602, "grad_norm": 0.43988656997680664, "learning_rate": 5.729767302059763e-06, "loss": 0.3661, "step": 3070 }, { "epoch": 1.524573887142148, "grad_norm": 0.4412538409233093, "learning_rate": 5.726909031806279e-06, "loss": 0.3424, "step": 3071 }, { "epoch": 1.5250703293066357, "grad_norm": 0.5149542093276978, "learning_rate": 5.724050518857627e-06, "loss": 0.4181, "step": 3072 }, { "epoch": 1.5255667714711236, "grad_norm": 0.439865380525589, "learning_rate": 5.721191764168183e-06, "loss": 0.3023, "step": 3073 }, { "epoch": 1.5260632136356116, "grad_norm": 0.4641745090484619, "learning_rate": 5.718332768692413e-06, "loss": 0.3957, "step": 3074 }, { "epoch": 1.5265596558000993, "grad_norm": 0.45759937167167664, "learning_rate": 5.715473533384853e-06, "loss": 0.3659, "step": 3075 }, { "epoch": 1.527056097964587, "grad_norm": 0.4194805324077606, "learning_rate": 5.712614059200126e-06, "loss": 0.3481, "step": 3076 }, { "epoch": 1.5275525401290748, "grad_norm": 0.46575021743774414, "learning_rate": 5.709754347092933e-06, "loss": 0.3787, "step": 3077 }, { "epoch": 1.5280489822935628, "grad_norm": 0.5052847862243652, "learning_rate": 5.706894398018053e-06, "loss": 0.3456, "step": 3078 }, { "epoch": 1.5285454244580507, "grad_norm": 0.44287413358688354, "learning_rate": 5.704034212930346e-06, "loss": 0.3478, "step": 3079 }, { "epoch": 1.5290418666225385, "grad_norm": 0.42556890845298767, "learning_rate": 5.7011737927847484e-06, "loss": 0.3492, "step": 3080 }, { "epoch": 1.5295383087870262, "grad_norm": 0.5199395418167114, "learning_rate": 5.69831313853628e-06, "loss": 0.4252, "step": 3081 }, { "epoch": 1.5300347509515142, "grad_norm": 0.43490099906921387, "learning_rate": 5.695452251140034e-06, "loss": 0.303, "step": 3082 }, { "epoch": 1.530531193116002, "grad_norm": 0.4171132445335388, "learning_rate": 5.692591131551182e-06, "loss": 0.3446, "step": 3083 }, { "epoch": 1.53102763528049, "grad_norm": 0.47057124972343445, "learning_rate": 5.689729780724974e-06, "loss": 0.3362, "step": 3084 }, { "epoch": 1.5315240774449776, "grad_norm": 0.3862575590610504, "learning_rate": 5.68686819961674e-06, "loss": 0.3393, "step": 3085 }, { "epoch": 1.5320205196094654, "grad_norm": 0.4219917356967926, "learning_rate": 5.6840063891818795e-06, "loss": 0.3687, "step": 3086 }, { "epoch": 1.5325169617739534, "grad_norm": 0.40286874771118164, "learning_rate": 5.681144350375877e-06, "loss": 0.3413, "step": 3087 }, { "epoch": 1.5330134039384413, "grad_norm": 0.489506334066391, "learning_rate": 5.678282084154289e-06, "loss": 0.4174, "step": 3088 }, { "epoch": 1.533509846102929, "grad_norm": 0.4186706840991974, "learning_rate": 5.675419591472747e-06, "loss": 0.3495, "step": 3089 }, { "epoch": 1.5340062882674168, "grad_norm": 0.48508918285369873, "learning_rate": 5.672556873286961e-06, "loss": 0.3818, "step": 3090 }, { "epoch": 1.5345027304319045, "grad_norm": 0.46306684613227844, "learning_rate": 5.669693930552714e-06, "loss": 0.3672, "step": 3091 }, { "epoch": 1.5349991725963925, "grad_norm": 0.4371658265590668, "learning_rate": 5.6668307642258655e-06, "loss": 0.3751, "step": 3092 }, { "epoch": 1.5354956147608805, "grad_norm": 0.5143643021583557, "learning_rate": 5.663967375262348e-06, "loss": 0.387, "step": 3093 }, { "epoch": 1.5359920569253682, "grad_norm": 0.4252530634403229, "learning_rate": 5.6611037646181684e-06, "loss": 0.3534, "step": 3094 }, { "epoch": 1.536488499089856, "grad_norm": 0.43253856897354126, "learning_rate": 5.65823993324941e-06, "loss": 0.3324, "step": 3095 }, { "epoch": 1.5369849412543437, "grad_norm": 0.4425477087497711, "learning_rate": 5.655375882112228e-06, "loss": 0.335, "step": 3096 }, { "epoch": 1.5374813834188317, "grad_norm": 0.4803096055984497, "learning_rate": 5.652511612162851e-06, "loss": 0.3641, "step": 3097 }, { "epoch": 1.5379778255833196, "grad_norm": 0.5102413296699524, "learning_rate": 5.649647124357582e-06, "loss": 0.3991, "step": 3098 }, { "epoch": 1.5384742677478074, "grad_norm": 0.42686885595321655, "learning_rate": 5.646782419652793e-06, "loss": 0.3571, "step": 3099 }, { "epoch": 1.5389707099122951, "grad_norm": 0.4795965850353241, "learning_rate": 5.643917499004934e-06, "loss": 0.35, "step": 3100 }, { "epoch": 1.539467152076783, "grad_norm": 0.5249106884002686, "learning_rate": 5.641052363370523e-06, "loss": 0.4284, "step": 3101 }, { "epoch": 1.539963594241271, "grad_norm": 0.40259093046188354, "learning_rate": 5.63818701370615e-06, "loss": 0.3557, "step": 3102 }, { "epoch": 1.5404600364057588, "grad_norm": 0.4909491240978241, "learning_rate": 5.635321450968476e-06, "loss": 0.4203, "step": 3103 }, { "epoch": 1.5409564785702465, "grad_norm": 0.4746449291706085, "learning_rate": 5.63245567611424e-06, "loss": 0.3737, "step": 3104 }, { "epoch": 1.5414529207347343, "grad_norm": 0.4191553592681885, "learning_rate": 5.629589690100241e-06, "loss": 0.3363, "step": 3105 }, { "epoch": 1.5419493628992222, "grad_norm": 0.46407440304756165, "learning_rate": 5.626723493883357e-06, "loss": 0.3951, "step": 3106 }, { "epoch": 1.5424458050637102, "grad_norm": 0.4325239956378937, "learning_rate": 5.623857088420531e-06, "loss": 0.3732, "step": 3107 }, { "epoch": 1.542942247228198, "grad_norm": 0.5249986052513123, "learning_rate": 5.620990474668779e-06, "loss": 0.394, "step": 3108 }, { "epoch": 1.5434386893926857, "grad_norm": 0.4250940978527069, "learning_rate": 5.618123653585184e-06, "loss": 0.3658, "step": 3109 }, { "epoch": 1.5439351315571734, "grad_norm": 0.475229412317276, "learning_rate": 5.615256626126903e-06, "loss": 0.3459, "step": 3110 }, { "epoch": 1.5444315737216614, "grad_norm": 0.4623008370399475, "learning_rate": 5.612389393251154e-06, "loss": 0.3562, "step": 3111 }, { "epoch": 1.5449280158861494, "grad_norm": 0.4264439344406128, "learning_rate": 5.609521955915231e-06, "loss": 0.3697, "step": 3112 }, { "epoch": 1.545424458050637, "grad_norm": 0.5008541941642761, "learning_rate": 5.606654315076494e-06, "loss": 0.3678, "step": 3113 }, { "epoch": 1.5459209002151248, "grad_norm": 0.42835503816604614, "learning_rate": 5.6037864716923675e-06, "loss": 0.3158, "step": 3114 }, { "epoch": 1.5464173423796128, "grad_norm": 0.3999284505844116, "learning_rate": 5.60091842672035e-06, "loss": 0.3711, "step": 3115 }, { "epoch": 1.5469137845441008, "grad_norm": 0.5222602486610413, "learning_rate": 5.5980501811179996e-06, "loss": 0.4128, "step": 3116 }, { "epoch": 1.5474102267085885, "grad_norm": 0.5368857383728027, "learning_rate": 5.595181735842951e-06, "loss": 0.3461, "step": 3117 }, { "epoch": 1.5479066688730763, "grad_norm": 0.43991926312446594, "learning_rate": 5.592313091852894e-06, "loss": 0.3186, "step": 3118 }, { "epoch": 1.548403111037564, "grad_norm": 0.5529372096061707, "learning_rate": 5.589444250105595e-06, "loss": 0.3793, "step": 3119 }, { "epoch": 1.548899553202052, "grad_norm": 0.44661644101142883, "learning_rate": 5.58657521155888e-06, "loss": 0.3274, "step": 3120 }, { "epoch": 1.54939599536654, "grad_norm": 0.4121370315551758, "learning_rate": 5.583705977170646e-06, "loss": 0.3566, "step": 3121 }, { "epoch": 1.5498924375310277, "grad_norm": 0.521033525466919, "learning_rate": 5.580836547898849e-06, "loss": 0.3911, "step": 3122 }, { "epoch": 1.5503888796955154, "grad_norm": 0.42375195026397705, "learning_rate": 5.577966924701516e-06, "loss": 0.3417, "step": 3123 }, { "epoch": 1.5508853218600032, "grad_norm": 0.4747629463672638, "learning_rate": 5.575097108536735e-06, "loss": 0.3703, "step": 3124 }, { "epoch": 1.5513817640244911, "grad_norm": 0.42270413041114807, "learning_rate": 5.572227100362658e-06, "loss": 0.3095, "step": 3125 }, { "epoch": 1.551878206188979, "grad_norm": 0.5284610390663147, "learning_rate": 5.569356901137506e-06, "loss": 0.4061, "step": 3126 }, { "epoch": 1.5523746483534668, "grad_norm": 0.42840179800987244, "learning_rate": 5.566486511819558e-06, "loss": 0.3158, "step": 3127 }, { "epoch": 1.5528710905179546, "grad_norm": 0.43169471621513367, "learning_rate": 5.563615933367161e-06, "loss": 0.3889, "step": 3128 }, { "epoch": 1.5533675326824425, "grad_norm": 0.426999568939209, "learning_rate": 5.560745166738722e-06, "loss": 0.3567, "step": 3129 }, { "epoch": 1.5538639748469303, "grad_norm": 0.5135005712509155, "learning_rate": 5.557874212892711e-06, "loss": 0.3534, "step": 3130 }, { "epoch": 1.5543604170114182, "grad_norm": 0.44674739241600037, "learning_rate": 5.555003072787664e-06, "loss": 0.374, "step": 3131 }, { "epoch": 1.554856859175906, "grad_norm": 0.4760531187057495, "learning_rate": 5.552131747382174e-06, "loss": 0.3822, "step": 3132 }, { "epoch": 1.5553533013403937, "grad_norm": 0.40499237179756165, "learning_rate": 5.5492602376349e-06, "loss": 0.3338, "step": 3133 }, { "epoch": 1.5558497435048817, "grad_norm": 0.5250628590583801, "learning_rate": 5.5463885445045605e-06, "loss": 0.4303, "step": 3134 }, { "epoch": 1.5563461856693697, "grad_norm": 0.46638163924217224, "learning_rate": 5.543516668949935e-06, "loss": 0.3887, "step": 3135 }, { "epoch": 1.5568426278338574, "grad_norm": 0.4522458612918854, "learning_rate": 5.540644611929869e-06, "loss": 0.3888, "step": 3136 }, { "epoch": 1.5573390699983451, "grad_norm": 0.41548827290534973, "learning_rate": 5.5377723744032585e-06, "loss": 0.3609, "step": 3137 }, { "epoch": 1.557835512162833, "grad_norm": 0.4017617702484131, "learning_rate": 5.534899957329067e-06, "loss": 0.3443, "step": 3138 }, { "epoch": 1.5583319543273209, "grad_norm": 0.4548313021659851, "learning_rate": 5.53202736166632e-06, "loss": 0.3541, "step": 3139 }, { "epoch": 1.5588283964918088, "grad_norm": 0.5217058062553406, "learning_rate": 5.529154588374096e-06, "loss": 0.3939, "step": 3140 }, { "epoch": 1.5593248386562966, "grad_norm": 0.4115477502346039, "learning_rate": 5.526281638411537e-06, "loss": 0.3243, "step": 3141 }, { "epoch": 1.5598212808207843, "grad_norm": 0.49381306767463684, "learning_rate": 5.523408512737841e-06, "loss": 0.3655, "step": 3142 }, { "epoch": 1.5603177229852723, "grad_norm": 0.4360875189304352, "learning_rate": 5.520535212312268e-06, "loss": 0.3414, "step": 3143 }, { "epoch": 1.56081416514976, "grad_norm": 0.4755643308162689, "learning_rate": 5.5176617380941355e-06, "loss": 0.3725, "step": 3144 }, { "epoch": 1.561310607314248, "grad_norm": 0.48230454325675964, "learning_rate": 5.514788091042819e-06, "loss": 0.3818, "step": 3145 }, { "epoch": 1.5618070494787357, "grad_norm": 0.4218543469905853, "learning_rate": 5.511914272117748e-06, "loss": 0.3691, "step": 3146 }, { "epoch": 1.5623034916432235, "grad_norm": 0.45021089911460876, "learning_rate": 5.5090402822784175e-06, "loss": 0.3867, "step": 3147 }, { "epoch": 1.5627999338077114, "grad_norm": 0.4852950870990753, "learning_rate": 5.506166122484369e-06, "loss": 0.398, "step": 3148 }, { "epoch": 1.5632963759721994, "grad_norm": 0.38887354731559753, "learning_rate": 5.503291793695211e-06, "loss": 0.3392, "step": 3149 }, { "epoch": 1.5637928181366871, "grad_norm": 0.44610822200775146, "learning_rate": 5.500417296870599e-06, "loss": 0.3496, "step": 3150 }, { "epoch": 1.5642892603011749, "grad_norm": 0.46489793062210083, "learning_rate": 5.497542632970255e-06, "loss": 0.3473, "step": 3151 }, { "epoch": 1.5647857024656626, "grad_norm": 0.4392590820789337, "learning_rate": 5.494667802953947e-06, "loss": 0.3312, "step": 3152 }, { "epoch": 1.5652821446301506, "grad_norm": 0.433235228061676, "learning_rate": 5.4917928077815034e-06, "loss": 0.3632, "step": 3153 }, { "epoch": 1.5657785867946385, "grad_norm": 0.4575008153915405, "learning_rate": 5.488917648412809e-06, "loss": 0.3509, "step": 3154 }, { "epoch": 1.5662750289591263, "grad_norm": 0.4858499765396118, "learning_rate": 5.486042325807799e-06, "loss": 0.404, "step": 3155 }, { "epoch": 1.566771471123614, "grad_norm": 0.3814159035682678, "learning_rate": 5.483166840926467e-06, "loss": 0.346, "step": 3156 }, { "epoch": 1.5672679132881018, "grad_norm": 0.39744916558265686, "learning_rate": 5.480291194728857e-06, "loss": 0.3513, "step": 3157 }, { "epoch": 1.5677643554525897, "grad_norm": 0.4430543780326843, "learning_rate": 5.477415388175071e-06, "loss": 0.3642, "step": 3158 }, { "epoch": 1.5682607976170777, "grad_norm": 0.4773925542831421, "learning_rate": 5.474539422225263e-06, "loss": 0.3737, "step": 3159 }, { "epoch": 1.5687572397815654, "grad_norm": 0.45602989196777344, "learning_rate": 5.47166329783964e-06, "loss": 0.3995, "step": 3160 }, { "epoch": 1.5692536819460532, "grad_norm": 0.4588109254837036, "learning_rate": 5.4687870159784595e-06, "loss": 0.3407, "step": 3161 }, { "epoch": 1.5697501241105412, "grad_norm": 0.40121763944625854, "learning_rate": 5.465910577602037e-06, "loss": 0.3299, "step": 3162 }, { "epoch": 1.5702465662750291, "grad_norm": 0.4105336368083954, "learning_rate": 5.463033983670733e-06, "loss": 0.3262, "step": 3163 }, { "epoch": 1.5707430084395169, "grad_norm": 0.43732064962387085, "learning_rate": 5.4601572351449695e-06, "loss": 0.373, "step": 3164 }, { "epoch": 1.5712394506040046, "grad_norm": 0.387247234582901, "learning_rate": 5.457280332985209e-06, "loss": 0.343, "step": 3165 }, { "epoch": 1.5717358927684923, "grad_norm": 0.47753584384918213, "learning_rate": 5.454403278151974e-06, "loss": 0.3957, "step": 3166 }, { "epoch": 1.5722323349329803, "grad_norm": 0.45690497756004333, "learning_rate": 5.451526071605835e-06, "loss": 0.3661, "step": 3167 }, { "epoch": 1.5727287770974683, "grad_norm": 0.40391552448272705, "learning_rate": 5.44864871430741e-06, "loss": 0.3487, "step": 3168 }, { "epoch": 1.573225219261956, "grad_norm": 0.4252486824989319, "learning_rate": 5.445771207217377e-06, "loss": 0.3602, "step": 3169 }, { "epoch": 1.5737216614264438, "grad_norm": 0.403161883354187, "learning_rate": 5.4428935512964505e-06, "loss": 0.3399, "step": 3170 }, { "epoch": 1.5742181035909315, "grad_norm": 0.4477198123931885, "learning_rate": 5.440015747505406e-06, "loss": 0.3656, "step": 3171 }, { "epoch": 1.5747145457554195, "grad_norm": 0.42814821004867554, "learning_rate": 5.437137796805062e-06, "loss": 0.3562, "step": 3172 }, { "epoch": 1.5752109879199074, "grad_norm": 0.4377986490726471, "learning_rate": 5.434259700156288e-06, "loss": 0.3663, "step": 3173 }, { "epoch": 1.5757074300843952, "grad_norm": 0.4016132950782776, "learning_rate": 5.431381458520002e-06, "loss": 0.3835, "step": 3174 }, { "epoch": 1.576203872248883, "grad_norm": 0.43031421303749084, "learning_rate": 5.428503072857172e-06, "loss": 0.3012, "step": 3175 }, { "epoch": 1.5767003144133709, "grad_norm": 0.5062369704246521, "learning_rate": 5.425624544128813e-06, "loss": 0.3946, "step": 3176 }, { "epoch": 1.5771967565778586, "grad_norm": 0.3793363869190216, "learning_rate": 5.422745873295985e-06, "loss": 0.3575, "step": 3177 }, { "epoch": 1.5776931987423466, "grad_norm": 0.402389794588089, "learning_rate": 5.4198670613198e-06, "loss": 0.3815, "step": 3178 }, { "epoch": 1.5781896409068343, "grad_norm": 0.43076613545417786, "learning_rate": 5.416988109161414e-06, "loss": 0.3725, "step": 3179 }, { "epoch": 1.578686083071322, "grad_norm": 0.4742351174354553, "learning_rate": 5.414109017782033e-06, "loss": 0.4063, "step": 3180 }, { "epoch": 1.57918252523581, "grad_norm": 0.4107184410095215, "learning_rate": 5.411229788142905e-06, "loss": 0.3269, "step": 3181 }, { "epoch": 1.579678967400298, "grad_norm": 0.4817928075790405, "learning_rate": 5.408350421205326e-06, "loss": 0.4206, "step": 3182 }, { "epoch": 1.5801754095647857, "grad_norm": 0.410040944814682, "learning_rate": 5.405470917930641e-06, "loss": 0.3461, "step": 3183 }, { "epoch": 1.5806718517292735, "grad_norm": 0.4296213686466217, "learning_rate": 5.4025912792802374e-06, "loss": 0.3646, "step": 3184 }, { "epoch": 1.5811682938937612, "grad_norm": 0.4126681983470917, "learning_rate": 5.3997115062155455e-06, "loss": 0.3576, "step": 3185 }, { "epoch": 1.5816647360582492, "grad_norm": 0.45395055413246155, "learning_rate": 5.396831599698048e-06, "loss": 0.3339, "step": 3186 }, { "epoch": 1.5821611782227372, "grad_norm": 0.4884850084781647, "learning_rate": 5.393951560689262e-06, "loss": 0.3683, "step": 3187 }, { "epoch": 1.582657620387225, "grad_norm": 0.4268726706504822, "learning_rate": 5.39107139015076e-06, "loss": 0.3466, "step": 3188 }, { "epoch": 1.5831540625517126, "grad_norm": 0.3903951346874237, "learning_rate": 5.388191089044146e-06, "loss": 0.3057, "step": 3189 }, { "epoch": 1.5836505047162006, "grad_norm": 0.5149285793304443, "learning_rate": 5.385310658331079e-06, "loss": 0.4215, "step": 3190 }, { "epoch": 1.5841469468806884, "grad_norm": 0.443257600069046, "learning_rate": 5.382430098973256e-06, "loss": 0.3841, "step": 3191 }, { "epoch": 1.5846433890451763, "grad_norm": 0.4146920144557953, "learning_rate": 5.379549411932417e-06, "loss": 0.3442, "step": 3192 }, { "epoch": 1.585139831209664, "grad_norm": 0.4526948630809784, "learning_rate": 5.376668598170344e-06, "loss": 0.3303, "step": 3193 }, { "epoch": 1.5856362733741518, "grad_norm": 0.45280104875564575, "learning_rate": 5.373787658648864e-06, "loss": 0.3234, "step": 3194 }, { "epoch": 1.5861327155386398, "grad_norm": 0.45164424180984497, "learning_rate": 5.370906594329844e-06, "loss": 0.3824, "step": 3195 }, { "epoch": 1.5866291577031277, "grad_norm": 0.3918415307998657, "learning_rate": 5.368025406175191e-06, "loss": 0.3561, "step": 3196 }, { "epoch": 1.5871255998676155, "grad_norm": 0.3992595970630646, "learning_rate": 5.365144095146858e-06, "loss": 0.3413, "step": 3197 }, { "epoch": 1.5876220420321032, "grad_norm": 0.42491695284843445, "learning_rate": 5.362262662206837e-06, "loss": 0.3895, "step": 3198 }, { "epoch": 1.588118484196591, "grad_norm": 0.3964380621910095, "learning_rate": 5.359381108317159e-06, "loss": 0.3688, "step": 3199 }, { "epoch": 1.588614926361079, "grad_norm": 0.46167588233947754, "learning_rate": 5.3564994344398944e-06, "loss": 0.3824, "step": 3200 }, { "epoch": 1.589111368525567, "grad_norm": 0.37183329463005066, "learning_rate": 5.35361764153716e-06, "loss": 0.3402, "step": 3201 }, { "epoch": 1.5896078106900546, "grad_norm": 0.4740125238895416, "learning_rate": 5.350735730571104e-06, "loss": 0.3872, "step": 3202 }, { "epoch": 1.5901042528545424, "grad_norm": 0.45222190022468567, "learning_rate": 5.347853702503921e-06, "loss": 0.3572, "step": 3203 }, { "epoch": 1.5906006950190301, "grad_norm": 0.44121742248535156, "learning_rate": 5.344971558297841e-06, "loss": 0.367, "step": 3204 }, { "epoch": 1.591097137183518, "grad_norm": 0.40521958470344543, "learning_rate": 5.342089298915133e-06, "loss": 0.356, "step": 3205 }, { "epoch": 1.591593579348006, "grad_norm": 0.432704895734787, "learning_rate": 5.339206925318106e-06, "loss": 0.3678, "step": 3206 }, { "epoch": 1.5920900215124938, "grad_norm": 0.45848748087882996, "learning_rate": 5.336324438469104e-06, "loss": 0.4037, "step": 3207 }, { "epoch": 1.5925864636769815, "grad_norm": 0.40478813648223877, "learning_rate": 5.333441839330515e-06, "loss": 0.322, "step": 3208 }, { "epoch": 1.5930829058414695, "grad_norm": 0.4696842133998871, "learning_rate": 5.330559128864757e-06, "loss": 0.388, "step": 3209 }, { "epoch": 1.5935793480059575, "grad_norm": 0.44098928570747375, "learning_rate": 5.327676308034292e-06, "loss": 0.347, "step": 3210 }, { "epoch": 1.5940757901704452, "grad_norm": 0.412954181432724, "learning_rate": 5.324793377801611e-06, "loss": 0.384, "step": 3211 }, { "epoch": 1.594572232334933, "grad_norm": 0.44687169790267944, "learning_rate": 5.321910339129251e-06, "loss": 0.3962, "step": 3212 }, { "epoch": 1.5950686744994207, "grad_norm": 0.45302921533584595, "learning_rate": 5.3190271929797755e-06, "loss": 0.3507, "step": 3213 }, { "epoch": 1.5955651166639087, "grad_norm": 0.41430386900901794, "learning_rate": 5.316143940315792e-06, "loss": 0.3618, "step": 3214 }, { "epoch": 1.5960615588283966, "grad_norm": 0.4152604341506958, "learning_rate": 5.313260582099938e-06, "loss": 0.3289, "step": 3215 }, { "epoch": 1.5965580009928844, "grad_norm": 0.42199981212615967, "learning_rate": 5.310377119294892e-06, "loss": 0.3887, "step": 3216 }, { "epoch": 1.597054443157372, "grad_norm": 0.3654882609844208, "learning_rate": 5.307493552863359e-06, "loss": 0.3509, "step": 3217 }, { "epoch": 1.5975508853218598, "grad_norm": 0.44000351428985596, "learning_rate": 5.304609883768088e-06, "loss": 0.4296, "step": 3218 }, { "epoch": 1.5980473274863478, "grad_norm": 0.43635159730911255, "learning_rate": 5.3017261129718545e-06, "loss": 0.3321, "step": 3219 }, { "epoch": 1.5985437696508358, "grad_norm": 0.4285506308078766, "learning_rate": 5.298842241437473e-06, "loss": 0.3457, "step": 3220 }, { "epoch": 1.5990402118153235, "grad_norm": 0.4258693754673004, "learning_rate": 5.295958270127787e-06, "loss": 0.3644, "step": 3221 }, { "epoch": 1.5995366539798113, "grad_norm": 0.4234961271286011, "learning_rate": 5.293074200005679e-06, "loss": 0.3462, "step": 3222 }, { "epoch": 1.6000330961442992, "grad_norm": 0.48818543553352356, "learning_rate": 5.290190032034063e-06, "loss": 0.3589, "step": 3223 }, { "epoch": 1.6005295383087872, "grad_norm": 0.4437236785888672, "learning_rate": 5.287305767175881e-06, "loss": 0.3178, "step": 3224 }, { "epoch": 1.601025980473275, "grad_norm": 0.465088814496994, "learning_rate": 5.284421406394112e-06, "loss": 0.4222, "step": 3225 }, { "epoch": 1.6015224226377627, "grad_norm": 0.40048742294311523, "learning_rate": 5.281536950651765e-06, "loss": 0.3327, "step": 3226 }, { "epoch": 1.6020188648022504, "grad_norm": 0.44157010316848755, "learning_rate": 5.2786524009118836e-06, "loss": 0.3459, "step": 3227 }, { "epoch": 1.6025153069667384, "grad_norm": 0.4832254648208618, "learning_rate": 5.2757677581375375e-06, "loss": 0.3694, "step": 3228 }, { "epoch": 1.6030117491312263, "grad_norm": 0.4584333598613739, "learning_rate": 5.2728830232918315e-06, "loss": 0.3411, "step": 3229 }, { "epoch": 1.603508191295714, "grad_norm": 0.4558500051498413, "learning_rate": 5.269998197337901e-06, "loss": 0.3818, "step": 3230 }, { "epoch": 1.6040046334602018, "grad_norm": 0.43860408663749695, "learning_rate": 5.267113281238912e-06, "loss": 0.3552, "step": 3231 }, { "epoch": 1.6045010756246896, "grad_norm": 0.49000224471092224, "learning_rate": 5.264228275958056e-06, "loss": 0.3838, "step": 3232 }, { "epoch": 1.6049975177891775, "grad_norm": 0.41862982511520386, "learning_rate": 5.261343182458562e-06, "loss": 0.3392, "step": 3233 }, { "epoch": 1.6054939599536655, "grad_norm": 0.4331575632095337, "learning_rate": 5.25845800170368e-06, "loss": 0.3276, "step": 3234 }, { "epoch": 1.6059904021181532, "grad_norm": 0.4579720199108124, "learning_rate": 5.255572734656697e-06, "loss": 0.4038, "step": 3235 }, { "epoch": 1.606486844282641, "grad_norm": 0.46797505021095276, "learning_rate": 5.252687382280924e-06, "loss": 0.3566, "step": 3236 }, { "epoch": 1.606983286447129, "grad_norm": 0.5082241296768188, "learning_rate": 5.249801945539701e-06, "loss": 0.3491, "step": 3237 }, { "epoch": 1.6074797286116167, "grad_norm": 0.4121003746986389, "learning_rate": 5.246916425396398e-06, "loss": 0.3468, "step": 3238 }, { "epoch": 1.6079761707761047, "grad_norm": 0.4453255236148834, "learning_rate": 5.244030822814411e-06, "loss": 0.386, "step": 3239 }, { "epoch": 1.6084726129405924, "grad_norm": 0.47088074684143066, "learning_rate": 5.241145138757167e-06, "loss": 0.3616, "step": 3240 }, { "epoch": 1.6089690551050801, "grad_norm": 0.5044232606887817, "learning_rate": 5.238259374188113e-06, "loss": 0.3731, "step": 3241 }, { "epoch": 1.6094654972695681, "grad_norm": 0.43711376190185547, "learning_rate": 5.23537353007073e-06, "loss": 0.3881, "step": 3242 }, { "epoch": 1.609961939434056, "grad_norm": 0.42551133036613464, "learning_rate": 5.232487607368522e-06, "loss": 0.351, "step": 3243 }, { "epoch": 1.6104583815985438, "grad_norm": 0.4109075367450714, "learning_rate": 5.229601607045021e-06, "loss": 0.3373, "step": 3244 }, { "epoch": 1.6109548237630316, "grad_norm": 0.5495297312736511, "learning_rate": 5.226715530063782e-06, "loss": 0.3721, "step": 3245 }, { "epoch": 1.6114512659275193, "grad_norm": 0.4175966680049896, "learning_rate": 5.223829377388392e-06, "loss": 0.3845, "step": 3246 }, { "epoch": 1.6119477080920073, "grad_norm": 0.46949392557144165, "learning_rate": 5.220943149982455e-06, "loss": 0.3728, "step": 3247 }, { "epoch": 1.6124441502564952, "grad_norm": 0.3968791663646698, "learning_rate": 5.218056848809604e-06, "loss": 0.3603, "step": 3248 }, { "epoch": 1.612940592420983, "grad_norm": 0.43506377935409546, "learning_rate": 5.2151704748335e-06, "loss": 0.3772, "step": 3249 }, { "epoch": 1.6134370345854707, "grad_norm": 0.39271077513694763, "learning_rate": 5.21228402901782e-06, "loss": 0.292, "step": 3250 }, { "epoch": 1.6139334767499587, "grad_norm": 0.5298523902893066, "learning_rate": 5.2093975123262745e-06, "loss": 0.3648, "step": 3251 }, { "epoch": 1.6144299189144464, "grad_norm": 0.4620070159435272, "learning_rate": 5.20651092572259e-06, "loss": 0.3902, "step": 3252 }, { "epoch": 1.6149263610789344, "grad_norm": 0.386776328086853, "learning_rate": 5.2036242701705185e-06, "loss": 0.364, "step": 3253 }, { "epoch": 1.6154228032434221, "grad_norm": 0.5012742877006531, "learning_rate": 5.200737546633839e-06, "loss": 0.3944, "step": 3254 }, { "epoch": 1.6159192454079099, "grad_norm": 0.4277174174785614, "learning_rate": 5.197850756076348e-06, "loss": 0.2867, "step": 3255 }, { "epoch": 1.6164156875723978, "grad_norm": 0.5403633713722229, "learning_rate": 5.1949638994618666e-06, "loss": 0.3923, "step": 3256 }, { "epoch": 1.6169121297368858, "grad_norm": 0.41005682945251465, "learning_rate": 5.192076977754239e-06, "loss": 0.3013, "step": 3257 }, { "epoch": 1.6174085719013735, "grad_norm": 0.5315666198730469, "learning_rate": 5.189189991917328e-06, "loss": 0.3925, "step": 3258 }, { "epoch": 1.6179050140658613, "grad_norm": 0.4641391932964325, "learning_rate": 5.186302942915021e-06, "loss": 0.3618, "step": 3259 }, { "epoch": 1.618401456230349, "grad_norm": 0.39210060238838196, "learning_rate": 5.1834158317112245e-06, "loss": 0.3147, "step": 3260 }, { "epoch": 1.618897898394837, "grad_norm": 0.4848231375217438, "learning_rate": 5.180528659269867e-06, "loss": 0.4111, "step": 3261 }, { "epoch": 1.619394340559325, "grad_norm": 0.4182208776473999, "learning_rate": 5.177641426554896e-06, "loss": 0.304, "step": 3262 }, { "epoch": 1.6198907827238127, "grad_norm": 0.40944525599479675, "learning_rate": 5.174754134530281e-06, "loss": 0.3535, "step": 3263 }, { "epoch": 1.6203872248883004, "grad_norm": 0.5346323847770691, "learning_rate": 5.1718667841600115e-06, "loss": 0.4331, "step": 3264 }, { "epoch": 1.6208836670527882, "grad_norm": 0.4503125250339508, "learning_rate": 5.168979376408092e-06, "loss": 0.3631, "step": 3265 }, { "epoch": 1.6213801092172762, "grad_norm": 0.3832566738128662, "learning_rate": 5.166091912238552e-06, "loss": 0.3577, "step": 3266 }, { "epoch": 1.6218765513817641, "grad_norm": 0.3969564735889435, "learning_rate": 5.163204392615436e-06, "loss": 0.3501, "step": 3267 }, { "epoch": 1.6223729935462519, "grad_norm": 0.4606688618659973, "learning_rate": 5.16031681850281e-06, "loss": 0.396, "step": 3268 }, { "epoch": 1.6228694357107396, "grad_norm": 0.4778660535812378, "learning_rate": 5.157429190864755e-06, "loss": 0.3836, "step": 3269 }, { "epoch": 1.6233658778752276, "grad_norm": 0.4140864312648773, "learning_rate": 5.154541510665372e-06, "loss": 0.393, "step": 3270 }, { "epoch": 1.6238623200397155, "grad_norm": 0.3788284659385681, "learning_rate": 5.151653778868778e-06, "loss": 0.3666, "step": 3271 }, { "epoch": 1.6243587622042033, "grad_norm": 0.42532795667648315, "learning_rate": 5.14876599643911e-06, "loss": 0.3685, "step": 3272 }, { "epoch": 1.624855204368691, "grad_norm": 0.5081415176391602, "learning_rate": 5.145878164340518e-06, "loss": 0.345, "step": 3273 }, { "epoch": 1.6253516465331788, "grad_norm": 0.48284006118774414, "learning_rate": 5.142990283537174e-06, "loss": 0.4193, "step": 3274 }, { "epoch": 1.6258480886976667, "grad_norm": 0.42454811930656433, "learning_rate": 5.140102354993258e-06, "loss": 0.3339, "step": 3275 }, { "epoch": 1.6263445308621547, "grad_norm": 0.5374150276184082, "learning_rate": 5.137214379672975e-06, "loss": 0.3786, "step": 3276 }, { "epoch": 1.6268409730266424, "grad_norm": 0.4596374034881592, "learning_rate": 5.134326358540538e-06, "loss": 0.3718, "step": 3277 }, { "epoch": 1.6273374151911302, "grad_norm": 0.40326279401779175, "learning_rate": 5.131438292560181e-06, "loss": 0.3717, "step": 3278 }, { "epoch": 1.627833857355618, "grad_norm": 0.41383156180381775, "learning_rate": 5.128550182696153e-06, "loss": 0.3855, "step": 3279 }, { "epoch": 1.6283302995201059, "grad_norm": 0.43393635749816895, "learning_rate": 5.12566202991271e-06, "loss": 0.3284, "step": 3280 }, { "epoch": 1.6288267416845938, "grad_norm": 0.4471795856952667, "learning_rate": 5.1227738351741326e-06, "loss": 0.383, "step": 3281 }, { "epoch": 1.6293231838490816, "grad_norm": 0.4688468873500824, "learning_rate": 5.119885599444707e-06, "loss": 0.3673, "step": 3282 }, { "epoch": 1.6298196260135693, "grad_norm": 0.46593886613845825, "learning_rate": 5.1169973236887394e-06, "loss": 0.4023, "step": 3283 }, { "epoch": 1.6303160681780573, "grad_norm": 0.4763087034225464, "learning_rate": 5.1141090088705436e-06, "loss": 0.3699, "step": 3284 }, { "epoch": 1.630812510342545, "grad_norm": 0.4078397750854492, "learning_rate": 5.111220655954452e-06, "loss": 0.3482, "step": 3285 }, { "epoch": 1.631308952507033, "grad_norm": 0.41637226939201355, "learning_rate": 5.108332265904805e-06, "loss": 0.3751, "step": 3286 }, { "epoch": 1.6318053946715207, "grad_norm": 0.5035280585289001, "learning_rate": 5.105443839685961e-06, "loss": 0.44, "step": 3287 }, { "epoch": 1.6323018368360085, "grad_norm": 0.3847520053386688, "learning_rate": 5.102555378262283e-06, "loss": 0.3411, "step": 3288 }, { "epoch": 1.6327982790004965, "grad_norm": 0.40028098225593567, "learning_rate": 5.099666882598152e-06, "loss": 0.3463, "step": 3289 }, { "epoch": 1.6332947211649844, "grad_norm": 0.4281407296657562, "learning_rate": 5.096778353657957e-06, "loss": 0.3457, "step": 3290 }, { "epoch": 1.6337911633294722, "grad_norm": 0.3850270211696625, "learning_rate": 5.093889792406101e-06, "loss": 0.3497, "step": 3291 }, { "epoch": 1.63428760549396, "grad_norm": 0.434452623128891, "learning_rate": 5.091001199806994e-06, "loss": 0.4133, "step": 3292 }, { "epoch": 1.6347840476584476, "grad_norm": 0.36209556460380554, "learning_rate": 5.08811257682506e-06, "loss": 0.3133, "step": 3293 }, { "epoch": 1.6352804898229356, "grad_norm": 0.43082162737846375, "learning_rate": 5.085223924424733e-06, "loss": 0.3545, "step": 3294 }, { "epoch": 1.6357769319874236, "grad_norm": 0.3871895372867584, "learning_rate": 5.082335243570452e-06, "loss": 0.3691, "step": 3295 }, { "epoch": 1.6362733741519113, "grad_norm": 0.4338257610797882, "learning_rate": 5.079446535226673e-06, "loss": 0.4162, "step": 3296 }, { "epoch": 1.636769816316399, "grad_norm": 0.3639349341392517, "learning_rate": 5.076557800357853e-06, "loss": 0.3259, "step": 3297 }, { "epoch": 1.637266258480887, "grad_norm": 0.3951907753944397, "learning_rate": 5.073669039928466e-06, "loss": 0.3641, "step": 3298 }, { "epoch": 1.6377627006453748, "grad_norm": 0.4058045744895935, "learning_rate": 5.0707802549029875e-06, "loss": 0.3488, "step": 3299 }, { "epoch": 1.6382591428098627, "grad_norm": 0.40873822569847107, "learning_rate": 5.067891446245905e-06, "loss": 0.3241, "step": 3300 }, { "epoch": 1.6387555849743505, "grad_norm": 0.45503994822502136, "learning_rate": 5.0650026149217135e-06, "loss": 0.4005, "step": 3301 }, { "epoch": 1.6392520271388382, "grad_norm": 0.4269391894340515, "learning_rate": 5.062113761894918e-06, "loss": 0.3416, "step": 3302 }, { "epoch": 1.6397484693033262, "grad_norm": 0.4686501622200012, "learning_rate": 5.059224888130023e-06, "loss": 0.3541, "step": 3303 }, { "epoch": 1.6402449114678141, "grad_norm": 0.41750431060791016, "learning_rate": 5.056335994591549e-06, "loss": 0.3275, "step": 3304 }, { "epoch": 1.640741353632302, "grad_norm": 0.41713836789131165, "learning_rate": 5.0534470822440176e-06, "loss": 0.3963, "step": 3305 }, { "epoch": 1.6412377957967896, "grad_norm": 0.40166300535202026, "learning_rate": 5.050558152051957e-06, "loss": 0.3775, "step": 3306 }, { "epoch": 1.6417342379612774, "grad_norm": 0.48994654417037964, "learning_rate": 5.047669204979906e-06, "loss": 0.4349, "step": 3307 }, { "epoch": 1.6422306801257653, "grad_norm": 0.36273255944252014, "learning_rate": 5.0447802419924e-06, "loss": 0.361, "step": 3308 }, { "epoch": 1.6427271222902533, "grad_norm": 0.4200311005115509, "learning_rate": 5.0418912640539895e-06, "loss": 0.3734, "step": 3309 }, { "epoch": 1.643223564454741, "grad_norm": 0.43085798621177673, "learning_rate": 5.039002272129224e-06, "loss": 0.3539, "step": 3310 }, { "epoch": 1.6437200066192288, "grad_norm": 0.3979569971561432, "learning_rate": 5.036113267182661e-06, "loss": 0.317, "step": 3311 }, { "epoch": 1.6442164487837165, "grad_norm": 0.4517953097820282, "learning_rate": 5.033224250178859e-06, "loss": 0.3778, "step": 3312 }, { "epoch": 1.6447128909482045, "grad_norm": 0.41006195545196533, "learning_rate": 5.030335222082383e-06, "loss": 0.3399, "step": 3313 }, { "epoch": 1.6452093331126925, "grad_norm": 0.3910154402256012, "learning_rate": 5.0274461838578e-06, "loss": 0.33, "step": 3314 }, { "epoch": 1.6457057752771802, "grad_norm": 0.45607709884643555, "learning_rate": 5.024557136469682e-06, "loss": 0.3831, "step": 3315 }, { "epoch": 1.646202217441668, "grad_norm": 0.36274781823158264, "learning_rate": 5.021668080882605e-06, "loss": 0.2978, "step": 3316 }, { "epoch": 1.646698659606156, "grad_norm": 0.4347727298736572, "learning_rate": 5.018779018061143e-06, "loss": 0.3821, "step": 3317 }, { "epoch": 1.6471951017706439, "grad_norm": 0.468761146068573, "learning_rate": 5.015889948969879e-06, "loss": 0.4587, "step": 3318 }, { "epoch": 1.6476915439351316, "grad_norm": 0.41933897137641907, "learning_rate": 5.013000874573392e-06, "loss": 0.3757, "step": 3319 }, { "epoch": 1.6481879860996194, "grad_norm": 0.40766578912734985, "learning_rate": 5.0101117958362665e-06, "loss": 0.349, "step": 3320 }, { "epoch": 1.648684428264107, "grad_norm": 0.43067261576652527, "learning_rate": 5.007222713723086e-06, "loss": 0.3947, "step": 3321 }, { "epoch": 1.649180870428595, "grad_norm": 0.38979753851890564, "learning_rate": 5.00433362919844e-06, "loss": 0.3208, "step": 3322 }, { "epoch": 1.649677312593083, "grad_norm": 0.44227588176727295, "learning_rate": 5.001444543226912e-06, "loss": 0.3832, "step": 3323 }, { "epoch": 1.6501737547575708, "grad_norm": 0.417404443025589, "learning_rate": 4.99855545677309e-06, "loss": 0.3519, "step": 3324 }, { "epoch": 1.6506701969220585, "grad_norm": 0.43218111991882324, "learning_rate": 4.995666370801563e-06, "loss": 0.3784, "step": 3325 }, { "epoch": 1.6511666390865463, "grad_norm": 0.38061830401420593, "learning_rate": 4.9927772862769136e-06, "loss": 0.3388, "step": 3326 }, { "epoch": 1.6516630812510342, "grad_norm": 0.42385223507881165, "learning_rate": 4.989888204163735e-06, "loss": 0.338, "step": 3327 }, { "epoch": 1.6521595234155222, "grad_norm": 0.4000215232372284, "learning_rate": 4.98699912542661e-06, "loss": 0.3423, "step": 3328 }, { "epoch": 1.65265596558001, "grad_norm": 0.4993458390235901, "learning_rate": 4.9841100510301234e-06, "loss": 0.376, "step": 3329 }, { "epoch": 1.6531524077444977, "grad_norm": 0.4490346610546112, "learning_rate": 4.981220981938858e-06, "loss": 0.3445, "step": 3330 }, { "epoch": 1.6536488499089856, "grad_norm": 0.47514674067497253, "learning_rate": 4.978331919117398e-06, "loss": 0.3815, "step": 3331 }, { "epoch": 1.6541452920734736, "grad_norm": 0.5392583608627319, "learning_rate": 4.975442863530319e-06, "loss": 0.3916, "step": 3332 }, { "epoch": 1.6546417342379613, "grad_norm": 0.3991836607456207, "learning_rate": 4.9725538161422005e-06, "loss": 0.3133, "step": 3333 }, { "epoch": 1.655138176402449, "grad_norm": 0.47018033266067505, "learning_rate": 4.969664777917619e-06, "loss": 0.3638, "step": 3334 }, { "epoch": 1.6556346185669368, "grad_norm": 0.44524461030960083, "learning_rate": 4.966775749821143e-06, "loss": 0.3838, "step": 3335 }, { "epoch": 1.6561310607314248, "grad_norm": 0.4410572648048401, "learning_rate": 4.963886732817342e-06, "loss": 0.3652, "step": 3336 }, { "epoch": 1.6566275028959128, "grad_norm": 0.44426068663597107, "learning_rate": 4.9609977278707765e-06, "loss": 0.3628, "step": 3337 }, { "epoch": 1.6571239450604005, "grad_norm": 0.5085585713386536, "learning_rate": 4.958108735946012e-06, "loss": 0.3728, "step": 3338 }, { "epoch": 1.6576203872248882, "grad_norm": 0.3785998523235321, "learning_rate": 4.955219758007601e-06, "loss": 0.2845, "step": 3339 }, { "epoch": 1.658116829389376, "grad_norm": 0.4377411901950836, "learning_rate": 4.9523307950200976e-06, "loss": 0.3606, "step": 3340 }, { "epoch": 1.658613271553864, "grad_norm": 0.391598105430603, "learning_rate": 4.949441847948043e-06, "loss": 0.3313, "step": 3341 }, { "epoch": 1.659109713718352, "grad_norm": 0.4078366160392761, "learning_rate": 4.946552917755983e-06, "loss": 0.3432, "step": 3342 }, { "epoch": 1.6596061558828397, "grad_norm": 0.44623661041259766, "learning_rate": 4.943664005408453e-06, "loss": 0.3468, "step": 3343 }, { "epoch": 1.6601025980473274, "grad_norm": 0.4097582995891571, "learning_rate": 4.9407751118699784e-06, "loss": 0.3426, "step": 3344 }, { "epoch": 1.6605990402118154, "grad_norm": 0.40833115577697754, "learning_rate": 4.937886238105084e-06, "loss": 0.3565, "step": 3345 }, { "epoch": 1.6610954823763031, "grad_norm": 0.4574689567089081, "learning_rate": 4.934997385078287e-06, "loss": 0.3765, "step": 3346 }, { "epoch": 1.661591924540791, "grad_norm": 0.4034993648529053, "learning_rate": 4.932108553754097e-06, "loss": 0.3294, "step": 3347 }, { "epoch": 1.6620883667052788, "grad_norm": 0.3920203149318695, "learning_rate": 4.929219745097015e-06, "loss": 0.3809, "step": 3348 }, { "epoch": 1.6625848088697666, "grad_norm": 0.49911442399024963, "learning_rate": 4.9263309600715356e-06, "loss": 0.3639, "step": 3349 }, { "epoch": 1.6630812510342545, "grad_norm": 0.3919394612312317, "learning_rate": 4.923442199642148e-06, "loss": 0.3636, "step": 3350 }, { "epoch": 1.6635776931987425, "grad_norm": 0.3904232382774353, "learning_rate": 4.92055346477333e-06, "loss": 0.3654, "step": 3351 }, { "epoch": 1.6640741353632302, "grad_norm": 0.4002526104450226, "learning_rate": 4.917664756429548e-06, "loss": 0.3659, "step": 3352 }, { "epoch": 1.664570577527718, "grad_norm": 0.4118606150150299, "learning_rate": 4.914776075575268e-06, "loss": 0.3903, "step": 3353 }, { "epoch": 1.6650670196922057, "grad_norm": 0.376006156206131, "learning_rate": 4.91188742317494e-06, "loss": 0.3539, "step": 3354 }, { "epoch": 1.6655634618566937, "grad_norm": 0.4019266664981842, "learning_rate": 4.9089988001930064e-06, "loss": 0.3533, "step": 3355 }, { "epoch": 1.6660599040211816, "grad_norm": 0.48489582538604736, "learning_rate": 4.9061102075939e-06, "loss": 0.4448, "step": 3356 }, { "epoch": 1.6665563461856694, "grad_norm": 0.37315627932548523, "learning_rate": 4.903221646342044e-06, "loss": 0.3188, "step": 3357 }, { "epoch": 1.6670527883501571, "grad_norm": 0.44153928756713867, "learning_rate": 4.9003331174018494e-06, "loss": 0.3951, "step": 3358 }, { "epoch": 1.667549230514645, "grad_norm": 0.41642993688583374, "learning_rate": 4.897444621737717e-06, "loss": 0.3474, "step": 3359 }, { "epoch": 1.6680456726791328, "grad_norm": 0.4099167585372925, "learning_rate": 4.894556160314041e-06, "loss": 0.362, "step": 3360 }, { "epoch": 1.6685421148436208, "grad_norm": 0.4254894554615021, "learning_rate": 4.8916677340951965e-06, "loss": 0.3451, "step": 3361 }, { "epoch": 1.6690385570081085, "grad_norm": 0.48179808259010315, "learning_rate": 4.888779344045549e-06, "loss": 0.3715, "step": 3362 }, { "epoch": 1.6695349991725963, "grad_norm": 0.42364197969436646, "learning_rate": 4.885890991129458e-06, "loss": 0.3515, "step": 3363 }, { "epoch": 1.6700314413370843, "grad_norm": 0.40611758828163147, "learning_rate": 4.883002676311262e-06, "loss": 0.3846, "step": 3364 }, { "epoch": 1.6705278835015722, "grad_norm": 0.3863654136657715, "learning_rate": 4.880114400555294e-06, "loss": 0.3601, "step": 3365 }, { "epoch": 1.67102432566606, "grad_norm": 0.48632195591926575, "learning_rate": 4.87722616482587e-06, "loss": 0.3949, "step": 3366 }, { "epoch": 1.6715207678305477, "grad_norm": 0.4305301308631897, "learning_rate": 4.87433797008729e-06, "loss": 0.3673, "step": 3367 }, { "epoch": 1.6720172099950354, "grad_norm": 0.4465939700603485, "learning_rate": 4.871449817303849e-06, "loss": 0.311, "step": 3368 }, { "epoch": 1.6725136521595234, "grad_norm": 0.4288612902164459, "learning_rate": 4.86856170743982e-06, "loss": 0.3422, "step": 3369 }, { "epoch": 1.6730100943240114, "grad_norm": 0.46125996112823486, "learning_rate": 4.865673641459463e-06, "loss": 0.4249, "step": 3370 }, { "epoch": 1.6735065364884991, "grad_norm": 0.44992774724960327, "learning_rate": 4.862785620327028e-06, "loss": 0.3386, "step": 3371 }, { "epoch": 1.6740029786529869, "grad_norm": 0.4646962583065033, "learning_rate": 4.859897645006743e-06, "loss": 0.3352, "step": 3372 }, { "epoch": 1.6744994208174746, "grad_norm": 0.48936501145362854, "learning_rate": 4.8570097164628285e-06, "loss": 0.3223, "step": 3373 }, { "epoch": 1.6749958629819626, "grad_norm": 0.48332908749580383, "learning_rate": 4.854121835659482e-06, "loss": 0.3324, "step": 3374 }, { "epoch": 1.6754923051464505, "grad_norm": 0.4086475074291229, "learning_rate": 4.851234003560891e-06, "loss": 0.3381, "step": 3375 }, { "epoch": 1.6759887473109383, "grad_norm": 0.4568094313144684, "learning_rate": 4.848346221131223e-06, "loss": 0.4264, "step": 3376 }, { "epoch": 1.676485189475426, "grad_norm": 0.3928796648979187, "learning_rate": 4.845458489334631e-06, "loss": 0.3126, "step": 3377 }, { "epoch": 1.676981631639914, "grad_norm": 0.4872033894062042, "learning_rate": 4.842570809135246e-06, "loss": 0.393, "step": 3378 }, { "epoch": 1.677478073804402, "grad_norm": 0.4875142276287079, "learning_rate": 4.839683181497192e-06, "loss": 0.4024, "step": 3379 }, { "epoch": 1.6779745159688897, "grad_norm": 0.3977273404598236, "learning_rate": 4.8367956073845655e-06, "loss": 0.3334, "step": 3380 }, { "epoch": 1.6784709581333774, "grad_norm": 0.46419402956962585, "learning_rate": 4.83390808776145e-06, "loss": 0.3475, "step": 3381 }, { "epoch": 1.6789674002978652, "grad_norm": 0.39664095640182495, "learning_rate": 4.831020623591909e-06, "loss": 0.3166, "step": 3382 }, { "epoch": 1.6794638424623531, "grad_norm": 0.5020340085029602, "learning_rate": 4.828133215839991e-06, "loss": 0.4206, "step": 3383 }, { "epoch": 1.679960284626841, "grad_norm": 0.39476868510246277, "learning_rate": 4.82524586546972e-06, "loss": 0.385, "step": 3384 }, { "epoch": 1.6804567267913288, "grad_norm": 0.49324700236320496, "learning_rate": 4.822358573445106e-06, "loss": 0.3907, "step": 3385 }, { "epoch": 1.6809531689558166, "grad_norm": 0.35731884837150574, "learning_rate": 4.819471340730135e-06, "loss": 0.2801, "step": 3386 }, { "epoch": 1.6814496111203043, "grad_norm": 0.5644133687019348, "learning_rate": 4.816584168288776e-06, "loss": 0.4416, "step": 3387 }, { "epoch": 1.6819460532847923, "grad_norm": 0.4125045835971832, "learning_rate": 4.81369705708498e-06, "loss": 0.333, "step": 3388 }, { "epoch": 1.6824424954492803, "grad_norm": 0.405013769865036, "learning_rate": 4.810810008082672e-06, "loss": 0.3327, "step": 3389 }, { "epoch": 1.682938937613768, "grad_norm": 0.44208043813705444, "learning_rate": 4.8079230222457616e-06, "loss": 0.3818, "step": 3390 }, { "epoch": 1.6834353797782557, "grad_norm": 0.47806641459465027, "learning_rate": 4.805036100538134e-06, "loss": 0.3676, "step": 3391 }, { "epoch": 1.6839318219427437, "grad_norm": 0.4681016206741333, "learning_rate": 4.802149243923655e-06, "loss": 0.3903, "step": 3392 }, { "epoch": 1.6844282641072315, "grad_norm": 0.38934525847435, "learning_rate": 4.799262453366162e-06, "loss": 0.2894, "step": 3393 }, { "epoch": 1.6849247062717194, "grad_norm": 0.4354211986064911, "learning_rate": 4.796375729829483e-06, "loss": 0.3479, "step": 3394 }, { "epoch": 1.6854211484362072, "grad_norm": 0.39751675724983215, "learning_rate": 4.793489074277412e-06, "loss": 0.3359, "step": 3395 }, { "epoch": 1.685917590600695, "grad_norm": 0.44665083289146423, "learning_rate": 4.790602487673728e-06, "loss": 0.3816, "step": 3396 }, { "epoch": 1.6864140327651829, "grad_norm": 0.4121491014957428, "learning_rate": 4.7877159709821805e-06, "loss": 0.3389, "step": 3397 }, { "epoch": 1.6869104749296708, "grad_norm": 0.4115191102027893, "learning_rate": 4.784829525166502e-06, "loss": 0.3666, "step": 3398 }, { "epoch": 1.6874069170941586, "grad_norm": 0.4080761969089508, "learning_rate": 4.781943151190397e-06, "loss": 0.3474, "step": 3399 }, { "epoch": 1.6879033592586463, "grad_norm": 0.40126317739486694, "learning_rate": 4.779056850017546e-06, "loss": 0.3399, "step": 3400 }, { "epoch": 1.688399801423134, "grad_norm": 0.41977250576019287, "learning_rate": 4.77617062261161e-06, "loss": 0.4217, "step": 3401 }, { "epoch": 1.688896243587622, "grad_norm": 0.39270445704460144, "learning_rate": 4.773284469936219e-06, "loss": 0.3216, "step": 3402 }, { "epoch": 1.68939268575211, "grad_norm": 0.41435113549232483, "learning_rate": 4.7703983929549816e-06, "loss": 0.3313, "step": 3403 }, { "epoch": 1.6898891279165977, "grad_norm": 0.3944357633590698, "learning_rate": 4.767512392631479e-06, "loss": 0.3397, "step": 3404 }, { "epoch": 1.6903855700810855, "grad_norm": 0.4311476945877075, "learning_rate": 4.764626469929272e-06, "loss": 0.3813, "step": 3405 }, { "epoch": 1.6908820122455734, "grad_norm": 0.43280622363090515, "learning_rate": 4.7617406258118895e-06, "loss": 0.342, "step": 3406 }, { "epoch": 1.6913784544100612, "grad_norm": 0.4266560971736908, "learning_rate": 4.758854861242837e-06, "loss": 0.3744, "step": 3407 }, { "epoch": 1.6918748965745491, "grad_norm": 0.42958223819732666, "learning_rate": 4.755969177185589e-06, "loss": 0.375, "step": 3408 }, { "epoch": 1.692371338739037, "grad_norm": 0.4242185354232788, "learning_rate": 4.753083574603603e-06, "loss": 0.3894, "step": 3409 }, { "epoch": 1.6928677809035246, "grad_norm": 0.39466020464897156, "learning_rate": 4.7501980544602995e-06, "loss": 0.3145, "step": 3410 }, { "epoch": 1.6933642230680126, "grad_norm": 0.4050382673740387, "learning_rate": 4.747312617719079e-06, "loss": 0.3322, "step": 3411 }, { "epoch": 1.6938606652325006, "grad_norm": 0.43111810088157654, "learning_rate": 4.744427265343304e-06, "loss": 0.387, "step": 3412 }, { "epoch": 1.6943571073969883, "grad_norm": 0.4212215542793274, "learning_rate": 4.741541998296321e-06, "loss": 0.3485, "step": 3413 }, { "epoch": 1.694853549561476, "grad_norm": 0.4235689043998718, "learning_rate": 4.738656817541441e-06, "loss": 0.373, "step": 3414 }, { "epoch": 1.6953499917259638, "grad_norm": 0.4185439348220825, "learning_rate": 4.735771724041945e-06, "loss": 0.3353, "step": 3415 }, { "epoch": 1.6958464338904518, "grad_norm": 0.4526943564414978, "learning_rate": 4.732886718761091e-06, "loss": 0.373, "step": 3416 }, { "epoch": 1.6963428760549397, "grad_norm": 0.449454665184021, "learning_rate": 4.730001802662101e-06, "loss": 0.3914, "step": 3417 }, { "epoch": 1.6968393182194275, "grad_norm": 0.3969576060771942, "learning_rate": 4.72711697670817e-06, "loss": 0.3865, "step": 3418 }, { "epoch": 1.6973357603839152, "grad_norm": 0.4319426715373993, "learning_rate": 4.724232241862464e-06, "loss": 0.3616, "step": 3419 }, { "epoch": 1.697832202548403, "grad_norm": 0.431548148393631, "learning_rate": 4.721347599088118e-06, "loss": 0.3355, "step": 3420 }, { "epoch": 1.698328644712891, "grad_norm": 0.37374404072761536, "learning_rate": 4.7184630493482355e-06, "loss": 0.313, "step": 3421 }, { "epoch": 1.6988250868773789, "grad_norm": 0.47353240847587585, "learning_rate": 4.71557859360589e-06, "loss": 0.3879, "step": 3422 }, { "epoch": 1.6993215290418666, "grad_norm": 0.3952730894088745, "learning_rate": 4.71269423282412e-06, "loss": 0.3539, "step": 3423 }, { "epoch": 1.6998179712063544, "grad_norm": 0.4190680682659149, "learning_rate": 4.709809967965939e-06, "loss": 0.3536, "step": 3424 }, { "epoch": 1.7003144133708423, "grad_norm": 0.42486143112182617, "learning_rate": 4.706925799994322e-06, "loss": 0.3285, "step": 3425 }, { "epoch": 1.7008108555353303, "grad_norm": 0.4452935457229614, "learning_rate": 4.704041729872215e-06, "loss": 0.3706, "step": 3426 }, { "epoch": 1.701307297699818, "grad_norm": 0.3718862533569336, "learning_rate": 4.701157758562528e-06, "loss": 0.3222, "step": 3427 }, { "epoch": 1.7018037398643058, "grad_norm": 0.41999101638793945, "learning_rate": 4.698273887028147e-06, "loss": 0.3682, "step": 3428 }, { "epoch": 1.7023001820287935, "grad_norm": 0.4588022530078888, "learning_rate": 4.695390116231915e-06, "loss": 0.3615, "step": 3429 }, { "epoch": 1.7027966241932815, "grad_norm": 0.42990338802337646, "learning_rate": 4.692506447136641e-06, "loss": 0.3517, "step": 3430 }, { "epoch": 1.7032930663577694, "grad_norm": 0.4056645631790161, "learning_rate": 4.68962288070511e-06, "loss": 0.3322, "step": 3431 }, { "epoch": 1.7037895085222572, "grad_norm": 0.4211224913597107, "learning_rate": 4.686739417900063e-06, "loss": 0.3331, "step": 3432 }, { "epoch": 1.704285950686745, "grad_norm": 0.4721679389476776, "learning_rate": 4.68385605968421e-06, "loss": 0.3698, "step": 3433 }, { "epoch": 1.7047823928512327, "grad_norm": 0.4711633622646332, "learning_rate": 4.680972807020226e-06, "loss": 0.3679, "step": 3434 }, { "epoch": 1.7052788350157206, "grad_norm": 0.41089141368865967, "learning_rate": 4.67808966087075e-06, "loss": 0.3891, "step": 3435 }, { "epoch": 1.7057752771802086, "grad_norm": 0.38763493299484253, "learning_rate": 4.67520662219839e-06, "loss": 0.3088, "step": 3436 }, { "epoch": 1.7062717193446963, "grad_norm": 0.462736040353775, "learning_rate": 4.672323691965711e-06, "loss": 0.3763, "step": 3437 }, { "epoch": 1.706768161509184, "grad_norm": 0.39708778262138367, "learning_rate": 4.669440871135243e-06, "loss": 0.3348, "step": 3438 }, { "epoch": 1.707264603673672, "grad_norm": 0.39904558658599854, "learning_rate": 4.666558160669486e-06, "loss": 0.3258, "step": 3439 }, { "epoch": 1.70776104583816, "grad_norm": 0.4381767809391022, "learning_rate": 4.663675561530897e-06, "loss": 0.4131, "step": 3440 }, { "epoch": 1.7082574880026478, "grad_norm": 0.4085589647293091, "learning_rate": 4.660793074681895e-06, "loss": 0.3904, "step": 3441 }, { "epoch": 1.7087539301671355, "grad_norm": 0.4074671268463135, "learning_rate": 4.657910701084869e-06, "loss": 0.3353, "step": 3442 }, { "epoch": 1.7092503723316232, "grad_norm": 0.47265589237213135, "learning_rate": 4.655028441702161e-06, "loss": 0.3893, "step": 3443 }, { "epoch": 1.7097468144961112, "grad_norm": 0.430618554353714, "learning_rate": 4.6521462974960805e-06, "loss": 0.3755, "step": 3444 }, { "epoch": 1.7102432566605992, "grad_norm": 0.42276567220687866, "learning_rate": 4.649264269428896e-06, "loss": 0.3457, "step": 3445 }, { "epoch": 1.710739698825087, "grad_norm": 0.4188057482242584, "learning_rate": 4.6463823584628415e-06, "loss": 0.3492, "step": 3446 }, { "epoch": 1.7112361409895747, "grad_norm": 0.3961176574230194, "learning_rate": 4.643500565560106e-06, "loss": 0.3228, "step": 3447 }, { "epoch": 1.7117325831540624, "grad_norm": 0.416866660118103, "learning_rate": 4.640618891682844e-06, "loss": 0.3759, "step": 3448 }, { "epoch": 1.7122290253185504, "grad_norm": 0.4514777660369873, "learning_rate": 4.637737337793164e-06, "loss": 0.3681, "step": 3449 }, { "epoch": 1.7127254674830383, "grad_norm": 0.4216620624065399, "learning_rate": 4.634855904853143e-06, "loss": 0.3781, "step": 3450 }, { "epoch": 1.713221909647526, "grad_norm": 0.43369248509407043, "learning_rate": 4.63197459382481e-06, "loss": 0.3282, "step": 3451 }, { "epoch": 1.7137183518120138, "grad_norm": 0.4346970319747925, "learning_rate": 4.629093405670159e-06, "loss": 0.3795, "step": 3452 }, { "epoch": 1.7142147939765018, "grad_norm": 0.4283752739429474, "learning_rate": 4.626212341351137e-06, "loss": 0.3918, "step": 3453 }, { "epoch": 1.7147112361409895, "grad_norm": 0.41232728958129883, "learning_rate": 4.623331401829658e-06, "loss": 0.3484, "step": 3454 }, { "epoch": 1.7152076783054775, "grad_norm": 0.379302978515625, "learning_rate": 4.6204505880675856e-06, "loss": 0.3633, "step": 3455 }, { "epoch": 1.7157041204699652, "grad_norm": 0.4148751497268677, "learning_rate": 4.617569901026745e-06, "loss": 0.4151, "step": 3456 }, { "epoch": 1.716200562634453, "grad_norm": 0.3872535824775696, "learning_rate": 4.614689341668922e-06, "loss": 0.3244, "step": 3457 }, { "epoch": 1.716697004798941, "grad_norm": 0.3826894462108612, "learning_rate": 4.611808910955855e-06, "loss": 0.3577, "step": 3458 }, { "epoch": 1.717193446963429, "grad_norm": 0.4121086895465851, "learning_rate": 4.608928609849244e-06, "loss": 0.4039, "step": 3459 }, { "epoch": 1.7176898891279166, "grad_norm": 0.39683032035827637, "learning_rate": 4.606048439310738e-06, "loss": 0.3218, "step": 3460 }, { "epoch": 1.7181863312924044, "grad_norm": 0.4559135437011719, "learning_rate": 4.603168400301954e-06, "loss": 0.3932, "step": 3461 }, { "epoch": 1.7186827734568921, "grad_norm": 0.3660829961299896, "learning_rate": 4.600288493784455e-06, "loss": 0.3118, "step": 3462 }, { "epoch": 1.71917921562138, "grad_norm": 0.37852099537849426, "learning_rate": 4.597408720719765e-06, "loss": 0.3837, "step": 3463 }, { "epoch": 1.719675657785868, "grad_norm": 0.4370517134666443, "learning_rate": 4.5945290820693585e-06, "loss": 0.406, "step": 3464 }, { "epoch": 1.7201720999503558, "grad_norm": 0.3840208649635315, "learning_rate": 4.591649578794675e-06, "loss": 0.3695, "step": 3465 }, { "epoch": 1.7206685421148435, "grad_norm": 0.40988150238990784, "learning_rate": 4.588770211857096e-06, "loss": 0.3591, "step": 3466 }, { "epoch": 1.7211649842793315, "grad_norm": 0.3722843527793884, "learning_rate": 4.58589098221797e-06, "loss": 0.3749, "step": 3467 }, { "epoch": 1.7216614264438193, "grad_norm": 0.39053860306739807, "learning_rate": 4.583011890838586e-06, "loss": 0.3505, "step": 3468 }, { "epoch": 1.7221578686083072, "grad_norm": 0.41890954971313477, "learning_rate": 4.580132938680202e-06, "loss": 0.3439, "step": 3469 }, { "epoch": 1.722654310772795, "grad_norm": 0.38969677686691284, "learning_rate": 4.577254126704017e-06, "loss": 0.357, "step": 3470 }, { "epoch": 1.7231507529372827, "grad_norm": 0.39882007241249084, "learning_rate": 4.574375455871188e-06, "loss": 0.372, "step": 3471 }, { "epoch": 1.7236471951017707, "grad_norm": 0.43373286724090576, "learning_rate": 4.571496927142829e-06, "loss": 0.3848, "step": 3472 }, { "epoch": 1.7241436372662586, "grad_norm": 0.39338088035583496, "learning_rate": 4.56861854148e-06, "loss": 0.3138, "step": 3473 }, { "epoch": 1.7246400794307464, "grad_norm": 0.4682184159755707, "learning_rate": 4.565740299843714e-06, "loss": 0.3678, "step": 3474 }, { "epoch": 1.7251365215952341, "grad_norm": 0.38133060932159424, "learning_rate": 4.562862203194939e-06, "loss": 0.2927, "step": 3475 }, { "epoch": 1.7256329637597219, "grad_norm": 0.41281524300575256, "learning_rate": 4.559984252494595e-06, "loss": 0.3905, "step": 3476 }, { "epoch": 1.7261294059242098, "grad_norm": 0.43895962834358215, "learning_rate": 4.55710644870355e-06, "loss": 0.3724, "step": 3477 }, { "epoch": 1.7266258480886978, "grad_norm": 0.40703558921813965, "learning_rate": 4.554228792782626e-06, "loss": 0.3878, "step": 3478 }, { "epoch": 1.7271222902531855, "grad_norm": 0.42586997151374817, "learning_rate": 4.551351285692589e-06, "loss": 0.3939, "step": 3479 }, { "epoch": 1.7276187324176733, "grad_norm": 0.4255748689174652, "learning_rate": 4.548473928394167e-06, "loss": 0.364, "step": 3480 }, { "epoch": 1.728115174582161, "grad_norm": 0.4221493899822235, "learning_rate": 4.545596721848027e-06, "loss": 0.3527, "step": 3481 }, { "epoch": 1.728611616746649, "grad_norm": 0.4173060953617096, "learning_rate": 4.542719667014792e-06, "loss": 0.3552, "step": 3482 }, { "epoch": 1.729108058911137, "grad_norm": 0.4109133183956146, "learning_rate": 4.539842764855032e-06, "loss": 0.3576, "step": 3483 }, { "epoch": 1.7296045010756247, "grad_norm": 0.4029085636138916, "learning_rate": 4.5369660163292674e-06, "loss": 0.3364, "step": 3484 }, { "epoch": 1.7301009432401124, "grad_norm": 0.4252501130104065, "learning_rate": 4.534089422397965e-06, "loss": 0.376, "step": 3485 }, { "epoch": 1.7305973854046004, "grad_norm": 0.4562428891658783, "learning_rate": 4.5312129840215405e-06, "loss": 0.3447, "step": 3486 }, { "epoch": 1.7310938275690884, "grad_norm": 0.4350196123123169, "learning_rate": 4.528336702160361e-06, "loss": 0.3598, "step": 3487 }, { "epoch": 1.731590269733576, "grad_norm": 0.4179953336715698, "learning_rate": 4.5254605777747376e-06, "loss": 0.3346, "step": 3488 }, { "epoch": 1.7320867118980638, "grad_norm": 0.4997890889644623, "learning_rate": 4.5225846118249295e-06, "loss": 0.3859, "step": 3489 }, { "epoch": 1.7325831540625516, "grad_norm": 0.43896976113319397, "learning_rate": 4.519708805271144e-06, "loss": 0.3612, "step": 3490 }, { "epoch": 1.7330795962270396, "grad_norm": 0.407652884721756, "learning_rate": 4.5168331590735345e-06, "loss": 0.3244, "step": 3491 }, { "epoch": 1.7335760383915275, "grad_norm": 0.42112112045288086, "learning_rate": 4.513957674192203e-06, "loss": 0.3666, "step": 3492 }, { "epoch": 1.7340724805560153, "grad_norm": 0.40093737840652466, "learning_rate": 4.511082351587194e-06, "loss": 0.2892, "step": 3493 }, { "epoch": 1.734568922720503, "grad_norm": 0.4580364525318146, "learning_rate": 4.5082071922184965e-06, "loss": 0.3337, "step": 3494 }, { "epoch": 1.7350653648849907, "grad_norm": 0.4633478820323944, "learning_rate": 4.505332197046055e-06, "loss": 0.3682, "step": 3495 }, { "epoch": 1.7355618070494787, "grad_norm": 0.4182467758655548, "learning_rate": 4.5024573670297475e-06, "loss": 0.347, "step": 3496 }, { "epoch": 1.7360582492139667, "grad_norm": 0.43714725971221924, "learning_rate": 4.499582703129402e-06, "loss": 0.3721, "step": 3497 }, { "epoch": 1.7365546913784544, "grad_norm": 0.39639389514923096, "learning_rate": 4.49670820630479e-06, "loss": 0.3479, "step": 3498 }, { "epoch": 1.7370511335429422, "grad_norm": 0.3376375734806061, "learning_rate": 4.493833877515632e-06, "loss": 0.2791, "step": 3499 }, { "epoch": 1.7375475757074301, "grad_norm": 0.5395908355712891, "learning_rate": 4.490959717721586e-06, "loss": 0.4141, "step": 3500 }, { "epoch": 1.7380440178719179, "grad_norm": 0.4188658595085144, "learning_rate": 4.4880857278822524e-06, "loss": 0.3554, "step": 3501 }, { "epoch": 1.7385404600364058, "grad_norm": 0.4572022259235382, "learning_rate": 4.485211908957183e-06, "loss": 0.3941, "step": 3502 }, { "epoch": 1.7390369022008936, "grad_norm": 0.4223828613758087, "learning_rate": 4.482338261905866e-06, "loss": 0.3598, "step": 3503 }, { "epoch": 1.7395333443653813, "grad_norm": 0.4202645719051361, "learning_rate": 4.4794647876877335e-06, "loss": 0.3317, "step": 3504 }, { "epoch": 1.7400297865298693, "grad_norm": 0.4672909379005432, "learning_rate": 4.476591487262161e-06, "loss": 0.3608, "step": 3505 }, { "epoch": 1.7405262286943572, "grad_norm": 0.43829846382141113, "learning_rate": 4.473718361588465e-06, "loss": 0.3393, "step": 3506 }, { "epoch": 1.741022670858845, "grad_norm": 0.3823731243610382, "learning_rate": 4.470845411625906e-06, "loss": 0.3616, "step": 3507 }, { "epoch": 1.7415191130233327, "grad_norm": 0.43867725133895874, "learning_rate": 4.467972638333682e-06, "loss": 0.3188, "step": 3508 }, { "epoch": 1.7420155551878205, "grad_norm": 0.46032825112342834, "learning_rate": 4.465100042670933e-06, "loss": 0.4066, "step": 3509 }, { "epoch": 1.7425119973523084, "grad_norm": 0.3955080509185791, "learning_rate": 4.462227625596743e-06, "loss": 0.2976, "step": 3510 }, { "epoch": 1.7430084395167964, "grad_norm": 0.5174143314361572, "learning_rate": 4.459355388070134e-06, "loss": 0.3561, "step": 3511 }, { "epoch": 1.7435048816812841, "grad_norm": 0.38736116886138916, "learning_rate": 4.456483331050064e-06, "loss": 0.3531, "step": 3512 }, { "epoch": 1.744001323845772, "grad_norm": 0.38521745800971985, "learning_rate": 4.453611455495441e-06, "loss": 0.3407, "step": 3513 }, { "epoch": 1.7444977660102599, "grad_norm": 0.441607803106308, "learning_rate": 4.450739762365101e-06, "loss": 0.3887, "step": 3514 }, { "epoch": 1.7449942081747476, "grad_norm": 0.48877283930778503, "learning_rate": 4.447868252617828e-06, "loss": 0.3861, "step": 3515 }, { "epoch": 1.7454906503392356, "grad_norm": 0.37354832887649536, "learning_rate": 4.444996927212337e-06, "loss": 0.27, "step": 3516 }, { "epoch": 1.7459870925037233, "grad_norm": 0.43533000349998474, "learning_rate": 4.44212578710729e-06, "loss": 0.3977, "step": 3517 }, { "epoch": 1.746483534668211, "grad_norm": 0.41942861676216125, "learning_rate": 4.439254833261281e-06, "loss": 0.3604, "step": 3518 }, { "epoch": 1.746979976832699, "grad_norm": 0.43625956773757935, "learning_rate": 4.436384066632842e-06, "loss": 0.3655, "step": 3519 }, { "epoch": 1.747476418997187, "grad_norm": 0.40300214290618896, "learning_rate": 4.433513488180443e-06, "loss": 0.3418, "step": 3520 }, { "epoch": 1.7479728611616747, "grad_norm": 0.36206138134002686, "learning_rate": 4.4306430988624945e-06, "loss": 0.341, "step": 3521 }, { "epoch": 1.7484693033261625, "grad_norm": 0.4058184027671814, "learning_rate": 4.427772899637343e-06, "loss": 0.3786, "step": 3522 }, { "epoch": 1.7489657454906502, "grad_norm": 0.45525509119033813, "learning_rate": 4.424902891463269e-06, "loss": 0.3771, "step": 3523 }, { "epoch": 1.7494621876551382, "grad_norm": 0.44668740034103394, "learning_rate": 4.422033075298485e-06, "loss": 0.3688, "step": 3524 }, { "epoch": 1.7499586298196261, "grad_norm": 0.4630526304244995, "learning_rate": 4.419163452101153e-06, "loss": 0.3979, "step": 3525 }, { "epoch": 1.7504550719841139, "grad_norm": 0.3961310088634491, "learning_rate": 4.416294022829356e-06, "loss": 0.344, "step": 3526 }, { "epoch": 1.7509515141486016, "grad_norm": 0.3983076512813568, "learning_rate": 4.41342478844112e-06, "loss": 0.3579, "step": 3527 }, { "epoch": 1.7514479563130894, "grad_norm": 0.3971560597419739, "learning_rate": 4.410555749894407e-06, "loss": 0.3595, "step": 3528 }, { "epoch": 1.7519443984775773, "grad_norm": 0.41794800758361816, "learning_rate": 4.407686908147107e-06, "loss": 0.3595, "step": 3529 }, { "epoch": 1.7524408406420653, "grad_norm": 0.4270433187484741, "learning_rate": 4.404818264157052e-06, "loss": 0.3166, "step": 3530 }, { "epoch": 1.752937282806553, "grad_norm": 0.4714721739292145, "learning_rate": 4.4019498188819996e-06, "loss": 0.3985, "step": 3531 }, { "epoch": 1.7534337249710408, "grad_norm": 0.40725672245025635, "learning_rate": 4.399081573279651e-06, "loss": 0.367, "step": 3532 }, { "epoch": 1.7539301671355287, "grad_norm": 0.3868299722671509, "learning_rate": 4.396213528307633e-06, "loss": 0.3296, "step": 3533 }, { "epoch": 1.7544266093000167, "grad_norm": 0.387024849653244, "learning_rate": 4.393345684923508e-06, "loss": 0.337, "step": 3534 }, { "epoch": 1.7549230514645044, "grad_norm": 0.44280004501342773, "learning_rate": 4.3904780440847695e-06, "loss": 0.3603, "step": 3535 }, { "epoch": 1.7554194936289922, "grad_norm": 0.44075992703437805, "learning_rate": 4.387610606748847e-06, "loss": 0.355, "step": 3536 }, { "epoch": 1.75591593579348, "grad_norm": 0.370971143245697, "learning_rate": 4.384743373873099e-06, "loss": 0.3532, "step": 3537 }, { "epoch": 1.756412377957968, "grad_norm": 0.436681866645813, "learning_rate": 4.3818763464148165e-06, "loss": 0.3851, "step": 3538 }, { "epoch": 1.7569088201224559, "grad_norm": 0.37362024188041687, "learning_rate": 4.379009525331222e-06, "loss": 0.3275, "step": 3539 }, { "epoch": 1.7574052622869436, "grad_norm": 0.4035605490207672, "learning_rate": 4.37614291157947e-06, "loss": 0.3168, "step": 3540 }, { "epoch": 1.7579017044514313, "grad_norm": 0.45498576760292053, "learning_rate": 4.373276506116645e-06, "loss": 0.3766, "step": 3541 }, { "epoch": 1.758398146615919, "grad_norm": 0.4041537642478943, "learning_rate": 4.370410309899759e-06, "loss": 0.3587, "step": 3542 }, { "epoch": 1.758894588780407, "grad_norm": 0.4470871090888977, "learning_rate": 4.367544323885762e-06, "loss": 0.384, "step": 3543 }, { "epoch": 1.759391030944895, "grad_norm": 0.41390088200569153, "learning_rate": 4.364678549031525e-06, "loss": 0.3996, "step": 3544 }, { "epoch": 1.7598874731093828, "grad_norm": 0.37815022468566895, "learning_rate": 4.3618129862938525e-06, "loss": 0.3142, "step": 3545 }, { "epoch": 1.7603839152738705, "grad_norm": 0.4085596799850464, "learning_rate": 4.358947636629478e-06, "loss": 0.3348, "step": 3546 }, { "epoch": 1.7608803574383585, "grad_norm": 0.4037335515022278, "learning_rate": 4.3560825009950665e-06, "loss": 0.3237, "step": 3547 }, { "epoch": 1.7613767996028464, "grad_norm": 0.45882540941238403, "learning_rate": 4.353217580347208e-06, "loss": 0.3947, "step": 3548 }, { "epoch": 1.7618732417673342, "grad_norm": 0.4050838053226471, "learning_rate": 4.3503528756424204e-06, "loss": 0.399, "step": 3549 }, { "epoch": 1.762369683931822, "grad_norm": 0.447048157453537, "learning_rate": 4.3474883878371496e-06, "loss": 0.3798, "step": 3550 }, { "epoch": 1.7628661260963097, "grad_norm": 0.4181094169616699, "learning_rate": 4.3446241178877735e-06, "loss": 0.3962, "step": 3551 }, { "epoch": 1.7633625682607976, "grad_norm": 0.4188656508922577, "learning_rate": 4.341760066750591e-06, "loss": 0.3232, "step": 3552 }, { "epoch": 1.7638590104252856, "grad_norm": 0.42014241218566895, "learning_rate": 4.338896235381832e-06, "loss": 0.4071, "step": 3553 }, { "epoch": 1.7643554525897733, "grad_norm": 0.4172673523426056, "learning_rate": 4.336032624737653e-06, "loss": 0.3095, "step": 3554 }, { "epoch": 1.764851894754261, "grad_norm": 0.4352819323539734, "learning_rate": 4.333169235774136e-06, "loss": 0.3693, "step": 3555 }, { "epoch": 1.7653483369187488, "grad_norm": 0.3698497414588928, "learning_rate": 4.330306069447287e-06, "loss": 0.3036, "step": 3556 }, { "epoch": 1.7658447790832368, "grad_norm": 0.404302716255188, "learning_rate": 4.327443126713039e-06, "loss": 0.3584, "step": 3557 }, { "epoch": 1.7663412212477247, "grad_norm": 0.4196814298629761, "learning_rate": 4.324580408527254e-06, "loss": 0.3671, "step": 3558 }, { "epoch": 1.7668376634122125, "grad_norm": 0.38172799348831177, "learning_rate": 4.321717915845713e-06, "loss": 0.3498, "step": 3559 }, { "epoch": 1.7673341055767002, "grad_norm": 0.3968551754951477, "learning_rate": 4.318855649624124e-06, "loss": 0.3409, "step": 3560 }, { "epoch": 1.7678305477411882, "grad_norm": 0.40258553624153137, "learning_rate": 4.315993610818121e-06, "loss": 0.3669, "step": 3561 }, { "epoch": 1.768326989905676, "grad_norm": 0.35236701369285583, "learning_rate": 4.3131318003832625e-06, "loss": 0.2896, "step": 3562 }, { "epoch": 1.768823432070164, "grad_norm": 0.42061665654182434, "learning_rate": 4.310270219275028e-06, "loss": 0.3676, "step": 3563 }, { "epoch": 1.7693198742346516, "grad_norm": 0.379690557718277, "learning_rate": 4.307408868448822e-06, "loss": 0.3653, "step": 3564 }, { "epoch": 1.7698163163991394, "grad_norm": 0.4287635087966919, "learning_rate": 4.304547748859967e-06, "loss": 0.3617, "step": 3565 }, { "epoch": 1.7703127585636274, "grad_norm": 0.3948676884174347, "learning_rate": 4.301686861463722e-06, "loss": 0.3759, "step": 3566 }, { "epoch": 1.7708092007281153, "grad_norm": 0.38777267932891846, "learning_rate": 4.298826207215254e-06, "loss": 0.3133, "step": 3567 }, { "epoch": 1.771305642892603, "grad_norm": 0.4192628860473633, "learning_rate": 4.2959657870696555e-06, "loss": 0.3471, "step": 3568 }, { "epoch": 1.7718020850570908, "grad_norm": 0.4588128626346588, "learning_rate": 4.293105601981948e-06, "loss": 0.3839, "step": 3569 }, { "epoch": 1.7722985272215785, "grad_norm": 0.3768344223499298, "learning_rate": 4.290245652907069e-06, "loss": 0.3538, "step": 3570 }, { "epoch": 1.7727949693860665, "grad_norm": 0.391674667596817, "learning_rate": 4.287385940799876e-06, "loss": 0.3638, "step": 3571 }, { "epoch": 1.7732914115505545, "grad_norm": 0.41087451577186584, "learning_rate": 4.284526466615148e-06, "loss": 0.3347, "step": 3572 }, { "epoch": 1.7737878537150422, "grad_norm": 0.4243941009044647, "learning_rate": 4.281667231307588e-06, "loss": 0.4463, "step": 3573 }, { "epoch": 1.77428429587953, "grad_norm": 0.3812597990036011, "learning_rate": 4.278808235831818e-06, "loss": 0.3323, "step": 3574 }, { "epoch": 1.774780738044018, "grad_norm": 0.44965648651123047, "learning_rate": 4.2759494811423755e-06, "loss": 0.378, "step": 3575 }, { "epoch": 1.7752771802085057, "grad_norm": 0.4544672667980194, "learning_rate": 4.2730909681937224e-06, "loss": 0.378, "step": 3576 }, { "epoch": 1.7757736223729936, "grad_norm": 0.3871768116950989, "learning_rate": 4.2702326979402385e-06, "loss": 0.3197, "step": 3577 }, { "epoch": 1.7762700645374814, "grad_norm": 0.4456430971622467, "learning_rate": 4.267374671336224e-06, "loss": 0.3654, "step": 3578 }, { "epoch": 1.7767665067019691, "grad_norm": 0.48051995038986206, "learning_rate": 4.264516889335894e-06, "loss": 0.3906, "step": 3579 }, { "epoch": 1.777262948866457, "grad_norm": 0.3784416615962982, "learning_rate": 4.261659352893386e-06, "loss": 0.3338, "step": 3580 }, { "epoch": 1.777759391030945, "grad_norm": 0.4810880124568939, "learning_rate": 4.258802062962754e-06, "loss": 0.3844, "step": 3581 }, { "epoch": 1.7782558331954328, "grad_norm": 0.41664934158325195, "learning_rate": 4.255945020497968e-06, "loss": 0.2951, "step": 3582 }, { "epoch": 1.7787522753599205, "grad_norm": 0.43161922693252563, "learning_rate": 4.253088226452915e-06, "loss": 0.3844, "step": 3583 }, { "epoch": 1.7792487175244083, "grad_norm": 0.4180920720100403, "learning_rate": 4.250231681781406e-06, "loss": 0.3311, "step": 3584 }, { "epoch": 1.7797451596888962, "grad_norm": 0.42827293276786804, "learning_rate": 4.24737538743716e-06, "loss": 0.4061, "step": 3585 }, { "epoch": 1.7802416018533842, "grad_norm": 0.4029332399368286, "learning_rate": 4.244519344373817e-06, "loss": 0.3343, "step": 3586 }, { "epoch": 1.780738044017872, "grad_norm": 0.4849119186401367, "learning_rate": 4.241663553544931e-06, "loss": 0.401, "step": 3587 }, { "epoch": 1.7812344861823597, "grad_norm": 0.4084133803844452, "learning_rate": 4.2388080159039755e-06, "loss": 0.329, "step": 3588 }, { "epoch": 1.7817309283468474, "grad_norm": 0.4575764834880829, "learning_rate": 4.235952732404336e-06, "loss": 0.4488, "step": 3589 }, { "epoch": 1.7822273705113354, "grad_norm": 0.38959163427352905, "learning_rate": 4.233097703999313e-06, "loss": 0.308, "step": 3590 }, { "epoch": 1.7827238126758234, "grad_norm": 0.4343438446521759, "learning_rate": 4.230242931642121e-06, "loss": 0.3923, "step": 3591 }, { "epoch": 1.783220254840311, "grad_norm": 0.4238279461860657, "learning_rate": 4.2273884162858955e-06, "loss": 0.3575, "step": 3592 }, { "epoch": 1.7837166970047988, "grad_norm": 0.38193830847740173, "learning_rate": 4.224534158883679e-06, "loss": 0.3335, "step": 3593 }, { "epoch": 1.7842131391692868, "grad_norm": 0.46659404039382935, "learning_rate": 4.22168016038843e-06, "loss": 0.3723, "step": 3594 }, { "epoch": 1.7847095813337748, "grad_norm": 0.43169909715652466, "learning_rate": 4.2188264217530235e-06, "loss": 0.332, "step": 3595 }, { "epoch": 1.7852060234982625, "grad_norm": 0.4559648036956787, "learning_rate": 4.2159729439302435e-06, "loss": 0.3954, "step": 3596 }, { "epoch": 1.7857024656627503, "grad_norm": 0.3893948793411255, "learning_rate": 4.213119727872789e-06, "loss": 0.3117, "step": 3597 }, { "epoch": 1.786198907827238, "grad_norm": 0.45693227648735046, "learning_rate": 4.210266774533269e-06, "loss": 0.3559, "step": 3598 }, { "epoch": 1.786695349991726, "grad_norm": 0.47173526883125305, "learning_rate": 4.207414084864211e-06, "loss": 0.3653, "step": 3599 }, { "epoch": 1.787191792156214, "grad_norm": 0.439274400472641, "learning_rate": 4.204561659818049e-06, "loss": 0.3488, "step": 3600 }, { "epoch": 1.7876882343207017, "grad_norm": 0.4375983476638794, "learning_rate": 4.2017095003471294e-06, "loss": 0.3359, "step": 3601 }, { "epoch": 1.7881846764851894, "grad_norm": 0.42190635204315186, "learning_rate": 4.19885760740371e-06, "loss": 0.3419, "step": 3602 }, { "epoch": 1.7886811186496772, "grad_norm": 0.4293259382247925, "learning_rate": 4.196005981939963e-06, "loss": 0.3724, "step": 3603 }, { "epoch": 1.7891775608141651, "grad_norm": 0.44344478845596313, "learning_rate": 4.193154624907968e-06, "loss": 0.3283, "step": 3604 }, { "epoch": 1.789674002978653, "grad_norm": 0.39100202918052673, "learning_rate": 4.1903035372597155e-06, "loss": 0.3398, "step": 3605 }, { "epoch": 1.7901704451431408, "grad_norm": 0.4298996925354004, "learning_rate": 4.1874527199471025e-06, "loss": 0.3697, "step": 3606 }, { "epoch": 1.7906668873076286, "grad_norm": 0.4405331611633301, "learning_rate": 4.184602173921945e-06, "loss": 0.3727, "step": 3607 }, { "epoch": 1.7911633294721165, "grad_norm": 0.43319928646087646, "learning_rate": 4.181751900135959e-06, "loss": 0.4158, "step": 3608 }, { "epoch": 1.7916597716366043, "grad_norm": 0.380766898393631, "learning_rate": 4.178901899540775e-06, "loss": 0.3325, "step": 3609 }, { "epoch": 1.7921562138010922, "grad_norm": 0.4534495174884796, "learning_rate": 4.17605217308793e-06, "loss": 0.4134, "step": 3610 }, { "epoch": 1.79265265596558, "grad_norm": 0.3929918706417084, "learning_rate": 4.173202721728873e-06, "loss": 0.3244, "step": 3611 }, { "epoch": 1.7931490981300677, "grad_norm": 0.4277290999889374, "learning_rate": 4.170353546414955e-06, "loss": 0.3873, "step": 3612 }, { "epoch": 1.7936455402945557, "grad_norm": 0.43446677923202515, "learning_rate": 4.167504648097438e-06, "loss": 0.3536, "step": 3613 }, { "epoch": 1.7941419824590437, "grad_norm": 0.3905675411224365, "learning_rate": 4.164656027727495e-06, "loss": 0.3444, "step": 3614 }, { "epoch": 1.7946384246235314, "grad_norm": 0.48642170429229736, "learning_rate": 4.161807686256199e-06, "loss": 0.3821, "step": 3615 }, { "epoch": 1.7951348667880191, "grad_norm": 0.4071691334247589, "learning_rate": 4.158959624634537e-06, "loss": 0.3729, "step": 3616 }, { "epoch": 1.7956313089525069, "grad_norm": 0.4642496109008789, "learning_rate": 4.156111843813397e-06, "loss": 0.3765, "step": 3617 }, { "epoch": 1.7961277511169949, "grad_norm": 0.4061780869960785, "learning_rate": 4.153264344743578e-06, "loss": 0.3283, "step": 3618 }, { "epoch": 1.7966241932814828, "grad_norm": 0.45099401473999023, "learning_rate": 4.150417128375782e-06, "loss": 0.3831, "step": 3619 }, { "epoch": 1.7971206354459706, "grad_norm": 0.4328870177268982, "learning_rate": 4.147570195660614e-06, "loss": 0.3611, "step": 3620 }, { "epoch": 1.7976170776104583, "grad_norm": 0.426016241312027, "learning_rate": 4.144723547548592e-06, "loss": 0.352, "step": 3621 }, { "epoch": 1.7981135197749463, "grad_norm": 0.465628057718277, "learning_rate": 4.141877184990133e-06, "loss": 0.384, "step": 3622 }, { "epoch": 1.798609961939434, "grad_norm": 0.45441317558288574, "learning_rate": 4.1390311089355575e-06, "loss": 0.3712, "step": 3623 }, { "epoch": 1.799106404103922, "grad_norm": 0.4318005442619324, "learning_rate": 4.136185320335095e-06, "loss": 0.3995, "step": 3624 }, { "epoch": 1.7996028462684097, "grad_norm": 0.3752419054508209, "learning_rate": 4.133339820138876e-06, "loss": 0.3108, "step": 3625 }, { "epoch": 1.8000992884328975, "grad_norm": 0.4171358644962311, "learning_rate": 4.130494609296939e-06, "loss": 0.3686, "step": 3626 }, { "epoch": 1.8005957305973854, "grad_norm": 0.4029138386249542, "learning_rate": 4.12764968875922e-06, "loss": 0.3677, "step": 3627 }, { "epoch": 1.8010921727618734, "grad_norm": 0.4019133150577545, "learning_rate": 4.124805059475559e-06, "loss": 0.3493, "step": 3628 }, { "epoch": 1.8015886149263611, "grad_norm": 0.3973993957042694, "learning_rate": 4.1219607223957026e-06, "loss": 0.3373, "step": 3629 }, { "epoch": 1.8020850570908489, "grad_norm": 0.4471695125102997, "learning_rate": 4.119116678469298e-06, "loss": 0.3654, "step": 3630 }, { "epoch": 1.8025814992553366, "grad_norm": 0.41173145174980164, "learning_rate": 4.116272928645893e-06, "loss": 0.3578, "step": 3631 }, { "epoch": 1.8030779414198246, "grad_norm": 0.3895764946937561, "learning_rate": 4.113429473874938e-06, "loss": 0.3677, "step": 3632 }, { "epoch": 1.8035743835843125, "grad_norm": 0.4370647966861725, "learning_rate": 4.1105863151057865e-06, "loss": 0.3704, "step": 3633 }, { "epoch": 1.8040708257488003, "grad_norm": 0.47146061062812805, "learning_rate": 4.107743453287693e-06, "loss": 0.3726, "step": 3634 }, { "epoch": 1.804567267913288, "grad_norm": 0.39836588501930237, "learning_rate": 4.1049008893698066e-06, "loss": 0.3079, "step": 3635 }, { "epoch": 1.8050637100777758, "grad_norm": 0.43896055221557617, "learning_rate": 4.102058624301189e-06, "loss": 0.3767, "step": 3636 }, { "epoch": 1.8055601522422637, "grad_norm": 0.4088461995124817, "learning_rate": 4.099216659030792e-06, "loss": 0.3542, "step": 3637 }, { "epoch": 1.8060565944067517, "grad_norm": 0.3995796740055084, "learning_rate": 4.09637499450747e-06, "loss": 0.3543, "step": 3638 }, { "epoch": 1.8065530365712394, "grad_norm": 0.41806069016456604, "learning_rate": 4.0935336316799764e-06, "loss": 0.3803, "step": 3639 }, { "epoch": 1.8070494787357272, "grad_norm": 0.40099024772644043, "learning_rate": 4.090692571496968e-06, "loss": 0.3176, "step": 3640 }, { "epoch": 1.8075459209002152, "grad_norm": 0.39964759349823, "learning_rate": 4.087851814906997e-06, "loss": 0.2859, "step": 3641 }, { "epoch": 1.8080423630647031, "grad_norm": 0.3943895399570465, "learning_rate": 4.0850113628585155e-06, "loss": 0.3543, "step": 3642 }, { "epoch": 1.8085388052291909, "grad_norm": 0.4103333652019501, "learning_rate": 4.0821712162998686e-06, "loss": 0.3418, "step": 3643 }, { "epoch": 1.8090352473936786, "grad_norm": 0.4095951020717621, "learning_rate": 4.07933137617931e-06, "loss": 0.3542, "step": 3644 }, { "epoch": 1.8095316895581663, "grad_norm": 0.41218483448028564, "learning_rate": 4.076491843444982e-06, "loss": 0.3362, "step": 3645 }, { "epoch": 1.8100281317226543, "grad_norm": 0.4291670322418213, "learning_rate": 4.0736526190449264e-06, "loss": 0.3728, "step": 3646 }, { "epoch": 1.8105245738871423, "grad_norm": 0.36682215332984924, "learning_rate": 4.0708137039270855e-06, "loss": 0.3448, "step": 3647 }, { "epoch": 1.81102101605163, "grad_norm": 0.3995973467826843, "learning_rate": 4.067975099039295e-06, "loss": 0.3717, "step": 3648 }, { "epoch": 1.8115174582161178, "grad_norm": 0.40094950795173645, "learning_rate": 4.065136805329289e-06, "loss": 0.312, "step": 3649 }, { "epoch": 1.8120139003806055, "grad_norm": 0.4105813503265381, "learning_rate": 4.0622988237446924e-06, "loss": 0.3531, "step": 3650 }, { "epoch": 1.8125103425450935, "grad_norm": 0.4212915897369385, "learning_rate": 4.059461155233036e-06, "loss": 0.3859, "step": 3651 }, { "epoch": 1.8130067847095814, "grad_norm": 0.4015576243400574, "learning_rate": 4.056623800741738e-06, "loss": 0.3497, "step": 3652 }, { "epoch": 1.8135032268740692, "grad_norm": 0.3545067608356476, "learning_rate": 4.053786761218113e-06, "loss": 0.2949, "step": 3653 }, { "epoch": 1.813999669038557, "grad_norm": 0.45494306087493896, "learning_rate": 4.05095003760937e-06, "loss": 0.3886, "step": 3654 }, { "epoch": 1.8144961112030449, "grad_norm": 0.4600226879119873, "learning_rate": 4.048113630862617e-06, "loss": 0.3666, "step": 3655 }, { "epoch": 1.8149925533675328, "grad_norm": 0.37804457545280457, "learning_rate": 4.045277541924851e-06, "loss": 0.3704, "step": 3656 }, { "epoch": 1.8154889955320206, "grad_norm": 0.42182886600494385, "learning_rate": 4.042441771742967e-06, "loss": 0.3483, "step": 3657 }, { "epoch": 1.8159854376965083, "grad_norm": 0.4031047224998474, "learning_rate": 4.039606321263748e-06, "loss": 0.3514, "step": 3658 }, { "epoch": 1.816481879860996, "grad_norm": 0.4714978337287903, "learning_rate": 4.036771191433879e-06, "loss": 0.3369, "step": 3659 }, { "epoch": 1.816978322025484, "grad_norm": 0.4064290225505829, "learning_rate": 4.03393638319993e-06, "loss": 0.347, "step": 3660 }, { "epoch": 1.817474764189972, "grad_norm": 0.3941013216972351, "learning_rate": 4.0311018975083644e-06, "loss": 0.3584, "step": 3661 }, { "epoch": 1.8179712063544597, "grad_norm": 0.41731664538383484, "learning_rate": 4.028267735305544e-06, "loss": 0.3579, "step": 3662 }, { "epoch": 1.8184676485189475, "grad_norm": 0.3701015114784241, "learning_rate": 4.025433897537715e-06, "loss": 0.3329, "step": 3663 }, { "epoch": 1.8189640906834352, "grad_norm": 0.42086073756217957, "learning_rate": 4.022600385151022e-06, "loss": 0.3741, "step": 3664 }, { "epoch": 1.8194605328479232, "grad_norm": 0.44228607416152954, "learning_rate": 4.019767199091494e-06, "loss": 0.3865, "step": 3665 }, { "epoch": 1.8199569750124112, "grad_norm": 0.46301931142807007, "learning_rate": 4.016934340305059e-06, "loss": 0.3553, "step": 3666 }, { "epoch": 1.820453417176899, "grad_norm": 0.38023656606674194, "learning_rate": 4.01410180973753e-06, "loss": 0.3678, "step": 3667 }, { "epoch": 1.8209498593413866, "grad_norm": 0.37920647859573364, "learning_rate": 4.01126960833461e-06, "loss": 0.33, "step": 3668 }, { "epoch": 1.8214463015058746, "grad_norm": 0.429909348487854, "learning_rate": 4.008437737041895e-06, "loss": 0.3965, "step": 3669 }, { "epoch": 1.8219427436703624, "grad_norm": 0.40733641386032104, "learning_rate": 4.005606196804872e-06, "loss": 0.3244, "step": 3670 }, { "epoch": 1.8224391858348503, "grad_norm": 0.46461957693099976, "learning_rate": 4.0027749885689126e-06, "loss": 0.3653, "step": 3671 }, { "epoch": 1.822935627999338, "grad_norm": 0.3841169774532318, "learning_rate": 3.999944113279283e-06, "loss": 0.3465, "step": 3672 }, { "epoch": 1.8234320701638258, "grad_norm": 0.42150428891181946, "learning_rate": 3.9971135718811315e-06, "loss": 0.381, "step": 3673 }, { "epoch": 1.8239285123283138, "grad_norm": 0.42088937759399414, "learning_rate": 3.994283365319503e-06, "loss": 0.3736, "step": 3674 }, { "epoch": 1.8244249544928017, "grad_norm": 0.41163375973701477, "learning_rate": 3.991453494539326e-06, "loss": 0.3599, "step": 3675 }, { "epoch": 1.8249213966572895, "grad_norm": 0.47824206948280334, "learning_rate": 3.988623960485414e-06, "loss": 0.356, "step": 3676 }, { "epoch": 1.8254178388217772, "grad_norm": 0.42388179898262024, "learning_rate": 3.985794764102475e-06, "loss": 0.3209, "step": 3677 }, { "epoch": 1.825914280986265, "grad_norm": 0.42294660210609436, "learning_rate": 3.9829659063351e-06, "loss": 0.3535, "step": 3678 }, { "epoch": 1.826410723150753, "grad_norm": 0.4530671238899231, "learning_rate": 3.980137388127768e-06, "loss": 0.3539, "step": 3679 }, { "epoch": 1.8269071653152409, "grad_norm": 0.4087166488170624, "learning_rate": 3.977309210424841e-06, "loss": 0.3501, "step": 3680 }, { "epoch": 1.8274036074797286, "grad_norm": 0.3949061632156372, "learning_rate": 3.9744813741705766e-06, "loss": 0.3301, "step": 3681 }, { "epoch": 1.8279000496442164, "grad_norm": 0.4570569097995758, "learning_rate": 3.971653880309109e-06, "loss": 0.3594, "step": 3682 }, { "epoch": 1.8283964918087043, "grad_norm": 0.4067571461200714, "learning_rate": 3.968826729784462e-06, "loss": 0.3378, "step": 3683 }, { "epoch": 1.828892933973192, "grad_norm": 0.3863314688205719, "learning_rate": 3.965999923540541e-06, "loss": 0.3252, "step": 3684 }, { "epoch": 1.82938937613768, "grad_norm": 0.43870171904563904, "learning_rate": 3.963173462521146e-06, "loss": 0.3875, "step": 3685 }, { "epoch": 1.8298858183021678, "grad_norm": 0.4463542699813843, "learning_rate": 3.960347347669951e-06, "loss": 0.3584, "step": 3686 }, { "epoch": 1.8303822604666555, "grad_norm": 0.45017874240875244, "learning_rate": 3.957521579930522e-06, "loss": 0.4037, "step": 3687 }, { "epoch": 1.8308787026311435, "grad_norm": 0.37489837408065796, "learning_rate": 3.954696160246302e-06, "loss": 0.3494, "step": 3688 }, { "epoch": 1.8313751447956315, "grad_norm": 0.42770224809646606, "learning_rate": 3.951871089560626e-06, "loss": 0.359, "step": 3689 }, { "epoch": 1.8318715869601192, "grad_norm": 0.4543986916542053, "learning_rate": 3.949046368816708e-06, "loss": 0.364, "step": 3690 }, { "epoch": 1.832368029124607, "grad_norm": 0.41941139101982117, "learning_rate": 3.94622199895764e-06, "loss": 0.3819, "step": 3691 }, { "epoch": 1.8328644712890947, "grad_norm": 0.42580652236938477, "learning_rate": 3.94339798092641e-06, "loss": 0.3558, "step": 3692 }, { "epoch": 1.8333609134535827, "grad_norm": 0.3484592139720917, "learning_rate": 3.940574315665877e-06, "loss": 0.3243, "step": 3693 }, { "epoch": 1.8338573556180706, "grad_norm": 0.4323218762874603, "learning_rate": 3.937751004118786e-06, "loss": 0.3746, "step": 3694 }, { "epoch": 1.8343537977825584, "grad_norm": 0.4081248641014099, "learning_rate": 3.934928047227764e-06, "loss": 0.3557, "step": 3695 }, { "epoch": 1.834850239947046, "grad_norm": 0.4271438717842102, "learning_rate": 3.932105445935319e-06, "loss": 0.3451, "step": 3696 }, { "epoch": 1.8353466821115338, "grad_norm": 0.4509774446487427, "learning_rate": 3.929283201183844e-06, "loss": 0.3845, "step": 3697 }, { "epoch": 1.8358431242760218, "grad_norm": 0.408656507730484, "learning_rate": 3.926461313915607e-06, "loss": 0.3641, "step": 3698 }, { "epoch": 1.8363395664405098, "grad_norm": 0.4115462601184845, "learning_rate": 3.923639785072759e-06, "loss": 0.3763, "step": 3699 }, { "epoch": 1.8368360086049975, "grad_norm": 0.3784303069114685, "learning_rate": 3.920818615597334e-06, "loss": 0.3433, "step": 3700 }, { "epoch": 1.8373324507694853, "grad_norm": 0.3981471359729767, "learning_rate": 3.9179978064312426e-06, "loss": 0.3159, "step": 3701 }, { "epoch": 1.8378288929339732, "grad_norm": 0.44000232219696045, "learning_rate": 3.915177358516276e-06, "loss": 0.3812, "step": 3702 }, { "epoch": 1.8383253350984612, "grad_norm": 0.4098576307296753, "learning_rate": 3.912357272794105e-06, "loss": 0.3568, "step": 3703 }, { "epoch": 1.838821777262949, "grad_norm": 0.3623220920562744, "learning_rate": 3.909537550206281e-06, "loss": 0.2983, "step": 3704 }, { "epoch": 1.8393182194274367, "grad_norm": 0.402554988861084, "learning_rate": 3.906718191694232e-06, "loss": 0.3555, "step": 3705 }, { "epoch": 1.8398146615919244, "grad_norm": 0.42059800028800964, "learning_rate": 3.903899198199264e-06, "loss": 0.362, "step": 3706 }, { "epoch": 1.8403111037564124, "grad_norm": 0.40331193804740906, "learning_rate": 3.901080570662565e-06, "loss": 0.3747, "step": 3707 }, { "epoch": 1.8408075459209003, "grad_norm": 0.38816016912460327, "learning_rate": 3.898262310025196e-06, "loss": 0.3613, "step": 3708 }, { "epoch": 1.841303988085388, "grad_norm": 0.4042571187019348, "learning_rate": 3.895444417228097e-06, "loss": 0.3848, "step": 3709 }, { "epoch": 1.8418004302498758, "grad_norm": 0.36610591411590576, "learning_rate": 3.892626893212088e-06, "loss": 0.2809, "step": 3710 }, { "epoch": 1.8422968724143636, "grad_norm": 0.4027213454246521, "learning_rate": 3.889809738917862e-06, "loss": 0.4228, "step": 3711 }, { "epoch": 1.8427933145788515, "grad_norm": 0.3677351474761963, "learning_rate": 3.8869929552859915e-06, "loss": 0.3474, "step": 3712 }, { "epoch": 1.8432897567433395, "grad_norm": 0.3754594027996063, "learning_rate": 3.884176543256924e-06, "loss": 0.3268, "step": 3713 }, { "epoch": 1.8437861989078272, "grad_norm": 0.4218599796295166, "learning_rate": 3.88136050377098e-06, "loss": 0.3843, "step": 3714 }, { "epoch": 1.844282641072315, "grad_norm": 0.39397311210632324, "learning_rate": 3.878544837768362e-06, "loss": 0.3486, "step": 3715 }, { "epoch": 1.844779083236803, "grad_norm": 0.3827308118343353, "learning_rate": 3.875729546189144e-06, "loss": 0.3719, "step": 3716 }, { "epoch": 1.8452755254012907, "grad_norm": 0.3930080533027649, "learning_rate": 3.872914629973273e-06, "loss": 0.381, "step": 3717 }, { "epoch": 1.8457719675657787, "grad_norm": 0.3885324001312256, "learning_rate": 3.870100090060577e-06, "loss": 0.3393, "step": 3718 }, { "epoch": 1.8462684097302664, "grad_norm": 0.3929230570793152, "learning_rate": 3.8672859273907495e-06, "loss": 0.3833, "step": 3719 }, { "epoch": 1.8467648518947541, "grad_norm": 0.42035284638404846, "learning_rate": 3.864472142903367e-06, "loss": 0.4073, "step": 3720 }, { "epoch": 1.847261294059242, "grad_norm": 0.4243932068347931, "learning_rate": 3.861658737537872e-06, "loss": 0.3818, "step": 3721 }, { "epoch": 1.84775773622373, "grad_norm": 0.4314197897911072, "learning_rate": 3.858845712233588e-06, "loss": 0.3223, "step": 3722 }, { "epoch": 1.8482541783882178, "grad_norm": 0.4450163245201111, "learning_rate": 3.8560330679297065e-06, "loss": 0.3498, "step": 3723 }, { "epoch": 1.8487506205527056, "grad_norm": 0.3768727481365204, "learning_rate": 3.853220805565292e-06, "loss": 0.3085, "step": 3724 }, { "epoch": 1.8492470627171933, "grad_norm": 0.49368715286254883, "learning_rate": 3.850408926079281e-06, "loss": 0.4001, "step": 3725 }, { "epoch": 1.8497435048816813, "grad_norm": 0.400199830532074, "learning_rate": 3.847597430410486e-06, "loss": 0.331, "step": 3726 }, { "epoch": 1.8502399470461692, "grad_norm": 0.4567776024341583, "learning_rate": 3.844786319497589e-06, "loss": 0.3591, "step": 3727 }, { "epoch": 1.850736389210657, "grad_norm": 0.4144534170627594, "learning_rate": 3.841975594279144e-06, "loss": 0.3999, "step": 3728 }, { "epoch": 1.8512328313751447, "grad_norm": 0.4840797781944275, "learning_rate": 3.839165255693571e-06, "loss": 0.3682, "step": 3729 }, { "epoch": 1.8517292735396327, "grad_norm": 0.43588608503341675, "learning_rate": 3.836355304679173e-06, "loss": 0.3422, "step": 3730 }, { "epoch": 1.8522257157041204, "grad_norm": 0.444418340921402, "learning_rate": 3.833545742174113e-06, "loss": 0.3137, "step": 3731 }, { "epoch": 1.8527221578686084, "grad_norm": 0.41212958097457886, "learning_rate": 3.830736569116423e-06, "loss": 0.3449, "step": 3732 }, { "epoch": 1.8532186000330961, "grad_norm": 0.46278128027915955, "learning_rate": 3.827927786444018e-06, "loss": 0.3329, "step": 3733 }, { "epoch": 1.8537150421975839, "grad_norm": 0.4767743945121765, "learning_rate": 3.825119395094668e-06, "loss": 0.3583, "step": 3734 }, { "epoch": 1.8542114843620718, "grad_norm": 0.49137383699417114, "learning_rate": 3.822311396006022e-06, "loss": 0.3794, "step": 3735 }, { "epoch": 1.8547079265265598, "grad_norm": 0.41783052682876587, "learning_rate": 3.81950379011559e-06, "loss": 0.3199, "step": 3736 }, { "epoch": 1.8552043686910475, "grad_norm": 0.42172345519065857, "learning_rate": 3.816696578360761e-06, "loss": 0.3802, "step": 3737 }, { "epoch": 1.8557008108555353, "grad_norm": 0.5185666680335999, "learning_rate": 3.8138897616787847e-06, "loss": 0.4232, "step": 3738 }, { "epoch": 1.856197253020023, "grad_norm": 0.3962355852127075, "learning_rate": 3.8110833410067795e-06, "loss": 0.3248, "step": 3739 }, { "epoch": 1.856693695184511, "grad_norm": 0.39393356442451477, "learning_rate": 3.808277317281732e-06, "loss": 0.385, "step": 3740 }, { "epoch": 1.857190137348999, "grad_norm": 0.40017279982566833, "learning_rate": 3.805471691440501e-06, "loss": 0.3908, "step": 3741 }, { "epoch": 1.8576865795134867, "grad_norm": 0.379621684551239, "learning_rate": 3.802666464419806e-06, "loss": 0.3257, "step": 3742 }, { "epoch": 1.8581830216779744, "grad_norm": 0.4181431233882904, "learning_rate": 3.7998616371562377e-06, "loss": 0.3156, "step": 3743 }, { "epoch": 1.8586794638424622, "grad_norm": 0.41362565755844116, "learning_rate": 3.797057210586248e-06, "loss": 0.3666, "step": 3744 }, { "epoch": 1.8591759060069502, "grad_norm": 0.4295709431171417, "learning_rate": 3.7942531856461643e-06, "loss": 0.3929, "step": 3745 }, { "epoch": 1.8596723481714381, "grad_norm": 0.44645804166793823, "learning_rate": 3.7914495632721713e-06, "loss": 0.3765, "step": 3746 }, { "epoch": 1.8601687903359259, "grad_norm": 0.3906530439853668, "learning_rate": 3.788646344400321e-06, "loss": 0.2999, "step": 3747 }, { "epoch": 1.8606652325004136, "grad_norm": 0.41080984473228455, "learning_rate": 3.7858435299665354e-06, "loss": 0.3274, "step": 3748 }, { "epoch": 1.8611616746649016, "grad_norm": 0.5203538537025452, "learning_rate": 3.783041120906596e-06, "loss": 0.3715, "step": 3749 }, { "epoch": 1.8616581168293895, "grad_norm": 0.41548776626586914, "learning_rate": 3.7802391181561497e-06, "loss": 0.343, "step": 3750 }, { "epoch": 1.8621545589938773, "grad_norm": 0.48938268423080444, "learning_rate": 3.7774375226507106e-06, "loss": 0.3807, "step": 3751 }, { "epoch": 1.862651001158365, "grad_norm": 0.4407116174697876, "learning_rate": 3.7746363353256567e-06, "loss": 0.3773, "step": 3752 }, { "epoch": 1.8631474433228528, "grad_norm": 0.4703051745891571, "learning_rate": 3.7718355571162266e-06, "loss": 0.3645, "step": 3753 }, { "epoch": 1.8636438854873407, "grad_norm": 0.436048299074173, "learning_rate": 3.769035188957525e-06, "loss": 0.3468, "step": 3754 }, { "epoch": 1.8641403276518287, "grad_norm": 0.46135708689689636, "learning_rate": 3.766235231784515e-06, "loss": 0.3852, "step": 3755 }, { "epoch": 1.8646367698163164, "grad_norm": 0.41353848576545715, "learning_rate": 3.7634356865320327e-06, "loss": 0.3389, "step": 3756 }, { "epoch": 1.8651332119808042, "grad_norm": 0.4289407432079315, "learning_rate": 3.760636554134765e-06, "loss": 0.3758, "step": 3757 }, { "epoch": 1.865629654145292, "grad_norm": 0.40735286474227905, "learning_rate": 3.757837835527268e-06, "loss": 0.3385, "step": 3758 }, { "epoch": 1.8661260963097799, "grad_norm": 0.4384831190109253, "learning_rate": 3.7550395316439568e-06, "loss": 0.3408, "step": 3759 }, { "epoch": 1.8666225384742678, "grad_norm": 0.46599748730659485, "learning_rate": 3.7522416434191117e-06, "loss": 0.3773, "step": 3760 }, { "epoch": 1.8671189806387556, "grad_norm": 0.38384348154067993, "learning_rate": 3.7494441717868698e-06, "loss": 0.3364, "step": 3761 }, { "epoch": 1.8676154228032433, "grad_norm": 0.46559086441993713, "learning_rate": 3.746647117681228e-06, "loss": 0.3504, "step": 3762 }, { "epoch": 1.8681118649677313, "grad_norm": 0.4454149603843689, "learning_rate": 3.7438504820360523e-06, "loss": 0.3645, "step": 3763 }, { "epoch": 1.8686083071322193, "grad_norm": 0.39693742990493774, "learning_rate": 3.741054265785059e-06, "loss": 0.376, "step": 3764 }, { "epoch": 1.869104749296707, "grad_norm": 0.3691561818122864, "learning_rate": 3.738258469861831e-06, "loss": 0.326, "step": 3765 }, { "epoch": 1.8696011914611947, "grad_norm": 0.40285664796829224, "learning_rate": 3.7354630951998063e-06, "loss": 0.3881, "step": 3766 }, { "epoch": 1.8700976336256825, "grad_norm": 0.36452382802963257, "learning_rate": 3.732668142732286e-06, "loss": 0.3225, "step": 3767 }, { "epoch": 1.8705940757901705, "grad_norm": 0.46272727847099304, "learning_rate": 3.7298736133924295e-06, "loss": 0.3471, "step": 3768 }, { "epoch": 1.8710905179546584, "grad_norm": 0.47204190492630005, "learning_rate": 3.727079508113254e-06, "loss": 0.3796, "step": 3769 }, { "epoch": 1.8715869601191462, "grad_norm": 0.4030991494655609, "learning_rate": 3.724285827827633e-06, "loss": 0.3298, "step": 3770 }, { "epoch": 1.872083402283634, "grad_norm": 0.40564775466918945, "learning_rate": 3.721492573468303e-06, "loss": 0.4131, "step": 3771 }, { "epoch": 1.8725798444481216, "grad_norm": 0.37550219893455505, "learning_rate": 3.7186997459678553e-06, "loss": 0.3278, "step": 3772 }, { "epoch": 1.8730762866126096, "grad_norm": 0.4050162732601166, "learning_rate": 3.715907346258737e-06, "loss": 0.3762, "step": 3773 }, { "epoch": 1.8735727287770976, "grad_norm": 0.42306581139564514, "learning_rate": 3.7131153752732563e-06, "loss": 0.3866, "step": 3774 }, { "epoch": 1.8740691709415853, "grad_norm": 0.4486246407032013, "learning_rate": 3.7103238339435776e-06, "loss": 0.4146, "step": 3775 }, { "epoch": 1.874565613106073, "grad_norm": 0.42517557740211487, "learning_rate": 3.7075327232017195e-06, "loss": 0.3526, "step": 3776 }, { "epoch": 1.875062055270561, "grad_norm": 0.3755309283733368, "learning_rate": 3.7047420439795555e-06, "loss": 0.3425, "step": 3777 }, { "epoch": 1.8755584974350488, "grad_norm": 0.4013522267341614, "learning_rate": 3.701951797208822e-06, "loss": 0.3047, "step": 3778 }, { "epoch": 1.8760549395995367, "grad_norm": 0.4139203131198883, "learning_rate": 3.6991619838211048e-06, "loss": 0.3621, "step": 3779 }, { "epoch": 1.8765513817640245, "grad_norm": 0.39541733264923096, "learning_rate": 3.696372604747845e-06, "loss": 0.3312, "step": 3780 }, { "epoch": 1.8770478239285122, "grad_norm": 0.4006558358669281, "learning_rate": 3.6935836609203412e-06, "loss": 0.3189, "step": 3781 }, { "epoch": 1.8775442660930002, "grad_norm": 0.46403422951698303, "learning_rate": 3.6907951532697474e-06, "loss": 0.3861, "step": 3782 }, { "epoch": 1.8780407082574881, "grad_norm": 0.4763590097427368, "learning_rate": 3.688007082727071e-06, "loss": 0.4154, "step": 3783 }, { "epoch": 1.8785371504219759, "grad_norm": 0.40079817175865173, "learning_rate": 3.6852194502231707e-06, "loss": 0.3278, "step": 3784 }, { "epoch": 1.8790335925864636, "grad_norm": 0.5011786818504333, "learning_rate": 3.682432256688761e-06, "loss": 0.3953, "step": 3785 }, { "epoch": 1.8795300347509514, "grad_norm": 0.3907761573791504, "learning_rate": 3.6796455030544133e-06, "loss": 0.3314, "step": 3786 }, { "epoch": 1.8800264769154393, "grad_norm": 0.4490252733230591, "learning_rate": 3.6768591902505467e-06, "loss": 0.3389, "step": 3787 }, { "epoch": 1.8805229190799273, "grad_norm": 0.3935225307941437, "learning_rate": 3.674073319207433e-06, "loss": 0.3622, "step": 3788 }, { "epoch": 1.881019361244415, "grad_norm": 0.3636181950569153, "learning_rate": 3.671287890855204e-06, "loss": 0.3505, "step": 3789 }, { "epoch": 1.8815158034089028, "grad_norm": 0.42891207337379456, "learning_rate": 3.6685029061238344e-06, "loss": 0.4075, "step": 3790 }, { "epoch": 1.8820122455733908, "grad_norm": 0.38206037878990173, "learning_rate": 3.665718365943158e-06, "loss": 0.3184, "step": 3791 }, { "epoch": 1.8825086877378785, "grad_norm": 0.46474823355674744, "learning_rate": 3.662934271242853e-06, "loss": 0.3771, "step": 3792 }, { "epoch": 1.8830051299023665, "grad_norm": 0.43936222791671753, "learning_rate": 3.6601506229524576e-06, "loss": 0.3554, "step": 3793 }, { "epoch": 1.8835015720668542, "grad_norm": 0.42463546991348267, "learning_rate": 3.6573674220013532e-06, "loss": 0.3405, "step": 3794 }, { "epoch": 1.883998014231342, "grad_norm": 0.40428218245506287, "learning_rate": 3.654584669318777e-06, "loss": 0.3302, "step": 3795 }, { "epoch": 1.88449445639583, "grad_norm": 0.39962905645370483, "learning_rate": 3.6518023658338107e-06, "loss": 0.3618, "step": 3796 }, { "epoch": 1.8849908985603179, "grad_norm": 0.4546661078929901, "learning_rate": 3.6490205124753947e-06, "loss": 0.3861, "step": 3797 }, { "epoch": 1.8854873407248056, "grad_norm": 0.39910995960235596, "learning_rate": 3.646239110172311e-06, "loss": 0.3568, "step": 3798 }, { "epoch": 1.8859837828892934, "grad_norm": 0.39987674355506897, "learning_rate": 3.6434581598531937e-06, "loss": 0.3894, "step": 3799 }, { "epoch": 1.886480225053781, "grad_norm": 0.39167076349258423, "learning_rate": 3.640677662446531e-06, "loss": 0.3614, "step": 3800 }, { "epoch": 1.886976667218269, "grad_norm": 0.40267062187194824, "learning_rate": 3.6378976188806525e-06, "loss": 0.3684, "step": 3801 }, { "epoch": 1.887473109382757, "grad_norm": 0.3688230514526367, "learning_rate": 3.6351180300837386e-06, "loss": 0.3587, "step": 3802 }, { "epoch": 1.8879695515472448, "grad_norm": 0.4090246558189392, "learning_rate": 3.632338896983817e-06, "loss": 0.367, "step": 3803 }, { "epoch": 1.8884659937117325, "grad_norm": 0.4321516454219818, "learning_rate": 3.6295602205087687e-06, "loss": 0.3397, "step": 3804 }, { "epoch": 1.8889624358762203, "grad_norm": 0.4954165816307068, "learning_rate": 3.6267820015863153e-06, "loss": 0.4135, "step": 3805 }, { "epoch": 1.8894588780407082, "grad_norm": 0.38898757100105286, "learning_rate": 3.624004241144031e-06, "loss": 0.3616, "step": 3806 }, { "epoch": 1.8899553202051962, "grad_norm": 0.36894524097442627, "learning_rate": 3.621226940109331e-06, "loss": 0.3541, "step": 3807 }, { "epoch": 1.890451762369684, "grad_norm": 0.43413183093070984, "learning_rate": 3.618450099409484e-06, "loss": 0.3683, "step": 3808 }, { "epoch": 1.8909482045341717, "grad_norm": 0.43377965688705444, "learning_rate": 3.6156737199716014e-06, "loss": 0.3476, "step": 3809 }, { "epoch": 1.8914446466986596, "grad_norm": 0.409308522939682, "learning_rate": 3.612897802722639e-06, "loss": 0.3671, "step": 3810 }, { "epoch": 1.8919410888631476, "grad_norm": 0.39241307973861694, "learning_rate": 3.6101223485893995e-06, "loss": 0.3467, "step": 3811 }, { "epoch": 1.8924375310276353, "grad_norm": 0.4025750160217285, "learning_rate": 3.6073473584985346e-06, "loss": 0.3863, "step": 3812 }, { "epoch": 1.892933973192123, "grad_norm": 0.45454296469688416, "learning_rate": 3.6045728333765356e-06, "loss": 0.3663, "step": 3813 }, { "epoch": 1.8934304153566108, "grad_norm": 0.40694907307624817, "learning_rate": 3.601798774149742e-06, "loss": 0.3633, "step": 3814 }, { "epoch": 1.8939268575210988, "grad_norm": 0.43182435631752014, "learning_rate": 3.5990251817443365e-06, "loss": 0.3315, "step": 3815 }, { "epoch": 1.8944232996855868, "grad_norm": 0.40815678238868713, "learning_rate": 3.596252057086348e-06, "loss": 0.332, "step": 3816 }, { "epoch": 1.8949197418500745, "grad_norm": 0.40779274702072144, "learning_rate": 3.593479401101645e-06, "loss": 0.3648, "step": 3817 }, { "epoch": 1.8954161840145622, "grad_norm": 0.4551762044429779, "learning_rate": 3.590707214715942e-06, "loss": 0.3698, "step": 3818 }, { "epoch": 1.89591262617905, "grad_norm": 0.4099257290363312, "learning_rate": 3.5879354988547988e-06, "loss": 0.3691, "step": 3819 }, { "epoch": 1.896409068343538, "grad_norm": 0.3563299775123596, "learning_rate": 3.585164254443615e-06, "loss": 0.3237, "step": 3820 }, { "epoch": 1.896905510508026, "grad_norm": 0.4181298315525055, "learning_rate": 3.582393482407632e-06, "loss": 0.3569, "step": 3821 }, { "epoch": 1.8974019526725137, "grad_norm": 0.3937072157859802, "learning_rate": 3.5796231836719363e-06, "loss": 0.3632, "step": 3822 }, { "epoch": 1.8978983948370014, "grad_norm": 0.44624850153923035, "learning_rate": 3.5768533591614575e-06, "loss": 0.4051, "step": 3823 }, { "epoch": 1.8983948370014894, "grad_norm": 0.4275226294994354, "learning_rate": 3.5740840098009634e-06, "loss": 0.359, "step": 3824 }, { "epoch": 1.898891279165977, "grad_norm": 0.37934622168540955, "learning_rate": 3.5713151365150645e-06, "loss": 0.3008, "step": 3825 }, { "epoch": 1.899387721330465, "grad_norm": 0.399919331073761, "learning_rate": 3.5685467402282093e-06, "loss": 0.3073, "step": 3826 }, { "epoch": 1.8998841634949528, "grad_norm": 0.44184207916259766, "learning_rate": 3.565778821864695e-06, "loss": 0.4506, "step": 3827 }, { "epoch": 1.9003806056594406, "grad_norm": 0.3700285851955414, "learning_rate": 3.563011382348651e-06, "loss": 0.3283, "step": 3828 }, { "epoch": 1.9008770478239285, "grad_norm": 0.4238802194595337, "learning_rate": 3.560244422604052e-06, "loss": 0.4374, "step": 3829 }, { "epoch": 1.9013734899884165, "grad_norm": 0.3842369616031647, "learning_rate": 3.557477943554709e-06, "loss": 0.3271, "step": 3830 }, { "epoch": 1.9018699321529042, "grad_norm": 0.3801615536212921, "learning_rate": 3.5547119461242766e-06, "loss": 0.3293, "step": 3831 }, { "epoch": 1.902366374317392, "grad_norm": 0.3840852677822113, "learning_rate": 3.551946431236245e-06, "loss": 0.3932, "step": 3832 }, { "epoch": 1.9028628164818797, "grad_norm": 0.3841366469860077, "learning_rate": 3.5491813998139413e-06, "loss": 0.3579, "step": 3833 }, { "epoch": 1.9033592586463677, "grad_norm": 0.41289812326431274, "learning_rate": 3.5464168527805398e-06, "loss": 0.3461, "step": 3834 }, { "epoch": 1.9038557008108556, "grad_norm": 0.4528470039367676, "learning_rate": 3.5436527910590446e-06, "loss": 0.3411, "step": 3835 }, { "epoch": 1.9043521429753434, "grad_norm": 0.4060024321079254, "learning_rate": 3.5408892155723e-06, "loss": 0.3395, "step": 3836 }, { "epoch": 1.9048485851398311, "grad_norm": 0.38938674330711365, "learning_rate": 3.53812612724299e-06, "loss": 0.3028, "step": 3837 }, { "epoch": 1.905345027304319, "grad_norm": 0.43814241886138916, "learning_rate": 3.535363526993635e-06, "loss": 0.4426, "step": 3838 }, { "epoch": 1.9058414694688068, "grad_norm": 0.37660571932792664, "learning_rate": 3.5326014157465922e-06, "loss": 0.3588, "step": 3839 }, { "epoch": 1.9063379116332948, "grad_norm": 0.40397265553474426, "learning_rate": 3.5298397944240524e-06, "loss": 0.3927, "step": 3840 }, { "epoch": 1.9068343537977825, "grad_norm": 0.3628499209880829, "learning_rate": 3.5270786639480512e-06, "loss": 0.3565, "step": 3841 }, { "epoch": 1.9073307959622703, "grad_norm": 0.3642476797103882, "learning_rate": 3.524318025240453e-06, "loss": 0.3496, "step": 3842 }, { "epoch": 1.9078272381267583, "grad_norm": 0.4308866858482361, "learning_rate": 3.5215578792229586e-06, "loss": 0.4131, "step": 3843 }, { "epoch": 1.9083236802912462, "grad_norm": 0.36139360070228577, "learning_rate": 3.518798226817105e-06, "loss": 0.3425, "step": 3844 }, { "epoch": 1.908820122455734, "grad_norm": 0.4188520908355713, "learning_rate": 3.516039068944267e-06, "loss": 0.3911, "step": 3845 }, { "epoch": 1.9093165646202217, "grad_norm": 0.40044525265693665, "learning_rate": 3.513280406525653e-06, "loss": 0.3604, "step": 3846 }, { "epoch": 1.9098130067847094, "grad_norm": 0.3975870609283447, "learning_rate": 3.510522240482305e-06, "loss": 0.3222, "step": 3847 }, { "epoch": 1.9103094489491974, "grad_norm": 0.4060112237930298, "learning_rate": 3.507764571735097e-06, "loss": 0.382, "step": 3848 }, { "epoch": 1.9108058911136854, "grad_norm": 0.42826804518699646, "learning_rate": 3.5050074012047443e-06, "loss": 0.3567, "step": 3849 }, { "epoch": 1.9113023332781731, "grad_norm": 0.44441843032836914, "learning_rate": 3.5022507298117873e-06, "loss": 0.3643, "step": 3850 }, { "epoch": 1.9117987754426609, "grad_norm": 0.44076868891716003, "learning_rate": 3.4994945584766048e-06, "loss": 0.3755, "step": 3851 }, { "epoch": 1.9122952176071486, "grad_norm": 0.42749685049057007, "learning_rate": 3.4967388881194083e-06, "loss": 0.3289, "step": 3852 }, { "epoch": 1.9127916597716366, "grad_norm": 0.41305476427078247, "learning_rate": 3.49398371966024e-06, "loss": 0.3737, "step": 3853 }, { "epoch": 1.9132881019361245, "grad_norm": 0.3943822383880615, "learning_rate": 3.4912290540189776e-06, "loss": 0.3554, "step": 3854 }, { "epoch": 1.9137845441006123, "grad_norm": 0.40059396624565125, "learning_rate": 3.4884748921153253e-06, "loss": 0.3531, "step": 3855 }, { "epoch": 1.9142809862651, "grad_norm": 0.4246278405189514, "learning_rate": 3.4857212348688285e-06, "loss": 0.4051, "step": 3856 }, { "epoch": 1.914777428429588, "grad_norm": 0.37170132994651794, "learning_rate": 3.4829680831988557e-06, "loss": 0.2914, "step": 3857 }, { "epoch": 1.915273870594076, "grad_norm": 0.44957807660102844, "learning_rate": 3.480215438024609e-06, "loss": 0.3627, "step": 3858 }, { "epoch": 1.9157703127585637, "grad_norm": 0.3946690559387207, "learning_rate": 3.4774633002651196e-06, "loss": 0.3503, "step": 3859 }, { "epoch": 1.9162667549230514, "grad_norm": 0.4512956440448761, "learning_rate": 3.4747116708392565e-06, "loss": 0.3664, "step": 3860 }, { "epoch": 1.9167631970875392, "grad_norm": 0.38141703605651855, "learning_rate": 3.4719605506657105e-06, "loss": 0.2759, "step": 3861 }, { "epoch": 1.9172596392520271, "grad_norm": 0.4123358428478241, "learning_rate": 3.4692099406630076e-06, "loss": 0.3235, "step": 3862 }, { "epoch": 1.917756081416515, "grad_norm": 0.4124756455421448, "learning_rate": 3.466459841749499e-06, "loss": 0.3716, "step": 3863 }, { "epoch": 1.9182525235810028, "grad_norm": 0.4352026879787445, "learning_rate": 3.463710254843372e-06, "loss": 0.3416, "step": 3864 }, { "epoch": 1.9187489657454906, "grad_norm": 0.42675265669822693, "learning_rate": 3.4609611808626363e-06, "loss": 0.3848, "step": 3865 }, { "epoch": 1.9192454079099783, "grad_norm": 0.4536471664905548, "learning_rate": 3.458212620725134e-06, "loss": 0.3666, "step": 3866 }, { "epoch": 1.9197418500744663, "grad_norm": 0.44038042426109314, "learning_rate": 3.4554645753485326e-06, "loss": 0.3752, "step": 3867 }, { "epoch": 1.9202382922389543, "grad_norm": 0.4278239905834198, "learning_rate": 3.452717045650332e-06, "loss": 0.3546, "step": 3868 }, { "epoch": 1.920734734403442, "grad_norm": 0.36406317353248596, "learning_rate": 3.449970032547858e-06, "loss": 0.3812, "step": 3869 }, { "epoch": 1.9212311765679297, "grad_norm": 0.40049856901168823, "learning_rate": 3.4472235369582603e-06, "loss": 0.412, "step": 3870 }, { "epoch": 1.9217276187324177, "grad_norm": 0.42951175570487976, "learning_rate": 3.4444775597985236e-06, "loss": 0.3087, "step": 3871 }, { "epoch": 1.9222240608969057, "grad_norm": 0.48859903216362, "learning_rate": 3.4417321019854533e-06, "loss": 0.3888, "step": 3872 }, { "epoch": 1.9227205030613934, "grad_norm": 0.42986801266670227, "learning_rate": 3.4389871644356825e-06, "loss": 0.3457, "step": 3873 }, { "epoch": 1.9232169452258812, "grad_norm": 0.41486939787864685, "learning_rate": 3.4362427480656703e-06, "loss": 0.3624, "step": 3874 }, { "epoch": 1.923713387390369, "grad_norm": 0.48225048184394836, "learning_rate": 3.4334988537917045e-06, "loss": 0.3667, "step": 3875 }, { "epoch": 1.9242098295548569, "grad_norm": 0.42270973324775696, "learning_rate": 3.430755482529896e-06, "loss": 0.3164, "step": 3876 }, { "epoch": 1.9247062717193448, "grad_norm": 0.4020833671092987, "learning_rate": 3.428012635196184e-06, "loss": 0.3267, "step": 3877 }, { "epoch": 1.9252027138838326, "grad_norm": 0.37187427282333374, "learning_rate": 3.425270312706326e-06, "loss": 0.2987, "step": 3878 }, { "epoch": 1.9256991560483203, "grad_norm": 0.43169957399368286, "learning_rate": 3.4225285159759137e-06, "loss": 0.3863, "step": 3879 }, { "epoch": 1.926195598212808, "grad_norm": 0.41039857268333435, "learning_rate": 3.419787245920357e-06, "loss": 0.4177, "step": 3880 }, { "epoch": 1.926692040377296, "grad_norm": 0.40728822350502014, "learning_rate": 3.4170465034548883e-06, "loss": 0.3831, "step": 3881 }, { "epoch": 1.927188482541784, "grad_norm": 0.42378154397010803, "learning_rate": 3.4143062894945727e-06, "loss": 0.3881, "step": 3882 }, { "epoch": 1.9276849247062717, "grad_norm": 0.36120498180389404, "learning_rate": 3.41156660495429e-06, "loss": 0.2806, "step": 3883 }, { "epoch": 1.9281813668707595, "grad_norm": 0.4416836202144623, "learning_rate": 3.4088274507487455e-06, "loss": 0.3533, "step": 3884 }, { "epoch": 1.9286778090352474, "grad_norm": 0.3939952850341797, "learning_rate": 3.4060888277924697e-06, "loss": 0.3273, "step": 3885 }, { "epoch": 1.9291742511997352, "grad_norm": 0.40063440799713135, "learning_rate": 3.4033507369998143e-06, "loss": 0.3727, "step": 3886 }, { "epoch": 1.9296706933642231, "grad_norm": 0.3502577543258667, "learning_rate": 3.400613179284954e-06, "loss": 0.3107, "step": 3887 }, { "epoch": 1.9301671355287109, "grad_norm": 0.38468533754348755, "learning_rate": 3.3978761555618845e-06, "loss": 0.3363, "step": 3888 }, { "epoch": 1.9306635776931986, "grad_norm": 0.4006808400154114, "learning_rate": 3.3951396667444213e-06, "loss": 0.377, "step": 3889 }, { "epoch": 1.9311600198576866, "grad_norm": 0.42839112877845764, "learning_rate": 3.3924037137462074e-06, "loss": 0.3481, "step": 3890 }, { "epoch": 1.9316564620221746, "grad_norm": 0.42079728841781616, "learning_rate": 3.389668297480702e-06, "loss": 0.3396, "step": 3891 }, { "epoch": 1.9321529041866623, "grad_norm": 0.4040222764015198, "learning_rate": 3.3869334188611848e-06, "loss": 0.324, "step": 3892 }, { "epoch": 1.93264934635115, "grad_norm": 0.40352529287338257, "learning_rate": 3.384199078800756e-06, "loss": 0.324, "step": 3893 }, { "epoch": 1.9331457885156378, "grad_norm": 0.43565070629119873, "learning_rate": 3.381465278212343e-06, "loss": 0.3929, "step": 3894 }, { "epoch": 1.9336422306801258, "grad_norm": 0.41519781947135925, "learning_rate": 3.3787320180086836e-06, "loss": 0.385, "step": 3895 }, { "epoch": 1.9341386728446137, "grad_norm": 0.40091145038604736, "learning_rate": 3.375999299102338e-06, "loss": 0.3706, "step": 3896 }, { "epoch": 1.9346351150091015, "grad_norm": 0.37497830390930176, "learning_rate": 3.373267122405691e-06, "loss": 0.3349, "step": 3897 }, { "epoch": 1.9351315571735892, "grad_norm": 0.411657452583313, "learning_rate": 3.3705354888309395e-06, "loss": 0.3338, "step": 3898 }, { "epoch": 1.9356279993380772, "grad_norm": 0.4051017463207245, "learning_rate": 3.3678043992901e-06, "loss": 0.3103, "step": 3899 }, { "epoch": 1.936124441502565, "grad_norm": 0.40559566020965576, "learning_rate": 3.3650738546950117e-06, "loss": 0.3733, "step": 3900 }, { "epoch": 1.9366208836670529, "grad_norm": 0.4873960614204407, "learning_rate": 3.3623438559573284e-06, "loss": 0.3809, "step": 3901 }, { "epoch": 1.9371173258315406, "grad_norm": 0.4049523174762726, "learning_rate": 3.3596144039885237e-06, "loss": 0.402, "step": 3902 }, { "epoch": 1.9376137679960284, "grad_norm": 0.46489906311035156, "learning_rate": 3.3568854996998864e-06, "loss": 0.3964, "step": 3903 }, { "epoch": 1.9381102101605163, "grad_norm": 0.42164668440818787, "learning_rate": 3.354157144002521e-06, "loss": 0.3086, "step": 3904 }, { "epoch": 1.9386066523250043, "grad_norm": 0.44436556100845337, "learning_rate": 3.351429337807356e-06, "loss": 0.3722, "step": 3905 }, { "epoch": 1.939103094489492, "grad_norm": 0.3642694652080536, "learning_rate": 3.3487020820251293e-06, "loss": 0.3242, "step": 3906 }, { "epoch": 1.9395995366539798, "grad_norm": 0.3795354664325714, "learning_rate": 3.3459753775663963e-06, "loss": 0.347, "step": 3907 }, { "epoch": 1.9400959788184675, "grad_norm": 0.39360329508781433, "learning_rate": 3.343249225341531e-06, "loss": 0.3703, "step": 3908 }, { "epoch": 1.9405924209829555, "grad_norm": 0.38334187865257263, "learning_rate": 3.3405236262607214e-06, "loss": 0.3399, "step": 3909 }, { "epoch": 1.9410888631474434, "grad_norm": 0.3832605481147766, "learning_rate": 3.337798581233972e-06, "loss": 0.3013, "step": 3910 }, { "epoch": 1.9415853053119312, "grad_norm": 0.39012446999549866, "learning_rate": 3.3350740911710987e-06, "loss": 0.3704, "step": 3911 }, { "epoch": 1.942081747476419, "grad_norm": 0.42451390624046326, "learning_rate": 3.3323501569817375e-06, "loss": 0.364, "step": 3912 }, { "epoch": 1.9425781896409067, "grad_norm": 0.44637414813041687, "learning_rate": 3.3296267795753345e-06, "loss": 0.3389, "step": 3913 }, { "epoch": 1.9430746318053946, "grad_norm": 0.4305492341518402, "learning_rate": 3.3269039598611525e-06, "loss": 0.3623, "step": 3914 }, { "epoch": 1.9435710739698826, "grad_norm": 0.4097904860973358, "learning_rate": 3.324181698748263e-06, "loss": 0.4047, "step": 3915 }, { "epoch": 1.9440675161343703, "grad_norm": 0.36908411979675293, "learning_rate": 3.3214599971455596e-06, "loss": 0.334, "step": 3916 }, { "epoch": 1.944563958298858, "grad_norm": 0.38479065895080566, "learning_rate": 3.3187388559617438e-06, "loss": 0.3662, "step": 3917 }, { "epoch": 1.945060400463346, "grad_norm": 0.3929920792579651, "learning_rate": 3.3160182761053306e-06, "loss": 0.3454, "step": 3918 }, { "epoch": 1.945556842627834, "grad_norm": 0.4002162218093872, "learning_rate": 3.3132982584846442e-06, "loss": 0.3845, "step": 3919 }, { "epoch": 1.9460532847923218, "grad_norm": 0.37259817123413086, "learning_rate": 3.310578804007829e-06, "loss": 0.3255, "step": 3920 }, { "epoch": 1.9465497269568095, "grad_norm": 0.4816770553588867, "learning_rate": 3.307859913582836e-06, "loss": 0.3899, "step": 3921 }, { "epoch": 1.9470461691212972, "grad_norm": 0.4355013966560364, "learning_rate": 3.3051415881174263e-06, "loss": 0.3353, "step": 3922 }, { "epoch": 1.9475426112857852, "grad_norm": 0.39281031489372253, "learning_rate": 3.3024238285191774e-06, "loss": 0.3404, "step": 3923 }, { "epoch": 1.9480390534502732, "grad_norm": 0.4069069027900696, "learning_rate": 3.299706635695474e-06, "loss": 0.3218, "step": 3924 }, { "epoch": 1.948535495614761, "grad_norm": 0.39839112758636475, "learning_rate": 3.2969900105535148e-06, "loss": 0.3556, "step": 3925 }, { "epoch": 1.9490319377792487, "grad_norm": 0.43260854482650757, "learning_rate": 3.2942739540003034e-06, "loss": 0.3661, "step": 3926 }, { "epoch": 1.9495283799437364, "grad_norm": 0.35011082887649536, "learning_rate": 3.2915584669426624e-06, "loss": 0.2929, "step": 3927 }, { "epoch": 1.9500248221082244, "grad_norm": 0.3760402202606201, "learning_rate": 3.288843550287216e-06, "loss": 0.3426, "step": 3928 }, { "epoch": 1.9505212642727123, "grad_norm": 0.38517695665359497, "learning_rate": 3.2861292049404016e-06, "loss": 0.3444, "step": 3929 }, { "epoch": 1.9510177064372, "grad_norm": 0.4205247759819031, "learning_rate": 3.2834154318084632e-06, "loss": 0.3846, "step": 3930 }, { "epoch": 1.9515141486016878, "grad_norm": 0.39839819073677063, "learning_rate": 3.2807022317974594e-06, "loss": 0.3503, "step": 3931 }, { "epoch": 1.9520105907661758, "grad_norm": 0.41189882159233093, "learning_rate": 3.277989605813252e-06, "loss": 0.3682, "step": 3932 }, { "epoch": 1.9525070329306637, "grad_norm": 0.38919657468795776, "learning_rate": 3.2752775547615147e-06, "loss": 0.3707, "step": 3933 }, { "epoch": 1.9530034750951515, "grad_norm": 0.3922402858734131, "learning_rate": 3.2725660795477242e-06, "loss": 0.3289, "step": 3934 }, { "epoch": 1.9534999172596392, "grad_norm": 0.4002164304256439, "learning_rate": 3.269855181077173e-06, "loss": 0.3918, "step": 3935 }, { "epoch": 1.953996359424127, "grad_norm": 0.3936907649040222, "learning_rate": 3.2671448602549537e-06, "loss": 0.3396, "step": 3936 }, { "epoch": 1.954492801588615, "grad_norm": 0.38170573115348816, "learning_rate": 3.2644351179859678e-06, "loss": 0.3316, "step": 3937 }, { "epoch": 1.954989243753103, "grad_norm": 0.37998223304748535, "learning_rate": 3.2617259551749283e-06, "loss": 0.3642, "step": 3938 }, { "epoch": 1.9554856859175906, "grad_norm": 0.4012511074542999, "learning_rate": 3.2590173727263464e-06, "loss": 0.3384, "step": 3939 }, { "epoch": 1.9559821280820784, "grad_norm": 0.4174956977367401, "learning_rate": 3.256309371544548e-06, "loss": 0.3305, "step": 3940 }, { "epoch": 1.9564785702465661, "grad_norm": 0.4892074465751648, "learning_rate": 3.253601952533658e-06, "loss": 0.3779, "step": 3941 }, { "epoch": 1.956975012411054, "grad_norm": 0.40711334347724915, "learning_rate": 3.2508951165976132e-06, "loss": 0.3457, "step": 3942 }, { "epoch": 1.957471454575542, "grad_norm": 0.41643643379211426, "learning_rate": 3.2481888646401506e-06, "loss": 0.3957, "step": 3943 }, { "epoch": 1.9579678967400298, "grad_norm": 0.4640657603740692, "learning_rate": 3.2454831975648147e-06, "loss": 0.3486, "step": 3944 }, { "epoch": 1.9584643389045175, "grad_norm": 0.42296358942985535, "learning_rate": 3.2427781162749527e-06, "loss": 0.3831, "step": 3945 }, { "epoch": 1.9589607810690055, "grad_norm": 0.3856073319911957, "learning_rate": 3.2400736216737207e-06, "loss": 0.325, "step": 3946 }, { "epoch": 1.9594572232334933, "grad_norm": 0.42109113931655884, "learning_rate": 3.2373697146640727e-06, "loss": 0.3849, "step": 3947 }, { "epoch": 1.9599536653979812, "grad_norm": 0.39276689291000366, "learning_rate": 3.2346663961487722e-06, "loss": 0.3497, "step": 3948 }, { "epoch": 1.960450107562469, "grad_norm": 0.3894651532173157, "learning_rate": 3.2319636670303815e-06, "loss": 0.3354, "step": 3949 }, { "epoch": 1.9609465497269567, "grad_norm": 0.4167749285697937, "learning_rate": 3.2292615282112715e-06, "loss": 0.315, "step": 3950 }, { "epoch": 1.9614429918914447, "grad_norm": 0.44029101729393005, "learning_rate": 3.226559980593612e-06, "loss": 0.3436, "step": 3951 }, { "epoch": 1.9619394340559326, "grad_norm": 0.4369032382965088, "learning_rate": 3.2238590250793734e-06, "loss": 0.3406, "step": 3952 }, { "epoch": 1.9624358762204204, "grad_norm": 0.4617750644683838, "learning_rate": 3.2211586625703343e-06, "loss": 0.3962, "step": 3953 }, { "epoch": 1.9629323183849081, "grad_norm": 0.3919609785079956, "learning_rate": 3.2184588939680727e-06, "loss": 0.3527, "step": 3954 }, { "epoch": 1.9634287605493959, "grad_norm": 0.35976162552833557, "learning_rate": 3.2157597201739655e-06, "loss": 0.3469, "step": 3955 }, { "epoch": 1.9639252027138838, "grad_norm": 0.4460522532463074, "learning_rate": 3.2130611420891943e-06, "loss": 0.3663, "step": 3956 }, { "epoch": 1.9644216448783718, "grad_norm": 0.40671733021736145, "learning_rate": 3.210363160614742e-06, "loss": 0.3216, "step": 3957 }, { "epoch": 1.9649180870428595, "grad_norm": 0.4419689178466797, "learning_rate": 3.207665776651392e-06, "loss": 0.3935, "step": 3958 }, { "epoch": 1.9654145292073473, "grad_norm": 0.3876538872718811, "learning_rate": 3.2049689910997255e-06, "loss": 0.3344, "step": 3959 }, { "epoch": 1.9659109713718352, "grad_norm": 0.41868317127227783, "learning_rate": 3.202272804860125e-06, "loss": 0.3931, "step": 3960 }, { "epoch": 1.966407413536323, "grad_norm": 0.4637366533279419, "learning_rate": 3.1995772188327778e-06, "loss": 0.4181, "step": 3961 }, { "epoch": 1.966903855700811, "grad_norm": 0.38122686743736267, "learning_rate": 3.196882233917663e-06, "loss": 0.3305, "step": 3962 }, { "epoch": 1.9674002978652987, "grad_norm": 0.39212846755981445, "learning_rate": 3.194187851014565e-06, "loss": 0.3836, "step": 3963 }, { "epoch": 1.9678967400297864, "grad_norm": 0.42307907342910767, "learning_rate": 3.1914940710230622e-06, "loss": 0.2932, "step": 3964 }, { "epoch": 1.9683931821942744, "grad_norm": 0.3844515383243561, "learning_rate": 3.18880089484254e-06, "loss": 0.3375, "step": 3965 }, { "epoch": 1.9688896243587624, "grad_norm": 0.4281887710094452, "learning_rate": 3.186108323372172e-06, "loss": 0.3072, "step": 3966 }, { "epoch": 1.96938606652325, "grad_norm": 0.4158499538898468, "learning_rate": 3.1834163575109343e-06, "loss": 0.375, "step": 3967 }, { "epoch": 1.9698825086877378, "grad_norm": 0.40034419298171997, "learning_rate": 3.180724998157605e-06, "loss": 0.3497, "step": 3968 }, { "epoch": 1.9703789508522256, "grad_norm": 0.3791455030441284, "learning_rate": 3.1780342462107535e-06, "loss": 0.3647, "step": 3969 }, { "epoch": 1.9708753930167136, "grad_norm": 0.41300252079963684, "learning_rate": 3.1753441025687483e-06, "loss": 0.3742, "step": 3970 }, { "epoch": 1.9713718351812015, "grad_norm": 0.3861382007598877, "learning_rate": 3.172654568129755e-06, "loss": 0.3695, "step": 3971 }, { "epoch": 1.9718682773456893, "grad_norm": 0.3798954486846924, "learning_rate": 3.169965643791737e-06, "loss": 0.3697, "step": 3972 }, { "epoch": 1.972364719510177, "grad_norm": 0.3850860893726349, "learning_rate": 3.1672773304524552e-06, "loss": 0.396, "step": 3973 }, { "epoch": 1.9728611616746647, "grad_norm": 0.38988062739372253, "learning_rate": 3.1645896290094615e-06, "loss": 0.3362, "step": 3974 }, { "epoch": 1.9733576038391527, "grad_norm": 0.4416916072368622, "learning_rate": 3.1619025403601043e-06, "loss": 0.3227, "step": 3975 }, { "epoch": 1.9738540460036407, "grad_norm": 0.4062724709510803, "learning_rate": 3.1592160654015346e-06, "loss": 0.376, "step": 3976 }, { "epoch": 1.9743504881681284, "grad_norm": 0.3942384719848633, "learning_rate": 3.1565302050306914e-06, "loss": 0.3558, "step": 3977 }, { "epoch": 1.9748469303326162, "grad_norm": 0.43936800956726074, "learning_rate": 3.1538449601443067e-06, "loss": 0.3708, "step": 3978 }, { "epoch": 1.9753433724971041, "grad_norm": 0.4392222762107849, "learning_rate": 3.151160331638917e-06, "loss": 0.3907, "step": 3979 }, { "epoch": 1.975839814661592, "grad_norm": 0.39584529399871826, "learning_rate": 3.1484763204108433e-06, "loss": 0.358, "step": 3980 }, { "epoch": 1.9763362568260798, "grad_norm": 0.4120037853717804, "learning_rate": 3.1457929273562048e-06, "loss": 0.3714, "step": 3981 }, { "epoch": 1.9768326989905676, "grad_norm": 0.40204623341560364, "learning_rate": 3.143110153370912e-06, "loss": 0.3331, "step": 3982 }, { "epoch": 1.9773291411550553, "grad_norm": 0.4043726921081543, "learning_rate": 3.1404279993506726e-06, "loss": 0.3738, "step": 3983 }, { "epoch": 1.9778255833195433, "grad_norm": 0.3989870548248291, "learning_rate": 3.137746466190985e-06, "loss": 0.3213, "step": 3984 }, { "epoch": 1.9783220254840312, "grad_norm": 0.4170354902744293, "learning_rate": 3.1350655547871384e-06, "loss": 0.3405, "step": 3985 }, { "epoch": 1.978818467648519, "grad_norm": 0.42606809735298157, "learning_rate": 3.1323852660342146e-06, "loss": 0.3622, "step": 3986 }, { "epoch": 1.9793149098130067, "grad_norm": 0.3937358558177948, "learning_rate": 3.1297056008270932e-06, "loss": 0.3646, "step": 3987 }, { "epoch": 1.9798113519774945, "grad_norm": 0.3914463520050049, "learning_rate": 3.127026560060441e-06, "loss": 0.3435, "step": 3988 }, { "epoch": 1.9803077941419824, "grad_norm": 0.43863385915756226, "learning_rate": 3.124348144628715e-06, "loss": 0.3585, "step": 3989 }, { "epoch": 1.9808042363064704, "grad_norm": 0.4299965500831604, "learning_rate": 3.121670355426165e-06, "loss": 0.3354, "step": 3990 }, { "epoch": 1.9813006784709581, "grad_norm": 0.40906208753585815, "learning_rate": 3.1189931933468345e-06, "loss": 0.3731, "step": 3991 }, { "epoch": 1.9817971206354459, "grad_norm": 0.4158685803413391, "learning_rate": 3.116316659284554e-06, "loss": 0.3639, "step": 3992 }, { "epoch": 1.9822935627999339, "grad_norm": 0.3414784371852875, "learning_rate": 3.1136407541329435e-06, "loss": 0.3008, "step": 3993 }, { "epoch": 1.9827900049644216, "grad_norm": 0.38488662242889404, "learning_rate": 3.1109654787854184e-06, "loss": 0.3583, "step": 3994 }, { "epoch": 1.9832864471289096, "grad_norm": 0.43601417541503906, "learning_rate": 3.108290834135178e-06, "loss": 0.391, "step": 3995 }, { "epoch": 1.9837828892933973, "grad_norm": 0.37449970841407776, "learning_rate": 3.105616821075216e-06, "loss": 0.3293, "step": 3996 }, { "epoch": 1.984279331457885, "grad_norm": 0.3931335210800171, "learning_rate": 3.102943440498308e-06, "loss": 0.3486, "step": 3997 }, { "epoch": 1.984775773622373, "grad_norm": 0.4197522699832916, "learning_rate": 3.1002706932970283e-06, "loss": 0.3213, "step": 3998 }, { "epoch": 1.985272215786861, "grad_norm": 0.43141594529151917, "learning_rate": 3.097598580363732e-06, "loss": 0.4011, "step": 3999 }, { "epoch": 1.9857686579513487, "grad_norm": 0.3761480748653412, "learning_rate": 3.094927102590566e-06, "loss": 0.296, "step": 4000 }, { "epoch": 1.9862651001158365, "grad_norm": 0.405719518661499, "learning_rate": 3.0922562608694604e-06, "loss": 0.3466, "step": 4001 }, { "epoch": 1.9867615422803242, "grad_norm": 0.38427984714508057, "learning_rate": 3.089586056092143e-06, "loss": 0.3716, "step": 4002 }, { "epoch": 1.9872579844448122, "grad_norm": 0.43692547082901, "learning_rate": 3.086916489150118e-06, "loss": 0.363, "step": 4003 }, { "epoch": 1.9877544266093001, "grad_norm": 0.3952825367450714, "learning_rate": 3.0842475609346833e-06, "loss": 0.3231, "step": 4004 }, { "epoch": 1.9882508687737879, "grad_norm": 0.40707799792289734, "learning_rate": 3.081579272336919e-06, "loss": 0.3499, "step": 4005 }, { "epoch": 1.9887473109382756, "grad_norm": 0.3863087296485901, "learning_rate": 3.0789116242476967e-06, "loss": 0.3403, "step": 4006 }, { "epoch": 1.9892437531027636, "grad_norm": 0.4136399030685425, "learning_rate": 3.076244617557672e-06, "loss": 0.3591, "step": 4007 }, { "epoch": 1.9897401952672513, "grad_norm": 0.3987202048301697, "learning_rate": 3.073578253157282e-06, "loss": 0.303, "step": 4008 }, { "epoch": 1.9902366374317393, "grad_norm": 0.4455214738845825, "learning_rate": 3.070912531936759e-06, "loss": 0.3755, "step": 4009 }, { "epoch": 1.990733079596227, "grad_norm": 0.398686021566391, "learning_rate": 3.06824745478611e-06, "loss": 0.3216, "step": 4010 }, { "epoch": 1.9912295217607148, "grad_norm": 0.39765769243240356, "learning_rate": 3.0655830225951355e-06, "loss": 0.3608, "step": 4011 }, { "epoch": 1.9917259639252027, "grad_norm": 0.4045025408267975, "learning_rate": 3.062919236253412e-06, "loss": 0.333, "step": 4012 }, { "epoch": 1.9922224060896907, "grad_norm": 0.4186612367630005, "learning_rate": 3.0602560966503114e-06, "loss": 0.362, "step": 4013 }, { "epoch": 1.9927188482541784, "grad_norm": 0.42304474115371704, "learning_rate": 3.057593604674981e-06, "loss": 0.3748, "step": 4014 }, { "epoch": 1.9932152904186662, "grad_norm": 0.3453375995159149, "learning_rate": 3.0549317612163543e-06, "loss": 0.3232, "step": 4015 }, { "epoch": 1.993711732583154, "grad_norm": 0.41707611083984375, "learning_rate": 3.052270567163146e-06, "loss": 0.3979, "step": 4016 }, { "epoch": 1.994208174747642, "grad_norm": 0.3583800792694092, "learning_rate": 3.0496100234038615e-06, "loss": 0.3307, "step": 4017 }, { "epoch": 1.9947046169121299, "grad_norm": 0.38004258275032043, "learning_rate": 3.0469501308267803e-06, "loss": 0.3438, "step": 4018 }, { "epoch": 1.9952010590766176, "grad_norm": 0.45921000838279724, "learning_rate": 3.0442908903199692e-06, "loss": 0.391, "step": 4019 }, { "epoch": 1.9956975012411053, "grad_norm": 0.403114914894104, "learning_rate": 3.0416323027712767e-06, "loss": 0.3549, "step": 4020 }, { "epoch": 1.996193943405593, "grad_norm": 0.36191946268081665, "learning_rate": 3.0389743690683337e-06, "loss": 0.324, "step": 4021 }, { "epoch": 1.996690385570081, "grad_norm": 0.40099042654037476, "learning_rate": 3.036317090098552e-06, "loss": 0.3616, "step": 4022 }, { "epoch": 1.997186827734569, "grad_norm": 0.4161485731601715, "learning_rate": 3.033660466749121e-06, "loss": 0.406, "step": 4023 }, { "epoch": 1.9976832698990568, "grad_norm": 0.3807937800884247, "learning_rate": 3.0310044999070204e-06, "loss": 0.361, "step": 4024 }, { "epoch": 1.9981797120635445, "grad_norm": 0.44692498445510864, "learning_rate": 3.0283491904590027e-06, "loss": 0.3423, "step": 4025 }, { "epoch": 1.9986761542280325, "grad_norm": 0.4558561146259308, "learning_rate": 3.0256945392916033e-06, "loss": 0.3928, "step": 4026 }, { "epoch": 1.9991725963925204, "grad_norm": 0.38137996196746826, "learning_rate": 3.0230405472911374e-06, "loss": 0.3483, "step": 4027 }, { "epoch": 1.9996690385570082, "grad_norm": 0.39637166261672974, "learning_rate": 3.020387215343704e-06, "loss": 0.3975, "step": 4028 }, { "epoch": 2.000165480721496, "grad_norm": 0.8123154640197754, "learning_rate": 3.017734544335176e-06, "loss": 0.5543, "step": 4029 }, { "epoch": 2.0006619228859837, "grad_norm": 0.3893912732601166, "learning_rate": 3.0150825351512094e-06, "loss": 0.3091, "step": 4030 }, { "epoch": 2.0011583650504714, "grad_norm": 0.41670021414756775, "learning_rate": 3.0124311886772352e-06, "loss": 0.3569, "step": 4031 }, { "epoch": 2.0016548072149596, "grad_norm": 0.39405080676078796, "learning_rate": 3.009780505798469e-06, "loss": 0.3375, "step": 4032 }, { "epoch": 2.0021512493794473, "grad_norm": 0.36431875824928284, "learning_rate": 3.007130487399901e-06, "loss": 0.3064, "step": 4033 }, { "epoch": 2.002647691543935, "grad_norm": 0.42945313453674316, "learning_rate": 3.0044811343662996e-06, "loss": 0.3622, "step": 4034 }, { "epoch": 2.003144133708423, "grad_norm": 0.37509989738464355, "learning_rate": 3.0018324475822113e-06, "loss": 0.2826, "step": 4035 }, { "epoch": 2.003640575872911, "grad_norm": 0.4301036596298218, "learning_rate": 2.9991844279319636e-06, "loss": 0.3471, "step": 4036 }, { "epoch": 2.0041370180373987, "grad_norm": 0.403870165348053, "learning_rate": 2.996537076299656e-06, "loss": 0.305, "step": 4037 }, { "epoch": 2.0046334602018865, "grad_norm": 0.4338100254535675, "learning_rate": 2.9938903935691655e-06, "loss": 0.3058, "step": 4038 }, { "epoch": 2.0051299023663742, "grad_norm": 0.3590986728668213, "learning_rate": 2.991244380624152e-06, "loss": 0.271, "step": 4039 }, { "epoch": 2.005626344530862, "grad_norm": 0.39247313141822815, "learning_rate": 2.9885990383480447e-06, "loss": 0.3384, "step": 4040 }, { "epoch": 2.00612278669535, "grad_norm": 0.40834447741508484, "learning_rate": 2.98595436762405e-06, "loss": 0.3133, "step": 4041 }, { "epoch": 2.006619228859838, "grad_norm": 0.3954438865184784, "learning_rate": 2.9833103693351533e-06, "loss": 0.338, "step": 4042 }, { "epoch": 2.0071156710243256, "grad_norm": 0.3686100244522095, "learning_rate": 2.980667044364114e-06, "loss": 0.2959, "step": 4043 }, { "epoch": 2.0076121131888134, "grad_norm": 0.39007633924484253, "learning_rate": 2.9780243935934673e-06, "loss": 0.2985, "step": 4044 }, { "epoch": 2.008108555353301, "grad_norm": 0.3809436857700348, "learning_rate": 2.9753824179055214e-06, "loss": 0.2793, "step": 4045 }, { "epoch": 2.0086049975177893, "grad_norm": 0.418361097574234, "learning_rate": 2.972741118182358e-06, "loss": 0.3422, "step": 4046 }, { "epoch": 2.009101439682277, "grad_norm": 0.3313376307487488, "learning_rate": 2.970100495305839e-06, "loss": 0.2656, "step": 4047 }, { "epoch": 2.009597881846765, "grad_norm": 0.426261842250824, "learning_rate": 2.9674605501575954e-06, "loss": 0.3813, "step": 4048 }, { "epoch": 2.0100943240112525, "grad_norm": 0.3818516731262207, "learning_rate": 2.9648212836190305e-06, "loss": 0.3377, "step": 4049 }, { "epoch": 2.0105907661757407, "grad_norm": 0.3553279638290405, "learning_rate": 2.9621826965713285e-06, "loss": 0.3179, "step": 4050 }, { "epoch": 2.0110872083402285, "grad_norm": 0.37580522894859314, "learning_rate": 2.959544789895438e-06, "loss": 0.3095, "step": 4051 }, { "epoch": 2.011583650504716, "grad_norm": 0.3935818672180176, "learning_rate": 2.956907564472086e-06, "loss": 0.3309, "step": 4052 }, { "epoch": 2.012080092669204, "grad_norm": 0.40476587414741516, "learning_rate": 2.9542710211817687e-06, "loss": 0.2998, "step": 4053 }, { "epoch": 2.0125765348336917, "grad_norm": 0.41856539249420166, "learning_rate": 2.95163516090476e-06, "loss": 0.3091, "step": 4054 }, { "epoch": 2.01307297699818, "grad_norm": 0.38133180141448975, "learning_rate": 2.948999984521099e-06, "loss": 0.3249, "step": 4055 }, { "epoch": 2.0135694191626676, "grad_norm": 0.37295112013816833, "learning_rate": 2.946365492910599e-06, "loss": 0.3684, "step": 4056 }, { "epoch": 2.0140658613271554, "grad_norm": 0.3952355682849884, "learning_rate": 2.9437316869528467e-06, "loss": 0.3518, "step": 4057 }, { "epoch": 2.014562303491643, "grad_norm": 0.43002423644065857, "learning_rate": 2.9410985675271968e-06, "loss": 0.3435, "step": 4058 }, { "epoch": 2.015058745656131, "grad_norm": 0.39944031834602356, "learning_rate": 2.9384661355127798e-06, "loss": 0.3102, "step": 4059 }, { "epoch": 2.015555187820619, "grad_norm": 0.39288151264190674, "learning_rate": 2.935834391788488e-06, "loss": 0.3623, "step": 4060 }, { "epoch": 2.016051629985107, "grad_norm": 0.4458581507205963, "learning_rate": 2.9332033372329936e-06, "loss": 0.3117, "step": 4061 }, { "epoch": 2.0165480721495945, "grad_norm": 0.3989618420600891, "learning_rate": 2.930572972724733e-06, "loss": 0.2791, "step": 4062 }, { "epoch": 2.0170445143140823, "grad_norm": 0.3555598258972168, "learning_rate": 2.927943299141912e-06, "loss": 0.2985, "step": 4063 }, { "epoch": 2.0175409564785705, "grad_norm": 0.4347423315048218, "learning_rate": 2.9253143173625076e-06, "loss": 0.3596, "step": 4064 }, { "epoch": 2.018037398643058, "grad_norm": 0.38379302620887756, "learning_rate": 2.9226860282642668e-06, "loss": 0.3171, "step": 4065 }, { "epoch": 2.018533840807546, "grad_norm": 0.3882234990596771, "learning_rate": 2.9200584327247017e-06, "loss": 0.2718, "step": 4066 }, { "epoch": 2.0190302829720337, "grad_norm": 0.39285722374916077, "learning_rate": 2.9174315316210987e-06, "loss": 0.3474, "step": 4067 }, { "epoch": 2.0195267251365214, "grad_norm": 0.40096315741539, "learning_rate": 2.914805325830502e-06, "loss": 0.3287, "step": 4068 }, { "epoch": 2.0200231673010096, "grad_norm": 0.4464411437511444, "learning_rate": 2.912179816229739e-06, "loss": 0.3503, "step": 4069 }, { "epoch": 2.0205196094654974, "grad_norm": 0.4241136610507965, "learning_rate": 2.909555003695389e-06, "loss": 0.3157, "step": 4070 }, { "epoch": 2.021016051629985, "grad_norm": 0.4439597427845001, "learning_rate": 2.9069308891038083e-06, "loss": 0.3609, "step": 4071 }, { "epoch": 2.021512493794473, "grad_norm": 0.3561362028121948, "learning_rate": 2.9043074733311172e-06, "loss": 0.3006, "step": 4072 }, { "epoch": 2.0220089359589606, "grad_norm": 0.40374240279197693, "learning_rate": 2.901684757253203e-06, "loss": 0.347, "step": 4073 }, { "epoch": 2.0225053781234488, "grad_norm": 0.39873042702674866, "learning_rate": 2.8990627417457216e-06, "loss": 0.3152, "step": 4074 }, { "epoch": 2.0230018202879365, "grad_norm": 0.3798239529132843, "learning_rate": 2.8964414276840858e-06, "loss": 0.3122, "step": 4075 }, { "epoch": 2.0234982624524243, "grad_norm": 0.3764905035495758, "learning_rate": 2.8938208159434905e-06, "loss": 0.2756, "step": 4076 }, { "epoch": 2.023994704616912, "grad_norm": 0.43671533465385437, "learning_rate": 2.8912009073988796e-06, "loss": 0.3065, "step": 4077 }, { "epoch": 2.0244911467814, "grad_norm": 0.35872572660446167, "learning_rate": 2.888581702924972e-06, "loss": 0.2825, "step": 4078 }, { "epoch": 2.024987588945888, "grad_norm": 0.4201982021331787, "learning_rate": 2.885963203396248e-06, "loss": 0.415, "step": 4079 }, { "epoch": 2.0254840311103757, "grad_norm": 0.3700574040412903, "learning_rate": 2.8833454096869546e-06, "loss": 0.2899, "step": 4080 }, { "epoch": 2.0259804732748634, "grad_norm": 0.39341723918914795, "learning_rate": 2.8807283226711036e-06, "loss": 0.282, "step": 4081 }, { "epoch": 2.026476915439351, "grad_norm": 0.4042288362979889, "learning_rate": 2.8781119432224646e-06, "loss": 0.3034, "step": 4082 }, { "epoch": 2.0269733576038393, "grad_norm": 0.38180267810821533, "learning_rate": 2.875496272214578e-06, "loss": 0.3433, "step": 4083 }, { "epoch": 2.027469799768327, "grad_norm": 0.3935442268848419, "learning_rate": 2.8728813105207455e-06, "loss": 0.331, "step": 4084 }, { "epoch": 2.027966241932815, "grad_norm": 0.3838929235935211, "learning_rate": 2.8702670590140314e-06, "loss": 0.3347, "step": 4085 }, { "epoch": 2.0284626840973026, "grad_norm": 0.3696202337741852, "learning_rate": 2.867653518567265e-06, "loss": 0.3442, "step": 4086 }, { "epoch": 2.0289591262617903, "grad_norm": 0.3905583918094635, "learning_rate": 2.8650406900530316e-06, "loss": 0.3289, "step": 4087 }, { "epoch": 2.0294555684262785, "grad_norm": 0.38171032071113586, "learning_rate": 2.8624285743436904e-06, "loss": 0.2716, "step": 4088 }, { "epoch": 2.0299520105907662, "grad_norm": 0.39834973216056824, "learning_rate": 2.85981717231135e-06, "loss": 0.2897, "step": 4089 }, { "epoch": 2.030448452755254, "grad_norm": 0.448601096868515, "learning_rate": 2.857206484827889e-06, "loss": 0.3268, "step": 4090 }, { "epoch": 2.0309448949197417, "grad_norm": 0.4197012484073639, "learning_rate": 2.8545965127649455e-06, "loss": 0.3324, "step": 4091 }, { "epoch": 2.0314413370842295, "grad_norm": 0.38127654790878296, "learning_rate": 2.851987256993919e-06, "loss": 0.2744, "step": 4092 }, { "epoch": 2.0319377792487177, "grad_norm": 0.4119124710559845, "learning_rate": 2.8493787183859657e-06, "loss": 0.3083, "step": 4093 }, { "epoch": 2.0324342214132054, "grad_norm": 0.34926024079322815, "learning_rate": 2.8467708978120075e-06, "loss": 0.2422, "step": 4094 }, { "epoch": 2.032930663577693, "grad_norm": 0.4355899393558502, "learning_rate": 2.844163796142725e-06, "loss": 0.3625, "step": 4095 }, { "epoch": 2.033427105742181, "grad_norm": 0.42749282717704773, "learning_rate": 2.8415574142485588e-06, "loss": 0.3305, "step": 4096 }, { "epoch": 2.033923547906669, "grad_norm": 0.38192492723464966, "learning_rate": 2.83895175299971e-06, "loss": 0.3013, "step": 4097 }, { "epoch": 2.034419990071157, "grad_norm": 0.4715120792388916, "learning_rate": 2.836346813266134e-06, "loss": 0.3432, "step": 4098 }, { "epoch": 2.0349164322356446, "grad_norm": 0.4091477692127228, "learning_rate": 2.8337425959175558e-06, "loss": 0.2945, "step": 4099 }, { "epoch": 2.0354128744001323, "grad_norm": 0.36912375688552856, "learning_rate": 2.831139101823447e-06, "loss": 0.2917, "step": 4100 }, { "epoch": 2.03590931656462, "grad_norm": 0.39638760685920715, "learning_rate": 2.8285363318530455e-06, "loss": 0.3171, "step": 4101 }, { "epoch": 2.0364057587291082, "grad_norm": 0.336398720741272, "learning_rate": 2.825934286875346e-06, "loss": 0.2847, "step": 4102 }, { "epoch": 2.036902200893596, "grad_norm": 0.41449782252311707, "learning_rate": 2.8233329677591003e-06, "loss": 0.281, "step": 4103 }, { "epoch": 2.0373986430580837, "grad_norm": 0.3996535539627075, "learning_rate": 2.8207323753728205e-06, "loss": 0.3376, "step": 4104 }, { "epoch": 2.0378950852225715, "grad_norm": 0.41411516070365906, "learning_rate": 2.8181325105847667e-06, "loss": 0.3752, "step": 4105 }, { "epoch": 2.038391527387059, "grad_norm": 0.38060086965560913, "learning_rate": 2.815533374262972e-06, "loss": 0.2967, "step": 4106 }, { "epoch": 2.0388879695515474, "grad_norm": 0.40554380416870117, "learning_rate": 2.8129349672752117e-06, "loss": 0.3465, "step": 4107 }, { "epoch": 2.039384411716035, "grad_norm": 0.38354426622390747, "learning_rate": 2.8103372904890234e-06, "loss": 0.2898, "step": 4108 }, { "epoch": 2.039880853880523, "grad_norm": 0.4228661060333252, "learning_rate": 2.8077403447717034e-06, "loss": 0.3544, "step": 4109 }, { "epoch": 2.0403772960450106, "grad_norm": 0.3990709185600281, "learning_rate": 2.8051441309902995e-06, "loss": 0.2897, "step": 4110 }, { "epoch": 2.040873738209499, "grad_norm": 0.4231674075126648, "learning_rate": 2.802548650011619e-06, "loss": 0.3807, "step": 4111 }, { "epoch": 2.0413701803739865, "grad_norm": 0.35870760679244995, "learning_rate": 2.7999539027022193e-06, "loss": 0.2796, "step": 4112 }, { "epoch": 2.0418666225384743, "grad_norm": 0.35774070024490356, "learning_rate": 2.7973598899284173e-06, "loss": 0.3037, "step": 4113 }, { "epoch": 2.042363064702962, "grad_norm": 0.4015906751155853, "learning_rate": 2.7947666125562833e-06, "loss": 0.3687, "step": 4114 }, { "epoch": 2.0428595068674498, "grad_norm": 0.3869587182998657, "learning_rate": 2.7921740714516454e-06, "loss": 0.3585, "step": 4115 }, { "epoch": 2.043355949031938, "grad_norm": 0.35875818133354187, "learning_rate": 2.789582267480075e-06, "loss": 0.276, "step": 4116 }, { "epoch": 2.0438523911964257, "grad_norm": 0.39068442583084106, "learning_rate": 2.7869912015069136e-06, "loss": 0.3399, "step": 4117 }, { "epoch": 2.0443488333609134, "grad_norm": 0.3674030900001526, "learning_rate": 2.784400874397242e-06, "loss": 0.2876, "step": 4118 }, { "epoch": 2.044845275525401, "grad_norm": 0.36925143003463745, "learning_rate": 2.781811287015902e-06, "loss": 0.3148, "step": 4119 }, { "epoch": 2.045341717689889, "grad_norm": 0.39867064356803894, "learning_rate": 2.779222440227486e-06, "loss": 0.3425, "step": 4120 }, { "epoch": 2.045838159854377, "grad_norm": 0.3808092772960663, "learning_rate": 2.7766343348963392e-06, "loss": 0.302, "step": 4121 }, { "epoch": 2.046334602018865, "grad_norm": 0.4140518009662628, "learning_rate": 2.7740469718865626e-06, "loss": 0.3188, "step": 4122 }, { "epoch": 2.0468310441833526, "grad_norm": 0.35807928442955017, "learning_rate": 2.7714603520620026e-06, "loss": 0.3076, "step": 4123 }, { "epoch": 2.0473274863478403, "grad_norm": 0.35689985752105713, "learning_rate": 2.7688744762862624e-06, "loss": 0.3021, "step": 4124 }, { "epoch": 2.0478239285123285, "grad_norm": 0.39503514766693115, "learning_rate": 2.7662893454226956e-06, "loss": 0.3523, "step": 4125 }, { "epoch": 2.0483203706768163, "grad_norm": 0.36989453434944153, "learning_rate": 2.763704960334408e-06, "loss": 0.386, "step": 4126 }, { "epoch": 2.048816812841304, "grad_norm": 0.3753148317337036, "learning_rate": 2.761121321884257e-06, "loss": 0.3311, "step": 4127 }, { "epoch": 2.0493132550057918, "grad_norm": 0.3489398658275604, "learning_rate": 2.758538430934843e-06, "loss": 0.3256, "step": 4128 }, { "epoch": 2.0498096971702795, "grad_norm": 0.3809763789176941, "learning_rate": 2.7559562883485314e-06, "loss": 0.318, "step": 4129 }, { "epoch": 2.0503061393347677, "grad_norm": 0.38867175579071045, "learning_rate": 2.7533748949874227e-06, "loss": 0.3591, "step": 4130 }, { "epoch": 2.0508025814992554, "grad_norm": 0.37578701972961426, "learning_rate": 2.750794251713378e-06, "loss": 0.3793, "step": 4131 }, { "epoch": 2.051299023663743, "grad_norm": 0.38437527418136597, "learning_rate": 2.7482143593880015e-06, "loss": 0.3031, "step": 4132 }, { "epoch": 2.051795465828231, "grad_norm": 0.4540836215019226, "learning_rate": 2.745635218872651e-06, "loss": 0.3354, "step": 4133 }, { "epoch": 2.0522919079927187, "grad_norm": 0.38449007272720337, "learning_rate": 2.743056831028432e-06, "loss": 0.3249, "step": 4134 }, { "epoch": 2.052788350157207, "grad_norm": 0.33838334679603577, "learning_rate": 2.7404791967161937e-06, "loss": 0.327, "step": 4135 }, { "epoch": 2.0532847923216946, "grad_norm": 0.39462533593177795, "learning_rate": 2.7379023167965447e-06, "loss": 0.3729, "step": 4136 }, { "epoch": 2.0537812344861823, "grad_norm": 0.3970167338848114, "learning_rate": 2.7353261921298303e-06, "loss": 0.3598, "step": 4137 }, { "epoch": 2.05427767665067, "grad_norm": 0.3506917953491211, "learning_rate": 2.7327508235761513e-06, "loss": 0.3108, "step": 4138 }, { "epoch": 2.054774118815158, "grad_norm": 0.3707887530326843, "learning_rate": 2.730176211995348e-06, "loss": 0.312, "step": 4139 }, { "epoch": 2.055270560979646, "grad_norm": 0.39996784925460815, "learning_rate": 2.7276023582470213e-06, "loss": 0.2961, "step": 4140 }, { "epoch": 2.0557670031441337, "grad_norm": 0.41488009691238403, "learning_rate": 2.725029263190504e-06, "loss": 0.3397, "step": 4141 }, { "epoch": 2.0562634453086215, "grad_norm": 0.3878713548183441, "learning_rate": 2.7224569276848866e-06, "loss": 0.3513, "step": 4142 }, { "epoch": 2.0567598874731092, "grad_norm": 0.36647340655326843, "learning_rate": 2.7198853525890003e-06, "loss": 0.279, "step": 4143 }, { "epoch": 2.0572563296375974, "grad_norm": 0.402516633272171, "learning_rate": 2.717314538761425e-06, "loss": 0.3239, "step": 4144 }, { "epoch": 2.057752771802085, "grad_norm": 0.3855515122413635, "learning_rate": 2.7147444870604868e-06, "loss": 0.3062, "step": 4145 }, { "epoch": 2.058249213966573, "grad_norm": 0.42379269003868103, "learning_rate": 2.712175198344251e-06, "loss": 0.3428, "step": 4146 }, { "epoch": 2.0587456561310606, "grad_norm": 0.37171483039855957, "learning_rate": 2.7096066734705406e-06, "loss": 0.3, "step": 4147 }, { "epoch": 2.0592420982955484, "grad_norm": 0.40884721279144287, "learning_rate": 2.70703891329691e-06, "loss": 0.3037, "step": 4148 }, { "epoch": 2.0597385404600366, "grad_norm": 0.39710214734077454, "learning_rate": 2.7044719186806677e-06, "loss": 0.3983, "step": 4149 }, { "epoch": 2.0602349826245243, "grad_norm": 0.37944769859313965, "learning_rate": 2.7019056904788625e-06, "loss": 0.3435, "step": 4150 }, { "epoch": 2.060731424789012, "grad_norm": 0.39688727259635925, "learning_rate": 2.6993402295482885e-06, "loss": 0.3466, "step": 4151 }, { "epoch": 2.0612278669535, "grad_norm": 0.34694573283195496, "learning_rate": 2.6967755367454855e-06, "loss": 0.2771, "step": 4152 }, { "epoch": 2.0617243091179875, "grad_norm": 0.4175563156604767, "learning_rate": 2.694211612926731e-06, "loss": 0.3817, "step": 4153 }, { "epoch": 2.0622207512824757, "grad_norm": 0.43229445815086365, "learning_rate": 2.6916484589480505e-06, "loss": 0.3417, "step": 4154 }, { "epoch": 2.0627171934469635, "grad_norm": 0.3856489360332489, "learning_rate": 2.6890860756652125e-06, "loss": 0.3248, "step": 4155 }, { "epoch": 2.063213635611451, "grad_norm": 0.3763786554336548, "learning_rate": 2.6865244639337263e-06, "loss": 0.3076, "step": 4156 }, { "epoch": 2.063710077775939, "grad_norm": 0.3667292594909668, "learning_rate": 2.6839636246088446e-06, "loss": 0.3064, "step": 4157 }, { "epoch": 2.064206519940427, "grad_norm": 0.40125030279159546, "learning_rate": 2.6814035585455628e-06, "loss": 0.3561, "step": 4158 }, { "epoch": 2.064702962104915, "grad_norm": 0.3106812834739685, "learning_rate": 2.6788442665986184e-06, "loss": 0.323, "step": 4159 }, { "epoch": 2.0651994042694026, "grad_norm": 0.39913827180862427, "learning_rate": 2.6762857496224858e-06, "loss": 0.348, "step": 4160 }, { "epoch": 2.0656958464338904, "grad_norm": 0.4072754681110382, "learning_rate": 2.673728008471387e-06, "loss": 0.301, "step": 4161 }, { "epoch": 2.066192288598378, "grad_norm": 0.38477832078933716, "learning_rate": 2.6711710439992812e-06, "loss": 0.3521, "step": 4162 }, { "epoch": 2.0666887307628663, "grad_norm": 0.3794364333152771, "learning_rate": 2.668614857059872e-06, "loss": 0.339, "step": 4163 }, { "epoch": 2.067185172927354, "grad_norm": 0.3603569269180298, "learning_rate": 2.666059448506596e-06, "loss": 0.3163, "step": 4164 }, { "epoch": 2.067681615091842, "grad_norm": 0.4256459176540375, "learning_rate": 2.6635048191926375e-06, "loss": 0.3782, "step": 4165 }, { "epoch": 2.0681780572563295, "grad_norm": 0.3822643756866455, "learning_rate": 2.6609509699709174e-06, "loss": 0.3469, "step": 4166 }, { "epoch": 2.0686744994208173, "grad_norm": 0.3744523823261261, "learning_rate": 2.6583979016940962e-06, "loss": 0.3171, "step": 4167 }, { "epoch": 2.0691709415853055, "grad_norm": 0.37978148460388184, "learning_rate": 2.655845615214577e-06, "loss": 0.2778, "step": 4168 }, { "epoch": 2.069667383749793, "grad_norm": 0.36180558800697327, "learning_rate": 2.6532941113844924e-06, "loss": 0.3115, "step": 4169 }, { "epoch": 2.070163825914281, "grad_norm": 0.36970797181129456, "learning_rate": 2.650743391055728e-06, "loss": 0.2982, "step": 4170 }, { "epoch": 2.0706602680787687, "grad_norm": 0.3633882403373718, "learning_rate": 2.648193455079894e-06, "loss": 0.2911, "step": 4171 }, { "epoch": 2.071156710243257, "grad_norm": 0.37609559297561646, "learning_rate": 2.6456443043083457e-06, "loss": 0.3402, "step": 4172 }, { "epoch": 2.0716531524077446, "grad_norm": 0.38880422711372375, "learning_rate": 2.643095939592177e-06, "loss": 0.3361, "step": 4173 }, { "epoch": 2.0721495945722324, "grad_norm": 0.3907678723335266, "learning_rate": 2.640548361782218e-06, "loss": 0.2973, "step": 4174 }, { "epoch": 2.07264603673672, "grad_norm": 0.3697074055671692, "learning_rate": 2.6380015717290356e-06, "loss": 0.3044, "step": 4175 }, { "epoch": 2.073142478901208, "grad_norm": 0.3689570128917694, "learning_rate": 2.6354555702829293e-06, "loss": 0.2919, "step": 4176 }, { "epoch": 2.073638921065696, "grad_norm": 0.4203183352947235, "learning_rate": 2.6329103582939474e-06, "loss": 0.3482, "step": 4177 }, { "epoch": 2.0741353632301838, "grad_norm": 0.43632450699806213, "learning_rate": 2.6303659366118605e-06, "loss": 0.3247, "step": 4178 }, { "epoch": 2.0746318053946715, "grad_norm": 0.37739911675453186, "learning_rate": 2.6278223060861846e-06, "loss": 0.2886, "step": 4179 }, { "epoch": 2.0751282475591593, "grad_norm": 0.38748687505722046, "learning_rate": 2.6252794675661685e-06, "loss": 0.3394, "step": 4180 }, { "epoch": 2.075624689723647, "grad_norm": 0.3619794547557831, "learning_rate": 2.6227374219007963e-06, "loss": 0.2972, "step": 4181 }, { "epoch": 2.076121131888135, "grad_norm": 0.36380404233932495, "learning_rate": 2.620196169938791e-06, "loss": 0.2819, "step": 4182 }, { "epoch": 2.076617574052623, "grad_norm": 0.39376917481422424, "learning_rate": 2.617655712528603e-06, "loss": 0.349, "step": 4183 }, { "epoch": 2.0771140162171107, "grad_norm": 0.36169150471687317, "learning_rate": 2.615116050518424e-06, "loss": 0.357, "step": 4184 }, { "epoch": 2.0776104583815984, "grad_norm": 0.3659641444683075, "learning_rate": 2.6125771847561785e-06, "loss": 0.3655, "step": 4185 }, { "epoch": 2.0781069005460866, "grad_norm": 0.34896206855773926, "learning_rate": 2.610039116089526e-06, "loss": 0.2511, "step": 4186 }, { "epoch": 2.0786033427105743, "grad_norm": 0.3854008615016937, "learning_rate": 2.607501845365853e-06, "loss": 0.3474, "step": 4187 }, { "epoch": 2.079099784875062, "grad_norm": 0.3521692156791687, "learning_rate": 2.604965373432294e-06, "loss": 0.2417, "step": 4188 }, { "epoch": 2.07959622703955, "grad_norm": 0.3671572804450989, "learning_rate": 2.602429701135701e-06, "loss": 0.3465, "step": 4189 }, { "epoch": 2.0800926692040376, "grad_norm": 0.3349255323410034, "learning_rate": 2.5998948293226684e-06, "loss": 0.2829, "step": 4190 }, { "epoch": 2.0805891113685258, "grad_norm": 0.3789498209953308, "learning_rate": 2.597360758839521e-06, "loss": 0.3043, "step": 4191 }, { "epoch": 2.0810855535330135, "grad_norm": 0.37159284949302673, "learning_rate": 2.5948274905323163e-06, "loss": 0.301, "step": 4192 }, { "epoch": 2.0815819956975012, "grad_norm": 0.4230900704860687, "learning_rate": 2.5922950252468455e-06, "loss": 0.3246, "step": 4193 }, { "epoch": 2.082078437861989, "grad_norm": 0.4156512916088104, "learning_rate": 2.5897633638286256e-06, "loss": 0.2593, "step": 4194 }, { "epoch": 2.0825748800264767, "grad_norm": 0.4003695547580719, "learning_rate": 2.587232507122912e-06, "loss": 0.3538, "step": 4195 }, { "epoch": 2.083071322190965, "grad_norm": 0.3598071336746216, "learning_rate": 2.584702455974689e-06, "loss": 0.2995, "step": 4196 }, { "epoch": 2.0835677643554527, "grad_norm": 0.43762966990470886, "learning_rate": 2.5821732112286726e-06, "loss": 0.3826, "step": 4197 }, { "epoch": 2.0840642065199404, "grad_norm": 0.3461560308933258, "learning_rate": 2.579644773729307e-06, "loss": 0.3016, "step": 4198 }, { "epoch": 2.084560648684428, "grad_norm": 0.41977348923683167, "learning_rate": 2.5771171443207703e-06, "loss": 0.3545, "step": 4199 }, { "epoch": 2.085057090848916, "grad_norm": 0.3661072850227356, "learning_rate": 2.574590323846971e-06, "loss": 0.3072, "step": 4200 }, { "epoch": 2.085553533013404, "grad_norm": 0.42792728543281555, "learning_rate": 2.572064313151541e-06, "loss": 0.3614, "step": 4201 }, { "epoch": 2.086049975177892, "grad_norm": 0.3721398413181305, "learning_rate": 2.5695391130778504e-06, "loss": 0.3246, "step": 4202 }, { "epoch": 2.0865464173423796, "grad_norm": 0.3764643967151642, "learning_rate": 2.5670147244689926e-06, "loss": 0.2755, "step": 4203 }, { "epoch": 2.0870428595068673, "grad_norm": 0.42509889602661133, "learning_rate": 2.5644911481677937e-06, "loss": 0.3491, "step": 4204 }, { "epoch": 2.0875393016713555, "grad_norm": 0.37285542488098145, "learning_rate": 2.5619683850168087e-06, "loss": 0.3033, "step": 4205 }, { "epoch": 2.0880357438358432, "grad_norm": 0.3742159903049469, "learning_rate": 2.5594464358583137e-06, "loss": 0.389, "step": 4206 }, { "epoch": 2.088532186000331, "grad_norm": 0.3724098801612854, "learning_rate": 2.5569253015343277e-06, "loss": 0.3427, "step": 4207 }, { "epoch": 2.0890286281648187, "grad_norm": 0.34126392006874084, "learning_rate": 2.5544049828865823e-06, "loss": 0.2827, "step": 4208 }, { "epoch": 2.0895250703293065, "grad_norm": 0.40689510107040405, "learning_rate": 2.5518854807565473e-06, "loss": 0.3217, "step": 4209 }, { "epoch": 2.0900215124937946, "grad_norm": 0.3883250653743744, "learning_rate": 2.5493667959854106e-06, "loss": 0.2571, "step": 4210 }, { "epoch": 2.0905179546582824, "grad_norm": 0.4239693284034729, "learning_rate": 2.5468489294141003e-06, "loss": 0.3878, "step": 4211 }, { "epoch": 2.09101439682277, "grad_norm": 0.3787074089050293, "learning_rate": 2.5443318818832574e-06, "loss": 0.3148, "step": 4212 }, { "epoch": 2.091510838987258, "grad_norm": 0.3740960955619812, "learning_rate": 2.5418156542332557e-06, "loss": 0.3075, "step": 4213 }, { "epoch": 2.0920072811517456, "grad_norm": 0.3787432909011841, "learning_rate": 2.539300247304202e-06, "loss": 0.3078, "step": 4214 }, { "epoch": 2.092503723316234, "grad_norm": 0.3930642306804657, "learning_rate": 2.536785661935914e-06, "loss": 0.3331, "step": 4215 }, { "epoch": 2.0930001654807215, "grad_norm": 0.3762491047382355, "learning_rate": 2.53427189896795e-06, "loss": 0.3014, "step": 4216 }, { "epoch": 2.0934966076452093, "grad_norm": 0.42268607020378113, "learning_rate": 2.5317589592395802e-06, "loss": 0.2894, "step": 4217 }, { "epoch": 2.093993049809697, "grad_norm": 0.410751610994339, "learning_rate": 2.5292468435898145e-06, "loss": 0.3356, "step": 4218 }, { "epoch": 2.094489491974185, "grad_norm": 0.39330166578292847, "learning_rate": 2.5267355528573745e-06, "loss": 0.307, "step": 4219 }, { "epoch": 2.094985934138673, "grad_norm": 0.34386685490608215, "learning_rate": 2.524225087880714e-06, "loss": 0.3133, "step": 4220 }, { "epoch": 2.0954823763031607, "grad_norm": 0.4159983992576599, "learning_rate": 2.5217154494980087e-06, "loss": 0.3025, "step": 4221 }, { "epoch": 2.0959788184676484, "grad_norm": 0.42000776529312134, "learning_rate": 2.5192066385471592e-06, "loss": 0.347, "step": 4222 }, { "epoch": 2.096475260632136, "grad_norm": 0.3879395127296448, "learning_rate": 2.5166986558657904e-06, "loss": 0.3344, "step": 4223 }, { "epoch": 2.0969717027966244, "grad_norm": 0.3786351680755615, "learning_rate": 2.5141915022912454e-06, "loss": 0.3422, "step": 4224 }, { "epoch": 2.097468144961112, "grad_norm": 0.3624129891395569, "learning_rate": 2.5116851786605983e-06, "loss": 0.3079, "step": 4225 }, { "epoch": 2.0979645871256, "grad_norm": 0.39836910367012024, "learning_rate": 2.509179685810641e-06, "loss": 0.3169, "step": 4226 }, { "epoch": 2.0984610292900876, "grad_norm": 0.416708379983902, "learning_rate": 2.5066750245778905e-06, "loss": 0.3281, "step": 4227 }, { "epoch": 2.0989574714545753, "grad_norm": 0.4394407570362091, "learning_rate": 2.504171195798584e-06, "loss": 0.3419, "step": 4228 }, { "epoch": 2.0994539136190635, "grad_norm": 0.381070077419281, "learning_rate": 2.5016682003086812e-06, "loss": 0.3417, "step": 4229 }, { "epoch": 2.0999503557835513, "grad_norm": 0.3640787601470947, "learning_rate": 2.4991660389438687e-06, "loss": 0.271, "step": 4230 }, { "epoch": 2.100446797948039, "grad_norm": 0.3705645501613617, "learning_rate": 2.496664712539545e-06, "loss": 0.305, "step": 4231 }, { "epoch": 2.1009432401125268, "grad_norm": 0.4247264564037323, "learning_rate": 2.494164221930836e-06, "loss": 0.3139, "step": 4232 }, { "epoch": 2.101439682277015, "grad_norm": 0.3900303244590759, "learning_rate": 2.491664567952589e-06, "loss": 0.3666, "step": 4233 }, { "epoch": 2.1019361244415027, "grad_norm": 0.3855172097682953, "learning_rate": 2.489165751439372e-06, "loss": 0.3266, "step": 4234 }, { "epoch": 2.1024325666059904, "grad_norm": 0.3885287642478943, "learning_rate": 2.486667773225468e-06, "loss": 0.3078, "step": 4235 }, { "epoch": 2.102929008770478, "grad_norm": 0.38915085792541504, "learning_rate": 2.484170634144884e-06, "loss": 0.3454, "step": 4236 }, { "epoch": 2.103425450934966, "grad_norm": 0.3675372302532196, "learning_rate": 2.481674335031352e-06, "loss": 0.3117, "step": 4237 }, { "epoch": 2.103921893099454, "grad_norm": 0.3836170732975006, "learning_rate": 2.4791788767183144e-06, "loss": 0.3334, "step": 4238 }, { "epoch": 2.104418335263942, "grad_norm": 0.40515580773353577, "learning_rate": 2.476684260038937e-06, "loss": 0.375, "step": 4239 }, { "epoch": 2.1049147774284296, "grad_norm": 0.3585866689682007, "learning_rate": 2.474190485826106e-06, "loss": 0.3172, "step": 4240 }, { "epoch": 2.1054112195929173, "grad_norm": 0.39876607060432434, "learning_rate": 2.471697554912425e-06, "loss": 0.394, "step": 4241 }, { "epoch": 2.105907661757405, "grad_norm": 0.3918905258178711, "learning_rate": 2.4692054681302135e-06, "loss": 0.3507, "step": 4242 }, { "epoch": 2.1064041039218933, "grad_norm": 0.36577948927879333, "learning_rate": 2.466714226311513e-06, "loss": 0.3168, "step": 4243 }, { "epoch": 2.106900546086381, "grad_norm": 0.38744622468948364, "learning_rate": 2.4642238302880817e-06, "loss": 0.3502, "step": 4244 }, { "epoch": 2.1073969882508687, "grad_norm": 0.38511180877685547, "learning_rate": 2.461734280891394e-06, "loss": 0.2995, "step": 4245 }, { "epoch": 2.1078934304153565, "grad_norm": 0.420195072889328, "learning_rate": 2.4592455789526466e-06, "loss": 0.3754, "step": 4246 }, { "epoch": 2.1083898725798447, "grad_norm": 0.36475226283073425, "learning_rate": 2.4567577253027425e-06, "loss": 0.3097, "step": 4247 }, { "epoch": 2.1088863147443324, "grad_norm": 0.38184839487075806, "learning_rate": 2.4542707207723158e-06, "loss": 0.3002, "step": 4248 }, { "epoch": 2.10938275690882, "grad_norm": 0.40155771374702454, "learning_rate": 2.451784566191705e-06, "loss": 0.3009, "step": 4249 }, { "epoch": 2.109879199073308, "grad_norm": 0.43423011898994446, "learning_rate": 2.4492992623909706e-06, "loss": 0.33, "step": 4250 }, { "epoch": 2.1103756412377956, "grad_norm": 0.3954043388366699, "learning_rate": 2.4468148101998877e-06, "loss": 0.3414, "step": 4251 }, { "epoch": 2.110872083402284, "grad_norm": 0.39938825368881226, "learning_rate": 2.4443312104479487e-06, "loss": 0.3315, "step": 4252 }, { "epoch": 2.1113685255667716, "grad_norm": 0.43579673767089844, "learning_rate": 2.441848463964361e-06, "loss": 0.3315, "step": 4253 }, { "epoch": 2.1118649677312593, "grad_norm": 0.3889438211917877, "learning_rate": 2.4393665715780405e-06, "loss": 0.314, "step": 4254 }, { "epoch": 2.112361409895747, "grad_norm": 0.3800019323825836, "learning_rate": 2.436885534117632e-06, "loss": 0.3374, "step": 4255 }, { "epoch": 2.112857852060235, "grad_norm": 0.3459899425506592, "learning_rate": 2.4344053524114796e-06, "loss": 0.2924, "step": 4256 }, { "epoch": 2.113354294224723, "grad_norm": 0.40946128964424133, "learning_rate": 2.4319260272876533e-06, "loss": 0.4116, "step": 4257 }, { "epoch": 2.1138507363892107, "grad_norm": 0.38698601722717285, "learning_rate": 2.429447559573926e-06, "loss": 0.3073, "step": 4258 }, { "epoch": 2.1143471785536985, "grad_norm": 0.401935338973999, "learning_rate": 2.4269699500977987e-06, "loss": 0.3554, "step": 4259 }, { "epoch": 2.114843620718186, "grad_norm": 0.39371925592422485, "learning_rate": 2.424493199686472e-06, "loss": 0.3471, "step": 4260 }, { "epoch": 2.115340062882674, "grad_norm": 0.38042500615119934, "learning_rate": 2.4220173091668675e-06, "loss": 0.3168, "step": 4261 }, { "epoch": 2.115836505047162, "grad_norm": 0.3870638310909271, "learning_rate": 2.419542279365618e-06, "loss": 0.322, "step": 4262 }, { "epoch": 2.11633294721165, "grad_norm": 0.4166378974914551, "learning_rate": 2.4170681111090684e-06, "loss": 0.3378, "step": 4263 }, { "epoch": 2.1168293893761376, "grad_norm": 0.39915022253990173, "learning_rate": 2.414594805223278e-06, "loss": 0.3255, "step": 4264 }, { "epoch": 2.1173258315406254, "grad_norm": 0.3714321553707123, "learning_rate": 2.4121223625340134e-06, "loss": 0.3075, "step": 4265 }, { "epoch": 2.1178222737051136, "grad_norm": 0.3772881031036377, "learning_rate": 2.4096507838667564e-06, "loss": 0.2825, "step": 4266 }, { "epoch": 2.1183187158696013, "grad_norm": 0.365519255399704, "learning_rate": 2.407180070046702e-06, "loss": 0.2938, "step": 4267 }, { "epoch": 2.118815158034089, "grad_norm": 0.37315911054611206, "learning_rate": 2.404710221898752e-06, "loss": 0.3149, "step": 4268 }, { "epoch": 2.119311600198577, "grad_norm": 0.33934488892555237, "learning_rate": 2.4022412402475235e-06, "loss": 0.2647, "step": 4269 }, { "epoch": 2.1198080423630645, "grad_norm": 0.4067927896976471, "learning_rate": 2.3997731259173423e-06, "loss": 0.3446, "step": 4270 }, { "epoch": 2.1203044845275527, "grad_norm": 0.3753102719783783, "learning_rate": 2.3973058797322453e-06, "loss": 0.3524, "step": 4271 }, { "epoch": 2.1208009266920405, "grad_norm": 0.37209352850914, "learning_rate": 2.394839502515976e-06, "loss": 0.308, "step": 4272 }, { "epoch": 2.121297368856528, "grad_norm": 0.4106026291847229, "learning_rate": 2.3923739950919924e-06, "loss": 0.2938, "step": 4273 }, { "epoch": 2.121793811021016, "grad_norm": 0.38218072056770325, "learning_rate": 2.3899093582834605e-06, "loss": 0.3518, "step": 4274 }, { "epoch": 2.1222902531855037, "grad_norm": 0.41019052267074585, "learning_rate": 2.3874455929132557e-06, "loss": 0.335, "step": 4275 }, { "epoch": 2.122786695349992, "grad_norm": 0.40509238839149475, "learning_rate": 2.384982699803964e-06, "loss": 0.2995, "step": 4276 }, { "epoch": 2.1232831375144796, "grad_norm": 0.3641020357608795, "learning_rate": 2.382520679777873e-06, "loss": 0.2698, "step": 4277 }, { "epoch": 2.1237795796789674, "grad_norm": 0.39907771348953247, "learning_rate": 2.380059533656991e-06, "loss": 0.3331, "step": 4278 }, { "epoch": 2.124276021843455, "grad_norm": 0.4022120237350464, "learning_rate": 2.377599262263023e-06, "loss": 0.3587, "step": 4279 }, { "epoch": 2.1247724640079433, "grad_norm": 0.35258907079696655, "learning_rate": 2.3751398664173906e-06, "loss": 0.2582, "step": 4280 }, { "epoch": 2.125268906172431, "grad_norm": 0.40298402309417725, "learning_rate": 2.372681346941213e-06, "loss": 0.3669, "step": 4281 }, { "epoch": 2.1257653483369188, "grad_norm": 0.4208427369594574, "learning_rate": 2.370223704655331e-06, "loss": 0.35, "step": 4282 }, { "epoch": 2.1262617905014065, "grad_norm": 0.3932285010814667, "learning_rate": 2.3677669403802788e-06, "loss": 0.299, "step": 4283 }, { "epoch": 2.1267582326658943, "grad_norm": 0.41222190856933594, "learning_rate": 2.3653110549363036e-06, "loss": 0.3694, "step": 4284 }, { "epoch": 2.1272546748303824, "grad_norm": 0.3590465188026428, "learning_rate": 2.3628560491433637e-06, "loss": 0.2954, "step": 4285 }, { "epoch": 2.12775111699487, "grad_norm": 0.36048445105552673, "learning_rate": 2.3604019238211135e-06, "loss": 0.2962, "step": 4286 }, { "epoch": 2.128247559159358, "grad_norm": 0.4097200036048889, "learning_rate": 2.3579486797889222e-06, "loss": 0.3256, "step": 4287 }, { "epoch": 2.1287440013238457, "grad_norm": 0.4225088655948639, "learning_rate": 2.3554963178658564e-06, "loss": 0.3803, "step": 4288 }, { "epoch": 2.1292404434883334, "grad_norm": 0.3741515278816223, "learning_rate": 2.3530448388707e-06, "loss": 0.3729, "step": 4289 }, { "epoch": 2.1297368856528216, "grad_norm": 0.3746855854988098, "learning_rate": 2.3505942436219297e-06, "loss": 0.3167, "step": 4290 }, { "epoch": 2.1302333278173093, "grad_norm": 0.4066586494445801, "learning_rate": 2.348144532937735e-06, "loss": 0.3379, "step": 4291 }, { "epoch": 2.130729769981797, "grad_norm": 0.3897533416748047, "learning_rate": 2.345695707636007e-06, "loss": 0.3575, "step": 4292 }, { "epoch": 2.131226212146285, "grad_norm": 0.40129736065864563, "learning_rate": 2.3432477685343426e-06, "loss": 0.3383, "step": 4293 }, { "epoch": 2.1317226543107726, "grad_norm": 0.38598716259002686, "learning_rate": 2.3408007164500427e-06, "loss": 0.328, "step": 4294 }, { "epoch": 2.1322190964752608, "grad_norm": 0.3417589068412781, "learning_rate": 2.338354552200108e-06, "loss": 0.2463, "step": 4295 }, { "epoch": 2.1327155386397485, "grad_norm": 0.417341411113739, "learning_rate": 2.3359092766012517e-06, "loss": 0.3521, "step": 4296 }, { "epoch": 2.1332119808042362, "grad_norm": 0.3814786970615387, "learning_rate": 2.33346489046988e-06, "loss": 0.3206, "step": 4297 }, { "epoch": 2.133708422968724, "grad_norm": 0.3996693193912506, "learning_rate": 2.3310213946221094e-06, "loss": 0.3394, "step": 4298 }, { "epoch": 2.134204865133212, "grad_norm": 0.35766059160232544, "learning_rate": 2.3285787898737565e-06, "loss": 0.2824, "step": 4299 }, { "epoch": 2.1347013072977, "grad_norm": 0.3814798593521118, "learning_rate": 2.32613707704034e-06, "loss": 0.3357, "step": 4300 }, { "epoch": 2.1351977494621877, "grad_norm": 0.37001746892929077, "learning_rate": 2.3236962569370843e-06, "loss": 0.3342, "step": 4301 }, { "epoch": 2.1356941916266754, "grad_norm": 0.40707501769065857, "learning_rate": 2.3212563303789082e-06, "loss": 0.3487, "step": 4302 }, { "epoch": 2.136190633791163, "grad_norm": 0.3807818591594696, "learning_rate": 2.318817298180439e-06, "loss": 0.292, "step": 4303 }, { "epoch": 2.1366870759556513, "grad_norm": 0.40636658668518066, "learning_rate": 2.3163791611560036e-06, "loss": 0.3324, "step": 4304 }, { "epoch": 2.137183518120139, "grad_norm": 0.41596221923828125, "learning_rate": 2.3139419201196316e-06, "loss": 0.3054, "step": 4305 }, { "epoch": 2.137679960284627, "grad_norm": 0.39315691590309143, "learning_rate": 2.3115055758850476e-06, "loss": 0.2837, "step": 4306 }, { "epoch": 2.1381764024491146, "grad_norm": 0.3908999562263489, "learning_rate": 2.3090701292656808e-06, "loss": 0.3515, "step": 4307 }, { "epoch": 2.1386728446136027, "grad_norm": 0.35885950922966003, "learning_rate": 2.306635581074666e-06, "loss": 0.3275, "step": 4308 }, { "epoch": 2.1391692867780905, "grad_norm": 0.3721747398376465, "learning_rate": 2.304201932124827e-06, "loss": 0.3297, "step": 4309 }, { "epoch": 2.1396657289425782, "grad_norm": 0.3360307216644287, "learning_rate": 2.3017691832286953e-06, "loss": 0.2903, "step": 4310 }, { "epoch": 2.140162171107066, "grad_norm": 0.36935240030288696, "learning_rate": 2.2993373351984994e-06, "loss": 0.333, "step": 4311 }, { "epoch": 2.1406586132715537, "grad_norm": 0.3886999189853668, "learning_rate": 2.2969063888461697e-06, "loss": 0.3305, "step": 4312 }, { "epoch": 2.141155055436042, "grad_norm": 0.4137096107006073, "learning_rate": 2.294476344983328e-06, "loss": 0.3763, "step": 4313 }, { "epoch": 2.1416514976005296, "grad_norm": 0.3344143033027649, "learning_rate": 2.292047204421303e-06, "loss": 0.2926, "step": 4314 }, { "epoch": 2.1421479397650174, "grad_norm": 0.38026025891304016, "learning_rate": 2.2896189679711186e-06, "loss": 0.302, "step": 4315 }, { "epoch": 2.142644381929505, "grad_norm": 0.35069480538368225, "learning_rate": 2.2871916364434963e-06, "loss": 0.3292, "step": 4316 }, { "epoch": 2.143140824093993, "grad_norm": 0.437671035528183, "learning_rate": 2.284765210648859e-06, "loss": 0.3218, "step": 4317 }, { "epoch": 2.143637266258481, "grad_norm": 0.3844178020954132, "learning_rate": 2.282339691397318e-06, "loss": 0.3173, "step": 4318 }, { "epoch": 2.144133708422969, "grad_norm": 0.3652764856815338, "learning_rate": 2.279915079498696e-06, "loss": 0.318, "step": 4319 }, { "epoch": 2.1446301505874565, "grad_norm": 0.3918423652648926, "learning_rate": 2.277491375762499e-06, "loss": 0.3267, "step": 4320 }, { "epoch": 2.1451265927519443, "grad_norm": 0.3804995119571686, "learning_rate": 2.2750685809979378e-06, "loss": 0.2951, "step": 4321 }, { "epoch": 2.145623034916432, "grad_norm": 0.400028258562088, "learning_rate": 2.2726466960139176e-06, "loss": 0.3274, "step": 4322 }, { "epoch": 2.14611947708092, "grad_norm": 0.4025753140449524, "learning_rate": 2.270225721619041e-06, "loss": 0.3397, "step": 4323 }, { "epoch": 2.146615919245408, "grad_norm": 0.4333488345146179, "learning_rate": 2.2678056586216062e-06, "loss": 0.4088, "step": 4324 }, { "epoch": 2.1471123614098957, "grad_norm": 0.34473007917404175, "learning_rate": 2.2653865078296017e-06, "loss": 0.3115, "step": 4325 }, { "epoch": 2.1476088035743834, "grad_norm": 0.36701127886772156, "learning_rate": 2.2629682700507225e-06, "loss": 0.2995, "step": 4326 }, { "epoch": 2.1481052457388716, "grad_norm": 0.45168963074684143, "learning_rate": 2.2605509460923488e-06, "loss": 0.3062, "step": 4327 }, { "epoch": 2.1486016879033594, "grad_norm": 0.42057809233665466, "learning_rate": 2.258134536761561e-06, "loss": 0.3113, "step": 4328 }, { "epoch": 2.149098130067847, "grad_norm": 0.38402843475341797, "learning_rate": 2.2557190428651282e-06, "loss": 0.3133, "step": 4329 }, { "epoch": 2.149594572232335, "grad_norm": 0.3627549111843109, "learning_rate": 2.253304465209524e-06, "loss": 0.2802, "step": 4330 }, { "epoch": 2.1500910143968226, "grad_norm": 0.36417022347450256, "learning_rate": 2.250890804600909e-06, "loss": 0.3176, "step": 4331 }, { "epoch": 2.150587456561311, "grad_norm": 0.39130139350891113, "learning_rate": 2.2484780618451357e-06, "loss": 0.3023, "step": 4332 }, { "epoch": 2.1510838987257985, "grad_norm": 0.35462719202041626, "learning_rate": 2.2460662377477554e-06, "loss": 0.2861, "step": 4333 }, { "epoch": 2.1515803408902863, "grad_norm": 0.3611910939216614, "learning_rate": 2.243655333114011e-06, "loss": 0.3553, "step": 4334 }, { "epoch": 2.152076783054774, "grad_norm": 0.3948679566383362, "learning_rate": 2.2412453487488394e-06, "loss": 0.3695, "step": 4335 }, { "epoch": 2.1525732252192618, "grad_norm": 0.3762432336807251, "learning_rate": 2.2388362854568628e-06, "loss": 0.303, "step": 4336 }, { "epoch": 2.15306966738375, "grad_norm": 0.40793943405151367, "learning_rate": 2.236428144042411e-06, "loss": 0.3267, "step": 4337 }, { "epoch": 2.1535661095482377, "grad_norm": 0.33980828523635864, "learning_rate": 2.234020925309489e-06, "loss": 0.2893, "step": 4338 }, { "epoch": 2.1540625517127254, "grad_norm": 0.43088969588279724, "learning_rate": 2.2316146300618057e-06, "loss": 0.3218, "step": 4339 }, { "epoch": 2.154558993877213, "grad_norm": 0.3634878993034363, "learning_rate": 2.2292092591027565e-06, "loss": 0.2961, "step": 4340 }, { "epoch": 2.1550554360417014, "grad_norm": 0.4088626801967621, "learning_rate": 2.2268048132354303e-06, "loss": 0.3674, "step": 4341 }, { "epoch": 2.155551878206189, "grad_norm": 0.3625139594078064, "learning_rate": 2.224401293262607e-06, "loss": 0.2626, "step": 4342 }, { "epoch": 2.156048320370677, "grad_norm": 0.3986189365386963, "learning_rate": 2.2219986999867537e-06, "loss": 0.3355, "step": 4343 }, { "epoch": 2.1565447625351646, "grad_norm": 0.4214107096195221, "learning_rate": 2.2195970342100328e-06, "loss": 0.3737, "step": 4344 }, { "epoch": 2.1570412046996523, "grad_norm": 0.31594741344451904, "learning_rate": 2.217196296734294e-06, "loss": 0.2875, "step": 4345 }, { "epoch": 2.1575376468641405, "grad_norm": 0.3633493185043335, "learning_rate": 2.21479648836108e-06, "loss": 0.3287, "step": 4346 }, { "epoch": 2.1580340890286283, "grad_norm": 0.36367055773735046, "learning_rate": 2.212397609891623e-06, "loss": 0.3119, "step": 4347 }, { "epoch": 2.158530531193116, "grad_norm": 0.4172969162464142, "learning_rate": 2.209999662126837e-06, "loss": 0.3003, "step": 4348 }, { "epoch": 2.1590269733576037, "grad_norm": 0.3666423559188843, "learning_rate": 2.20760264586734e-06, "loss": 0.3308, "step": 4349 }, { "epoch": 2.1595234155220915, "grad_norm": 0.4091246426105499, "learning_rate": 2.2052065619134243e-06, "loss": 0.3165, "step": 4350 }, { "epoch": 2.1600198576865797, "grad_norm": 0.3762466311454773, "learning_rate": 2.2028114110650796e-06, "loss": 0.3521, "step": 4351 }, { "epoch": 2.1605162998510674, "grad_norm": 0.3844658136367798, "learning_rate": 2.200417194121981e-06, "loss": 0.3439, "step": 4352 }, { "epoch": 2.161012742015555, "grad_norm": 0.36619052290916443, "learning_rate": 2.198023911883495e-06, "loss": 0.3188, "step": 4353 }, { "epoch": 2.161509184180043, "grad_norm": 0.3685336410999298, "learning_rate": 2.1956315651486694e-06, "loss": 0.3268, "step": 4354 }, { "epoch": 2.1620056263445306, "grad_norm": 0.3505057990550995, "learning_rate": 2.1932401547162436e-06, "loss": 0.2972, "step": 4355 }, { "epoch": 2.162502068509019, "grad_norm": 0.41180238127708435, "learning_rate": 2.1908496813846503e-06, "loss": 0.3383, "step": 4356 }, { "epoch": 2.1629985106735066, "grad_norm": 0.3739542067050934, "learning_rate": 2.188460145951998e-06, "loss": 0.336, "step": 4357 }, { "epoch": 2.1634949528379943, "grad_norm": 0.37422841787338257, "learning_rate": 2.1860715492160922e-06, "loss": 0.3683, "step": 4358 }, { "epoch": 2.163991395002482, "grad_norm": 0.3636173605918884, "learning_rate": 2.1836838919744136e-06, "loss": 0.2915, "step": 4359 }, { "epoch": 2.1644878371669702, "grad_norm": 0.3872678875923157, "learning_rate": 2.1812971750241436e-06, "loss": 0.3692, "step": 4360 }, { "epoch": 2.164984279331458, "grad_norm": 0.3736310601234436, "learning_rate": 2.178911399162137e-06, "loss": 0.32, "step": 4361 }, { "epoch": 2.1654807214959457, "grad_norm": 0.3740313947200775, "learning_rate": 2.1765265651849415e-06, "loss": 0.2889, "step": 4362 }, { "epoch": 2.1659771636604335, "grad_norm": 0.3950338065624237, "learning_rate": 2.1741426738887885e-06, "loss": 0.3144, "step": 4363 }, { "epoch": 2.166473605824921, "grad_norm": 0.4562913775444031, "learning_rate": 2.1717597260695934e-06, "loss": 0.3394, "step": 4364 }, { "epoch": 2.1669700479894094, "grad_norm": 0.3386235237121582, "learning_rate": 2.1693777225229605e-06, "loss": 0.2919, "step": 4365 }, { "epoch": 2.167466490153897, "grad_norm": 0.4190637171268463, "learning_rate": 2.16699666404417e-06, "loss": 0.3659, "step": 4366 }, { "epoch": 2.167962932318385, "grad_norm": 0.35985010862350464, "learning_rate": 2.1646165514282014e-06, "loss": 0.2659, "step": 4367 }, { "epoch": 2.1684593744828726, "grad_norm": 0.3711123466491699, "learning_rate": 2.162237385469702e-06, "loss": 0.2965, "step": 4368 }, { "epoch": 2.168955816647361, "grad_norm": 0.397649347782135, "learning_rate": 2.1598591669630135e-06, "loss": 0.3728, "step": 4369 }, { "epoch": 2.1694522588118486, "grad_norm": 0.44611990451812744, "learning_rate": 2.1574818967021595e-06, "loss": 0.3627, "step": 4370 }, { "epoch": 2.1699487009763363, "grad_norm": 0.38603487610816956, "learning_rate": 2.1551055754808436e-06, "loss": 0.311, "step": 4371 }, { "epoch": 2.170445143140824, "grad_norm": 0.41378310322761536, "learning_rate": 2.1527302040924588e-06, "loss": 0.2907, "step": 4372 }, { "epoch": 2.170941585305312, "grad_norm": 0.3911939561367035, "learning_rate": 2.1503557833300714e-06, "loss": 0.3374, "step": 4373 }, { "epoch": 2.1714380274698, "grad_norm": 0.3836006820201874, "learning_rate": 2.14798231398644e-06, "loss": 0.2886, "step": 4374 }, { "epoch": 2.1719344696342877, "grad_norm": 0.32744458317756653, "learning_rate": 2.1456097968539996e-06, "loss": 0.2708, "step": 4375 }, { "epoch": 2.1724309117987755, "grad_norm": 0.4781424403190613, "learning_rate": 2.1432382327248724e-06, "loss": 0.3705, "step": 4376 }, { "epoch": 2.172927353963263, "grad_norm": 0.38338544964790344, "learning_rate": 2.140867622390853e-06, "loss": 0.2906, "step": 4377 }, { "epoch": 2.173423796127751, "grad_norm": 0.3872446119785309, "learning_rate": 2.1384979666434295e-06, "loss": 0.3297, "step": 4378 }, { "epoch": 2.173920238292239, "grad_norm": 0.3785199820995331, "learning_rate": 2.1361292662737655e-06, "loss": 0.3415, "step": 4379 }, { "epoch": 2.174416680456727, "grad_norm": 0.3735784888267517, "learning_rate": 2.1337615220727015e-06, "loss": 0.3402, "step": 4380 }, { "epoch": 2.1749131226212146, "grad_norm": 0.37964075803756714, "learning_rate": 2.1313947348307655e-06, "loss": 0.3085, "step": 4381 }, { "epoch": 2.1754095647857024, "grad_norm": 0.41695740818977356, "learning_rate": 2.1290289053381635e-06, "loss": 0.3387, "step": 4382 }, { "epoch": 2.17590600695019, "grad_norm": 0.40332484245300293, "learning_rate": 2.1266640343847826e-06, "loss": 0.3533, "step": 4383 }, { "epoch": 2.1764024491146783, "grad_norm": 0.3457837700843811, "learning_rate": 2.124300122760186e-06, "loss": 0.2593, "step": 4384 }, { "epoch": 2.176898891279166, "grad_norm": 0.3641296625137329, "learning_rate": 2.1219371712536214e-06, "loss": 0.3133, "step": 4385 }, { "epoch": 2.1773953334436538, "grad_norm": 0.4109293520450592, "learning_rate": 2.119575180654014e-06, "loss": 0.3575, "step": 4386 }, { "epoch": 2.1778917756081415, "grad_norm": 0.35243141651153564, "learning_rate": 2.1172141517499676e-06, "loss": 0.2877, "step": 4387 }, { "epoch": 2.1783882177726293, "grad_norm": 0.37990742921829224, "learning_rate": 2.114854085329769e-06, "loss": 0.312, "step": 4388 }, { "epoch": 2.1788846599371174, "grad_norm": 0.36988094449043274, "learning_rate": 2.112494982181373e-06, "loss": 0.3446, "step": 4389 }, { "epoch": 2.179381102101605, "grad_norm": 0.3817630410194397, "learning_rate": 2.110136843092428e-06, "loss": 0.3249, "step": 4390 }, { "epoch": 2.179877544266093, "grad_norm": 0.37766802310943604, "learning_rate": 2.1077796688502478e-06, "loss": 0.2906, "step": 4391 }, { "epoch": 2.1803739864305807, "grad_norm": 0.36043429374694824, "learning_rate": 2.1054234602418294e-06, "loss": 0.3248, "step": 4392 }, { "epoch": 2.180870428595069, "grad_norm": 0.39740249514579773, "learning_rate": 2.1030682180538475e-06, "loss": 0.3586, "step": 4393 }, { "epoch": 2.1813668707595566, "grad_norm": 0.3384614884853363, "learning_rate": 2.100713943072653e-06, "loss": 0.3307, "step": 4394 }, { "epoch": 2.1818633129240443, "grad_norm": 0.35791757702827454, "learning_rate": 2.0983606360842773e-06, "loss": 0.3197, "step": 4395 }, { "epoch": 2.182359755088532, "grad_norm": 0.4310721158981323, "learning_rate": 2.096008297874419e-06, "loss": 0.3487, "step": 4396 }, { "epoch": 2.18285619725302, "grad_norm": 0.3586428761482239, "learning_rate": 2.0936569292284675e-06, "loss": 0.2607, "step": 4397 }, { "epoch": 2.183352639417508, "grad_norm": 0.391233891248703, "learning_rate": 2.091306530931475e-06, "loss": 0.2814, "step": 4398 }, { "epoch": 2.1838490815819958, "grad_norm": 0.36719128489494324, "learning_rate": 2.0889571037681807e-06, "loss": 0.3059, "step": 4399 }, { "epoch": 2.1843455237464835, "grad_norm": 0.39082464575767517, "learning_rate": 2.0866086485229875e-06, "loss": 0.306, "step": 4400 }, { "epoch": 2.1848419659109712, "grad_norm": 0.4146367907524109, "learning_rate": 2.0842611659799868e-06, "loss": 0.3291, "step": 4401 }, { "epoch": 2.1853384080754594, "grad_norm": 0.381578654050827, "learning_rate": 2.081914656922939e-06, "loss": 0.3031, "step": 4402 }, { "epoch": 2.185834850239947, "grad_norm": 0.36025017499923706, "learning_rate": 2.0795691221352766e-06, "loss": 0.3261, "step": 4403 }, { "epoch": 2.186331292404435, "grad_norm": 0.3764011561870575, "learning_rate": 2.0772245624001114e-06, "loss": 0.3462, "step": 4404 }, { "epoch": 2.1868277345689227, "grad_norm": 0.34424421191215515, "learning_rate": 2.0748809785002285e-06, "loss": 0.2933, "step": 4405 }, { "epoch": 2.1873241767334104, "grad_norm": 0.385343998670578, "learning_rate": 2.072538371218088e-06, "loss": 0.3271, "step": 4406 }, { "epoch": 2.1878206188978986, "grad_norm": 0.36023083329200745, "learning_rate": 2.0701967413358177e-06, "loss": 0.3417, "step": 4407 }, { "epoch": 2.1883170610623863, "grad_norm": 0.3792129158973694, "learning_rate": 2.067856089635231e-06, "loss": 0.3443, "step": 4408 }, { "epoch": 2.188813503226874, "grad_norm": 0.38941213488578796, "learning_rate": 2.065516416897804e-06, "loss": 0.3584, "step": 4409 }, { "epoch": 2.189309945391362, "grad_norm": 0.40294623374938965, "learning_rate": 2.06317772390469e-06, "loss": 0.3292, "step": 4410 }, { "epoch": 2.1898063875558496, "grad_norm": 0.3661515712738037, "learning_rate": 2.060840011436715e-06, "loss": 0.2816, "step": 4411 }, { "epoch": 2.1903028297203377, "grad_norm": 0.3763701021671295, "learning_rate": 2.058503280274379e-06, "loss": 0.3068, "step": 4412 }, { "epoch": 2.1907992718848255, "grad_norm": 0.38357558846473694, "learning_rate": 2.0561675311978533e-06, "loss": 0.3143, "step": 4413 }, { "epoch": 2.1912957140493132, "grad_norm": 0.38946765661239624, "learning_rate": 2.0538327649869793e-06, "loss": 0.3344, "step": 4414 }, { "epoch": 2.191792156213801, "grad_norm": 0.3934243619441986, "learning_rate": 2.0514989824212723e-06, "loss": 0.3313, "step": 4415 }, { "epoch": 2.1922885983782887, "grad_norm": 0.357016921043396, "learning_rate": 2.049166184279919e-06, "loss": 0.3321, "step": 4416 }, { "epoch": 2.192785040542777, "grad_norm": 0.3959461450576782, "learning_rate": 2.0468343713417773e-06, "loss": 0.3404, "step": 4417 }, { "epoch": 2.1932814827072646, "grad_norm": 0.37850990891456604, "learning_rate": 2.0445035443853765e-06, "loss": 0.3672, "step": 4418 }, { "epoch": 2.1937779248717524, "grad_norm": 0.3532082736492157, "learning_rate": 2.0421737041889167e-06, "loss": 0.3126, "step": 4419 }, { "epoch": 2.19427436703624, "grad_norm": 0.3470383882522583, "learning_rate": 2.0398448515302694e-06, "loss": 0.2464, "step": 4420 }, { "epoch": 2.1947708092007283, "grad_norm": 0.4007759094238281, "learning_rate": 2.0375169871869722e-06, "loss": 0.3491, "step": 4421 }, { "epoch": 2.195267251365216, "grad_norm": 0.39371463656425476, "learning_rate": 2.0351901119362368e-06, "loss": 0.3652, "step": 4422 }, { "epoch": 2.195763693529704, "grad_norm": 0.35789990425109863, "learning_rate": 2.0328642265549435e-06, "loss": 0.3116, "step": 4423 }, { "epoch": 2.1962601356941915, "grad_norm": 0.38524532318115234, "learning_rate": 2.0305393318196432e-06, "loss": 0.3574, "step": 4424 }, { "epoch": 2.1967565778586793, "grad_norm": 0.3712342083454132, "learning_rate": 2.0282154285065566e-06, "loss": 0.3199, "step": 4425 }, { "epoch": 2.1972530200231675, "grad_norm": 0.3695160746574402, "learning_rate": 2.0258925173915658e-06, "loss": 0.3059, "step": 4426 }, { "epoch": 2.197749462187655, "grad_norm": 0.4097203016281128, "learning_rate": 2.0235705992502353e-06, "loss": 0.3931, "step": 4427 }, { "epoch": 2.198245904352143, "grad_norm": 0.3635811507701874, "learning_rate": 2.021249674857785e-06, "loss": 0.2929, "step": 4428 }, { "epoch": 2.1987423465166307, "grad_norm": 0.39623647928237915, "learning_rate": 2.0189297449891123e-06, "loss": 0.3264, "step": 4429 }, { "epoch": 2.199238788681119, "grad_norm": 0.3845948874950409, "learning_rate": 2.016610810418773e-06, "loss": 0.3465, "step": 4430 }, { "epoch": 2.1997352308456066, "grad_norm": 0.34361034631729126, "learning_rate": 2.0142928719210035e-06, "loss": 0.2847, "step": 4431 }, { "epoch": 2.2002316730100944, "grad_norm": 0.36588239669799805, "learning_rate": 2.011975930269696e-06, "loss": 0.3133, "step": 4432 }, { "epoch": 2.200728115174582, "grad_norm": 0.3939180374145508, "learning_rate": 2.0096599862384147e-06, "loss": 0.3115, "step": 4433 }, { "epoch": 2.20122455733907, "grad_norm": 0.3739233911037445, "learning_rate": 2.0073450406003907e-06, "loss": 0.3437, "step": 4434 }, { "epoch": 2.201720999503558, "grad_norm": 0.3557473421096802, "learning_rate": 2.0050310941285226e-06, "loss": 0.3412, "step": 4435 }, { "epoch": 2.202217441668046, "grad_norm": 0.38472115993499756, "learning_rate": 2.002718147595375e-06, "loss": 0.3117, "step": 4436 }, { "epoch": 2.2027138838325335, "grad_norm": 0.3739203214645386, "learning_rate": 2.0004062017731724e-06, "loss": 0.3217, "step": 4437 }, { "epoch": 2.2032103259970213, "grad_norm": 0.3705288767814636, "learning_rate": 1.9980952574338185e-06, "loss": 0.3215, "step": 4438 }, { "epoch": 2.203706768161509, "grad_norm": 0.345936119556427, "learning_rate": 1.9957853153488694e-06, "loss": 0.3145, "step": 4439 }, { "epoch": 2.204203210325997, "grad_norm": 0.3763235807418823, "learning_rate": 1.9934763762895526e-06, "loss": 0.3142, "step": 4440 }, { "epoch": 2.204699652490485, "grad_norm": 0.34680598974227905, "learning_rate": 1.991168441026762e-06, "loss": 0.2689, "step": 4441 }, { "epoch": 2.2051960946549727, "grad_norm": 0.35914185643196106, "learning_rate": 1.9888615103310527e-06, "loss": 0.3555, "step": 4442 }, { "epoch": 2.2056925368194604, "grad_norm": 0.369113564491272, "learning_rate": 1.9865555849726488e-06, "loss": 0.3464, "step": 4443 }, { "epoch": 2.206188978983948, "grad_norm": 0.4121437072753906, "learning_rate": 1.9842506657214327e-06, "loss": 0.3734, "step": 4444 }, { "epoch": 2.2066854211484364, "grad_norm": 0.3922717571258545, "learning_rate": 1.9819467533469554e-06, "loss": 0.291, "step": 4445 }, { "epoch": 2.207181863312924, "grad_norm": 0.36978310346603394, "learning_rate": 1.979643848618431e-06, "loss": 0.347, "step": 4446 }, { "epoch": 2.207678305477412, "grad_norm": 0.40362751483917236, "learning_rate": 1.977341952304739e-06, "loss": 0.3781, "step": 4447 }, { "epoch": 2.2081747476418996, "grad_norm": 0.3851894736289978, "learning_rate": 1.9750410651744138e-06, "loss": 0.2757, "step": 4448 }, { "epoch": 2.2086711898063873, "grad_norm": 0.3742678761482239, "learning_rate": 1.9727411879956654e-06, "loss": 0.3312, "step": 4449 }, { "epoch": 2.2091676319708755, "grad_norm": 0.3758915066719055, "learning_rate": 1.9704423215363594e-06, "loss": 0.3275, "step": 4450 }, { "epoch": 2.2096640741353633, "grad_norm": 0.3799983859062195, "learning_rate": 1.968144466564022e-06, "loss": 0.3331, "step": 4451 }, { "epoch": 2.210160516299851, "grad_norm": 0.3617784380912781, "learning_rate": 1.9658476238458458e-06, "loss": 0.3107, "step": 4452 }, { "epoch": 2.2106569584643387, "grad_norm": 0.3653107285499573, "learning_rate": 1.9635517941486843e-06, "loss": 0.3273, "step": 4453 }, { "epoch": 2.211153400628827, "grad_norm": 0.37012144923210144, "learning_rate": 1.961256978239054e-06, "loss": 0.3071, "step": 4454 }, { "epoch": 2.2116498427933147, "grad_norm": 0.43612274527549744, "learning_rate": 1.9589631768831293e-06, "loss": 0.3786, "step": 4455 }, { "epoch": 2.2121462849578024, "grad_norm": 0.3284803628921509, "learning_rate": 1.956670390846748e-06, "loss": 0.2881, "step": 4456 }, { "epoch": 2.21264272712229, "grad_norm": 0.3737391233444214, "learning_rate": 1.9543786208954106e-06, "loss": 0.3338, "step": 4457 }, { "epoch": 2.213139169286778, "grad_norm": 0.39369767904281616, "learning_rate": 1.952087867794277e-06, "loss": 0.3132, "step": 4458 }, { "epoch": 2.213635611451266, "grad_norm": 0.37491488456726074, "learning_rate": 1.949798132308167e-06, "loss": 0.2886, "step": 4459 }, { "epoch": 2.214132053615754, "grad_norm": 0.36666765809059143, "learning_rate": 1.947509415201558e-06, "loss": 0.3272, "step": 4460 }, { "epoch": 2.2146284957802416, "grad_norm": 0.37733033299446106, "learning_rate": 1.945221717238597e-06, "loss": 0.3536, "step": 4461 }, { "epoch": 2.2151249379447293, "grad_norm": 0.35611051321029663, "learning_rate": 1.942935039183078e-06, "loss": 0.3362, "step": 4462 }, { "epoch": 2.2156213801092175, "grad_norm": 0.36913546919822693, "learning_rate": 1.9406493817984632e-06, "loss": 0.3105, "step": 4463 }, { "epoch": 2.2161178222737052, "grad_norm": 0.3739514946937561, "learning_rate": 1.9383647458478718e-06, "loss": 0.3031, "step": 4464 }, { "epoch": 2.216614264438193, "grad_norm": 0.38946470618247986, "learning_rate": 1.9360811320940805e-06, "loss": 0.3468, "step": 4465 }, { "epoch": 2.2171107066026807, "grad_norm": 0.4119459390640259, "learning_rate": 1.933798541299528e-06, "loss": 0.3905, "step": 4466 }, { "epoch": 2.2176071487671685, "grad_norm": 0.37756961584091187, "learning_rate": 1.9315169742263048e-06, "loss": 0.3003, "step": 4467 }, { "epoch": 2.2181035909316567, "grad_norm": 0.39656639099121094, "learning_rate": 1.9292364316361707e-06, "loss": 0.2719, "step": 4468 }, { "epoch": 2.2186000330961444, "grad_norm": 0.42108115553855896, "learning_rate": 1.9269569142905316e-06, "loss": 0.3589, "step": 4469 }, { "epoch": 2.219096475260632, "grad_norm": 0.3882592022418976, "learning_rate": 1.9246784229504593e-06, "loss": 0.3337, "step": 4470 }, { "epoch": 2.21959291742512, "grad_norm": 0.3791952431201935, "learning_rate": 1.9224009583766763e-06, "loss": 0.3251, "step": 4471 }, { "epoch": 2.2200893595896076, "grad_norm": 0.4087238013744354, "learning_rate": 1.92012452132957e-06, "loss": 0.3453, "step": 4472 }, { "epoch": 2.220585801754096, "grad_norm": 0.37038710713386536, "learning_rate": 1.917849112569181e-06, "loss": 0.2943, "step": 4473 }, { "epoch": 2.2210822439185836, "grad_norm": 0.40230879187583923, "learning_rate": 1.9155747328552027e-06, "loss": 0.3787, "step": 4474 }, { "epoch": 2.2215786860830713, "grad_norm": 0.373832643032074, "learning_rate": 1.913301382946994e-06, "loss": 0.3305, "step": 4475 }, { "epoch": 2.222075128247559, "grad_norm": 0.3712207078933716, "learning_rate": 1.91102906360356e-06, "loss": 0.3094, "step": 4476 }, { "epoch": 2.222571570412047, "grad_norm": 0.3865548074245453, "learning_rate": 1.9087577755835694e-06, "loss": 0.3482, "step": 4477 }, { "epoch": 2.223068012576535, "grad_norm": 0.3849119544029236, "learning_rate": 1.9064875196453392e-06, "loss": 0.3651, "step": 4478 }, { "epoch": 2.2235644547410227, "grad_norm": 0.42979681491851807, "learning_rate": 1.9042182965468525e-06, "loss": 0.3426, "step": 4479 }, { "epoch": 2.2240608969055105, "grad_norm": 0.39447444677352905, "learning_rate": 1.9019501070457363e-06, "loss": 0.3542, "step": 4480 }, { "epoch": 2.224557339069998, "grad_norm": 0.36101892590522766, "learning_rate": 1.8996829518992793e-06, "loss": 0.3242, "step": 4481 }, { "epoch": 2.2250537812344864, "grad_norm": 0.3933153450489044, "learning_rate": 1.8974168318644221e-06, "loss": 0.2879, "step": 4482 }, { "epoch": 2.225550223398974, "grad_norm": 0.3756812810897827, "learning_rate": 1.8951517476977615e-06, "loss": 0.3415, "step": 4483 }, { "epoch": 2.226046665563462, "grad_norm": 0.38400548696517944, "learning_rate": 1.892887700155549e-06, "loss": 0.3428, "step": 4484 }, { "epoch": 2.2265431077279496, "grad_norm": 0.39556455612182617, "learning_rate": 1.8906246899936853e-06, "loss": 0.2805, "step": 4485 }, { "epoch": 2.2270395498924374, "grad_norm": 0.3771318793296814, "learning_rate": 1.8883627179677287e-06, "loss": 0.3058, "step": 4486 }, { "epoch": 2.2275359920569255, "grad_norm": 0.4342540204524994, "learning_rate": 1.8861017848328917e-06, "loss": 0.3348, "step": 4487 }, { "epoch": 2.2280324342214133, "grad_norm": 0.36953070759773254, "learning_rate": 1.8838418913440376e-06, "loss": 0.3156, "step": 4488 }, { "epoch": 2.228528876385901, "grad_norm": 0.41329893469810486, "learning_rate": 1.8815830382556832e-06, "loss": 0.378, "step": 4489 }, { "epoch": 2.2290253185503888, "grad_norm": 0.39915451407432556, "learning_rate": 1.8793252263219985e-06, "loss": 0.3247, "step": 4490 }, { "epoch": 2.2295217607148765, "grad_norm": 0.37665072083473206, "learning_rate": 1.8770684562968079e-06, "loss": 0.3007, "step": 4491 }, { "epoch": 2.2300182028793647, "grad_norm": 0.39502575993537903, "learning_rate": 1.8748127289335805e-06, "loss": 0.313, "step": 4492 }, { "epoch": 2.2305146450438524, "grad_norm": 0.3596193194389343, "learning_rate": 1.8725580449854453e-06, "loss": 0.3139, "step": 4493 }, { "epoch": 2.23101108720834, "grad_norm": 0.36289265751838684, "learning_rate": 1.87030440520518e-06, "loss": 0.3433, "step": 4494 }, { "epoch": 2.231507529372828, "grad_norm": 0.3807174861431122, "learning_rate": 1.8680518103452134e-06, "loss": 0.2943, "step": 4495 }, { "epoch": 2.232003971537316, "grad_norm": 0.38839998841285706, "learning_rate": 1.865800261157627e-06, "loss": 0.3534, "step": 4496 }, { "epoch": 2.232500413701804, "grad_norm": 0.329519122838974, "learning_rate": 1.863549758394147e-06, "loss": 0.2624, "step": 4497 }, { "epoch": 2.2329968558662916, "grad_norm": 0.36832624673843384, "learning_rate": 1.8613003028061627e-06, "loss": 0.2727, "step": 4498 }, { "epoch": 2.2334932980307793, "grad_norm": 0.3937130272388458, "learning_rate": 1.8590518951447001e-06, "loss": 0.3795, "step": 4499 }, { "epoch": 2.233989740195267, "grad_norm": 0.37507885694503784, "learning_rate": 1.8568045361604453e-06, "loss": 0.2732, "step": 4500 }, { "epoch": 2.2344861823597553, "grad_norm": 0.3633444309234619, "learning_rate": 1.8545582266037254e-06, "loss": 0.3666, "step": 4501 }, { "epoch": 2.234982624524243, "grad_norm": 0.34879353642463684, "learning_rate": 1.8523129672245283e-06, "loss": 0.3412, "step": 4502 }, { "epoch": 2.2354790666887308, "grad_norm": 0.3816930651664734, "learning_rate": 1.8500687587724803e-06, "loss": 0.3415, "step": 4503 }, { "epoch": 2.2359755088532185, "grad_norm": 0.3784838020801544, "learning_rate": 1.8478256019968637e-06, "loss": 0.2987, "step": 4504 }, { "epoch": 2.2364719510177062, "grad_norm": 0.3758913278579712, "learning_rate": 1.8455834976466069e-06, "loss": 0.3187, "step": 4505 }, { "epoch": 2.2369683931821944, "grad_norm": 0.3597194254398346, "learning_rate": 1.8433424464702882e-06, "loss": 0.313, "step": 4506 }, { "epoch": 2.237464835346682, "grad_norm": 0.36894872784614563, "learning_rate": 1.841102449216135e-06, "loss": 0.3209, "step": 4507 }, { "epoch": 2.23796127751117, "grad_norm": 0.3546072840690613, "learning_rate": 1.8388635066320164e-06, "loss": 0.3386, "step": 4508 }, { "epoch": 2.2384577196756577, "grad_norm": 0.3630739450454712, "learning_rate": 1.8366256194654613e-06, "loss": 0.3244, "step": 4509 }, { "epoch": 2.2389541618401454, "grad_norm": 0.3808775842189789, "learning_rate": 1.8343887884636353e-06, "loss": 0.3282, "step": 4510 }, { "epoch": 2.2394506040046336, "grad_norm": 0.3566681742668152, "learning_rate": 1.8321530143733552e-06, "loss": 0.3146, "step": 4511 }, { "epoch": 2.2399470461691213, "grad_norm": 0.38915231823921204, "learning_rate": 1.8299182979410867e-06, "loss": 0.359, "step": 4512 }, { "epoch": 2.240443488333609, "grad_norm": 0.3679570257663727, "learning_rate": 1.8276846399129405e-06, "loss": 0.3048, "step": 4513 }, { "epoch": 2.240939930498097, "grad_norm": 0.3711584210395813, "learning_rate": 1.825452041034676e-06, "loss": 0.3041, "step": 4514 }, { "epoch": 2.241436372662585, "grad_norm": 0.3646060526371002, "learning_rate": 1.8232205020516925e-06, "loss": 0.3243, "step": 4515 }, { "epoch": 2.2419328148270727, "grad_norm": 0.4278898537158966, "learning_rate": 1.8209900237090461e-06, "loss": 0.3309, "step": 4516 }, { "epoch": 2.2424292569915605, "grad_norm": 0.3707757294178009, "learning_rate": 1.8187606067514284e-06, "loss": 0.2742, "step": 4517 }, { "epoch": 2.2429256991560482, "grad_norm": 0.40065881609916687, "learning_rate": 1.8165322519231832e-06, "loss": 0.3401, "step": 4518 }, { "epoch": 2.243422141320536, "grad_norm": 0.4020393192768097, "learning_rate": 1.8143049599682972e-06, "loss": 0.3576, "step": 4519 }, { "epoch": 2.243918583485024, "grad_norm": 0.3835991621017456, "learning_rate": 1.8120787316304028e-06, "loss": 0.3047, "step": 4520 }, { "epoch": 2.244415025649512, "grad_norm": 0.4017285704612732, "learning_rate": 1.8098535676527785e-06, "loss": 0.3122, "step": 4521 }, { "epoch": 2.2449114678139996, "grad_norm": 0.4190540015697479, "learning_rate": 1.8076294687783424e-06, "loss": 0.3531, "step": 4522 }, { "epoch": 2.2454079099784874, "grad_norm": 0.379131942987442, "learning_rate": 1.8054064357496636e-06, "loss": 0.2904, "step": 4523 }, { "epoch": 2.2459043521429756, "grad_norm": 0.3977206349372864, "learning_rate": 1.8031844693089513e-06, "loss": 0.3098, "step": 4524 }, { "epoch": 2.2464007943074633, "grad_norm": 0.3845995366573334, "learning_rate": 1.8009635701980615e-06, "loss": 0.2936, "step": 4525 }, { "epoch": 2.246897236471951, "grad_norm": 0.39432066679000854, "learning_rate": 1.7987437391584894e-06, "loss": 0.3787, "step": 4526 }, { "epoch": 2.247393678636439, "grad_norm": 0.3611743748188019, "learning_rate": 1.7965249769313776e-06, "loss": 0.3067, "step": 4527 }, { "epoch": 2.2478901208009265, "grad_norm": 0.35411736369132996, "learning_rate": 1.79430728425751e-06, "loss": 0.3288, "step": 4528 }, { "epoch": 2.2483865629654147, "grad_norm": 0.3683781623840332, "learning_rate": 1.7920906618773142e-06, "loss": 0.3334, "step": 4529 }, { "epoch": 2.2488830051299025, "grad_norm": 0.3656153082847595, "learning_rate": 1.7898751105308605e-06, "loss": 0.3311, "step": 4530 }, { "epoch": 2.24937944729439, "grad_norm": 0.37445440888404846, "learning_rate": 1.7876606309578608e-06, "loss": 0.3327, "step": 4531 }, { "epoch": 2.249875889458878, "grad_norm": 0.3813492953777313, "learning_rate": 1.7854472238976717e-06, "loss": 0.3103, "step": 4532 }, { "epoch": 2.2503723316233657, "grad_norm": 0.33977389335632324, "learning_rate": 1.7832348900892864e-06, "loss": 0.3045, "step": 4533 }, { "epoch": 2.250868773787854, "grad_norm": 0.3862466812133789, "learning_rate": 1.781023630271344e-06, "loss": 0.3493, "step": 4534 }, { "epoch": 2.2513652159523416, "grad_norm": 0.36358505487442017, "learning_rate": 1.7788134451821248e-06, "loss": 0.2896, "step": 4535 }, { "epoch": 2.2518616581168294, "grad_norm": 0.3775787949562073, "learning_rate": 1.7766043355595498e-06, "loss": 0.3243, "step": 4536 }, { "epoch": 2.252358100281317, "grad_norm": 0.3375479578971863, "learning_rate": 1.774396302141181e-06, "loss": 0.2922, "step": 4537 }, { "epoch": 2.252854542445805, "grad_norm": 0.40333878993988037, "learning_rate": 1.7721893456642165e-06, "loss": 0.3423, "step": 4538 }, { "epoch": 2.253350984610293, "grad_norm": 0.3716624975204468, "learning_rate": 1.7699834668655065e-06, "loss": 0.3101, "step": 4539 }, { "epoch": 2.253847426774781, "grad_norm": 0.40209704637527466, "learning_rate": 1.7677786664815278e-06, "loss": 0.3095, "step": 4540 }, { "epoch": 2.2543438689392685, "grad_norm": 0.4359849989414215, "learning_rate": 1.7655749452484067e-06, "loss": 0.3179, "step": 4541 }, { "epoch": 2.2548403111037563, "grad_norm": 0.3930285573005676, "learning_rate": 1.7633723039019018e-06, "loss": 0.313, "step": 4542 }, { "epoch": 2.255336753268244, "grad_norm": 0.37740358710289, "learning_rate": 1.7611707431774193e-06, "loss": 0.3403, "step": 4543 }, { "epoch": 2.255833195432732, "grad_norm": 0.35412538051605225, "learning_rate": 1.758970263810001e-06, "loss": 0.3235, "step": 4544 }, { "epoch": 2.25632963759722, "grad_norm": 0.3654618263244629, "learning_rate": 1.756770866534322e-06, "loss": 0.2972, "step": 4545 }, { "epoch": 2.2568260797617077, "grad_norm": 0.3643755316734314, "learning_rate": 1.7545725520847078e-06, "loss": 0.3486, "step": 4546 }, { "epoch": 2.2573225219261954, "grad_norm": 0.3693639636039734, "learning_rate": 1.7523753211951112e-06, "loss": 0.3533, "step": 4547 }, { "epoch": 2.2578189640906836, "grad_norm": 0.36478060483932495, "learning_rate": 1.7501791745991308e-06, "loss": 0.2792, "step": 4548 }, { "epoch": 2.2583154062551714, "grad_norm": 0.36018627882003784, "learning_rate": 1.7479841130299957e-06, "loss": 0.3351, "step": 4549 }, { "epoch": 2.258811848419659, "grad_norm": 0.38838934898376465, "learning_rate": 1.7457901372205832e-06, "loss": 0.3356, "step": 4550 }, { "epoch": 2.259308290584147, "grad_norm": 0.4122173488140106, "learning_rate": 1.7435972479033981e-06, "loss": 0.2819, "step": 4551 }, { "epoch": 2.259804732748635, "grad_norm": 0.3860279619693756, "learning_rate": 1.7414054458105878e-06, "loss": 0.3443, "step": 4552 }, { "epoch": 2.2603011749131228, "grad_norm": 0.4010348320007324, "learning_rate": 1.7392147316739356e-06, "loss": 0.3123, "step": 4553 }, { "epoch": 2.2607976170776105, "grad_norm": 0.38257449865341187, "learning_rate": 1.7370251062248606e-06, "loss": 0.2862, "step": 4554 }, { "epoch": 2.2612940592420983, "grad_norm": 0.42631059885025024, "learning_rate": 1.734836570194422e-06, "loss": 0.2855, "step": 4555 }, { "epoch": 2.261790501406586, "grad_norm": 0.40041401982307434, "learning_rate": 1.732649124313307e-06, "loss": 0.333, "step": 4556 }, { "epoch": 2.262286943571074, "grad_norm": 0.36304813623428345, "learning_rate": 1.7304627693118508e-06, "loss": 0.3368, "step": 4557 }, { "epoch": 2.262783385735562, "grad_norm": 0.3737685978412628, "learning_rate": 1.7282775059200136e-06, "loss": 0.3102, "step": 4558 }, { "epoch": 2.2632798279000497, "grad_norm": 0.36121636629104614, "learning_rate": 1.7260933348673963e-06, "loss": 0.2931, "step": 4559 }, { "epoch": 2.2637762700645374, "grad_norm": 0.3690708577632904, "learning_rate": 1.723910256883235e-06, "loss": 0.3342, "step": 4560 }, { "epoch": 2.264272712229025, "grad_norm": 0.3881196677684784, "learning_rate": 1.7217282726963996e-06, "loss": 0.3018, "step": 4561 }, { "epoch": 2.2647691543935133, "grad_norm": 0.37154266238212585, "learning_rate": 1.7195473830353971e-06, "loss": 0.2759, "step": 4562 }, { "epoch": 2.265265596558001, "grad_norm": 0.36944615840911865, "learning_rate": 1.7173675886283642e-06, "loss": 0.3051, "step": 4563 }, { "epoch": 2.265762038722489, "grad_norm": 0.3855958878993988, "learning_rate": 1.7151888902030762e-06, "loss": 0.3356, "step": 4564 }, { "epoch": 2.2662584808869766, "grad_norm": 0.34437721967697144, "learning_rate": 1.7130112884869415e-06, "loss": 0.3197, "step": 4565 }, { "epoch": 2.2667549230514643, "grad_norm": 0.39067572355270386, "learning_rate": 1.7108347842070023e-06, "loss": 0.3583, "step": 4566 }, { "epoch": 2.2672513652159525, "grad_norm": 0.3764127790927887, "learning_rate": 1.7086593780899353e-06, "loss": 0.339, "step": 4567 }, { "epoch": 2.2677478073804402, "grad_norm": 0.37287092208862305, "learning_rate": 1.7064850708620457e-06, "loss": 0.3124, "step": 4568 }, { "epoch": 2.268244249544928, "grad_norm": 0.37805843353271484, "learning_rate": 1.704311863249281e-06, "loss": 0.3193, "step": 4569 }, { "epoch": 2.2687406917094157, "grad_norm": 0.43678903579711914, "learning_rate": 1.7021397559772118e-06, "loss": 0.3664, "step": 4570 }, { "epoch": 2.2692371338739035, "grad_norm": 0.38231155276298523, "learning_rate": 1.6999687497710472e-06, "loss": 0.3181, "step": 4571 }, { "epoch": 2.2697335760383917, "grad_norm": 0.3633175790309906, "learning_rate": 1.697798845355627e-06, "loss": 0.3227, "step": 4572 }, { "epoch": 2.2702300182028794, "grad_norm": 0.3557702898979187, "learning_rate": 1.6956300434554256e-06, "loss": 0.3191, "step": 4573 }, { "epoch": 2.270726460367367, "grad_norm": 0.39662379026412964, "learning_rate": 1.6934623447945431e-06, "loss": 0.3357, "step": 4574 }, { "epoch": 2.271222902531855, "grad_norm": 0.3696313500404358, "learning_rate": 1.6912957500967164e-06, "loss": 0.2962, "step": 4575 }, { "epoch": 2.271719344696343, "grad_norm": 0.3824140429496765, "learning_rate": 1.6891302600853137e-06, "loss": 0.3024, "step": 4576 }, { "epoch": 2.272215786860831, "grad_norm": 0.40406402945518494, "learning_rate": 1.6869658754833323e-06, "loss": 0.3261, "step": 4577 }, { "epoch": 2.2727122290253186, "grad_norm": 0.40183964371681213, "learning_rate": 1.684802597013404e-06, "loss": 0.2954, "step": 4578 }, { "epoch": 2.2732086711898063, "grad_norm": 0.3798043429851532, "learning_rate": 1.682640425397783e-06, "loss": 0.2888, "step": 4579 }, { "epoch": 2.273705113354294, "grad_norm": 0.3748592138290405, "learning_rate": 1.6804793613583663e-06, "loss": 0.295, "step": 4580 }, { "epoch": 2.2742015555187822, "grad_norm": 0.3514014184474945, "learning_rate": 1.6783194056166697e-06, "loss": 0.2891, "step": 4581 }, { "epoch": 2.27469799768327, "grad_norm": 0.3997117877006531, "learning_rate": 1.676160558893845e-06, "loss": 0.333, "step": 4582 }, { "epoch": 2.2751944398477577, "grad_norm": 0.38313761353492737, "learning_rate": 1.674002821910673e-06, "loss": 0.382, "step": 4583 }, { "epoch": 2.2756908820122455, "grad_norm": 0.3560938835144043, "learning_rate": 1.671846195387563e-06, "loss": 0.3368, "step": 4584 }, { "epoch": 2.2761873241767336, "grad_norm": 0.4004148542881012, "learning_rate": 1.6696906800445562e-06, "loss": 0.3127, "step": 4585 }, { "epoch": 2.2766837663412214, "grad_norm": 0.37361767888069153, "learning_rate": 1.6675362766013148e-06, "loss": 0.3068, "step": 4586 }, { "epoch": 2.277180208505709, "grad_norm": 0.3807847499847412, "learning_rate": 1.6653829857771432e-06, "loss": 0.3205, "step": 4587 }, { "epoch": 2.277676650670197, "grad_norm": 0.3824775218963623, "learning_rate": 1.6632308082909604e-06, "loss": 0.3419, "step": 4588 }, { "epoch": 2.2781730928346846, "grad_norm": 0.40183040499687195, "learning_rate": 1.6610797448613225e-06, "loss": 0.3215, "step": 4589 }, { "epoch": 2.278669534999173, "grad_norm": 0.35569503903388977, "learning_rate": 1.6589297962064111e-06, "loss": 0.3126, "step": 4590 }, { "epoch": 2.2791659771636605, "grad_norm": 0.3896946609020233, "learning_rate": 1.6567809630440356e-06, "loss": 0.3026, "step": 4591 }, { "epoch": 2.2796624193281483, "grad_norm": 0.3283378779888153, "learning_rate": 1.6546332460916347e-06, "loss": 0.2977, "step": 4592 }, { "epoch": 2.280158861492636, "grad_norm": 0.395824134349823, "learning_rate": 1.6524866460662686e-06, "loss": 0.3475, "step": 4593 }, { "epoch": 2.2806553036571238, "grad_norm": 0.36908653378486633, "learning_rate": 1.6503411636846318e-06, "loss": 0.3375, "step": 4594 }, { "epoch": 2.281151745821612, "grad_norm": 0.3430072069168091, "learning_rate": 1.648196799663041e-06, "loss": 0.2872, "step": 4595 }, { "epoch": 2.2816481879860997, "grad_norm": 0.42828378081321716, "learning_rate": 1.646053554717444e-06, "loss": 0.3903, "step": 4596 }, { "epoch": 2.2821446301505874, "grad_norm": 0.3761465549468994, "learning_rate": 1.6439114295634068e-06, "loss": 0.3029, "step": 4597 }, { "epoch": 2.282641072315075, "grad_norm": 0.3719542324542999, "learning_rate": 1.6417704249161326e-06, "loss": 0.3052, "step": 4598 }, { "epoch": 2.283137514479563, "grad_norm": 0.39381998777389526, "learning_rate": 1.63963054149044e-06, "loss": 0.3756, "step": 4599 }, { "epoch": 2.283633956644051, "grad_norm": 0.3659912645816803, "learning_rate": 1.6374917800007806e-06, "loss": 0.302, "step": 4600 }, { "epoch": 2.284130398808539, "grad_norm": 0.38699862360954285, "learning_rate": 1.6353541411612272e-06, "loss": 0.3655, "step": 4601 }, { "epoch": 2.2846268409730266, "grad_norm": 0.38798072934150696, "learning_rate": 1.6332176256854809e-06, "loss": 0.3111, "step": 4602 }, { "epoch": 2.2851232831375143, "grad_norm": 0.3865199387073517, "learning_rate": 1.6310822342868664e-06, "loss": 0.3801, "step": 4603 }, { "epoch": 2.285619725302002, "grad_norm": 0.3897475004196167, "learning_rate": 1.6289479676783305e-06, "loss": 0.2534, "step": 4604 }, { "epoch": 2.2861161674664903, "grad_norm": 0.39021745324134827, "learning_rate": 1.6268148265724476e-06, "loss": 0.3548, "step": 4605 }, { "epoch": 2.286612609630978, "grad_norm": 0.3579948842525482, "learning_rate": 1.624682811681416e-06, "loss": 0.3221, "step": 4606 }, { "epoch": 2.2871090517954658, "grad_norm": 0.37429097294807434, "learning_rate": 1.6225519237170578e-06, "loss": 0.3091, "step": 4607 }, { "epoch": 2.2876054939599535, "grad_norm": 0.38635534048080444, "learning_rate": 1.6204221633908202e-06, "loss": 0.3402, "step": 4608 }, { "epoch": 2.2881019361244417, "grad_norm": 0.3849923014640808, "learning_rate": 1.6182935314137665e-06, "loss": 0.3302, "step": 4609 }, { "epoch": 2.2885983782889294, "grad_norm": 0.35279861092567444, "learning_rate": 1.6161660284965969e-06, "loss": 0.2634, "step": 4610 }, { "epoch": 2.289094820453417, "grad_norm": 0.41092368960380554, "learning_rate": 1.6140396553496208e-06, "loss": 0.33, "step": 4611 }, { "epoch": 2.289591262617905, "grad_norm": 0.401408314704895, "learning_rate": 1.6119144126827784e-06, "loss": 0.3255, "step": 4612 }, { "epoch": 2.290087704782393, "grad_norm": 0.36607658863067627, "learning_rate": 1.609790301205631e-06, "loss": 0.3061, "step": 4613 }, { "epoch": 2.290584146946881, "grad_norm": 0.3877456784248352, "learning_rate": 1.607667321627361e-06, "loss": 0.3521, "step": 4614 }, { "epoch": 2.2910805891113686, "grad_norm": 0.4000256061553955, "learning_rate": 1.605545474656775e-06, "loss": 0.2982, "step": 4615 }, { "epoch": 2.2915770312758563, "grad_norm": 0.4172811508178711, "learning_rate": 1.6034247610022962e-06, "loss": 0.315, "step": 4616 }, { "epoch": 2.292073473440344, "grad_norm": 0.36698800325393677, "learning_rate": 1.6013051813719788e-06, "loss": 0.2845, "step": 4617 }, { "epoch": 2.2925699156048323, "grad_norm": 0.4370010495185852, "learning_rate": 1.5991867364734887e-06, "loss": 0.3261, "step": 4618 }, { "epoch": 2.29306635776932, "grad_norm": 0.40807628631591797, "learning_rate": 1.5970694270141197e-06, "loss": 0.359, "step": 4619 }, { "epoch": 2.2935627999338077, "grad_norm": 0.33634838461875916, "learning_rate": 1.5949532537007795e-06, "loss": 0.2462, "step": 4620 }, { "epoch": 2.2940592420982955, "grad_norm": 0.39455553889274597, "learning_rate": 1.5928382172400064e-06, "loss": 0.2934, "step": 4621 }, { "epoch": 2.2945556842627832, "grad_norm": 0.3820115625858307, "learning_rate": 1.59072431833795e-06, "loss": 0.3095, "step": 4622 }, { "epoch": 2.2950521264272714, "grad_norm": 0.37093228101730347, "learning_rate": 1.5886115577003847e-06, "loss": 0.307, "step": 4623 }, { "epoch": 2.295548568591759, "grad_norm": 0.3746495544910431, "learning_rate": 1.5864999360327039e-06, "loss": 0.3189, "step": 4624 }, { "epoch": 2.296045010756247, "grad_norm": 0.38483625650405884, "learning_rate": 1.5843894540399201e-06, "loss": 0.3308, "step": 4625 }, { "epoch": 2.2965414529207346, "grad_norm": 0.37250590324401855, "learning_rate": 1.582280112426669e-06, "loss": 0.2987, "step": 4626 }, { "epoch": 2.2970378950852224, "grad_norm": 0.327551931142807, "learning_rate": 1.580171911897196e-06, "loss": 0.2479, "step": 4627 }, { "epoch": 2.2975343372497106, "grad_norm": 0.38597583770751953, "learning_rate": 1.5780648531553794e-06, "loss": 0.3782, "step": 4628 }, { "epoch": 2.2980307794141983, "grad_norm": 0.3786565661430359, "learning_rate": 1.5759589369047035e-06, "loss": 0.2993, "step": 4629 }, { "epoch": 2.298527221578686, "grad_norm": 0.3854597210884094, "learning_rate": 1.573854163848278e-06, "loss": 0.2931, "step": 4630 }, { "epoch": 2.299023663743174, "grad_norm": 0.36628249287605286, "learning_rate": 1.5717505346888301e-06, "loss": 0.2937, "step": 4631 }, { "epoch": 2.2995201059076615, "grad_norm": 0.39791688323020935, "learning_rate": 1.5696480501287037e-06, "loss": 0.3547, "step": 4632 }, { "epoch": 2.3000165480721497, "grad_norm": 0.37919357419013977, "learning_rate": 1.567546710869864e-06, "loss": 0.3026, "step": 4633 }, { "epoch": 2.3005129902366375, "grad_norm": 0.3924312889575958, "learning_rate": 1.565446517613886e-06, "loss": 0.3433, "step": 4634 }, { "epoch": 2.301009432401125, "grad_norm": 0.37157127261161804, "learning_rate": 1.56334747106197e-06, "loss": 0.286, "step": 4635 }, { "epoch": 2.301505874565613, "grad_norm": 0.3924254775047302, "learning_rate": 1.5612495719149306e-06, "loss": 0.3091, "step": 4636 }, { "epoch": 2.3020023167301007, "grad_norm": 0.4113422930240631, "learning_rate": 1.5591528208731993e-06, "loss": 0.3144, "step": 4637 }, { "epoch": 2.302498758894589, "grad_norm": 0.41773155331611633, "learning_rate": 1.5570572186368255e-06, "loss": 0.354, "step": 4638 }, { "epoch": 2.3029952010590766, "grad_norm": 0.33300524950027466, "learning_rate": 1.554962765905469e-06, "loss": 0.2753, "step": 4639 }, { "epoch": 2.3034916432235644, "grad_norm": 0.40244680643081665, "learning_rate": 1.5528694633784175e-06, "loss": 0.3427, "step": 4640 }, { "epoch": 2.303988085388052, "grad_norm": 0.3691997528076172, "learning_rate": 1.5507773117545628e-06, "loss": 0.2945, "step": 4641 }, { "epoch": 2.3044845275525403, "grad_norm": 0.39324426651000977, "learning_rate": 1.5486863117324185e-06, "loss": 0.3357, "step": 4642 }, { "epoch": 2.304980969717028, "grad_norm": 0.3891235589981079, "learning_rate": 1.5465964640101134e-06, "loss": 0.2943, "step": 4643 }, { "epoch": 2.305477411881516, "grad_norm": 0.38480496406555176, "learning_rate": 1.5445077692853926e-06, "loss": 0.3067, "step": 4644 }, { "epoch": 2.3059738540460035, "grad_norm": 0.40626150369644165, "learning_rate": 1.5424202282556106e-06, "loss": 0.3788, "step": 4645 }, { "epoch": 2.3064702962104917, "grad_norm": 0.37544694542884827, "learning_rate": 1.5403338416177428e-06, "loss": 0.3037, "step": 4646 }, { "epoch": 2.3069667383749795, "grad_norm": 0.38288116455078125, "learning_rate": 1.5382486100683768e-06, "loss": 0.3312, "step": 4647 }, { "epoch": 2.307463180539467, "grad_norm": 0.379447340965271, "learning_rate": 1.5361645343037146e-06, "loss": 0.2794, "step": 4648 }, { "epoch": 2.307959622703955, "grad_norm": 0.3683731257915497, "learning_rate": 1.5340816150195743e-06, "loss": 0.3156, "step": 4649 }, { "epoch": 2.3084560648684427, "grad_norm": 0.3879847228527069, "learning_rate": 1.5319998529113812e-06, "loss": 0.3499, "step": 4650 }, { "epoch": 2.308952507032931, "grad_norm": 0.3750465214252472, "learning_rate": 1.5299192486741848e-06, "loss": 0.2845, "step": 4651 }, { "epoch": 2.3094489491974186, "grad_norm": 0.36692455410957336, "learning_rate": 1.5278398030026386e-06, "loss": 0.3677, "step": 4652 }, { "epoch": 2.3099453913619064, "grad_norm": 0.39363357424736023, "learning_rate": 1.5257615165910139e-06, "loss": 0.379, "step": 4653 }, { "epoch": 2.310441833526394, "grad_norm": 0.3670899271965027, "learning_rate": 1.5236843901331943e-06, "loss": 0.3918, "step": 4654 }, { "epoch": 2.310938275690882, "grad_norm": 0.36957424879074097, "learning_rate": 1.521608424322676e-06, "loss": 0.3124, "step": 4655 }, { "epoch": 2.31143471785537, "grad_norm": 0.37222227454185486, "learning_rate": 1.519533619852569e-06, "loss": 0.3485, "step": 4656 }, { "epoch": 2.3119311600198578, "grad_norm": 0.3030472993850708, "learning_rate": 1.517459977415589e-06, "loss": 0.2449, "step": 4657 }, { "epoch": 2.3124276021843455, "grad_norm": 0.3844222128391266, "learning_rate": 1.5153874977040756e-06, "loss": 0.3725, "step": 4658 }, { "epoch": 2.3129240443488333, "grad_norm": 0.40479373931884766, "learning_rate": 1.5133161814099683e-06, "loss": 0.3314, "step": 4659 }, { "epoch": 2.313420486513321, "grad_norm": 0.3636350631713867, "learning_rate": 1.511246029224826e-06, "loss": 0.3485, "step": 4660 }, { "epoch": 2.313916928677809, "grad_norm": 0.35788172483444214, "learning_rate": 1.5091770418398149e-06, "loss": 0.3446, "step": 4661 }, { "epoch": 2.314413370842297, "grad_norm": 0.33917003870010376, "learning_rate": 1.5071092199457144e-06, "loss": 0.3397, "step": 4662 }, { "epoch": 2.3149098130067847, "grad_norm": 0.4352368712425232, "learning_rate": 1.5050425642329152e-06, "loss": 0.3775, "step": 4663 }, { "epoch": 2.3154062551712724, "grad_norm": 0.37823620438575745, "learning_rate": 1.5029770753914148e-06, "loss": 0.3269, "step": 4664 }, { "epoch": 2.31590269733576, "grad_norm": 0.3884040117263794, "learning_rate": 1.5009127541108247e-06, "loss": 0.3496, "step": 4665 }, { "epoch": 2.3163991395002483, "grad_norm": 0.3377026915550232, "learning_rate": 1.4988496010803667e-06, "loss": 0.2622, "step": 4666 }, { "epoch": 2.316895581664736, "grad_norm": 0.402101069688797, "learning_rate": 1.4967876169888724e-06, "loss": 0.3688, "step": 4667 }, { "epoch": 2.317392023829224, "grad_norm": 0.42966365814208984, "learning_rate": 1.4947268025247774e-06, "loss": 0.3037, "step": 4668 }, { "epoch": 2.3178884659937116, "grad_norm": 0.3539503812789917, "learning_rate": 1.4926671583761381e-06, "loss": 0.3395, "step": 4669 }, { "epoch": 2.3183849081581998, "grad_norm": 0.40029460191726685, "learning_rate": 1.490608685230609e-06, "loss": 0.4056, "step": 4670 }, { "epoch": 2.3188813503226875, "grad_norm": 0.3704099953174591, "learning_rate": 1.48855138377546e-06, "loss": 0.296, "step": 4671 }, { "epoch": 2.3193777924871752, "grad_norm": 0.3624061048030853, "learning_rate": 1.486495254697568e-06, "loss": 0.2705, "step": 4672 }, { "epoch": 2.319874234651663, "grad_norm": 0.37189167737960815, "learning_rate": 1.4844402986834188e-06, "loss": 0.3094, "step": 4673 }, { "epoch": 2.320370676816151, "grad_norm": 0.41039183735847473, "learning_rate": 1.4823865164191077e-06, "loss": 0.3843, "step": 4674 }, { "epoch": 2.320867118980639, "grad_norm": 0.3521421551704407, "learning_rate": 1.480333908590334e-06, "loss": 0.341, "step": 4675 }, { "epoch": 2.3213635611451267, "grad_norm": 0.35351455211639404, "learning_rate": 1.4782824758824088e-06, "loss": 0.319, "step": 4676 }, { "epoch": 2.3218600033096144, "grad_norm": 0.3876136541366577, "learning_rate": 1.4762322189802502e-06, "loss": 0.2811, "step": 4677 }, { "epoch": 2.322356445474102, "grad_norm": 0.4309132397174835, "learning_rate": 1.4741831385683824e-06, "loss": 0.383, "step": 4678 }, { "epoch": 2.3228528876385903, "grad_norm": 0.35601478815078735, "learning_rate": 1.4721352353309403e-06, "loss": 0.2912, "step": 4679 }, { "epoch": 2.323349329803078, "grad_norm": 0.3590575158596039, "learning_rate": 1.4700885099516577e-06, "loss": 0.3339, "step": 4680 }, { "epoch": 2.323845771967566, "grad_norm": 0.38592657446861267, "learning_rate": 1.468042963113887e-06, "loss": 0.3449, "step": 4681 }, { "epoch": 2.3243422141320536, "grad_norm": 0.3892170786857605, "learning_rate": 1.4659985955005767e-06, "loss": 0.3722, "step": 4682 }, { "epoch": 2.3248386562965413, "grad_norm": 0.3274874687194824, "learning_rate": 1.4639554077942859e-06, "loss": 0.3007, "step": 4683 }, { "epoch": 2.3253350984610295, "grad_norm": 0.34816136956214905, "learning_rate": 1.4619134006771802e-06, "loss": 0.2801, "step": 4684 }, { "epoch": 2.3258315406255172, "grad_norm": 0.4003821015357971, "learning_rate": 1.4598725748310304e-06, "loss": 0.3587, "step": 4685 }, { "epoch": 2.326327982790005, "grad_norm": 0.356230765581131, "learning_rate": 1.4578329309372136e-06, "loss": 0.3054, "step": 4686 }, { "epoch": 2.3268244249544927, "grad_norm": 0.40430891513824463, "learning_rate": 1.4557944696767078e-06, "loss": 0.338, "step": 4687 }, { "epoch": 2.3273208671189805, "grad_norm": 0.3862181305885315, "learning_rate": 1.4537571917301051e-06, "loss": 0.3235, "step": 4688 }, { "epoch": 2.3278173092834686, "grad_norm": 0.3928804397583008, "learning_rate": 1.4517210977775936e-06, "loss": 0.2944, "step": 4689 }, { "epoch": 2.3283137514479564, "grad_norm": 0.37476226687431335, "learning_rate": 1.4496861884989716e-06, "loss": 0.2979, "step": 4690 }, { "epoch": 2.328810193612444, "grad_norm": 0.39518383145332336, "learning_rate": 1.4476524645736362e-06, "loss": 0.3176, "step": 4691 }, { "epoch": 2.329306635776932, "grad_norm": 0.3587619364261627, "learning_rate": 1.4456199266805986e-06, "loss": 0.2917, "step": 4692 }, { "epoch": 2.3298030779414196, "grad_norm": 0.3932196795940399, "learning_rate": 1.443588575498463e-06, "loss": 0.3652, "step": 4693 }, { "epoch": 2.330299520105908, "grad_norm": 0.36912989616394043, "learning_rate": 1.4415584117054443e-06, "loss": 0.3438, "step": 4694 }, { "epoch": 2.3307959622703955, "grad_norm": 0.3568630814552307, "learning_rate": 1.4395294359793589e-06, "loss": 0.2956, "step": 4695 }, { "epoch": 2.3312924044348833, "grad_norm": 0.3729340732097626, "learning_rate": 1.4375016489976268e-06, "loss": 0.3209, "step": 4696 }, { "epoch": 2.331788846599371, "grad_norm": 0.37228158116340637, "learning_rate": 1.4354750514372717e-06, "loss": 0.3158, "step": 4697 }, { "epoch": 2.3322852887638588, "grad_norm": 0.35584092140197754, "learning_rate": 1.4334496439749157e-06, "loss": 0.3067, "step": 4698 }, { "epoch": 2.332781730928347, "grad_norm": 0.34605395793914795, "learning_rate": 1.4314254272867933e-06, "loss": 0.3027, "step": 4699 }, { "epoch": 2.3332781730928347, "grad_norm": 0.3907915949821472, "learning_rate": 1.4294024020487307e-06, "loss": 0.3511, "step": 4700 }, { "epoch": 2.3337746152573224, "grad_norm": 0.35558202862739563, "learning_rate": 1.4273805689361625e-06, "loss": 0.3211, "step": 4701 }, { "epoch": 2.33427105742181, "grad_norm": 0.35294678807258606, "learning_rate": 1.4253599286241242e-06, "loss": 0.2613, "step": 4702 }, { "epoch": 2.3347674995862984, "grad_norm": 0.3597235083580017, "learning_rate": 1.423340481787252e-06, "loss": 0.2935, "step": 4703 }, { "epoch": 2.335263941750786, "grad_norm": 0.40116769075393677, "learning_rate": 1.4213222290997863e-06, "loss": 0.316, "step": 4704 }, { "epoch": 2.335760383915274, "grad_norm": 0.38143932819366455, "learning_rate": 1.4193051712355638e-06, "loss": 0.2799, "step": 4705 }, { "epoch": 2.3362568260797616, "grad_norm": 0.37911996245384216, "learning_rate": 1.4172893088680268e-06, "loss": 0.3066, "step": 4706 }, { "epoch": 2.33675326824425, "grad_norm": 0.3579947054386139, "learning_rate": 1.4152746426702169e-06, "loss": 0.3271, "step": 4707 }, { "epoch": 2.3372497104087375, "grad_norm": 0.36654987931251526, "learning_rate": 1.4132611733147767e-06, "loss": 0.2987, "step": 4708 }, { "epoch": 2.3377461525732253, "grad_norm": 0.449788361787796, "learning_rate": 1.4112489014739477e-06, "loss": 0.3242, "step": 4709 }, { "epoch": 2.338242594737713, "grad_norm": 0.35902366042137146, "learning_rate": 1.4092378278195746e-06, "loss": 0.3187, "step": 4710 }, { "epoch": 2.3387390369022008, "grad_norm": 0.36776411533355713, "learning_rate": 1.4072279530231004e-06, "loss": 0.3574, "step": 4711 }, { "epoch": 2.339235479066689, "grad_norm": 0.3752245008945465, "learning_rate": 1.4052192777555645e-06, "loss": 0.2733, "step": 4712 }, { "epoch": 2.3397319212311767, "grad_norm": 0.36330246925354004, "learning_rate": 1.4032118026876118e-06, "loss": 0.3421, "step": 4713 }, { "epoch": 2.3402283633956644, "grad_norm": 0.3533554971218109, "learning_rate": 1.4012055284894827e-06, "loss": 0.3166, "step": 4714 }, { "epoch": 2.340724805560152, "grad_norm": 0.376986563205719, "learning_rate": 1.399200455831019e-06, "loss": 0.3688, "step": 4715 }, { "epoch": 2.34122124772464, "grad_norm": 0.34277936816215515, "learning_rate": 1.3971965853816577e-06, "loss": 0.2988, "step": 4716 }, { "epoch": 2.341717689889128, "grad_norm": 0.3787349760532379, "learning_rate": 1.3951939178104374e-06, "loss": 0.2989, "step": 4717 }, { "epoch": 2.342214132053616, "grad_norm": 0.4006495773792267, "learning_rate": 1.3931924537859948e-06, "loss": 0.3337, "step": 4718 }, { "epoch": 2.3427105742181036, "grad_norm": 0.3457585871219635, "learning_rate": 1.3911921939765643e-06, "loss": 0.2811, "step": 4719 }, { "epoch": 2.3432070163825913, "grad_norm": 0.36001020669937134, "learning_rate": 1.3891931390499802e-06, "loss": 0.3375, "step": 4720 }, { "epoch": 2.343703458547079, "grad_norm": 0.36946725845336914, "learning_rate": 1.3871952896736673e-06, "loss": 0.3071, "step": 4721 }, { "epoch": 2.3441999007115673, "grad_norm": 0.3893817663192749, "learning_rate": 1.38519864651466e-06, "loss": 0.3204, "step": 4722 }, { "epoch": 2.344696342876055, "grad_norm": 0.38774359226226807, "learning_rate": 1.3832032102395775e-06, "loss": 0.3002, "step": 4723 }, { "epoch": 2.3451927850405427, "grad_norm": 0.40890952944755554, "learning_rate": 1.3812089815146446e-06, "loss": 0.297, "step": 4724 }, { "epoch": 2.3456892272050305, "grad_norm": 0.40188655257225037, "learning_rate": 1.3792159610056794e-06, "loss": 0.2982, "step": 4725 }, { "epoch": 2.3461856693695182, "grad_norm": 0.3650643229484558, "learning_rate": 1.3772241493780975e-06, "loss": 0.3182, "step": 4726 }, { "epoch": 2.3466821115340064, "grad_norm": 0.34483543038368225, "learning_rate": 1.3752335472969113e-06, "loss": 0.3295, "step": 4727 }, { "epoch": 2.347178553698494, "grad_norm": 0.3932405710220337, "learning_rate": 1.3732441554267257e-06, "loss": 0.338, "step": 4728 }, { "epoch": 2.347674995862982, "grad_norm": 0.3508591651916504, "learning_rate": 1.37125597443175e-06, "loss": 0.3207, "step": 4729 }, { "epoch": 2.3481714380274696, "grad_norm": 0.38963592052459717, "learning_rate": 1.3692690049757783e-06, "loss": 0.3385, "step": 4730 }, { "epoch": 2.348667880191958, "grad_norm": 0.3903179168701172, "learning_rate": 1.3672832477222086e-06, "loss": 0.3381, "step": 4731 }, { "epoch": 2.3491643223564456, "grad_norm": 0.402605801820755, "learning_rate": 1.365298703334031e-06, "loss": 0.3173, "step": 4732 }, { "epoch": 2.3496607645209333, "grad_norm": 0.3969194293022156, "learning_rate": 1.3633153724738302e-06, "loss": 0.3412, "step": 4733 }, { "epoch": 2.350157206685421, "grad_norm": 0.32371124625205994, "learning_rate": 1.3613332558037883e-06, "loss": 0.2927, "step": 4734 }, { "epoch": 2.350653648849909, "grad_norm": 0.3445553481578827, "learning_rate": 1.3593523539856763e-06, "loss": 0.2891, "step": 4735 }, { "epoch": 2.351150091014397, "grad_norm": 0.3858391344547272, "learning_rate": 1.3573726676808686e-06, "loss": 0.3713, "step": 4736 }, { "epoch": 2.3516465331788847, "grad_norm": 0.37051576375961304, "learning_rate": 1.3553941975503243e-06, "loss": 0.3055, "step": 4737 }, { "epoch": 2.3521429753433725, "grad_norm": 0.3893047571182251, "learning_rate": 1.3534169442546046e-06, "loss": 0.3127, "step": 4738 }, { "epoch": 2.35263941750786, "grad_norm": 0.3567773103713989, "learning_rate": 1.3514409084538555e-06, "loss": 0.2921, "step": 4739 }, { "epoch": 2.3531358596723484, "grad_norm": 0.37547218799591064, "learning_rate": 1.3494660908078272e-06, "loss": 0.3577, "step": 4740 }, { "epoch": 2.353632301836836, "grad_norm": 0.403574675321579, "learning_rate": 1.3474924919758542e-06, "loss": 0.3416, "step": 4741 }, { "epoch": 2.354128744001324, "grad_norm": 0.35271507501602173, "learning_rate": 1.3455201126168682e-06, "loss": 0.2799, "step": 4742 }, { "epoch": 2.3546251861658116, "grad_norm": 0.4195518493652344, "learning_rate": 1.3435489533893937e-06, "loss": 0.3591, "step": 4743 }, { "epoch": 2.3551216283302994, "grad_norm": 0.38940751552581787, "learning_rate": 1.3415790149515461e-06, "loss": 0.3427, "step": 4744 }, { "epoch": 2.3556180704947876, "grad_norm": 0.3814955949783325, "learning_rate": 1.3396102979610377e-06, "loss": 0.3293, "step": 4745 }, { "epoch": 2.3561145126592753, "grad_norm": 0.36404258012771606, "learning_rate": 1.3376428030751643e-06, "loss": 0.3074, "step": 4746 }, { "epoch": 2.356610954823763, "grad_norm": 0.35026341676712036, "learning_rate": 1.3356765309508224e-06, "loss": 0.3041, "step": 4747 }, { "epoch": 2.357107396988251, "grad_norm": 0.40994319319725037, "learning_rate": 1.3337114822444958e-06, "loss": 0.3549, "step": 4748 }, { "epoch": 2.3576038391527385, "grad_norm": 0.38837194442749023, "learning_rate": 1.3317476576122607e-06, "loss": 0.3112, "step": 4749 }, { "epoch": 2.3581002813172267, "grad_norm": 0.3405431807041168, "learning_rate": 1.3297850577097853e-06, "loss": 0.33, "step": 4750 }, { "epoch": 2.3585967234817145, "grad_norm": 0.3666921854019165, "learning_rate": 1.3278236831923286e-06, "loss": 0.3045, "step": 4751 }, { "epoch": 2.359093165646202, "grad_norm": 0.35716336965560913, "learning_rate": 1.3258635347147407e-06, "loss": 0.2964, "step": 4752 }, { "epoch": 2.35958960781069, "grad_norm": 0.34546783566474915, "learning_rate": 1.3239046129314603e-06, "loss": 0.2888, "step": 4753 }, { "epoch": 2.3600860499751777, "grad_norm": 0.3822200894355774, "learning_rate": 1.3219469184965184e-06, "loss": 0.3375, "step": 4754 }, { "epoch": 2.360582492139666, "grad_norm": 0.38017261028289795, "learning_rate": 1.3199904520635365e-06, "loss": 0.3631, "step": 4755 }, { "epoch": 2.3610789343041536, "grad_norm": 0.35253551602363586, "learning_rate": 1.3180352142857256e-06, "loss": 0.3896, "step": 4756 }, { "epoch": 2.3615753764686414, "grad_norm": 0.32132795453071594, "learning_rate": 1.3160812058158883e-06, "loss": 0.2889, "step": 4757 }, { "epoch": 2.362071818633129, "grad_norm": 0.3557528257369995, "learning_rate": 1.3141284273064099e-06, "loss": 0.32, "step": 4758 }, { "epoch": 2.362568260797617, "grad_norm": 0.3575631380081177, "learning_rate": 1.3121768794092753e-06, "loss": 0.3107, "step": 4759 }, { "epoch": 2.363064702962105, "grad_norm": 0.3483123779296875, "learning_rate": 1.3102265627760507e-06, "loss": 0.3274, "step": 4760 }, { "epoch": 2.3635611451265928, "grad_norm": 0.35388025641441345, "learning_rate": 1.3082774780578954e-06, "loss": 0.3554, "step": 4761 }, { "epoch": 2.3640575872910805, "grad_norm": 0.3720189332962036, "learning_rate": 1.306329625905552e-06, "loss": 0.3244, "step": 4762 }, { "epoch": 2.3645540294555683, "grad_norm": 0.35278797149658203, "learning_rate": 1.3043830069693607e-06, "loss": 0.3732, "step": 4763 }, { "epoch": 2.3650504716200564, "grad_norm": 0.3338548243045807, "learning_rate": 1.3024376218992407e-06, "loss": 0.3204, "step": 4764 }, { "epoch": 2.365546913784544, "grad_norm": 0.3876834511756897, "learning_rate": 1.3004934713447047e-06, "loss": 0.3065, "step": 4765 }, { "epoch": 2.366043355949032, "grad_norm": 0.3748672902584076, "learning_rate": 1.2985505559548516e-06, "loss": 0.2946, "step": 4766 }, { "epoch": 2.3665397981135197, "grad_norm": 0.37627112865448, "learning_rate": 1.296608876378368e-06, "loss": 0.3461, "step": 4767 }, { "epoch": 2.367036240278008, "grad_norm": 0.34662142395973206, "learning_rate": 1.2946684332635295e-06, "loss": 0.2793, "step": 4768 }, { "epoch": 2.3675326824424956, "grad_norm": 0.3642960786819458, "learning_rate": 1.2927292272581925e-06, "loss": 0.2995, "step": 4769 }, { "epoch": 2.3680291246069833, "grad_norm": 0.37463822960853577, "learning_rate": 1.290791259009812e-06, "loss": 0.3279, "step": 4770 }, { "epoch": 2.368525566771471, "grad_norm": 0.3694632947444916, "learning_rate": 1.2888545291654175e-06, "loss": 0.2933, "step": 4771 }, { "epoch": 2.369022008935959, "grad_norm": 0.3848544657230377, "learning_rate": 1.2869190383716323e-06, "loss": 0.3133, "step": 4772 }, { "epoch": 2.369518451100447, "grad_norm": 0.4312441647052765, "learning_rate": 1.2849847872746646e-06, "loss": 0.3496, "step": 4773 }, { "epoch": 2.3700148932649348, "grad_norm": 0.3334580063819885, "learning_rate": 1.2830517765203082e-06, "loss": 0.3079, "step": 4774 }, { "epoch": 2.3705113354294225, "grad_norm": 0.36529484391212463, "learning_rate": 1.281120006753943e-06, "loss": 0.3286, "step": 4775 }, { "epoch": 2.3710077775939102, "grad_norm": 0.37822067737579346, "learning_rate": 1.2791894786205322e-06, "loss": 0.343, "step": 4776 }, { "epoch": 2.371504219758398, "grad_norm": 0.3508495092391968, "learning_rate": 1.2772601927646305e-06, "loss": 0.3134, "step": 4777 }, { "epoch": 2.372000661922886, "grad_norm": 0.38182997703552246, "learning_rate": 1.2753321498303711e-06, "loss": 0.3183, "step": 4778 }, { "epoch": 2.372497104087374, "grad_norm": 0.3886706233024597, "learning_rate": 1.2734053504614757e-06, "loss": 0.3402, "step": 4779 }, { "epoch": 2.3729935462518617, "grad_norm": 0.43070119619369507, "learning_rate": 1.271479795301251e-06, "loss": 0.3171, "step": 4780 }, { "epoch": 2.3734899884163494, "grad_norm": 0.3908765912055969, "learning_rate": 1.2695554849925862e-06, "loss": 0.2878, "step": 4781 }, { "epoch": 2.373986430580837, "grad_norm": 0.40345242619514465, "learning_rate": 1.2676324201779593e-06, "loss": 0.3622, "step": 4782 }, { "epoch": 2.3744828727453253, "grad_norm": 0.36055290699005127, "learning_rate": 1.265710601499426e-06, "loss": 0.3073, "step": 4783 }, { "epoch": 2.374979314909813, "grad_norm": 0.38239410519599915, "learning_rate": 1.2637900295986293e-06, "loss": 0.3601, "step": 4784 }, { "epoch": 2.375475757074301, "grad_norm": 0.3686445653438568, "learning_rate": 1.2618707051167983e-06, "loss": 0.2948, "step": 4785 }, { "epoch": 2.3759721992387886, "grad_norm": 0.3782065808773041, "learning_rate": 1.2599526286947427e-06, "loss": 0.3229, "step": 4786 }, { "epoch": 2.3764686414032763, "grad_norm": 0.36274364590644836, "learning_rate": 1.258035800972855e-06, "loss": 0.2735, "step": 4787 }, { "epoch": 2.3769650835677645, "grad_norm": 0.38038212060928345, "learning_rate": 1.2561202225911117e-06, "loss": 0.2942, "step": 4788 }, { "epoch": 2.3774615257322522, "grad_norm": 0.3601911664009094, "learning_rate": 1.2542058941890734e-06, "loss": 0.3012, "step": 4789 }, { "epoch": 2.37795796789674, "grad_norm": 0.34610098600387573, "learning_rate": 1.2522928164058817e-06, "loss": 0.3322, "step": 4790 }, { "epoch": 2.3784544100612277, "grad_norm": 0.3693186938762665, "learning_rate": 1.2503809898802615e-06, "loss": 0.3433, "step": 4791 }, { "epoch": 2.378950852225716, "grad_norm": 0.3232116997241974, "learning_rate": 1.2484704152505205e-06, "loss": 0.3208, "step": 4792 }, { "epoch": 2.3794472943902036, "grad_norm": 0.3636696934700012, "learning_rate": 1.246561093154548e-06, "loss": 0.2918, "step": 4793 }, { "epoch": 2.3799437365546914, "grad_norm": 0.3768170475959778, "learning_rate": 1.2446530242298117e-06, "loss": 0.2973, "step": 4794 }, { "epoch": 2.380440178719179, "grad_norm": 0.3694896697998047, "learning_rate": 1.2427462091133662e-06, "loss": 0.3128, "step": 4795 }, { "epoch": 2.380936620883667, "grad_norm": 0.38295978307724, "learning_rate": 1.2408406484418455e-06, "loss": 0.3139, "step": 4796 }, { "epoch": 2.381433063048155, "grad_norm": 0.40064623951911926, "learning_rate": 1.2389363428514634e-06, "loss": 0.3569, "step": 4797 }, { "epoch": 2.381929505212643, "grad_norm": 0.3479635417461395, "learning_rate": 1.2370332929780182e-06, "loss": 0.3001, "step": 4798 }, { "epoch": 2.3824259473771305, "grad_norm": 0.35368430614471436, "learning_rate": 1.235131499456882e-06, "loss": 0.3491, "step": 4799 }, { "epoch": 2.3829223895416183, "grad_norm": 0.35645923018455505, "learning_rate": 1.233230962923017e-06, "loss": 0.2832, "step": 4800 }, { "epoch": 2.3834188317061065, "grad_norm": 0.3895478844642639, "learning_rate": 1.2313316840109573e-06, "loss": 0.3512, "step": 4801 }, { "epoch": 2.383915273870594, "grad_norm": 0.3863394558429718, "learning_rate": 1.2294336633548215e-06, "loss": 0.3159, "step": 4802 }, { "epoch": 2.384411716035082, "grad_norm": 0.33949097990989685, "learning_rate": 1.227536901588307e-06, "loss": 0.266, "step": 4803 }, { "epoch": 2.3849081581995697, "grad_norm": 0.3906005918979645, "learning_rate": 1.2256413993446915e-06, "loss": 0.3614, "step": 4804 }, { "epoch": 2.3854046003640574, "grad_norm": 0.36051446199417114, "learning_rate": 1.2237471572568328e-06, "loss": 0.3207, "step": 4805 }, { "epoch": 2.3859010425285456, "grad_norm": 0.3728381395339966, "learning_rate": 1.2218541759571623e-06, "loss": 0.38, "step": 4806 }, { "epoch": 2.3863974846930334, "grad_norm": 0.3529369831085205, "learning_rate": 1.2199624560777006e-06, "loss": 0.2698, "step": 4807 }, { "epoch": 2.386893926857521, "grad_norm": 0.37177029252052307, "learning_rate": 1.2180719982500383e-06, "loss": 0.3076, "step": 4808 }, { "epoch": 2.387390369022009, "grad_norm": 0.4176143407821655, "learning_rate": 1.2161828031053502e-06, "loss": 0.3543, "step": 4809 }, { "epoch": 2.3878868111864966, "grad_norm": 0.3902484178543091, "learning_rate": 1.2142948712743824e-06, "loss": 0.3078, "step": 4810 }, { "epoch": 2.388383253350985, "grad_norm": 0.38761869072914124, "learning_rate": 1.2124082033874706e-06, "loss": 0.3321, "step": 4811 }, { "epoch": 2.3888796955154725, "grad_norm": 0.36848369240760803, "learning_rate": 1.2105228000745173e-06, "loss": 0.2787, "step": 4812 }, { "epoch": 2.3893761376799603, "grad_norm": 0.37329697608947754, "learning_rate": 1.208638661965008e-06, "loss": 0.3706, "step": 4813 }, { "epoch": 2.389872579844448, "grad_norm": 0.32481861114501953, "learning_rate": 1.2067557896880066e-06, "loss": 0.318, "step": 4814 }, { "epoch": 2.3903690220089358, "grad_norm": 0.40956631302833557, "learning_rate": 1.2048741838721523e-06, "loss": 0.361, "step": 4815 }, { "epoch": 2.390865464173424, "grad_norm": 0.37010565400123596, "learning_rate": 1.2029938451456636e-06, "loss": 0.2644, "step": 4816 }, { "epoch": 2.3913619063379117, "grad_norm": 0.3940662145614624, "learning_rate": 1.20111477413633e-06, "loss": 0.3495, "step": 4817 }, { "epoch": 2.3918583485023994, "grad_norm": 0.3497008681297302, "learning_rate": 1.1992369714715285e-06, "loss": 0.3359, "step": 4818 }, { "epoch": 2.392354790666887, "grad_norm": 0.33426064252853394, "learning_rate": 1.1973604377782017e-06, "loss": 0.2879, "step": 4819 }, { "epoch": 2.392851232831375, "grad_norm": 0.38664159178733826, "learning_rate": 1.195485173682875e-06, "loss": 0.342, "step": 4820 }, { "epoch": 2.393347674995863, "grad_norm": 0.37632134556770325, "learning_rate": 1.1936111798116474e-06, "loss": 0.3364, "step": 4821 }, { "epoch": 2.393844117160351, "grad_norm": 0.3817102611064911, "learning_rate": 1.1917384567901946e-06, "loss": 0.3424, "step": 4822 }, { "epoch": 2.3943405593248386, "grad_norm": 0.3272261619567871, "learning_rate": 1.1898670052437705e-06, "loss": 0.2529, "step": 4823 }, { "epoch": 2.3948370014893263, "grad_norm": 0.36889126896858215, "learning_rate": 1.1879968257971979e-06, "loss": 0.3342, "step": 4824 }, { "epoch": 2.3953334436538145, "grad_norm": 0.3702617287635803, "learning_rate": 1.1861279190748804e-06, "loss": 0.3318, "step": 4825 }, { "epoch": 2.3958298858183023, "grad_norm": 0.3620351254940033, "learning_rate": 1.1842602857007957e-06, "loss": 0.2701, "step": 4826 }, { "epoch": 2.39632632798279, "grad_norm": 0.3807470202445984, "learning_rate": 1.1823939262984958e-06, "loss": 0.3146, "step": 4827 }, { "epoch": 2.3968227701472777, "grad_norm": 0.4059567153453827, "learning_rate": 1.180528841491108e-06, "loss": 0.367, "step": 4828 }, { "epoch": 2.397319212311766, "grad_norm": 0.3558509051799774, "learning_rate": 1.1786650319013298e-06, "loss": 0.2725, "step": 4829 }, { "epoch": 2.3978156544762537, "grad_norm": 0.36634621024131775, "learning_rate": 1.1768024981514426e-06, "loss": 0.3272, "step": 4830 }, { "epoch": 2.3983120966407414, "grad_norm": 0.41922762989997864, "learning_rate": 1.174941240863291e-06, "loss": 0.3223, "step": 4831 }, { "epoch": 2.398808538805229, "grad_norm": 0.3503614366054535, "learning_rate": 1.1730812606582996e-06, "loss": 0.3078, "step": 4832 }, { "epoch": 2.399304980969717, "grad_norm": 0.3798806667327881, "learning_rate": 1.1712225581574655e-06, "loss": 0.2592, "step": 4833 }, { "epoch": 2.399801423134205, "grad_norm": 0.4015193581581116, "learning_rate": 1.16936513398136e-06, "loss": 0.3391, "step": 4834 }, { "epoch": 2.400297865298693, "grad_norm": 0.36048969626426697, "learning_rate": 1.167508988750124e-06, "loss": 0.3, "step": 4835 }, { "epoch": 2.4007943074631806, "grad_norm": 0.39935892820358276, "learning_rate": 1.1656541230834756e-06, "loss": 0.2965, "step": 4836 }, { "epoch": 2.4012907496276683, "grad_norm": 0.41642579436302185, "learning_rate": 1.1638005376007034e-06, "loss": 0.3708, "step": 4837 }, { "epoch": 2.401787191792156, "grad_norm": 0.35434773564338684, "learning_rate": 1.1619482329206694e-06, "loss": 0.3186, "step": 4838 }, { "epoch": 2.4022836339566442, "grad_norm": 0.3652162551879883, "learning_rate": 1.1600972096618102e-06, "loss": 0.2856, "step": 4839 }, { "epoch": 2.402780076121132, "grad_norm": 0.3620016276836395, "learning_rate": 1.1582474684421262e-06, "loss": 0.3484, "step": 4840 }, { "epoch": 2.4032765182856197, "grad_norm": 0.40832263231277466, "learning_rate": 1.1563990098792028e-06, "loss": 0.3852, "step": 4841 }, { "epoch": 2.4037729604501075, "grad_norm": 0.3637259900569916, "learning_rate": 1.1545518345901851e-06, "loss": 0.315, "step": 4842 }, { "epoch": 2.404269402614595, "grad_norm": 0.33356529474258423, "learning_rate": 1.1527059431917965e-06, "loss": 0.2926, "step": 4843 }, { "epoch": 2.4047658447790834, "grad_norm": 0.3932321071624756, "learning_rate": 1.1508613363003295e-06, "loss": 0.32, "step": 4844 }, { "epoch": 2.405262286943571, "grad_norm": 0.33266064524650574, "learning_rate": 1.1490180145316487e-06, "loss": 0.3222, "step": 4845 }, { "epoch": 2.405758729108059, "grad_norm": 0.3744467496871948, "learning_rate": 1.1471759785011903e-06, "loss": 0.3155, "step": 4846 }, { "epoch": 2.4062551712725466, "grad_norm": 0.36419859528541565, "learning_rate": 1.1453352288239561e-06, "loss": 0.2723, "step": 4847 }, { "epoch": 2.4067516134370344, "grad_norm": 0.40455347299575806, "learning_rate": 1.143495766114528e-06, "loss": 0.3339, "step": 4848 }, { "epoch": 2.4072480556015226, "grad_norm": 0.40158236026763916, "learning_rate": 1.141657590987048e-06, "loss": 0.2881, "step": 4849 }, { "epoch": 2.4077444977660103, "grad_norm": 0.3645077645778656, "learning_rate": 1.1398207040552344e-06, "loss": 0.3147, "step": 4850 }, { "epoch": 2.408240939930498, "grad_norm": 0.36283624172210693, "learning_rate": 1.1379851059323739e-06, "loss": 0.3052, "step": 4851 }, { "epoch": 2.408737382094986, "grad_norm": 0.3555285930633545, "learning_rate": 1.1361507972313223e-06, "loss": 0.3013, "step": 4852 }, { "epoch": 2.4092338242594735, "grad_norm": 0.35972246527671814, "learning_rate": 1.1343177785645083e-06, "loss": 0.3379, "step": 4853 }, { "epoch": 2.4097302664239617, "grad_norm": 0.3518638610839844, "learning_rate": 1.1324860505439222e-06, "loss": 0.2938, "step": 4854 }, { "epoch": 2.4102267085884495, "grad_norm": 0.3975954055786133, "learning_rate": 1.1306556137811309e-06, "loss": 0.3222, "step": 4855 }, { "epoch": 2.410723150752937, "grad_norm": 0.4054337739944458, "learning_rate": 1.1288264688872674e-06, "loss": 0.3464, "step": 4856 }, { "epoch": 2.411219592917425, "grad_norm": 0.34953296184539795, "learning_rate": 1.1269986164730351e-06, "loss": 0.3155, "step": 4857 }, { "epoch": 2.411716035081913, "grad_norm": 0.3673235774040222, "learning_rate": 1.1251720571487002e-06, "loss": 0.309, "step": 4858 }, { "epoch": 2.412212477246401, "grad_norm": 0.3571861982345581, "learning_rate": 1.1233467915241037e-06, "loss": 0.2903, "step": 4859 }, { "epoch": 2.4127089194108886, "grad_norm": 0.3884055018424988, "learning_rate": 1.121522820208652e-06, "loss": 0.3293, "step": 4860 }, { "epoch": 2.4132053615753764, "grad_norm": 0.36768898367881775, "learning_rate": 1.1197001438113198e-06, "loss": 0.3018, "step": 4861 }, { "epoch": 2.4137018037398645, "grad_norm": 0.4006853699684143, "learning_rate": 1.1178787629406485e-06, "loss": 0.3554, "step": 4862 }, { "epoch": 2.4141982459043523, "grad_norm": 0.36171412467956543, "learning_rate": 1.1160586782047478e-06, "loss": 0.3232, "step": 4863 }, { "epoch": 2.41469468806884, "grad_norm": 0.3915865123271942, "learning_rate": 1.1142398902112967e-06, "loss": 0.3174, "step": 4864 }, { "epoch": 2.4151911302333278, "grad_norm": 0.34501609206199646, "learning_rate": 1.1124223995675353e-06, "loss": 0.2795, "step": 4865 }, { "epoch": 2.4156875723978155, "grad_norm": 0.3673859238624573, "learning_rate": 1.1106062068802765e-06, "loss": 0.3373, "step": 4866 }, { "epoch": 2.4161840145623037, "grad_norm": 0.395815372467041, "learning_rate": 1.1087913127558974e-06, "loss": 0.3438, "step": 4867 }, { "epoch": 2.4166804567267914, "grad_norm": 0.3550095856189728, "learning_rate": 1.1069777178003416e-06, "loss": 0.3023, "step": 4868 }, { "epoch": 2.417176898891279, "grad_norm": 0.3655254542827606, "learning_rate": 1.1051654226191205e-06, "loss": 0.3205, "step": 4869 }, { "epoch": 2.417673341055767, "grad_norm": 0.31735748052597046, "learning_rate": 1.103354427817307e-06, "loss": 0.286, "step": 4870 }, { "epoch": 2.4181697832202547, "grad_norm": 0.42261725664138794, "learning_rate": 1.1015447339995473e-06, "loss": 0.316, "step": 4871 }, { "epoch": 2.418666225384743, "grad_norm": 0.39652326703071594, "learning_rate": 1.099736341770045e-06, "loss": 0.2942, "step": 4872 }, { "epoch": 2.4191626675492306, "grad_norm": 0.38889238238334656, "learning_rate": 1.0979292517325757e-06, "loss": 0.3607, "step": 4873 }, { "epoch": 2.4196591097137183, "grad_norm": 0.35646045207977295, "learning_rate": 1.0961234644904767e-06, "loss": 0.2877, "step": 4874 }, { "epoch": 2.420155551878206, "grad_norm": 0.42795076966285706, "learning_rate": 1.0943189806466515e-06, "loss": 0.3585, "step": 4875 }, { "epoch": 2.420651994042694, "grad_norm": 0.38696572184562683, "learning_rate": 1.0925158008035692e-06, "loss": 0.3653, "step": 4876 }, { "epoch": 2.421148436207182, "grad_norm": 0.3482215106487274, "learning_rate": 1.0907139255632587e-06, "loss": 0.2954, "step": 4877 }, { "epoch": 2.4216448783716698, "grad_norm": 0.3918222188949585, "learning_rate": 1.0889133555273228e-06, "loss": 0.3056, "step": 4878 }, { "epoch": 2.4221413205361575, "grad_norm": 0.39330923557281494, "learning_rate": 1.0871140912969186e-06, "loss": 0.3221, "step": 4879 }, { "epoch": 2.4226377627006452, "grad_norm": 0.3798932433128357, "learning_rate": 1.0853161334727746e-06, "loss": 0.3332, "step": 4880 }, { "epoch": 2.423134204865133, "grad_norm": 0.371855765581131, "learning_rate": 1.0835194826551754e-06, "loss": 0.3273, "step": 4881 }, { "epoch": 2.423630647029621, "grad_norm": 0.3582161068916321, "learning_rate": 1.08172413944398e-06, "loss": 0.2886, "step": 4882 }, { "epoch": 2.424127089194109, "grad_norm": 0.44224780797958374, "learning_rate": 1.0799301044385996e-06, "loss": 0.3525, "step": 4883 }, { "epoch": 2.4246235313585967, "grad_norm": 0.3955089747905731, "learning_rate": 1.0781373782380162e-06, "loss": 0.3284, "step": 4884 }, { "epoch": 2.4251199735230844, "grad_norm": 0.35203635692596436, "learning_rate": 1.0763459614407717e-06, "loss": 0.3005, "step": 4885 }, { "epoch": 2.4256164156875726, "grad_norm": 0.37943533062934875, "learning_rate": 1.074555854644972e-06, "loss": 0.3532, "step": 4886 }, { "epoch": 2.4261128578520603, "grad_norm": 0.373176634311676, "learning_rate": 1.0727670584482857e-06, "loss": 0.3006, "step": 4887 }, { "epoch": 2.426609300016548, "grad_norm": 0.3710530400276184, "learning_rate": 1.0709795734479395e-06, "loss": 0.3508, "step": 4888 }, { "epoch": 2.427105742181036, "grad_norm": 0.39023658633232117, "learning_rate": 1.0691934002407323e-06, "loss": 0.3357, "step": 4889 }, { "epoch": 2.427602184345524, "grad_norm": 0.41992250084877014, "learning_rate": 1.0674085394230132e-06, "loss": 0.3541, "step": 4890 }, { "epoch": 2.4280986265100117, "grad_norm": 0.39056849479675293, "learning_rate": 1.0656249915907012e-06, "loss": 0.2779, "step": 4891 }, { "epoch": 2.4285950686744995, "grad_norm": 0.36982792615890503, "learning_rate": 1.0638427573392745e-06, "loss": 0.2848, "step": 4892 }, { "epoch": 2.4290915108389872, "grad_norm": 0.35525795817375183, "learning_rate": 1.062061837263772e-06, "loss": 0.3086, "step": 4893 }, { "epoch": 2.429587953003475, "grad_norm": 0.40155133605003357, "learning_rate": 1.0602822319587958e-06, "loss": 0.3299, "step": 4894 }, { "epoch": 2.430084395167963, "grad_norm": 0.36616480350494385, "learning_rate": 1.0585039420185056e-06, "loss": 0.3189, "step": 4895 }, { "epoch": 2.430580837332451, "grad_norm": 0.35829028487205505, "learning_rate": 1.0567269680366255e-06, "loss": 0.3193, "step": 4896 }, { "epoch": 2.4310772794969386, "grad_norm": 0.36900657415390015, "learning_rate": 1.0549513106064386e-06, "loss": 0.3429, "step": 4897 }, { "epoch": 2.4315737216614264, "grad_norm": 0.38831430673599243, "learning_rate": 1.0531769703207883e-06, "loss": 0.2989, "step": 4898 }, { "epoch": 2.432070163825914, "grad_norm": 0.37943339347839355, "learning_rate": 1.0514039477720805e-06, "loss": 0.3077, "step": 4899 }, { "epoch": 2.4325666059904023, "grad_norm": 0.350911945104599, "learning_rate": 1.0496322435522748e-06, "loss": 0.2985, "step": 4900 }, { "epoch": 2.43306304815489, "grad_norm": 0.3390367925167084, "learning_rate": 1.0478618582529004e-06, "loss": 0.3053, "step": 4901 }, { "epoch": 2.433559490319378, "grad_norm": 0.35207903385162354, "learning_rate": 1.0460927924650371e-06, "loss": 0.3569, "step": 4902 }, { "epoch": 2.4340559324838655, "grad_norm": 0.32454177737236023, "learning_rate": 1.0443250467793297e-06, "loss": 0.2836, "step": 4903 }, { "epoch": 2.4345523746483533, "grad_norm": 0.36561810970306396, "learning_rate": 1.0425586217859796e-06, "loss": 0.3995, "step": 4904 }, { "epoch": 2.4350488168128415, "grad_norm": 0.36017856001853943, "learning_rate": 1.0407935180747496e-06, "loss": 0.2955, "step": 4905 }, { "epoch": 2.435545258977329, "grad_norm": 0.3769116997718811, "learning_rate": 1.0390297362349572e-06, "loss": 0.2867, "step": 4906 }, { "epoch": 2.436041701141817, "grad_norm": 0.4163971245288849, "learning_rate": 1.0372672768554813e-06, "loss": 0.3654, "step": 4907 }, { "epoch": 2.4365381433063047, "grad_norm": 0.38897040486335754, "learning_rate": 1.0355061405247635e-06, "loss": 0.335, "step": 4908 }, { "epoch": 2.4370345854707924, "grad_norm": 0.363273024559021, "learning_rate": 1.0337463278307953e-06, "loss": 0.2823, "step": 4909 }, { "epoch": 2.4375310276352806, "grad_norm": 0.39653441309928894, "learning_rate": 1.0319878393611321e-06, "loss": 0.3591, "step": 4910 }, { "epoch": 2.4380274697997684, "grad_norm": 0.36654266715049744, "learning_rate": 1.0302306757028824e-06, "loss": 0.2715, "step": 4911 }, { "epoch": 2.438523911964256, "grad_norm": 0.35857993364334106, "learning_rate": 1.0284748374427207e-06, "loss": 0.3319, "step": 4912 }, { "epoch": 2.439020354128744, "grad_norm": 0.32874801754951477, "learning_rate": 1.0267203251668689e-06, "loss": 0.283, "step": 4913 }, { "epoch": 2.4395167962932316, "grad_norm": 0.3953009843826294, "learning_rate": 1.0249671394611134e-06, "loss": 0.34, "step": 4914 }, { "epoch": 2.44001323845772, "grad_norm": 0.36690452694892883, "learning_rate": 1.0232152809107937e-06, "loss": 0.3374, "step": 4915 }, { "epoch": 2.4405096806222075, "grad_norm": 0.3770197033882141, "learning_rate": 1.0214647501008095e-06, "loss": 0.3212, "step": 4916 }, { "epoch": 2.4410061227866953, "grad_norm": 0.3589628040790558, "learning_rate": 1.0197155476156156e-06, "loss": 0.2906, "step": 4917 }, { "epoch": 2.441502564951183, "grad_norm": 0.35869213938713074, "learning_rate": 1.0179676740392196e-06, "loss": 0.2986, "step": 4918 }, { "epoch": 2.441999007115671, "grad_norm": 0.41929689049720764, "learning_rate": 1.0162211299551944e-06, "loss": 0.328, "step": 4919 }, { "epoch": 2.442495449280159, "grad_norm": 0.3616867661476135, "learning_rate": 1.0144759159466594e-06, "loss": 0.272, "step": 4920 }, { "epoch": 2.4429918914446467, "grad_norm": 0.41798946261405945, "learning_rate": 1.0127320325962953e-06, "loss": 0.3787, "step": 4921 }, { "epoch": 2.4434883336091344, "grad_norm": 0.40139102935791016, "learning_rate": 1.0109894804863378e-06, "loss": 0.3324, "step": 4922 }, { "epoch": 2.4439847757736226, "grad_norm": 0.35927248001098633, "learning_rate": 1.0092482601985775e-06, "loss": 0.2733, "step": 4923 }, { "epoch": 2.4444812179381104, "grad_norm": 0.3808128237724304, "learning_rate": 1.0075083723143614e-06, "loss": 0.3174, "step": 4924 }, { "epoch": 2.444977660102598, "grad_norm": 0.3996964395046234, "learning_rate": 1.005769817414589e-06, "loss": 0.3316, "step": 4925 }, { "epoch": 2.445474102267086, "grad_norm": 0.3385835289955139, "learning_rate": 1.0040325960797176e-06, "loss": 0.2461, "step": 4926 }, { "epoch": 2.4459705444315736, "grad_norm": 0.4081283509731293, "learning_rate": 1.0022967088897573e-06, "loss": 0.3372, "step": 4927 }, { "epoch": 2.4464669865960618, "grad_norm": 0.36840635538101196, "learning_rate": 1.0005621564242762e-06, "loss": 0.282, "step": 4928 }, { "epoch": 2.4469634287605495, "grad_norm": 0.3651689887046814, "learning_rate": 9.988289392623895e-07, "loss": 0.2973, "step": 4929 }, { "epoch": 2.4474598709250373, "grad_norm": 0.40120941400527954, "learning_rate": 9.970970579827771e-07, "loss": 0.3554, "step": 4930 }, { "epoch": 2.447956313089525, "grad_norm": 0.3595849871635437, "learning_rate": 9.953665131636624e-07, "loss": 0.3325, "step": 4931 }, { "epoch": 2.4484527552540127, "grad_norm": 0.3555055856704712, "learning_rate": 9.936373053828297e-07, "loss": 0.3275, "step": 4932 }, { "epoch": 2.448949197418501, "grad_norm": 0.3678198754787445, "learning_rate": 9.919094352176134e-07, "loss": 0.285, "step": 4933 }, { "epoch": 2.4494456395829887, "grad_norm": 0.3855838179588318, "learning_rate": 9.901829032449028e-07, "loss": 0.2977, "step": 4934 }, { "epoch": 2.4499420817474764, "grad_norm": 0.39239153265953064, "learning_rate": 9.884577100411413e-07, "loss": 0.3746, "step": 4935 }, { "epoch": 2.450438523911964, "grad_norm": 0.34572210907936096, "learning_rate": 9.867338561823215e-07, "loss": 0.3015, "step": 4936 }, { "epoch": 2.450934966076452, "grad_norm": 0.37663114070892334, "learning_rate": 9.850113422439927e-07, "loss": 0.282, "step": 4937 }, { "epoch": 2.45143140824094, "grad_norm": 0.3374653160572052, "learning_rate": 9.832901688012554e-07, "loss": 0.2615, "step": 4938 }, { "epoch": 2.451927850405428, "grad_norm": 0.3543810248374939, "learning_rate": 9.815703364287622e-07, "loss": 0.3462, "step": 4939 }, { "epoch": 2.4524242925699156, "grad_norm": 0.3365572988986969, "learning_rate": 9.798518457007206e-07, "loss": 0.3077, "step": 4940 }, { "epoch": 2.4529207347344033, "grad_norm": 0.37499451637268066, "learning_rate": 9.781346971908833e-07, "loss": 0.3514, "step": 4941 }, { "epoch": 2.453417176898891, "grad_norm": 0.35972365736961365, "learning_rate": 9.764188914725647e-07, "loss": 0.3021, "step": 4942 }, { "epoch": 2.4539136190633792, "grad_norm": 0.4245351254940033, "learning_rate": 9.747044291186226e-07, "loss": 0.3848, "step": 4943 }, { "epoch": 2.454410061227867, "grad_norm": 0.3425408899784088, "learning_rate": 9.7299131070147e-07, "loss": 0.2857, "step": 4944 }, { "epoch": 2.4549065033923547, "grad_norm": 0.3507632315158844, "learning_rate": 9.712795367930706e-07, "loss": 0.352, "step": 4945 }, { "epoch": 2.4554029455568425, "grad_norm": 0.34913402795791626, "learning_rate": 9.695691079649394e-07, "loss": 0.2797, "step": 4946 }, { "epoch": 2.4558993877213307, "grad_norm": 0.370000958442688, "learning_rate": 9.678600247881431e-07, "loss": 0.3438, "step": 4947 }, { "epoch": 2.4563958298858184, "grad_norm": 0.34706395864486694, "learning_rate": 9.661522878332947e-07, "loss": 0.3356, "step": 4948 }, { "epoch": 2.456892272050306, "grad_norm": 0.3424101173877716, "learning_rate": 9.64445897670566e-07, "loss": 0.2994, "step": 4949 }, { "epoch": 2.457388714214794, "grad_norm": 0.35378164052963257, "learning_rate": 9.627408548696704e-07, "loss": 0.3258, "step": 4950 }, { "epoch": 2.4578851563792816, "grad_norm": 0.38849571347236633, "learning_rate": 9.61037159999878e-07, "loss": 0.3799, "step": 4951 }, { "epoch": 2.45838159854377, "grad_norm": 0.32079556584358215, "learning_rate": 9.593348136300028e-07, "loss": 0.2857, "step": 4952 }, { "epoch": 2.4588780407082576, "grad_norm": 0.3282308578491211, "learning_rate": 9.57633816328416e-07, "loss": 0.3334, "step": 4953 }, { "epoch": 2.4593744828727453, "grad_norm": 0.391666442155838, "learning_rate": 9.559341686630319e-07, "loss": 0.3998, "step": 4954 }, { "epoch": 2.459870925037233, "grad_norm": 0.3573076128959656, "learning_rate": 9.542358712013155e-07, "loss": 0.3087, "step": 4955 }, { "epoch": 2.4603673672017212, "grad_norm": 0.3880205452442169, "learning_rate": 9.525389245102867e-07, "loss": 0.3467, "step": 4956 }, { "epoch": 2.460863809366209, "grad_norm": 0.3435259461402893, "learning_rate": 9.508433291565061e-07, "loss": 0.3049, "step": 4957 }, { "epoch": 2.4613602515306967, "grad_norm": 0.36182671785354614, "learning_rate": 9.491490857060887e-07, "loss": 0.3248, "step": 4958 }, { "epoch": 2.4618566936951845, "grad_norm": 0.32613635063171387, "learning_rate": 9.474561947246935e-07, "loss": 0.2916, "step": 4959 }, { "epoch": 2.462353135859672, "grad_norm": 0.3661681115627289, "learning_rate": 9.457646567775347e-07, "loss": 0.3306, "step": 4960 }, { "epoch": 2.4628495780241604, "grad_norm": 0.36027055978775024, "learning_rate": 9.440744724293682e-07, "loss": 0.2983, "step": 4961 }, { "epoch": 2.463346020188648, "grad_norm": 0.37559568881988525, "learning_rate": 9.423856422445015e-07, "loss": 0.3209, "step": 4962 }, { "epoch": 2.463842462353136, "grad_norm": 0.39938655495643616, "learning_rate": 9.406981667867888e-07, "loss": 0.3396, "step": 4963 }, { "epoch": 2.4643389045176236, "grad_norm": 0.38844868540763855, "learning_rate": 9.390120466196323e-07, "loss": 0.2908, "step": 4964 }, { "epoch": 2.4648353466821113, "grad_norm": 0.33408236503601074, "learning_rate": 9.373272823059836e-07, "loss": 0.2849, "step": 4965 }, { "epoch": 2.4653317888465995, "grad_norm": 0.3636452555656433, "learning_rate": 9.356438744083368e-07, "loss": 0.3181, "step": 4966 }, { "epoch": 2.4658282310110873, "grad_norm": 0.3753187954425812, "learning_rate": 9.339618234887371e-07, "loss": 0.3951, "step": 4967 }, { "epoch": 2.466324673175575, "grad_norm": 0.3637653589248657, "learning_rate": 9.322811301087753e-07, "loss": 0.3502, "step": 4968 }, { "epoch": 2.4668211153400628, "grad_norm": 0.3571566939353943, "learning_rate": 9.306017948295903e-07, "loss": 0.2988, "step": 4969 }, { "epoch": 2.4673175575045505, "grad_norm": 0.3515010476112366, "learning_rate": 9.289238182118654e-07, "loss": 0.2853, "step": 4970 }, { "epoch": 2.4678139996690387, "grad_norm": 0.42710453271865845, "learning_rate": 9.272472008158323e-07, "loss": 0.3535, "step": 4971 }, { "epoch": 2.4683104418335264, "grad_norm": 0.35189056396484375, "learning_rate": 9.255719432012683e-07, "loss": 0.2997, "step": 4972 }, { "epoch": 2.468806883998014, "grad_norm": 0.37915539741516113, "learning_rate": 9.238980459274949e-07, "loss": 0.3168, "step": 4973 }, { "epoch": 2.469303326162502, "grad_norm": 0.3731113076210022, "learning_rate": 9.222255095533816e-07, "loss": 0.3128, "step": 4974 }, { "epoch": 2.4697997683269897, "grad_norm": 0.37466666102409363, "learning_rate": 9.20554334637343e-07, "loss": 0.3285, "step": 4975 }, { "epoch": 2.470296210491478, "grad_norm": 0.3562244474887848, "learning_rate": 9.188845217373399e-07, "loss": 0.3082, "step": 4976 }, { "epoch": 2.4707926526559656, "grad_norm": 0.38875409960746765, "learning_rate": 9.172160714108752e-07, "loss": 0.3224, "step": 4977 }, { "epoch": 2.4712890948204533, "grad_norm": 0.360946387052536, "learning_rate": 9.15548984214999e-07, "loss": 0.2613, "step": 4978 }, { "epoch": 2.471785536984941, "grad_norm": 0.3735833764076233, "learning_rate": 9.138832607063103e-07, "loss": 0.3643, "step": 4979 }, { "epoch": 2.4722819791494293, "grad_norm": 0.3605547845363617, "learning_rate": 9.122189014409449e-07, "loss": 0.3175, "step": 4980 }, { "epoch": 2.472778421313917, "grad_norm": 0.4002368748188019, "learning_rate": 9.1055590697459e-07, "loss": 0.3472, "step": 4981 }, { "epoch": 2.4732748634784048, "grad_norm": 0.37638556957244873, "learning_rate": 9.088942778624704e-07, "loss": 0.3177, "step": 4982 }, { "epoch": 2.4737713056428925, "grad_norm": 0.3686974048614502, "learning_rate": 9.072340146593639e-07, "loss": 0.3455, "step": 4983 }, { "epoch": 2.4742677478073807, "grad_norm": 0.34909188747406006, "learning_rate": 9.055751179195832e-07, "loss": 0.308, "step": 4984 }, { "epoch": 2.4747641899718684, "grad_norm": 0.35106196999549866, "learning_rate": 9.039175881969903e-07, "loss": 0.3245, "step": 4985 }, { "epoch": 2.475260632136356, "grad_norm": 0.32402607798576355, "learning_rate": 9.022614260449897e-07, "loss": 0.2974, "step": 4986 }, { "epoch": 2.475757074300844, "grad_norm": 0.3803155720233917, "learning_rate": 9.006066320165285e-07, "loss": 0.4011, "step": 4987 }, { "epoch": 2.4762535164653316, "grad_norm": 0.38279569149017334, "learning_rate": 8.989532066640988e-07, "loss": 0.3321, "step": 4988 }, { "epoch": 2.47674995862982, "grad_norm": 0.3522196412086487, "learning_rate": 8.973011505397306e-07, "loss": 0.323, "step": 4989 }, { "epoch": 2.4772464007943076, "grad_norm": 0.34878629446029663, "learning_rate": 8.956504641950053e-07, "loss": 0.3211, "step": 4990 }, { "epoch": 2.4777428429587953, "grad_norm": 0.38175857067108154, "learning_rate": 8.940011481810384e-07, "loss": 0.3021, "step": 4991 }, { "epoch": 2.478239285123283, "grad_norm": 0.3951548933982849, "learning_rate": 8.923532030484938e-07, "loss": 0.3306, "step": 4992 }, { "epoch": 2.478735727287771, "grad_norm": 0.38084328174591064, "learning_rate": 8.907066293475752e-07, "loss": 0.3866, "step": 4993 }, { "epoch": 2.479232169452259, "grad_norm": 0.3514200448989868, "learning_rate": 8.890614276280285e-07, "loss": 0.3237, "step": 4994 }, { "epoch": 2.4797286116167467, "grad_norm": 0.3602486550807953, "learning_rate": 8.874175984391431e-07, "loss": 0.3141, "step": 4995 }, { "epoch": 2.4802250537812345, "grad_norm": 0.36460044980049133, "learning_rate": 8.857751423297456e-07, "loss": 0.3678, "step": 4996 }, { "epoch": 2.480721495945722, "grad_norm": 0.374772310256958, "learning_rate": 8.841340598482117e-07, "loss": 0.3202, "step": 4997 }, { "epoch": 2.48121793811021, "grad_norm": 0.34427645802497864, "learning_rate": 8.824943515424511e-07, "loss": 0.311, "step": 4998 }, { "epoch": 2.481714380274698, "grad_norm": 0.3498952090740204, "learning_rate": 8.808560179599201e-07, "loss": 0.2813, "step": 4999 }, { "epoch": 2.482210822439186, "grad_norm": 0.3985099792480469, "learning_rate": 8.792190596476102e-07, "loss": 0.317, "step": 5000 }, { "epoch": 2.4827072646036736, "grad_norm": 0.3862132132053375, "learning_rate": 8.775834771520608e-07, "loss": 0.3343, "step": 5001 }, { "epoch": 2.4832037067681614, "grad_norm": 0.3497495651245117, "learning_rate": 8.75949271019349e-07, "loss": 0.2948, "step": 5002 }, { "epoch": 2.483700148932649, "grad_norm": 0.3841489851474762, "learning_rate": 8.743164417950883e-07, "loss": 0.3117, "step": 5003 }, { "epoch": 2.4841965910971373, "grad_norm": 0.36954450607299805, "learning_rate": 8.726849900244383e-07, "loss": 0.3438, "step": 5004 }, { "epoch": 2.484693033261625, "grad_norm": 0.35791128873825073, "learning_rate": 8.710549162520954e-07, "loss": 0.3308, "step": 5005 }, { "epoch": 2.485189475426113, "grad_norm": 0.3780181407928467, "learning_rate": 8.694262210222992e-07, "loss": 0.3541, "step": 5006 }, { "epoch": 2.4856859175906005, "grad_norm": 0.3612971603870392, "learning_rate": 8.677989048788238e-07, "loss": 0.2953, "step": 5007 }, { "epoch": 2.4861823597550887, "grad_norm": 0.3759172558784485, "learning_rate": 8.661729683649867e-07, "loss": 0.3133, "step": 5008 }, { "epoch": 2.4866788019195765, "grad_norm": 0.36395272612571716, "learning_rate": 8.645484120236442e-07, "loss": 0.296, "step": 5009 }, { "epoch": 2.487175244084064, "grad_norm": 0.39730900526046753, "learning_rate": 8.629252363971918e-07, "loss": 0.3548, "step": 5010 }, { "epoch": 2.487671686248552, "grad_norm": 0.37979304790496826, "learning_rate": 8.613034420275634e-07, "loss": 0.2605, "step": 5011 }, { "epoch": 2.4881681284130397, "grad_norm": 0.41009318828582764, "learning_rate": 8.596830294562325e-07, "loss": 0.3291, "step": 5012 }, { "epoch": 2.488664570577528, "grad_norm": 0.40881767868995667, "learning_rate": 8.580639992242113e-07, "loss": 0.3377, "step": 5013 }, { "epoch": 2.4891610127420156, "grad_norm": 0.3857589066028595, "learning_rate": 8.564463518720483e-07, "loss": 0.3351, "step": 5014 }, { "epoch": 2.4896574549065034, "grad_norm": 0.381450891494751, "learning_rate": 8.548300879398324e-07, "loss": 0.3091, "step": 5015 }, { "epoch": 2.490153897070991, "grad_norm": 0.39773693680763245, "learning_rate": 8.532152079671913e-07, "loss": 0.318, "step": 5016 }, { "epoch": 2.4906503392354793, "grad_norm": 0.37043464183807373, "learning_rate": 8.516017124932885e-07, "loss": 0.3138, "step": 5017 }, { "epoch": 2.491146781399967, "grad_norm": 0.37554505467414856, "learning_rate": 8.499896020568276e-07, "loss": 0.3616, "step": 5018 }, { "epoch": 2.491643223564455, "grad_norm": 0.3250468075275421, "learning_rate": 8.483788771960455e-07, "loss": 0.3048, "step": 5019 }, { "epoch": 2.4921396657289425, "grad_norm": 0.39842724800109863, "learning_rate": 8.46769538448724e-07, "loss": 0.343, "step": 5020 }, { "epoch": 2.4926361078934303, "grad_norm": 0.3733188509941101, "learning_rate": 8.451615863521734e-07, "loss": 0.3044, "step": 5021 }, { "epoch": 2.4931325500579185, "grad_norm": 0.3852645754814148, "learning_rate": 8.435550214432486e-07, "loss": 0.2801, "step": 5022 }, { "epoch": 2.493628992222406, "grad_norm": 0.34160223603248596, "learning_rate": 8.419498442583335e-07, "loss": 0.3096, "step": 5023 }, { "epoch": 2.494125434386894, "grad_norm": 0.40560564398765564, "learning_rate": 8.403460553333586e-07, "loss": 0.3363, "step": 5024 }, { "epoch": 2.4946218765513817, "grad_norm": 0.36869266629219055, "learning_rate": 8.387436552037814e-07, "loss": 0.3556, "step": 5025 }, { "epoch": 2.4951183187158694, "grad_norm": 0.3564021587371826, "learning_rate": 8.371426444045994e-07, "loss": 0.3178, "step": 5026 }, { "epoch": 2.4956147608803576, "grad_norm": 0.389458030462265, "learning_rate": 8.35543023470351e-07, "loss": 0.3281, "step": 5027 }, { "epoch": 2.4961112030448454, "grad_norm": 0.3745863437652588, "learning_rate": 8.339447929351025e-07, "loss": 0.3192, "step": 5028 }, { "epoch": 2.496607645209333, "grad_norm": 0.3737655282020569, "learning_rate": 8.323479533324613e-07, "loss": 0.2836, "step": 5029 }, { "epoch": 2.497104087373821, "grad_norm": 0.4126444458961487, "learning_rate": 8.307525051955656e-07, "loss": 0.3328, "step": 5030 }, { "epoch": 2.4976005295383086, "grad_norm": 0.3524698317050934, "learning_rate": 8.29158449057097e-07, "loss": 0.2961, "step": 5031 }, { "epoch": 2.4980969717027968, "grad_norm": 0.3837282657623291, "learning_rate": 8.275657854492636e-07, "loss": 0.3489, "step": 5032 }, { "epoch": 2.4985934138672845, "grad_norm": 0.3552197217941284, "learning_rate": 8.259745149038145e-07, "loss": 0.3382, "step": 5033 }, { "epoch": 2.4990898560317722, "grad_norm": 0.3492162525653839, "learning_rate": 8.243846379520309e-07, "loss": 0.3582, "step": 5034 }, { "epoch": 2.49958629819626, "grad_norm": 0.34845200181007385, "learning_rate": 8.227961551247298e-07, "loss": 0.3139, "step": 5035 }, { "epoch": 2.5000827403607477, "grad_norm": 0.4165628254413605, "learning_rate": 8.212090669522632e-07, "loss": 0.3408, "step": 5036 }, { "epoch": 2.500579182525236, "grad_norm": 0.35848692059516907, "learning_rate": 8.196233739645154e-07, "loss": 0.2714, "step": 5037 }, { "epoch": 2.5010756246897237, "grad_norm": 0.3795055150985718, "learning_rate": 8.180390766909063e-07, "loss": 0.315, "step": 5038 }, { "epoch": 2.5015720668542114, "grad_norm": 0.3737562596797943, "learning_rate": 8.164561756603901e-07, "loss": 0.3841, "step": 5039 }, { "epoch": 2.5020685090186996, "grad_norm": 0.3618084490299225, "learning_rate": 8.148746714014544e-07, "loss": 0.2914, "step": 5040 }, { "epoch": 2.502564951183187, "grad_norm": 0.34749874472618103, "learning_rate": 8.132945644421203e-07, "loss": 0.3205, "step": 5041 }, { "epoch": 2.503061393347675, "grad_norm": 0.36527344584465027, "learning_rate": 8.11715855309943e-07, "loss": 0.3459, "step": 5042 }, { "epoch": 2.503557835512163, "grad_norm": 0.33353346586227417, "learning_rate": 8.10138544532012e-07, "loss": 0.3399, "step": 5043 }, { "epoch": 2.5040542776766506, "grad_norm": 0.35476112365722656, "learning_rate": 8.08562632634945e-07, "loss": 0.3524, "step": 5044 }, { "epoch": 2.5045507198411388, "grad_norm": 0.37219905853271484, "learning_rate": 8.069881201448987e-07, "loss": 0.3583, "step": 5045 }, { "epoch": 2.5050471620056265, "grad_norm": 0.3199833035469055, "learning_rate": 8.054150075875589e-07, "loss": 0.3199, "step": 5046 }, { "epoch": 2.5055436041701142, "grad_norm": 0.36646196246147156, "learning_rate": 8.038432954881464e-07, "loss": 0.3535, "step": 5047 }, { "epoch": 2.506040046334602, "grad_norm": 0.3828696608543396, "learning_rate": 8.022729843714116e-07, "loss": 0.3006, "step": 5048 }, { "epoch": 2.5065364884990897, "grad_norm": 0.3440951108932495, "learning_rate": 8.007040747616379e-07, "loss": 0.314, "step": 5049 }, { "epoch": 2.507032930663578, "grad_norm": 0.3521939218044281, "learning_rate": 7.991365671826462e-07, "loss": 0.3318, "step": 5050 }, { "epoch": 2.5075293728280657, "grad_norm": 0.34519824385643005, "learning_rate": 7.975704621577796e-07, "loss": 0.288, "step": 5051 }, { "epoch": 2.5080258149925534, "grad_norm": 0.3873867988586426, "learning_rate": 7.960057602099203e-07, "loss": 0.3441, "step": 5052 }, { "epoch": 2.508522257157041, "grad_norm": 0.38266536593437195, "learning_rate": 7.944424618614794e-07, "loss": 0.2958, "step": 5053 }, { "epoch": 2.509018699321529, "grad_norm": 0.38417568802833557, "learning_rate": 7.928805676344009e-07, "loss": 0.3342, "step": 5054 }, { "epoch": 2.509515141486017, "grad_norm": 0.3395209014415741, "learning_rate": 7.913200780501568e-07, "loss": 0.3124, "step": 5055 }, { "epoch": 2.510011583650505, "grad_norm": 0.34438130259513855, "learning_rate": 7.897609936297529e-07, "loss": 0.2699, "step": 5056 }, { "epoch": 2.5105080258149925, "grad_norm": 0.404945969581604, "learning_rate": 7.882033148937252e-07, "loss": 0.3137, "step": 5057 }, { "epoch": 2.5110044679794803, "grad_norm": 0.35015127062797546, "learning_rate": 7.866470423621402e-07, "loss": 0.3502, "step": 5058 }, { "epoch": 2.511500910143968, "grad_norm": 0.38860464096069336, "learning_rate": 7.850921765545966e-07, "loss": 0.2866, "step": 5059 }, { "epoch": 2.511997352308456, "grad_norm": 0.33647799491882324, "learning_rate": 7.835387179902182e-07, "loss": 0.2948, "step": 5060 }, { "epoch": 2.512493794472944, "grad_norm": 0.39762628078460693, "learning_rate": 7.819866671876669e-07, "loss": 0.2875, "step": 5061 }, { "epoch": 2.5129902366374317, "grad_norm": 0.37268927693367004, "learning_rate": 7.804360246651271e-07, "loss": 0.3563, "step": 5062 }, { "epoch": 2.5134866788019194, "grad_norm": 0.33728262782096863, "learning_rate": 7.788867909403169e-07, "loss": 0.3669, "step": 5063 }, { "epoch": 2.513983120966407, "grad_norm": 0.33777859807014465, "learning_rate": 7.773389665304842e-07, "loss": 0.2964, "step": 5064 }, { "epoch": 2.5144795631308954, "grad_norm": 0.3993433117866516, "learning_rate": 7.757925519524045e-07, "loss": 0.3053, "step": 5065 }, { "epoch": 2.514976005295383, "grad_norm": 0.34740978479385376, "learning_rate": 7.742475477223859e-07, "loss": 0.3004, "step": 5066 }, { "epoch": 2.515472447459871, "grad_norm": 0.39242833852767944, "learning_rate": 7.727039543562586e-07, "loss": 0.3417, "step": 5067 }, { "epoch": 2.5159688896243586, "grad_norm": 0.35634931921958923, "learning_rate": 7.711617723693921e-07, "loss": 0.3018, "step": 5068 }, { "epoch": 2.5164653317888463, "grad_norm": 0.39301085472106934, "learning_rate": 7.696210022766753e-07, "loss": 0.3283, "step": 5069 }, { "epoch": 2.5169617739533345, "grad_norm": 0.35361579060554504, "learning_rate": 7.680816445925315e-07, "loss": 0.3231, "step": 5070 }, { "epoch": 2.5174582161178223, "grad_norm": 0.36504557728767395, "learning_rate": 7.665436998309067e-07, "loss": 0.3266, "step": 5071 }, { "epoch": 2.51795465828231, "grad_norm": 0.34920212626457214, "learning_rate": 7.650071685052835e-07, "loss": 0.305, "step": 5072 }, { "epoch": 2.518451100446798, "grad_norm": 0.36054930090904236, "learning_rate": 7.634720511286664e-07, "loss": 0.3194, "step": 5073 }, { "epoch": 2.518947542611286, "grad_norm": 0.3539895713329315, "learning_rate": 7.619383482135884e-07, "loss": 0.3346, "step": 5074 }, { "epoch": 2.5194439847757737, "grad_norm": 0.370725154876709, "learning_rate": 7.604060602721114e-07, "loss": 0.3159, "step": 5075 }, { "epoch": 2.5199404269402614, "grad_norm": 0.36398717761039734, "learning_rate": 7.588751878158251e-07, "loss": 0.328, "step": 5076 }, { "epoch": 2.520436869104749, "grad_norm": 0.34845250844955444, "learning_rate": 7.57345731355848e-07, "loss": 0.327, "step": 5077 }, { "epoch": 2.5209333112692374, "grad_norm": 0.4031185805797577, "learning_rate": 7.558176914028203e-07, "loss": 0.3992, "step": 5078 }, { "epoch": 2.521429753433725, "grad_norm": 0.34258243441581726, "learning_rate": 7.542910684669153e-07, "loss": 0.2716, "step": 5079 }, { "epoch": 2.521926195598213, "grad_norm": 0.390032023191452, "learning_rate": 7.527658630578305e-07, "loss": 0.3334, "step": 5080 }, { "epoch": 2.5224226377627006, "grad_norm": 0.34914350509643555, "learning_rate": 7.51242075684791e-07, "loss": 0.293, "step": 5081 }, { "epoch": 2.5229190799271883, "grad_norm": 0.33246511220932007, "learning_rate": 7.49719706856547e-07, "loss": 0.284, "step": 5082 }, { "epoch": 2.5234155220916765, "grad_norm": 0.35236212611198425, "learning_rate": 7.48198757081377e-07, "loss": 0.3509, "step": 5083 }, { "epoch": 2.5239119642561643, "grad_norm": 0.38592952489852905, "learning_rate": 7.466792268670853e-07, "loss": 0.3619, "step": 5084 }, { "epoch": 2.524408406420652, "grad_norm": 0.35901010036468506, "learning_rate": 7.451611167209999e-07, "loss": 0.2321, "step": 5085 }, { "epoch": 2.5249048485851397, "grad_norm": 0.38185033202171326, "learning_rate": 7.436444271499776e-07, "loss": 0.335, "step": 5086 }, { "epoch": 2.5254012907496275, "grad_norm": 0.3802032768726349, "learning_rate": 7.421291586604001e-07, "loss": 0.311, "step": 5087 }, { "epoch": 2.5258977329141157, "grad_norm": 0.4138059616088867, "learning_rate": 7.406153117581733e-07, "loss": 0.3263, "step": 5088 }, { "epoch": 2.5263941750786034, "grad_norm": 0.3553308844566345, "learning_rate": 7.391028869487316e-07, "loss": 0.3436, "step": 5089 }, { "epoch": 2.526890617243091, "grad_norm": 0.30839821696281433, "learning_rate": 7.375918847370294e-07, "loss": 0.2917, "step": 5090 }, { "epoch": 2.527387059407579, "grad_norm": 0.36017608642578125, "learning_rate": 7.360823056275528e-07, "loss": 0.3502, "step": 5091 }, { "epoch": 2.5278835015720666, "grad_norm": 0.38861504197120667, "learning_rate": 7.345741501243065e-07, "loss": 0.3263, "step": 5092 }, { "epoch": 2.528379943736555, "grad_norm": 0.3921509385108948, "learning_rate": 7.330674187308234e-07, "loss": 0.3173, "step": 5093 }, { "epoch": 2.5288763859010426, "grad_norm": 0.3595849871635437, "learning_rate": 7.315621119501609e-07, "loss": 0.3351, "step": 5094 }, { "epoch": 2.5293728280655303, "grad_norm": 0.3178766965866089, "learning_rate": 7.300582302848991e-07, "loss": 0.2795, "step": 5095 }, { "epoch": 2.529869270230018, "grad_norm": 0.3937375843524933, "learning_rate": 7.285557742371446e-07, "loss": 0.3781, "step": 5096 }, { "epoch": 2.530365712394506, "grad_norm": 0.35853317379951477, "learning_rate": 7.270547443085241e-07, "loss": 0.3105, "step": 5097 }, { "epoch": 2.530862154558994, "grad_norm": 0.3827025890350342, "learning_rate": 7.255551410001938e-07, "loss": 0.356, "step": 5098 }, { "epoch": 2.5313585967234817, "grad_norm": 0.34105923771858215, "learning_rate": 7.240569648128282e-07, "loss": 0.3257, "step": 5099 }, { "epoch": 2.5318550388879695, "grad_norm": 0.35779592394828796, "learning_rate": 7.225602162466294e-07, "loss": 0.2932, "step": 5100 }, { "epoch": 2.5323514810524577, "grad_norm": 0.35753658413887024, "learning_rate": 7.210648958013177e-07, "loss": 0.3144, "step": 5101 }, { "epoch": 2.532847923216945, "grad_norm": 0.3937910795211792, "learning_rate": 7.195710039761444e-07, "loss": 0.3664, "step": 5102 }, { "epoch": 2.533344365381433, "grad_norm": 0.349884957075119, "learning_rate": 7.180785412698765e-07, "loss": 0.3018, "step": 5103 }, { "epoch": 2.533840807545921, "grad_norm": 0.38304048776626587, "learning_rate": 7.165875081808072e-07, "loss": 0.3442, "step": 5104 }, { "epoch": 2.5343372497104086, "grad_norm": 0.3425768315792084, "learning_rate": 7.150979052067524e-07, "loss": 0.3274, "step": 5105 }, { "epoch": 2.534833691874897, "grad_norm": 0.3281242549419403, "learning_rate": 7.136097328450497e-07, "loss": 0.3294, "step": 5106 }, { "epoch": 2.5353301340393846, "grad_norm": 0.3511752486228943, "learning_rate": 7.12122991592561e-07, "loss": 0.3154, "step": 5107 }, { "epoch": 2.5358265762038723, "grad_norm": 0.34962698817253113, "learning_rate": 7.106376819456651e-07, "loss": 0.3006, "step": 5108 }, { "epoch": 2.53632301836836, "grad_norm": 0.35038796067237854, "learning_rate": 7.091538044002705e-07, "loss": 0.3186, "step": 5109 }, { "epoch": 2.536819460532848, "grad_norm": 0.3535234332084656, "learning_rate": 7.076713594518014e-07, "loss": 0.3319, "step": 5110 }, { "epoch": 2.537315902697336, "grad_norm": 0.3533516526222229, "learning_rate": 7.061903475952059e-07, "loss": 0.2915, "step": 5111 }, { "epoch": 2.5378123448618237, "grad_norm": 0.3735027313232422, "learning_rate": 7.047107693249544e-07, "loss": 0.2859, "step": 5112 }, { "epoch": 2.5383087870263115, "grad_norm": 0.4123956859111786, "learning_rate": 7.032326251350375e-07, "loss": 0.3315, "step": 5113 }, { "epoch": 2.538805229190799, "grad_norm": 0.3520928621292114, "learning_rate": 7.017559155189679e-07, "loss": 0.3455, "step": 5114 }, { "epoch": 2.539301671355287, "grad_norm": 0.370400607585907, "learning_rate": 7.002806409697776e-07, "loss": 0.3205, "step": 5115 }, { "epoch": 2.539798113519775, "grad_norm": 0.3681352138519287, "learning_rate": 6.988068019800214e-07, "loss": 0.2843, "step": 5116 }, { "epoch": 2.540294555684263, "grad_norm": 0.3412403464317322, "learning_rate": 6.973343990417746e-07, "loss": 0.2697, "step": 5117 }, { "epoch": 2.5407909978487506, "grad_norm": 0.40855643153190613, "learning_rate": 6.958634326466313e-07, "loss": 0.3955, "step": 5118 }, { "epoch": 2.5412874400132384, "grad_norm": 0.3142530024051666, "learning_rate": 6.943939032857094e-07, "loss": 0.2792, "step": 5119 }, { "epoch": 2.541783882177726, "grad_norm": 0.3401819169521332, "learning_rate": 6.929258114496407e-07, "loss": 0.3176, "step": 5120 }, { "epoch": 2.5422803243422143, "grad_norm": 0.3781348764896393, "learning_rate": 6.914591576285862e-07, "loss": 0.3189, "step": 5121 }, { "epoch": 2.542776766506702, "grad_norm": 0.38191506266593933, "learning_rate": 6.899939423122181e-07, "loss": 0.3814, "step": 5122 }, { "epoch": 2.5432732086711898, "grad_norm": 0.3429218828678131, "learning_rate": 6.885301659897336e-07, "loss": 0.3083, "step": 5123 }, { "epoch": 2.5437696508356775, "grad_norm": 0.351910263299942, "learning_rate": 6.870678291498467e-07, "loss": 0.3089, "step": 5124 }, { "epoch": 2.5442660930001653, "grad_norm": 0.3692029118537903, "learning_rate": 6.856069322807946e-07, "loss": 0.2971, "step": 5125 }, { "epoch": 2.5447625351646534, "grad_norm": 0.422872930765152, "learning_rate": 6.841474758703276e-07, "loss": 0.3763, "step": 5126 }, { "epoch": 2.545258977329141, "grad_norm": 0.3625296652317047, "learning_rate": 6.826894604057199e-07, "loss": 0.3519, "step": 5127 }, { "epoch": 2.545755419493629, "grad_norm": 0.34095361828804016, "learning_rate": 6.812328863737632e-07, "loss": 0.2388, "step": 5128 }, { "epoch": 2.5462518616581167, "grad_norm": 0.38202276825904846, "learning_rate": 6.797777542607686e-07, "loss": 0.3291, "step": 5129 }, { "epoch": 2.5467483038226044, "grad_norm": 0.3644881844520569, "learning_rate": 6.783240645525657e-07, "loss": 0.3553, "step": 5130 }, { "epoch": 2.5472447459870926, "grad_norm": 0.34740766882896423, "learning_rate": 6.768718177344985e-07, "loss": 0.3371, "step": 5131 }, { "epoch": 2.5477411881515803, "grad_norm": 0.3442871570587158, "learning_rate": 6.75421014291438e-07, "loss": 0.2755, "step": 5132 }, { "epoch": 2.548237630316068, "grad_norm": 0.37016645073890686, "learning_rate": 6.739716547077635e-07, "loss": 0.3142, "step": 5133 }, { "epoch": 2.5487340724805563, "grad_norm": 0.3424086272716522, "learning_rate": 6.72523739467379e-07, "loss": 0.3462, "step": 5134 }, { "epoch": 2.5492305146450436, "grad_norm": 0.3238237500190735, "learning_rate": 6.710772690537037e-07, "loss": 0.3585, "step": 5135 }, { "epoch": 2.5497269568095318, "grad_norm": 0.38326480984687805, "learning_rate": 6.696322439496744e-07, "loss": 0.3435, "step": 5136 }, { "epoch": 2.5502233989740195, "grad_norm": 0.3612705171108246, "learning_rate": 6.681886646377473e-07, "loss": 0.3113, "step": 5137 }, { "epoch": 2.5507198411385072, "grad_norm": 0.35266581177711487, "learning_rate": 6.667465315998906e-07, "loss": 0.3061, "step": 5138 }, { "epoch": 2.5512162833029954, "grad_norm": 0.3440828323364258, "learning_rate": 6.653058453175981e-07, "loss": 0.3174, "step": 5139 }, { "epoch": 2.551712725467483, "grad_norm": 0.37144991755485535, "learning_rate": 6.638666062718718e-07, "loss": 0.3175, "step": 5140 }, { "epoch": 2.552209167631971, "grad_norm": 0.3989211618900299, "learning_rate": 6.624288149432378e-07, "loss": 0.2928, "step": 5141 }, { "epoch": 2.5527056097964587, "grad_norm": 0.34402963519096375, "learning_rate": 6.609924718117311e-07, "loss": 0.3109, "step": 5142 }, { "epoch": 2.5532020519609464, "grad_norm": 0.3755660355091095, "learning_rate": 6.595575773569118e-07, "loss": 0.3216, "step": 5143 }, { "epoch": 2.5536984941254346, "grad_norm": 0.35327452421188354, "learning_rate": 6.581241320578519e-07, "loss": 0.3156, "step": 5144 }, { "epoch": 2.5541949362899223, "grad_norm": 0.3563039302825928, "learning_rate": 6.566921363931373e-07, "loss": 0.3564, "step": 5145 }, { "epoch": 2.55469137845441, "grad_norm": 0.38222044706344604, "learning_rate": 6.552615908408739e-07, "loss": 0.3275, "step": 5146 }, { "epoch": 2.555187820618898, "grad_norm": 0.33603087067604065, "learning_rate": 6.538324958786818e-07, "loss": 0.2896, "step": 5147 }, { "epoch": 2.5556842627833856, "grad_norm": 0.3425375521183014, "learning_rate": 6.524048519836984e-07, "loss": 0.3127, "step": 5148 }, { "epoch": 2.5561807049478737, "grad_norm": 0.3951747715473175, "learning_rate": 6.509786596325718e-07, "loss": 0.3773, "step": 5149 }, { "epoch": 2.5566771471123615, "grad_norm": 0.3863663971424103, "learning_rate": 6.495539193014727e-07, "loss": 0.3261, "step": 5150 }, { "epoch": 2.5571735892768492, "grad_norm": 0.3563925623893738, "learning_rate": 6.481306314660801e-07, "loss": 0.2866, "step": 5151 }, { "epoch": 2.557670031441337, "grad_norm": 0.338419646024704, "learning_rate": 6.467087966015928e-07, "loss": 0.2593, "step": 5152 }, { "epoch": 2.5581664736058247, "grad_norm": 0.39857351779937744, "learning_rate": 6.452884151827222e-07, "loss": 0.3944, "step": 5153 }, { "epoch": 2.558662915770313, "grad_norm": 0.34414905309677124, "learning_rate": 6.438694876836954e-07, "loss": 0.308, "step": 5154 }, { "epoch": 2.5591593579348006, "grad_norm": 0.38558751344680786, "learning_rate": 6.424520145782542e-07, "loss": 0.375, "step": 5155 }, { "epoch": 2.5596558000992884, "grad_norm": 0.3416980504989624, "learning_rate": 6.410359963396534e-07, "loss": 0.3092, "step": 5156 }, { "epoch": 2.560152242263776, "grad_norm": 0.39351385831832886, "learning_rate": 6.396214334406631e-07, "loss": 0.3165, "step": 5157 }, { "epoch": 2.560648684428264, "grad_norm": 0.389646977186203, "learning_rate": 6.382083263535677e-07, "loss": 0.2842, "step": 5158 }, { "epoch": 2.561145126592752, "grad_norm": 0.43677622079849243, "learning_rate": 6.367966755501647e-07, "loss": 0.3356, "step": 5159 }, { "epoch": 2.56164156875724, "grad_norm": 0.3504006266593933, "learning_rate": 6.35386481501768e-07, "loss": 0.2995, "step": 5160 }, { "epoch": 2.5621380109217275, "grad_norm": 0.3781876266002655, "learning_rate": 6.339777446791994e-07, "loss": 0.3471, "step": 5161 }, { "epoch": 2.5626344530862153, "grad_norm": 0.3339698314666748, "learning_rate": 6.32570465552802e-07, "loss": 0.2766, "step": 5162 }, { "epoch": 2.563130895250703, "grad_norm": 0.34627029299736023, "learning_rate": 6.311646445924246e-07, "loss": 0.2644, "step": 5163 }, { "epoch": 2.563627337415191, "grad_norm": 0.3687548339366913, "learning_rate": 6.297602822674343e-07, "loss": 0.3854, "step": 5164 }, { "epoch": 2.564123779579679, "grad_norm": 0.3180035650730133, "learning_rate": 6.283573790467091e-07, "loss": 0.3123, "step": 5165 }, { "epoch": 2.5646202217441667, "grad_norm": 0.39052054286003113, "learning_rate": 6.269559353986404e-07, "loss": 0.3188, "step": 5166 }, { "epoch": 2.565116663908655, "grad_norm": 0.3560260534286499, "learning_rate": 6.255559517911336e-07, "loss": 0.3035, "step": 5167 }, { "epoch": 2.5656131060731426, "grad_norm": 0.3380223512649536, "learning_rate": 6.241574286916007e-07, "loss": 0.3507, "step": 5168 }, { "epoch": 2.5661095482376304, "grad_norm": 0.34854528307914734, "learning_rate": 6.227603665669762e-07, "loss": 0.2916, "step": 5169 }, { "epoch": 2.566605990402118, "grad_norm": 0.38770565390586853, "learning_rate": 6.21364765883698e-07, "loss": 0.2942, "step": 5170 }, { "epoch": 2.567102432566606, "grad_norm": 0.3917643427848816, "learning_rate": 6.199706271077199e-07, "loss": 0.362, "step": 5171 }, { "epoch": 2.567598874731094, "grad_norm": 0.3400404453277588, "learning_rate": 6.185779507045053e-07, "loss": 0.3193, "step": 5172 }, { "epoch": 2.568095316895582, "grad_norm": 0.3533954620361328, "learning_rate": 6.171867371390345e-07, "loss": 0.3128, "step": 5173 }, { "epoch": 2.5685917590600695, "grad_norm": 0.3030509948730469, "learning_rate": 6.157969868757923e-07, "loss": 0.288, "step": 5174 }, { "epoch": 2.5690882012245573, "grad_norm": 0.36697614192962646, "learning_rate": 6.144087003787807e-07, "loss": 0.3301, "step": 5175 }, { "epoch": 2.569584643389045, "grad_norm": 0.3635845482349396, "learning_rate": 6.130218781115105e-07, "loss": 0.2734, "step": 5176 }, { "epoch": 2.570081085553533, "grad_norm": 0.35073012113571167, "learning_rate": 6.116365205370034e-07, "loss": 0.3126, "step": 5177 }, { "epoch": 2.570577527718021, "grad_norm": 0.38821840286254883, "learning_rate": 6.102526281177939e-07, "loss": 0.3635, "step": 5178 }, { "epoch": 2.5710739698825087, "grad_norm": 0.36949220299720764, "learning_rate": 6.088702013159231e-07, "loss": 0.2946, "step": 5179 }, { "epoch": 2.5715704120469964, "grad_norm": 0.3796999156475067, "learning_rate": 6.0748924059295e-07, "loss": 0.3603, "step": 5180 }, { "epoch": 2.572066854211484, "grad_norm": 0.3822932541370392, "learning_rate": 6.061097464099363e-07, "loss": 0.3142, "step": 5181 }, { "epoch": 2.5725632963759724, "grad_norm": 0.38771894574165344, "learning_rate": 6.047317192274593e-07, "loss": 0.3281, "step": 5182 }, { "epoch": 2.57305973854046, "grad_norm": 0.3513425290584564, "learning_rate": 6.033551595056048e-07, "loss": 0.2822, "step": 5183 }, { "epoch": 2.573556180704948, "grad_norm": 0.3780568838119507, "learning_rate": 6.019800677039677e-07, "loss": 0.3437, "step": 5184 }, { "epoch": 2.5740526228694356, "grad_norm": 0.38160765171051025, "learning_rate": 6.006064442816556e-07, "loss": 0.3286, "step": 5185 }, { "epoch": 2.5745490650339233, "grad_norm": 0.36004647612571716, "learning_rate": 5.99234289697282e-07, "loss": 0.3153, "step": 5186 }, { "epoch": 2.5750455071984115, "grad_norm": 0.35676923394203186, "learning_rate": 5.978636044089731e-07, "loss": 0.3542, "step": 5187 }, { "epoch": 2.5755419493628993, "grad_norm": 0.3381097614765167, "learning_rate": 5.96494388874363e-07, "loss": 0.3536, "step": 5188 }, { "epoch": 2.576038391527387, "grad_norm": 0.33127471804618835, "learning_rate": 5.951266435505959e-07, "loss": 0.2782, "step": 5189 }, { "epoch": 2.5765348336918747, "grad_norm": 0.37239012122154236, "learning_rate": 5.937603688943244e-07, "loss": 0.2968, "step": 5190 }, { "epoch": 2.5770312758563625, "grad_norm": 0.4115380644798279, "learning_rate": 5.923955653617109e-07, "loss": 0.3723, "step": 5191 }, { "epoch": 2.5775277180208507, "grad_norm": 0.36004260182380676, "learning_rate": 5.910322334084273e-07, "loss": 0.3306, "step": 5192 }, { "epoch": 2.5780241601853384, "grad_norm": 0.3725794851779938, "learning_rate": 5.896703734896508e-07, "loss": 0.3873, "step": 5193 }, { "epoch": 2.578520602349826, "grad_norm": 0.36580100655555725, "learning_rate": 5.883099860600699e-07, "loss": 0.3017, "step": 5194 }, { "epoch": 2.5790170445143143, "grad_norm": 0.367533415555954, "learning_rate": 5.869510715738824e-07, "loss": 0.3332, "step": 5195 }, { "epoch": 2.5795134866788016, "grad_norm": 0.34801265597343445, "learning_rate": 5.855936304847926e-07, "loss": 0.305, "step": 5196 }, { "epoch": 2.58000992884329, "grad_norm": 0.3347369432449341, "learning_rate": 5.842376632460117e-07, "loss": 0.349, "step": 5197 }, { "epoch": 2.5805063710077776, "grad_norm": 0.34398216009140015, "learning_rate": 5.828831703102616e-07, "loss": 0.3105, "step": 5198 }, { "epoch": 2.5810028131722653, "grad_norm": 0.357808917760849, "learning_rate": 5.815301521297701e-07, "loss": 0.3191, "step": 5199 }, { "epoch": 2.5814992553367535, "grad_norm": 0.3365672528743744, "learning_rate": 5.801786091562733e-07, "loss": 0.3217, "step": 5200 }, { "epoch": 2.5819956975012412, "grad_norm": 0.3622618019580841, "learning_rate": 5.788285418410161e-07, "loss": 0.3585, "step": 5201 }, { "epoch": 2.582492139665729, "grad_norm": 0.35453876852989197, "learning_rate": 5.774799506347461e-07, "loss": 0.3114, "step": 5202 }, { "epoch": 2.5829885818302167, "grad_norm": 0.3499961793422699, "learning_rate": 5.76132835987725e-07, "loss": 0.2661, "step": 5203 }, { "epoch": 2.5834850239947045, "grad_norm": 0.38206174969673157, "learning_rate": 5.747871983497144e-07, "loss": 0.3673, "step": 5204 }, { "epoch": 2.5839814661591927, "grad_norm": 0.30962634086608887, "learning_rate": 5.734430381699884e-07, "loss": 0.2739, "step": 5205 }, { "epoch": 2.5844779083236804, "grad_norm": 0.37846940755844116, "learning_rate": 5.721003558973243e-07, "loss": 0.3443, "step": 5206 }, { "epoch": 2.584974350488168, "grad_norm": 0.37615519762039185, "learning_rate": 5.707591519800082e-07, "loss": 0.3134, "step": 5207 }, { "epoch": 2.585470792652656, "grad_norm": 0.4200095236301422, "learning_rate": 5.694194268658315e-07, "loss": 0.3322, "step": 5208 }, { "epoch": 2.5859672348171436, "grad_norm": 0.34865084290504456, "learning_rate": 5.680811810020903e-07, "loss": 0.275, "step": 5209 }, { "epoch": 2.586463676981632, "grad_norm": 0.3808838427066803, "learning_rate": 5.667444148355916e-07, "loss": 0.3208, "step": 5210 }, { "epoch": 2.5869601191461196, "grad_norm": 0.37120330333709717, "learning_rate": 5.654091288126429e-07, "loss": 0.3102, "step": 5211 }, { "epoch": 2.5874565613106073, "grad_norm": 0.3715832829475403, "learning_rate": 5.640753233790602e-07, "loss": 0.3079, "step": 5212 }, { "epoch": 2.587953003475095, "grad_norm": 0.3366614580154419, "learning_rate": 5.627429989801653e-07, "loss": 0.3467, "step": 5213 }, { "epoch": 2.588449445639583, "grad_norm": 0.3329384922981262, "learning_rate": 5.614121560607849e-07, "loss": 0.2967, "step": 5214 }, { "epoch": 2.588945887804071, "grad_norm": 0.3795587420463562, "learning_rate": 5.600827950652532e-07, "loss": 0.312, "step": 5215 }, { "epoch": 2.5894423299685587, "grad_norm": 0.3773009777069092, "learning_rate": 5.58754916437404e-07, "loss": 0.3333, "step": 5216 }, { "epoch": 2.5899387721330465, "grad_norm": 0.35723599791526794, "learning_rate": 5.574285206205826e-07, "loss": 0.3031, "step": 5217 }, { "epoch": 2.590435214297534, "grad_norm": 0.3711585998535156, "learning_rate": 5.561036080576354e-07, "loss": 0.3338, "step": 5218 }, { "epoch": 2.590931656462022, "grad_norm": 0.33252042531967163, "learning_rate": 5.547801791909163e-07, "loss": 0.2425, "step": 5219 }, { "epoch": 2.59142809862651, "grad_norm": 0.3657166361808777, "learning_rate": 5.534582344622785e-07, "loss": 0.3426, "step": 5220 }, { "epoch": 2.591924540790998, "grad_norm": 0.35416942834854126, "learning_rate": 5.521377743130885e-07, "loss": 0.3039, "step": 5221 }, { "epoch": 2.5924209829554856, "grad_norm": 0.37243199348449707, "learning_rate": 5.508187991842085e-07, "loss": 0.3368, "step": 5222 }, { "epoch": 2.5929174251199734, "grad_norm": 0.3853953778743744, "learning_rate": 5.49501309516009e-07, "loss": 0.3375, "step": 5223 }, { "epoch": 2.593413867284461, "grad_norm": 0.35381466150283813, "learning_rate": 5.481853057483644e-07, "loss": 0.3164, "step": 5224 }, { "epoch": 2.5939103094489493, "grad_norm": 0.37849152088165283, "learning_rate": 5.468707883206525e-07, "loss": 0.3511, "step": 5225 }, { "epoch": 2.594406751613437, "grad_norm": 0.3383144736289978, "learning_rate": 5.455577576717563e-07, "loss": 0.2948, "step": 5226 }, { "epoch": 2.5949031937779248, "grad_norm": 0.365120530128479, "learning_rate": 5.442462142400589e-07, "loss": 0.3011, "step": 5227 }, { "epoch": 2.595399635942413, "grad_norm": 0.4073391854763031, "learning_rate": 5.429361584634496e-07, "loss": 0.3195, "step": 5228 }, { "epoch": 2.5958960781069007, "grad_norm": 0.34191828966140747, "learning_rate": 5.416275907793212e-07, "loss": 0.2766, "step": 5229 }, { "epoch": 2.5963925202713884, "grad_norm": 0.3868170380592346, "learning_rate": 5.40320511624568e-07, "loss": 0.3507, "step": 5230 }, { "epoch": 2.596888962435876, "grad_norm": 0.3445567488670349, "learning_rate": 5.390149214355884e-07, "loss": 0.3049, "step": 5231 }, { "epoch": 2.597385404600364, "grad_norm": 0.3687993288040161, "learning_rate": 5.37710820648284e-07, "loss": 0.2976, "step": 5232 }, { "epoch": 2.597881846764852, "grad_norm": 0.3819340765476227, "learning_rate": 5.364082096980589e-07, "loss": 0.38, "step": 5233 }, { "epoch": 2.59837828892934, "grad_norm": 0.3302994966506958, "learning_rate": 5.351070890198184e-07, "loss": 0.3317, "step": 5234 }, { "epoch": 2.5988747310938276, "grad_norm": 0.3195309638977051, "learning_rate": 5.338074590479714e-07, "loss": 0.3316, "step": 5235 }, { "epoch": 2.5993711732583153, "grad_norm": 0.3511285185813904, "learning_rate": 5.3250932021643e-07, "loss": 0.3398, "step": 5236 }, { "epoch": 2.599867615422803, "grad_norm": 0.35196638107299805, "learning_rate": 5.312126729586065e-07, "loss": 0.3251, "step": 5237 }, { "epoch": 2.6003640575872913, "grad_norm": 0.36715200543403625, "learning_rate": 5.299175177074173e-07, "loss": 0.301, "step": 5238 }, { "epoch": 2.600860499751779, "grad_norm": 0.3914068341255188, "learning_rate": 5.286238548952771e-07, "loss": 0.3777, "step": 5239 }, { "epoch": 2.6013569419162668, "grad_norm": 0.34873607754707336, "learning_rate": 5.273316849541088e-07, "loss": 0.2616, "step": 5240 }, { "epoch": 2.6018533840807545, "grad_norm": 0.40723690390586853, "learning_rate": 5.260410083153289e-07, "loss": 0.3574, "step": 5241 }, { "epoch": 2.6023498262452422, "grad_norm": 0.3481428921222687, "learning_rate": 5.247518254098627e-07, "loss": 0.2756, "step": 5242 }, { "epoch": 2.6028462684097304, "grad_norm": 0.35669708251953125, "learning_rate": 5.234641366681287e-07, "loss": 0.2964, "step": 5243 }, { "epoch": 2.603342710574218, "grad_norm": 0.35225579142570496, "learning_rate": 5.221779425200563e-07, "loss": 0.3422, "step": 5244 }, { "epoch": 2.603839152738706, "grad_norm": 0.37730666995048523, "learning_rate": 5.208932433950675e-07, "loss": 0.3308, "step": 5245 }, { "epoch": 2.6043355949031937, "grad_norm": 0.388715922832489, "learning_rate": 5.196100397220893e-07, "loss": 0.3254, "step": 5246 }, { "epoch": 2.6048320370676814, "grad_norm": 0.3539288341999054, "learning_rate": 5.183283319295485e-07, "loss": 0.306, "step": 5247 }, { "epoch": 2.6053284792321696, "grad_norm": 0.32393884658813477, "learning_rate": 5.170481204453725e-07, "loss": 0.2657, "step": 5248 }, { "epoch": 2.6058249213966573, "grad_norm": 0.3946548402309418, "learning_rate": 5.157694056969903e-07, "loss": 0.4152, "step": 5249 }, { "epoch": 2.606321363561145, "grad_norm": 0.3501023054122925, "learning_rate": 5.144921881113269e-07, "loss": 0.3021, "step": 5250 }, { "epoch": 2.606817805725633, "grad_norm": 0.4025951027870178, "learning_rate": 5.132164681148144e-07, "loss": 0.3038, "step": 5251 }, { "epoch": 2.6073142478901206, "grad_norm": 0.3413032293319702, "learning_rate": 5.119422461333784e-07, "loss": 0.2646, "step": 5252 }, { "epoch": 2.6078106900546087, "grad_norm": 0.39782318472862244, "learning_rate": 5.10669522592448e-07, "loss": 0.3699, "step": 5253 }, { "epoch": 2.6083071322190965, "grad_norm": 0.3585265576839447, "learning_rate": 5.093982979169503e-07, "loss": 0.3417, "step": 5254 }, { "epoch": 2.6088035743835842, "grad_norm": 0.332003116607666, "learning_rate": 5.081285725313134e-07, "loss": 0.3118, "step": 5255 }, { "epoch": 2.6093000165480724, "grad_norm": 0.3669528663158417, "learning_rate": 5.068603468594646e-07, "loss": 0.3194, "step": 5256 }, { "epoch": 2.6097964587125597, "grad_norm": 0.325395792722702, "learning_rate": 5.055936213248286e-07, "loss": 0.2793, "step": 5257 }, { "epoch": 2.610292900877048, "grad_norm": 0.39176297187805176, "learning_rate": 5.043283963503309e-07, "loss": 0.3516, "step": 5258 }, { "epoch": 2.6107893430415356, "grad_norm": 0.3587704300880432, "learning_rate": 5.030646723583959e-07, "loss": 0.3127, "step": 5259 }, { "epoch": 2.6112857852060234, "grad_norm": 0.3749661147594452, "learning_rate": 5.018024497709473e-07, "loss": 0.3079, "step": 5260 }, { "epoch": 2.6117822273705116, "grad_norm": 0.3668554723262787, "learning_rate": 5.005417290094061e-07, "loss": 0.3295, "step": 5261 }, { "epoch": 2.6122786695349993, "grad_norm": 0.36176797747612, "learning_rate": 4.992825104946936e-07, "loss": 0.3366, "step": 5262 }, { "epoch": 2.612775111699487, "grad_norm": 0.306135892868042, "learning_rate": 4.980247946472289e-07, "loss": 0.2903, "step": 5263 }, { "epoch": 2.613271553863975, "grad_norm": 0.35739514231681824, "learning_rate": 4.967685818869273e-07, "loss": 0.3298, "step": 5264 }, { "epoch": 2.6137679960284625, "grad_norm": 0.3606718182563782, "learning_rate": 4.955138726332054e-07, "loss": 0.2992, "step": 5265 }, { "epoch": 2.6142644381929507, "grad_norm": 0.39530235528945923, "learning_rate": 4.94260667304976e-07, "loss": 0.3263, "step": 5266 }, { "epoch": 2.6147608803574385, "grad_norm": 0.3836574852466583, "learning_rate": 4.930089663206516e-07, "loss": 0.3196, "step": 5267 }, { "epoch": 2.615257322521926, "grad_norm": 0.379388689994812, "learning_rate": 4.917587700981391e-07, "loss": 0.3192, "step": 5268 }, { "epoch": 2.615753764686414, "grad_norm": 0.3507450222969055, "learning_rate": 4.905100790548462e-07, "loss": 0.3033, "step": 5269 }, { "epoch": 2.6162502068509017, "grad_norm": 0.34045442938804626, "learning_rate": 4.892628936076766e-07, "loss": 0.265, "step": 5270 }, { "epoch": 2.61674664901539, "grad_norm": 0.3708344101905823, "learning_rate": 4.880172141730316e-07, "loss": 0.3668, "step": 5271 }, { "epoch": 2.6172430911798776, "grad_norm": 0.3243940770626068, "learning_rate": 4.867730411668103e-07, "loss": 0.3168, "step": 5272 }, { "epoch": 2.6177395333443654, "grad_norm": 0.3400413990020752, "learning_rate": 4.855303750044077e-07, "loss": 0.2748, "step": 5273 }, { "epoch": 2.618235975508853, "grad_norm": 0.3353235721588135, "learning_rate": 4.842892161007173e-07, "loss": 0.279, "step": 5274 }, { "epoch": 2.618732417673341, "grad_norm": 0.4397566318511963, "learning_rate": 4.830495648701266e-07, "loss": 0.3406, "step": 5275 }, { "epoch": 2.619228859837829, "grad_norm": 0.36563053727149963, "learning_rate": 4.818114217265219e-07, "loss": 0.2909, "step": 5276 }, { "epoch": 2.619725302002317, "grad_norm": 0.34459394216537476, "learning_rate": 4.805747870832867e-07, "loss": 0.2588, "step": 5277 }, { "epoch": 2.6202217441668045, "grad_norm": 0.37950316071510315, "learning_rate": 4.79339661353298e-07, "loss": 0.2771, "step": 5278 }, { "epoch": 2.6207181863312923, "grad_norm": 0.4108743369579315, "learning_rate": 4.781060449489333e-07, "loss": 0.2878, "step": 5279 }, { "epoch": 2.62121462849578, "grad_norm": 0.3344789147377014, "learning_rate": 4.768739382820597e-07, "loss": 0.294, "step": 5280 }, { "epoch": 2.621711070660268, "grad_norm": 0.32346826791763306, "learning_rate": 4.7564334176404827e-07, "loss": 0.2856, "step": 5281 }, { "epoch": 2.622207512824756, "grad_norm": 0.3556283414363861, "learning_rate": 4.7441425580575904e-07, "loss": 0.3187, "step": 5282 }, { "epoch": 2.6227039549892437, "grad_norm": 0.3789583742618561, "learning_rate": 4.7318668081755116e-07, "loss": 0.325, "step": 5283 }, { "epoch": 2.6232003971537314, "grad_norm": 0.3714824318885803, "learning_rate": 4.7196061720927835e-07, "loss": 0.3051, "step": 5284 }, { "epoch": 2.623696839318219, "grad_norm": 0.38112112879753113, "learning_rate": 4.707360653902904e-07, "loss": 0.3153, "step": 5285 }, { "epoch": 2.6241932814827074, "grad_norm": 0.3941490948200226, "learning_rate": 4.695130257694325e-07, "loss": 0.3464, "step": 5286 }, { "epoch": 2.624689723647195, "grad_norm": 0.3682798743247986, "learning_rate": 4.682914987550413e-07, "loss": 0.3053, "step": 5287 }, { "epoch": 2.625186165811683, "grad_norm": 0.38943928480148315, "learning_rate": 4.6707148475495623e-07, "loss": 0.3253, "step": 5288 }, { "epoch": 2.625682607976171, "grad_norm": 0.3739486038684845, "learning_rate": 4.6585298417650306e-07, "loss": 0.3489, "step": 5289 }, { "epoch": 2.6261790501406588, "grad_norm": 0.327495813369751, "learning_rate": 4.6463599742650745e-07, "loss": 0.2719, "step": 5290 }, { "epoch": 2.6266754923051465, "grad_norm": 0.3586711287498474, "learning_rate": 4.6342052491128664e-07, "loss": 0.3039, "step": 5291 }, { "epoch": 2.6271719344696343, "grad_norm": 0.3502478003501892, "learning_rate": 4.622065670366571e-07, "loss": 0.2785, "step": 5292 }, { "epoch": 2.627668376634122, "grad_norm": 0.4363355040550232, "learning_rate": 4.6099412420792354e-07, "loss": 0.3017, "step": 5293 }, { "epoch": 2.62816481879861, "grad_norm": 0.40127134323120117, "learning_rate": 4.5978319682988826e-07, "loss": 0.347, "step": 5294 }, { "epoch": 2.628661260963098, "grad_norm": 0.32723960280418396, "learning_rate": 4.5857378530684724e-07, "loss": 0.27, "step": 5295 }, { "epoch": 2.6291577031275857, "grad_norm": 0.3832227885723114, "learning_rate": 4.573658900425909e-07, "loss": 0.3675, "step": 5296 }, { "epoch": 2.6296541452920734, "grad_norm": 0.393821656703949, "learning_rate": 4.561595114404022e-07, "loss": 0.3817, "step": 5297 }, { "epoch": 2.630150587456561, "grad_norm": 0.3293651044368744, "learning_rate": 4.5495464990305715e-07, "loss": 0.2738, "step": 5298 }, { "epoch": 2.6306470296210493, "grad_norm": 0.3609994649887085, "learning_rate": 4.537513058328269e-07, "loss": 0.2959, "step": 5299 }, { "epoch": 2.631143471785537, "grad_norm": 0.3432057499885559, "learning_rate": 4.5254947963147553e-07, "loss": 0.2774, "step": 5300 }, { "epoch": 2.631639913950025, "grad_norm": 0.3551687002182007, "learning_rate": 4.513491717002599e-07, "loss": 0.3543, "step": 5301 }, { "epoch": 2.6321363561145126, "grad_norm": 0.38670632243156433, "learning_rate": 4.501503824399306e-07, "loss": 0.3059, "step": 5302 }, { "epoch": 2.6326327982790003, "grad_norm": 0.36155280470848083, "learning_rate": 4.4895311225073014e-07, "loss": 0.2895, "step": 5303 }, { "epoch": 2.6331292404434885, "grad_norm": 0.38612571358680725, "learning_rate": 4.4775736153239657e-07, "loss": 0.3318, "step": 5304 }, { "epoch": 2.6336256826079762, "grad_norm": 0.3679911494255066, "learning_rate": 4.465631306841556e-07, "loss": 0.3012, "step": 5305 }, { "epoch": 2.634122124772464, "grad_norm": 0.36985456943511963, "learning_rate": 4.453704201047293e-07, "loss": 0.2894, "step": 5306 }, { "epoch": 2.6346185669369517, "grad_norm": 0.3651743531227112, "learning_rate": 4.44179230192332e-07, "loss": 0.3442, "step": 5307 }, { "epoch": 2.6351150091014395, "grad_norm": 0.3702086806297302, "learning_rate": 4.429895613446694e-07, "loss": 0.3061, "step": 5308 }, { "epoch": 2.6356114512659277, "grad_norm": 0.3784499764442444, "learning_rate": 4.41801413958941e-07, "loss": 0.3147, "step": 5309 }, { "epoch": 2.6361078934304154, "grad_norm": 0.33839309215545654, "learning_rate": 4.4061478843183294e-07, "loss": 0.284, "step": 5310 }, { "epoch": 2.636604335594903, "grad_norm": 0.38807517290115356, "learning_rate": 4.39429685159532e-07, "loss": 0.3118, "step": 5311 }, { "epoch": 2.637100777759391, "grad_norm": 0.3397007882595062, "learning_rate": 4.38246104537709e-07, "loss": 0.3222, "step": 5312 }, { "epoch": 2.6375972199238786, "grad_norm": 0.40057021379470825, "learning_rate": 4.3706404696153003e-07, "loss": 0.3233, "step": 5313 }, { "epoch": 2.638093662088367, "grad_norm": 0.37951624393463135, "learning_rate": 4.3588351282565166e-07, "loss": 0.3451, "step": 5314 }, { "epoch": 2.6385901042528546, "grad_norm": 0.3327949345111847, "learning_rate": 4.3470450252422416e-07, "loss": 0.3497, "step": 5315 }, { "epoch": 2.6390865464173423, "grad_norm": 0.3882279694080353, "learning_rate": 4.335270164508837e-07, "loss": 0.3459, "step": 5316 }, { "epoch": 2.6395829885818305, "grad_norm": 0.37690427899360657, "learning_rate": 4.3235105499876306e-07, "loss": 0.3216, "step": 5317 }, { "epoch": 2.640079430746318, "grad_norm": 0.3608526885509491, "learning_rate": 4.311766185604832e-07, "loss": 0.2965, "step": 5318 }, { "epoch": 2.640575872910806, "grad_norm": 0.34828901290893555, "learning_rate": 4.3000370752815655e-07, "loss": 0.3147, "step": 5319 }, { "epoch": 2.6410723150752937, "grad_norm": 0.36781591176986694, "learning_rate": 4.2883232229338766e-07, "loss": 0.3231, "step": 5320 }, { "epoch": 2.6415687572397815, "grad_norm": 0.3695848882198334, "learning_rate": 4.27662463247267e-07, "loss": 0.3117, "step": 5321 }, { "epoch": 2.6420651994042696, "grad_norm": 0.3826364278793335, "learning_rate": 4.2649413078038215e-07, "loss": 0.283, "step": 5322 }, { "epoch": 2.6425616415687574, "grad_norm": 0.3937075138092041, "learning_rate": 4.2532732528280497e-07, "loss": 0.3553, "step": 5323 }, { "epoch": 2.643058083733245, "grad_norm": 0.3882780075073242, "learning_rate": 4.241620471441016e-07, "loss": 0.3387, "step": 5324 }, { "epoch": 2.643554525897733, "grad_norm": 0.34274569153785706, "learning_rate": 4.2299829675332636e-07, "loss": 0.3029, "step": 5325 }, { "epoch": 2.6440509680622206, "grad_norm": 0.3417709469795227, "learning_rate": 4.2183607449902355e-07, "loss": 0.3004, "step": 5326 }, { "epoch": 2.644547410226709, "grad_norm": 0.3683611750602722, "learning_rate": 4.2067538076922874e-07, "loss": 0.3152, "step": 5327 }, { "epoch": 2.6450438523911965, "grad_norm": 0.3746475577354431, "learning_rate": 4.195162159514632e-07, "loss": 0.3419, "step": 5328 }, { "epoch": 2.6455402945556843, "grad_norm": 0.33905521035194397, "learning_rate": 4.1835858043274445e-07, "loss": 0.2845, "step": 5329 }, { "epoch": 2.646036736720172, "grad_norm": 0.3424559235572815, "learning_rate": 4.172024745995729e-07, "loss": 0.3283, "step": 5330 }, { "epoch": 2.6465331788846598, "grad_norm": 0.3525919020175934, "learning_rate": 4.160478988379413e-07, "loss": 0.3117, "step": 5331 }, { "epoch": 2.647029621049148, "grad_norm": 0.37998369336128235, "learning_rate": 4.148948535333319e-07, "loss": 0.3368, "step": 5332 }, { "epoch": 2.6475260632136357, "grad_norm": 0.35180917382240295, "learning_rate": 4.1374333907071406e-07, "loss": 0.3287, "step": 5333 }, { "epoch": 2.6480225053781234, "grad_norm": 0.3788672983646393, "learning_rate": 4.1259335583454854e-07, "loss": 0.3401, "step": 5334 }, { "epoch": 2.648518947542611, "grad_norm": 0.3341592252254486, "learning_rate": 4.114449042087826e-07, "loss": 0.3318, "step": 5335 }, { "epoch": 2.649015389707099, "grad_norm": 0.37086421251296997, "learning_rate": 4.102979845768523e-07, "loss": 0.3478, "step": 5336 }, { "epoch": 2.649511831871587, "grad_norm": 0.3708161413669586, "learning_rate": 4.0915259732168425e-07, "loss": 0.3106, "step": 5337 }, { "epoch": 2.650008274036075, "grad_norm": 0.3906204402446747, "learning_rate": 4.080087428256924e-07, "loss": 0.286, "step": 5338 }, { "epoch": 2.6505047162005626, "grad_norm": 0.40393128991127014, "learning_rate": 4.068664214707768e-07, "loss": 0.3684, "step": 5339 }, { "epoch": 2.6510011583650503, "grad_norm": 0.3610922694206238, "learning_rate": 4.0572563363832864e-07, "loss": 0.2843, "step": 5340 }, { "epoch": 2.651497600529538, "grad_norm": 0.3618597984313965, "learning_rate": 4.0458637970922645e-07, "loss": 0.3275, "step": 5341 }, { "epoch": 2.6519940426940263, "grad_norm": 0.37113627791404724, "learning_rate": 4.034486600638349e-07, "loss": 0.3192, "step": 5342 }, { "epoch": 2.652490484858514, "grad_norm": 0.41602370142936707, "learning_rate": 4.02312475082009e-07, "loss": 0.2897, "step": 5343 }, { "epoch": 2.6529869270230018, "grad_norm": 0.37386074662208557, "learning_rate": 4.011778251430892e-07, "loss": 0.2914, "step": 5344 }, { "epoch": 2.6534833691874895, "grad_norm": 0.3599815368652344, "learning_rate": 4.000447106259059e-07, "loss": 0.2885, "step": 5345 }, { "epoch": 2.6539798113519772, "grad_norm": 0.3485335409641266, "learning_rate": 3.9891313190877243e-07, "loss": 0.292, "step": 5346 }, { "epoch": 2.6544762535164654, "grad_norm": 0.3583439588546753, "learning_rate": 3.977830893694934e-07, "loss": 0.2936, "step": 5347 }, { "epoch": 2.654972695680953, "grad_norm": 0.3682025074958801, "learning_rate": 3.9665458338536023e-07, "loss": 0.319, "step": 5348 }, { "epoch": 2.655469137845441, "grad_norm": 0.3755035102367401, "learning_rate": 3.9552761433314936e-07, "loss": 0.3137, "step": 5349 }, { "epoch": 2.655965580009929, "grad_norm": 0.363351434469223, "learning_rate": 3.944021825891259e-07, "loss": 0.3585, "step": 5350 }, { "epoch": 2.6564620221744164, "grad_norm": 0.3532787561416626, "learning_rate": 3.932782885290393e-07, "loss": 0.2884, "step": 5351 }, { "epoch": 2.6569584643389046, "grad_norm": 0.35376372933387756, "learning_rate": 3.921559325281299e-07, "loss": 0.304, "step": 5352 }, { "epoch": 2.6574549065033923, "grad_norm": 0.38779416680336, "learning_rate": 3.9103511496111965e-07, "loss": 0.3707, "step": 5353 }, { "epoch": 2.65795134866788, "grad_norm": 0.3947324752807617, "learning_rate": 3.899158362022193e-07, "loss": 0.3341, "step": 5354 }, { "epoch": 2.6584477908323683, "grad_norm": 0.3805803060531616, "learning_rate": 3.887980966251265e-07, "loss": 0.3363, "step": 5355 }, { "epoch": 2.658944232996856, "grad_norm": 0.34579312801361084, "learning_rate": 3.876818966030238e-07, "loss": 0.3506, "step": 5356 }, { "epoch": 2.6594406751613437, "grad_norm": 0.3725391626358032, "learning_rate": 3.865672365085804e-07, "loss": 0.3381, "step": 5357 }, { "epoch": 2.6599371173258315, "grad_norm": 0.3249645531177521, "learning_rate": 3.8545411671394914e-07, "loss": 0.3082, "step": 5358 }, { "epoch": 2.6604335594903192, "grad_norm": 0.39680102467536926, "learning_rate": 3.843425375907739e-07, "loss": 0.3227, "step": 5359 }, { "epoch": 2.6609300016548074, "grad_norm": 0.38804811239242554, "learning_rate": 3.832324995101777e-07, "loss": 0.3552, "step": 5360 }, { "epoch": 2.661426443819295, "grad_norm": 0.3166487514972687, "learning_rate": 3.8212400284277364e-07, "loss": 0.2726, "step": 5361 }, { "epoch": 2.661922885983783, "grad_norm": 0.3393140137195587, "learning_rate": 3.810170479586567e-07, "loss": 0.3514, "step": 5362 }, { "epoch": 2.6624193281482706, "grad_norm": 0.399476557970047, "learning_rate": 3.799116352274124e-07, "loss": 0.3679, "step": 5363 }, { "epoch": 2.6629157703127584, "grad_norm": 0.3576590418815613, "learning_rate": 3.788077650181049e-07, "loss": 0.2715, "step": 5364 }, { "epoch": 2.6634122124772466, "grad_norm": 0.3604359030723572, "learning_rate": 3.7770543769928724e-07, "loss": 0.3439, "step": 5365 }, { "epoch": 2.6639086546417343, "grad_norm": 0.35125046968460083, "learning_rate": 3.766046536389978e-07, "loss": 0.3141, "step": 5366 }, { "epoch": 2.664405096806222, "grad_norm": 0.3636030852794647, "learning_rate": 3.7550541320475697e-07, "loss": 0.2938, "step": 5367 }, { "epoch": 2.66490153897071, "grad_norm": 0.3642999529838562, "learning_rate": 3.744077167635729e-07, "loss": 0.353, "step": 5368 }, { "epoch": 2.6653979811351975, "grad_norm": 0.37099161744117737, "learning_rate": 3.7331156468193353e-07, "loss": 0.3361, "step": 5369 }, { "epoch": 2.6658944232996857, "grad_norm": 0.3687564432621002, "learning_rate": 3.722169573258183e-07, "loss": 0.318, "step": 5370 }, { "epoch": 2.6663908654641735, "grad_norm": 0.33547115325927734, "learning_rate": 3.7112389506068435e-07, "loss": 0.2691, "step": 5371 }, { "epoch": 2.666887307628661, "grad_norm": 0.4068831503391266, "learning_rate": 3.7003237825147533e-07, "loss": 0.346, "step": 5372 }, { "epoch": 2.667383749793149, "grad_norm": 0.3757997155189514, "learning_rate": 3.689424072626202e-07, "loss": 0.3795, "step": 5373 }, { "epoch": 2.6678801919576367, "grad_norm": 0.32390081882476807, "learning_rate": 3.678539824580296e-07, "loss": 0.2937, "step": 5374 }, { "epoch": 2.668376634122125, "grad_norm": 0.3462146520614624, "learning_rate": 3.6676710420110063e-07, "loss": 0.2989, "step": 5375 }, { "epoch": 2.6688730762866126, "grad_norm": 0.3391876518726349, "learning_rate": 3.656817728547107e-07, "loss": 0.2801, "step": 5376 }, { "epoch": 2.6693695184511004, "grad_norm": 0.3558039367198944, "learning_rate": 3.6459798878122233e-07, "loss": 0.2687, "step": 5377 }, { "epoch": 2.669865960615588, "grad_norm": 0.376061350107193, "learning_rate": 3.635157523424826e-07, "loss": 0.338, "step": 5378 }, { "epoch": 2.670362402780076, "grad_norm": 0.35860422253608704, "learning_rate": 3.624350638998209e-07, "loss": 0.3044, "step": 5379 }, { "epoch": 2.670858844944564, "grad_norm": 0.3861212134361267, "learning_rate": 3.613559238140496e-07, "loss": 0.2817, "step": 5380 }, { "epoch": 2.671355287109052, "grad_norm": 0.36854085326194763, "learning_rate": 3.6027833244546286e-07, "loss": 0.3756, "step": 5381 }, { "epoch": 2.6718517292735395, "grad_norm": 0.3771139085292816, "learning_rate": 3.5920229015384165e-07, "loss": 0.3453, "step": 5382 }, { "epoch": 2.6723481714380277, "grad_norm": 0.36074909567832947, "learning_rate": 3.581277972984448e-07, "loss": 0.3283, "step": 5383 }, { "epoch": 2.6728446136025155, "grad_norm": 0.3687164783477783, "learning_rate": 3.5705485423801755e-07, "loss": 0.3149, "step": 5384 }, { "epoch": 2.673341055767003, "grad_norm": 0.35717400908470154, "learning_rate": 3.559834613307861e-07, "loss": 0.2913, "step": 5385 }, { "epoch": 2.673837497931491, "grad_norm": 0.415973037481308, "learning_rate": 3.549136189344604e-07, "loss": 0.4059, "step": 5386 }, { "epoch": 2.6743339400959787, "grad_norm": 0.3466489315032959, "learning_rate": 3.5384532740623033e-07, "loss": 0.2771, "step": 5387 }, { "epoch": 2.674830382260467, "grad_norm": 0.38568446040153503, "learning_rate": 3.5277858710277e-07, "loss": 0.3394, "step": 5388 }, { "epoch": 2.6753268244249546, "grad_norm": 0.36276617646217346, "learning_rate": 3.5171339838023453e-07, "loss": 0.3242, "step": 5389 }, { "epoch": 2.6758232665894424, "grad_norm": 0.37076902389526367, "learning_rate": 3.5064976159426224e-07, "loss": 0.2997, "step": 5390 }, { "epoch": 2.67631970875393, "grad_norm": 0.3696087896823883, "learning_rate": 3.495876770999729e-07, "loss": 0.2883, "step": 5391 }, { "epoch": 2.676816150918418, "grad_norm": 0.40009593963623047, "learning_rate": 3.4852714525196507e-07, "loss": 0.3248, "step": 5392 }, { "epoch": 2.677312593082906, "grad_norm": 0.3446856737136841, "learning_rate": 3.4746816640432556e-07, "loss": 0.2957, "step": 5393 }, { "epoch": 2.6778090352473938, "grad_norm": 0.37108147144317627, "learning_rate": 3.4641074091061545e-07, "loss": 0.3598, "step": 5394 }, { "epoch": 2.6783054774118815, "grad_norm": 0.3493345081806183, "learning_rate": 3.4535486912388115e-07, "loss": 0.3314, "step": 5395 }, { "epoch": 2.6788019195763693, "grad_norm": 0.35717514157295227, "learning_rate": 3.443005513966502e-07, "loss": 0.3034, "step": 5396 }, { "epoch": 2.679298361740857, "grad_norm": 0.37950170040130615, "learning_rate": 3.4324778808092985e-07, "loss": 0.3673, "step": 5397 }, { "epoch": 2.679794803905345, "grad_norm": 0.3327786922454834, "learning_rate": 3.421965795282106e-07, "loss": 0.2711, "step": 5398 }, { "epoch": 2.680291246069833, "grad_norm": 0.3789215087890625, "learning_rate": 3.411469260894601e-07, "loss": 0.3182, "step": 5399 }, { "epoch": 2.6807876882343207, "grad_norm": 0.3491133749485016, "learning_rate": 3.400988281151313e-07, "loss": 0.3062, "step": 5400 }, { "epoch": 2.6812841303988084, "grad_norm": 0.40891385078430176, "learning_rate": 3.3905228595515425e-07, "loss": 0.3637, "step": 5401 }, { "epoch": 2.681780572563296, "grad_norm": 0.35858842730522156, "learning_rate": 3.3800729995894124e-07, "loss": 0.322, "step": 5402 }, { "epoch": 2.6822770147277843, "grad_norm": 0.39874395728111267, "learning_rate": 3.3696387047538525e-07, "loss": 0.3234, "step": 5403 }, { "epoch": 2.682773456892272, "grad_norm": 0.37982580065727234, "learning_rate": 3.359219978528583e-07, "loss": 0.3369, "step": 5404 }, { "epoch": 2.68326989905676, "grad_norm": 0.39947858452796936, "learning_rate": 3.348816824392143e-07, "loss": 0.297, "step": 5405 }, { "epoch": 2.6837663412212476, "grad_norm": 0.38339272141456604, "learning_rate": 3.338429245817848e-07, "loss": 0.3134, "step": 5406 }, { "epoch": 2.6842627833857353, "grad_norm": 0.3613201975822449, "learning_rate": 3.3280572462738415e-07, "loss": 0.298, "step": 5407 }, { "epoch": 2.6847592255502235, "grad_norm": 0.369171679019928, "learning_rate": 3.3177008292230415e-07, "loss": 0.2876, "step": 5408 }, { "epoch": 2.6852556677147112, "grad_norm": 0.4028882682323456, "learning_rate": 3.307359998123194e-07, "loss": 0.336, "step": 5409 }, { "epoch": 2.685752109879199, "grad_norm": 0.3597492575645447, "learning_rate": 3.297034756426787e-07, "loss": 0.3416, "step": 5410 }, { "epoch": 2.686248552043687, "grad_norm": 0.36568787693977356, "learning_rate": 3.286725107581179e-07, "loss": 0.3184, "step": 5411 }, { "epoch": 2.6867449942081745, "grad_norm": 0.3339191675186157, "learning_rate": 3.276431055028445e-07, "loss": 0.2757, "step": 5412 }, { "epoch": 2.6872414363726627, "grad_norm": 0.40691378712654114, "learning_rate": 3.2661526022055135e-07, "loss": 0.3588, "step": 5413 }, { "epoch": 2.6877378785371504, "grad_norm": 0.4063583314418793, "learning_rate": 3.255889752544067e-07, "loss": 0.2875, "step": 5414 }, { "epoch": 2.688234320701638, "grad_norm": 0.3904348611831665, "learning_rate": 3.2456425094706034e-07, "loss": 0.3246, "step": 5415 }, { "epoch": 2.6887307628661263, "grad_norm": 0.37671247124671936, "learning_rate": 3.2354108764063973e-07, "loss": 0.3149, "step": 5416 }, { "epoch": 2.689227205030614, "grad_norm": 0.39656588435173035, "learning_rate": 3.2251948567674993e-07, "loss": 0.3486, "step": 5417 }, { "epoch": 2.689723647195102, "grad_norm": 0.3783475458621979, "learning_rate": 3.214994453964776e-07, "loss": 0.2811, "step": 5418 }, { "epoch": 2.6902200893595896, "grad_norm": 0.36138689517974854, "learning_rate": 3.204809671403852e-07, "loss": 0.2725, "step": 5419 }, { "epoch": 2.6907165315240773, "grad_norm": 0.34332406520843506, "learning_rate": 3.194640512485159e-07, "loss": 0.3352, "step": 5420 }, { "epoch": 2.6912129736885655, "grad_norm": 0.3319385349750519, "learning_rate": 3.184486980603907e-07, "loss": 0.3125, "step": 5421 }, { "epoch": 2.6917094158530532, "grad_norm": 0.3706309199333191, "learning_rate": 3.1743490791500577e-07, "loss": 0.3704, "step": 5422 }, { "epoch": 2.692205858017541, "grad_norm": 0.34985607862472534, "learning_rate": 3.1642268115084196e-07, "loss": 0.3192, "step": 5423 }, { "epoch": 2.6927023001820287, "grad_norm": 0.3477611839771271, "learning_rate": 3.1541201810585175e-07, "loss": 0.2714, "step": 5424 }, { "epoch": 2.6931987423465165, "grad_norm": 0.3662846088409424, "learning_rate": 3.14402919117468e-07, "loss": 0.3314, "step": 5425 }, { "epoch": 2.6936951845110046, "grad_norm": 0.35130199790000916, "learning_rate": 3.133953845226029e-07, "loss": 0.2689, "step": 5426 }, { "epoch": 2.6941916266754924, "grad_norm": 0.39466533064842224, "learning_rate": 3.1238941465764337e-07, "loss": 0.3456, "step": 5427 }, { "epoch": 2.69468806883998, "grad_norm": 0.34714508056640625, "learning_rate": 3.1138500985845755e-07, "loss": 0.3228, "step": 5428 }, { "epoch": 2.695184511004468, "grad_norm": 0.3756100833415985, "learning_rate": 3.103821704603854e-07, "loss": 0.3101, "step": 5429 }, { "epoch": 2.6956809531689556, "grad_norm": 0.37751150131225586, "learning_rate": 3.093808967982515e-07, "loss": 0.3371, "step": 5430 }, { "epoch": 2.696177395333444, "grad_norm": 0.35002371668815613, "learning_rate": 3.08381189206351e-07, "loss": 0.2679, "step": 5431 }, { "epoch": 2.6966738374979315, "grad_norm": 0.36808088421821594, "learning_rate": 3.0738304801846144e-07, "loss": 0.3505, "step": 5432 }, { "epoch": 2.6971702796624193, "grad_norm": 0.42286354303359985, "learning_rate": 3.0638647356783236e-07, "loss": 0.406, "step": 5433 }, { "epoch": 2.697666721826907, "grad_norm": 0.3672720193862915, "learning_rate": 3.0539146618719596e-07, "loss": 0.3621, "step": 5434 }, { "epoch": 2.6981631639913948, "grad_norm": 0.3632740080356598, "learning_rate": 3.043980262087559e-07, "loss": 0.3043, "step": 5435 }, { "epoch": 2.698659606155883, "grad_norm": 0.3703152537345886, "learning_rate": 3.0340615396419524e-07, "loss": 0.3472, "step": 5436 }, { "epoch": 2.6991560483203707, "grad_norm": 0.32298970222473145, "learning_rate": 3.0241584978467354e-07, "loss": 0.3045, "step": 5437 }, { "epoch": 2.6996524904848584, "grad_norm": 0.3637727200984955, "learning_rate": 3.0142711400082626e-07, "loss": 0.356, "step": 5438 }, { "epoch": 2.700148932649346, "grad_norm": 0.3246665894985199, "learning_rate": 3.004399469427666e-07, "loss": 0.2609, "step": 5439 }, { "epoch": 2.700645374813834, "grad_norm": 0.34860455989837646, "learning_rate": 2.994543489400797e-07, "loss": 0.3408, "step": 5440 }, { "epoch": 2.701141816978322, "grad_norm": 0.3446810245513916, "learning_rate": 2.9847032032183366e-07, "loss": 0.31, "step": 5441 }, { "epoch": 2.70163825914281, "grad_norm": 0.35129186511039734, "learning_rate": 2.974878614165666e-07, "loss": 0.3315, "step": 5442 }, { "epoch": 2.7021347013072976, "grad_norm": 0.3194252550601959, "learning_rate": 2.965069725522951e-07, "loss": 0.3257, "step": 5443 }, { "epoch": 2.702631143471786, "grad_norm": 0.4112420082092285, "learning_rate": 2.955276540565122e-07, "loss": 0.3486, "step": 5444 }, { "epoch": 2.7031275856362735, "grad_norm": 0.3570241928100586, "learning_rate": 2.945499062561846e-07, "loss": 0.248, "step": 5445 }, { "epoch": 2.7036240278007613, "grad_norm": 0.3617933392524719, "learning_rate": 2.9357372947775684e-07, "loss": 0.3196, "step": 5446 }, { "epoch": 2.704120469965249, "grad_norm": 0.3426571488380432, "learning_rate": 2.925991240471471e-07, "loss": 0.2869, "step": 5447 }, { "epoch": 2.7046169121297368, "grad_norm": 0.3972055912017822, "learning_rate": 2.916260902897494e-07, "loss": 0.3474, "step": 5448 }, { "epoch": 2.705113354294225, "grad_norm": 0.38041871786117554, "learning_rate": 2.9065462853043345e-07, "loss": 0.3002, "step": 5449 }, { "epoch": 2.7056097964587127, "grad_norm": 0.3504231870174408, "learning_rate": 2.896847390935442e-07, "loss": 0.2717, "step": 5450 }, { "epoch": 2.7061062386232004, "grad_norm": 0.381043016910553, "learning_rate": 2.887164223029015e-07, "loss": 0.3071, "step": 5451 }, { "epoch": 2.706602680787688, "grad_norm": 0.3875272274017334, "learning_rate": 2.8774967848179956e-07, "loss": 0.3025, "step": 5452 }, { "epoch": 2.707099122952176, "grad_norm": 0.3807568848133087, "learning_rate": 2.8678450795300907e-07, "loss": 0.3413, "step": 5453 }, { "epoch": 2.707595565116664, "grad_norm": 0.3653014302253723, "learning_rate": 2.8582091103877274e-07, "loss": 0.3088, "step": 5454 }, { "epoch": 2.708092007281152, "grad_norm": 0.36699652671813965, "learning_rate": 2.848588880608094e-07, "loss": 0.3229, "step": 5455 }, { "epoch": 2.7085884494456396, "grad_norm": 0.32937297224998474, "learning_rate": 2.8389843934031327e-07, "loss": 0.2733, "step": 5456 }, { "epoch": 2.7090848916101273, "grad_norm": 0.359022855758667, "learning_rate": 2.8293956519795216e-07, "loss": 0.3359, "step": 5457 }, { "epoch": 2.709581333774615, "grad_norm": 0.3499830961227417, "learning_rate": 2.8198226595386736e-07, "loss": 0.3269, "step": 5458 }, { "epoch": 2.7100777759391033, "grad_norm": 0.3883202075958252, "learning_rate": 2.810265419276753e-07, "loss": 0.3679, "step": 5459 }, { "epoch": 2.710574218103591, "grad_norm": 0.3579160273075104, "learning_rate": 2.800723934384658e-07, "loss": 0.3237, "step": 5460 }, { "epoch": 2.7110706602680787, "grad_norm": 0.3533010184764862, "learning_rate": 2.79119820804804e-07, "loss": 0.3026, "step": 5461 }, { "epoch": 2.7115671024325665, "grad_norm": 0.38651368021965027, "learning_rate": 2.7816882434472836e-07, "loss": 0.324, "step": 5462 }, { "epoch": 2.7120635445970542, "grad_norm": 0.3550482392311096, "learning_rate": 2.772194043757481e-07, "loss": 0.3526, "step": 5463 }, { "epoch": 2.7125599867615424, "grad_norm": 0.38138294219970703, "learning_rate": 2.762715612148525e-07, "loss": 0.3005, "step": 5464 }, { "epoch": 2.71305642892603, "grad_norm": 0.39191189408302307, "learning_rate": 2.7532529517849795e-07, "loss": 0.3455, "step": 5465 }, { "epoch": 2.713552871090518, "grad_norm": 0.4112798869609833, "learning_rate": 2.7438060658261825e-07, "loss": 0.2974, "step": 5466 }, { "epoch": 2.7140493132550056, "grad_norm": 0.37638595700263977, "learning_rate": 2.7343749574261836e-07, "loss": 0.321, "step": 5467 }, { "epoch": 2.7145457554194934, "grad_norm": 0.3409843444824219, "learning_rate": 2.7249596297337755e-07, "loss": 0.2858, "step": 5468 }, { "epoch": 2.7150421975839816, "grad_norm": 0.3684963583946228, "learning_rate": 2.715560085892494e-07, "loss": 0.3521, "step": 5469 }, { "epoch": 2.7155386397484693, "grad_norm": 0.38103702664375305, "learning_rate": 2.7061763290405606e-07, "loss": 0.3068, "step": 5470 }, { "epoch": 2.716035081912957, "grad_norm": 0.36879217624664307, "learning_rate": 2.6968083623109984e-07, "loss": 0.2821, "step": 5471 }, { "epoch": 2.7165315240774452, "grad_norm": 0.37322527170181274, "learning_rate": 2.687456188831483e-07, "loss": 0.3576, "step": 5472 }, { "epoch": 2.7170279662419325, "grad_norm": 0.3957325518131256, "learning_rate": 2.678119811724461e-07, "loss": 0.3372, "step": 5473 }, { "epoch": 2.7175244084064207, "grad_norm": 0.3436259627342224, "learning_rate": 2.6687992341070944e-07, "loss": 0.2949, "step": 5474 }, { "epoch": 2.7180208505709085, "grad_norm": 0.3514017164707184, "learning_rate": 2.6594944590912774e-07, "loss": 0.315, "step": 5475 }, { "epoch": 2.718517292735396, "grad_norm": 0.336588591337204, "learning_rate": 2.650205489783625e-07, "loss": 0.3065, "step": 5476 }, { "epoch": 2.7190137348998844, "grad_norm": 0.42013922333717346, "learning_rate": 2.6409323292854563e-07, "loss": 0.415, "step": 5477 }, { "epoch": 2.719510177064372, "grad_norm": 0.3404102623462677, "learning_rate": 2.6316749806928277e-07, "loss": 0.2961, "step": 5478 }, { "epoch": 2.72000661922886, "grad_norm": 0.3582592010498047, "learning_rate": 2.6224334470965284e-07, "loss": 0.3105, "step": 5479 }, { "epoch": 2.7205030613933476, "grad_norm": 0.32610127329826355, "learning_rate": 2.613207731582057e-07, "loss": 0.2944, "step": 5480 }, { "epoch": 2.7209995035578354, "grad_norm": 0.37291812896728516, "learning_rate": 2.60399783722961e-07, "loss": 0.3183, "step": 5481 }, { "epoch": 2.7214959457223236, "grad_norm": 0.34560903906822205, "learning_rate": 2.594803767114146e-07, "loss": 0.3179, "step": 5482 }, { "epoch": 2.7219923878868113, "grad_norm": 0.3816729485988617, "learning_rate": 2.5856255243052964e-07, "loss": 0.3658, "step": 5483 }, { "epoch": 2.722488830051299, "grad_norm": 0.37025511264801025, "learning_rate": 2.5764631118674275e-07, "loss": 0.2728, "step": 5484 }, { "epoch": 2.722985272215787, "grad_norm": 0.3562249541282654, "learning_rate": 2.5673165328596315e-07, "loss": 0.2992, "step": 5485 }, { "epoch": 2.7234817143802745, "grad_norm": 0.4163917601108551, "learning_rate": 2.5581857903356935e-07, "loss": 0.3447, "step": 5486 }, { "epoch": 2.7239781565447627, "grad_norm": 0.34445446729660034, "learning_rate": 2.5490708873441295e-07, "loss": 0.3203, "step": 5487 }, { "epoch": 2.7244745987092505, "grad_norm": 0.35148724913597107, "learning_rate": 2.5399718269281505e-07, "loss": 0.3002, "step": 5488 }, { "epoch": 2.724971040873738, "grad_norm": 0.32508155703544617, "learning_rate": 2.5308886121256816e-07, "loss": 0.3167, "step": 5489 }, { "epoch": 2.725467483038226, "grad_norm": 0.35215550661087036, "learning_rate": 2.5218212459693636e-07, "loss": 0.3696, "step": 5490 }, { "epoch": 2.7259639252027137, "grad_norm": 0.3240830600261688, "learning_rate": 2.5127697314865475e-07, "loss": 0.2964, "step": 5491 }, { "epoch": 2.726460367367202, "grad_norm": 0.3392345607280731, "learning_rate": 2.5037340716992874e-07, "loss": 0.322, "step": 5492 }, { "epoch": 2.7269568095316896, "grad_norm": 0.34897372126579285, "learning_rate": 2.494714269624343e-07, "loss": 0.3487, "step": 5493 }, { "epoch": 2.7274532516961774, "grad_norm": 0.3216491937637329, "learning_rate": 2.485710328273194e-07, "loss": 0.3647, "step": 5494 }, { "epoch": 2.727949693860665, "grad_norm": 0.33504319190979004, "learning_rate": 2.4767222506519863e-07, "loss": 0.3368, "step": 5495 }, { "epoch": 2.728446136025153, "grad_norm": 0.344438761472702, "learning_rate": 2.467750039761613e-07, "loss": 0.3419, "step": 5496 }, { "epoch": 2.728942578189641, "grad_norm": 0.323212206363678, "learning_rate": 2.4587936985976445e-07, "loss": 0.2827, "step": 5497 }, { "epoch": 2.7294390203541288, "grad_norm": 0.3406539559364319, "learning_rate": 2.4498532301503563e-07, "loss": 0.2919, "step": 5498 }, { "epoch": 2.7299354625186165, "grad_norm": 0.34439751505851746, "learning_rate": 2.440928637404749e-07, "loss": 0.3145, "step": 5499 }, { "epoch": 2.7304319046831043, "grad_norm": 0.36487093567848206, "learning_rate": 2.4320199233404675e-07, "loss": 0.3246, "step": 5500 }, { "epoch": 2.730928346847592, "grad_norm": 0.39868372678756714, "learning_rate": 2.4231270909319203e-07, "loss": 0.3133, "step": 5501 }, { "epoch": 2.73142478901208, "grad_norm": 0.3582288324832916, "learning_rate": 2.4142501431481613e-07, "loss": 0.312, "step": 5502 }, { "epoch": 2.731921231176568, "grad_norm": 0.3698834776878357, "learning_rate": 2.4053890829529804e-07, "loss": 0.3143, "step": 5503 }, { "epoch": 2.7324176733410557, "grad_norm": 0.35591578483581543, "learning_rate": 2.396543913304822e-07, "loss": 0.308, "step": 5504 }, { "epoch": 2.732914115505544, "grad_norm": 0.3796239197254181, "learning_rate": 2.387714637156874e-07, "loss": 0.4027, "step": 5505 }, { "epoch": 2.7334105576700316, "grad_norm": 0.3365652561187744, "learning_rate": 2.3789012574569726e-07, "loss": 0.2126, "step": 5506 }, { "epoch": 2.7339069998345193, "grad_norm": 0.38672298192977905, "learning_rate": 2.3701037771476642e-07, "loss": 0.2981, "step": 5507 }, { "epoch": 2.734403441999007, "grad_norm": 0.35667097568511963, "learning_rate": 2.361322199166205e-07, "loss": 0.3537, "step": 5508 }, { "epoch": 2.734899884163495, "grad_norm": 0.33533406257629395, "learning_rate": 2.352556526444516e-07, "loss": 0.2921, "step": 5509 }, { "epoch": 2.735396326327983, "grad_norm": 0.39103734493255615, "learning_rate": 2.3438067619092176e-07, "loss": 0.3666, "step": 5510 }, { "epoch": 2.7358927684924708, "grad_norm": 0.33792644739151, "learning_rate": 2.335072908481606e-07, "loss": 0.2899, "step": 5511 }, { "epoch": 2.7363892106569585, "grad_norm": 0.39041781425476074, "learning_rate": 2.3263549690777044e-07, "loss": 0.3112, "step": 5512 }, { "epoch": 2.7368856528214462, "grad_norm": 0.41526079177856445, "learning_rate": 2.3176529466081733e-07, "loss": 0.3107, "step": 5513 }, { "epoch": 2.737382094985934, "grad_norm": 0.3880123198032379, "learning_rate": 2.3089668439783885e-07, "loss": 0.3153, "step": 5514 }, { "epoch": 2.737878537150422, "grad_norm": 0.3569371998310089, "learning_rate": 2.3002966640884084e-07, "loss": 0.3442, "step": 5515 }, { "epoch": 2.73837497931491, "grad_norm": 0.35254883766174316, "learning_rate": 2.2916424098329614e-07, "loss": 0.3089, "step": 5516 }, { "epoch": 2.7388714214793977, "grad_norm": 0.3467981517314911, "learning_rate": 2.2830040841014812e-07, "loss": 0.2957, "step": 5517 }, { "epoch": 2.7393678636438854, "grad_norm": 0.3440072536468506, "learning_rate": 2.2743816897780547e-07, "loss": 0.3254, "step": 5518 }, { "epoch": 2.739864305808373, "grad_norm": 0.3674947917461395, "learning_rate": 2.265775229741468e-07, "loss": 0.3012, "step": 5519 }, { "epoch": 2.7403607479728613, "grad_norm": 0.35208234190940857, "learning_rate": 2.2571847068651898e-07, "loss": 0.3383, "step": 5520 }, { "epoch": 2.740857190137349, "grad_norm": 0.3620874285697937, "learning_rate": 2.2486101240173585e-07, "loss": 0.2988, "step": 5521 }, { "epoch": 2.741353632301837, "grad_norm": 0.3813871443271637, "learning_rate": 2.2400514840608012e-07, "loss": 0.3037, "step": 5522 }, { "epoch": 2.7418500744663246, "grad_norm": 0.3773075342178345, "learning_rate": 2.231508789853004e-07, "loss": 0.3777, "step": 5523 }, { "epoch": 2.7423465166308123, "grad_norm": 0.3651295602321625, "learning_rate": 2.222982044246158e-07, "loss": 0.2987, "step": 5524 }, { "epoch": 2.7428429587953005, "grad_norm": 0.38501763343811035, "learning_rate": 2.2144712500870913e-07, "loss": 0.3352, "step": 5525 }, { "epoch": 2.7433394009597882, "grad_norm": 0.34957462549209595, "learning_rate": 2.2059764102173364e-07, "loss": 0.2657, "step": 5526 }, { "epoch": 2.743835843124276, "grad_norm": 0.3472597897052765, "learning_rate": 2.1974975274730857e-07, "loss": 0.2915, "step": 5527 }, { "epoch": 2.7443322852887637, "grad_norm": 0.3909132480621338, "learning_rate": 2.1890346046852197e-07, "loss": 0.322, "step": 5528 }, { "epoch": 2.7448287274532515, "grad_norm": 0.3507954478263855, "learning_rate": 2.1805876446792607e-07, "loss": 0.3129, "step": 5529 }, { "epoch": 2.7453251696177396, "grad_norm": 0.32910677790641785, "learning_rate": 2.1721566502754255e-07, "loss": 0.297, "step": 5530 }, { "epoch": 2.7458216117822274, "grad_norm": 0.3397102952003479, "learning_rate": 2.1637416242886012e-07, "loss": 0.2944, "step": 5531 }, { "epoch": 2.746318053946715, "grad_norm": 0.39910969138145447, "learning_rate": 2.1553425695283293e-07, "loss": 0.2789, "step": 5532 }, { "epoch": 2.7468144961112033, "grad_norm": 0.3416089415550232, "learning_rate": 2.1469594887988277e-07, "loss": 0.2887, "step": 5533 }, { "epoch": 2.7473109382756906, "grad_norm": 0.35970091819763184, "learning_rate": 2.1385923848989797e-07, "loss": 0.3162, "step": 5534 }, { "epoch": 2.747807380440179, "grad_norm": 0.389510840177536, "learning_rate": 2.13024126062234e-07, "loss": 0.3304, "step": 5535 }, { "epoch": 2.7483038226046665, "grad_norm": 0.32100820541381836, "learning_rate": 2.1219061187571056e-07, "loss": 0.2787, "step": 5536 }, { "epoch": 2.7488002647691543, "grad_norm": 0.39452576637268066, "learning_rate": 2.1135869620861671e-07, "loss": 0.3935, "step": 5537 }, { "epoch": 2.7492967069336425, "grad_norm": 0.3506876826286316, "learning_rate": 2.1052837933870583e-07, "loss": 0.317, "step": 5538 }, { "epoch": 2.74979314909813, "grad_norm": 0.35433879494667053, "learning_rate": 2.09699661543199e-07, "loss": 0.3412, "step": 5539 }, { "epoch": 2.750289591262618, "grad_norm": 0.35502493381500244, "learning_rate": 2.0887254309878202e-07, "loss": 0.3348, "step": 5540 }, { "epoch": 2.7507860334271057, "grad_norm": 0.3290640711784363, "learning_rate": 2.0804702428160629e-07, "loss": 0.288, "step": 5541 }, { "epoch": 2.7512824755915934, "grad_norm": 0.3456750512123108, "learning_rate": 2.072231053672924e-07, "loss": 0.3098, "step": 5542 }, { "epoch": 2.7517789177560816, "grad_norm": 0.35755228996276855, "learning_rate": 2.0640078663092256e-07, "loss": 0.3451, "step": 5543 }, { "epoch": 2.7522753599205694, "grad_norm": 0.3444680869579315, "learning_rate": 2.055800683470477e-07, "loss": 0.2872, "step": 5544 }, { "epoch": 2.752771802085057, "grad_norm": 0.375827819108963, "learning_rate": 2.0476095078968195e-07, "loss": 0.3621, "step": 5545 }, { "epoch": 2.753268244249545, "grad_norm": 0.3478359282016754, "learning_rate": 2.0394343423230824e-07, "loss": 0.299, "step": 5546 }, { "epoch": 2.7537646864140326, "grad_norm": 0.35102707147598267, "learning_rate": 2.0312751894787208e-07, "loss": 0.3635, "step": 5547 }, { "epoch": 2.754261128578521, "grad_norm": 0.3379800617694855, "learning_rate": 2.0231320520878507e-07, "loss": 0.304, "step": 5548 }, { "epoch": 2.7547575707430085, "grad_norm": 0.3499104976654053, "learning_rate": 2.0150049328692578e-07, "loss": 0.3555, "step": 5549 }, { "epoch": 2.7552540129074963, "grad_norm": 0.3428165912628174, "learning_rate": 2.0068938345363497e-07, "loss": 0.3402, "step": 5550 }, { "epoch": 2.755750455071984, "grad_norm": 0.37377050518989563, "learning_rate": 1.9987987597972212e-07, "loss": 0.3929, "step": 5551 }, { "epoch": 2.7562468972364718, "grad_norm": 0.36274513602256775, "learning_rate": 1.9907197113545716e-07, "loss": 0.302, "step": 5552 }, { "epoch": 2.75674333940096, "grad_norm": 0.3958832621574402, "learning_rate": 1.9826566919058043e-07, "loss": 0.2747, "step": 5553 }, { "epoch": 2.7572397815654477, "grad_norm": 0.37418925762176514, "learning_rate": 1.9746097041429212e-07, "loss": 0.3106, "step": 5554 }, { "epoch": 2.7577362237299354, "grad_norm": 0.3457849323749542, "learning_rate": 1.9665787507525958e-07, "loss": 0.2876, "step": 5555 }, { "epoch": 2.758232665894423, "grad_norm": 0.3066859245300293, "learning_rate": 1.958563834416155e-07, "loss": 0.2713, "step": 5556 }, { "epoch": 2.758729108058911, "grad_norm": 0.3573112487792969, "learning_rate": 1.9505649578095532e-07, "loss": 0.3116, "step": 5557 }, { "epoch": 2.759225550223399, "grad_norm": 0.35539254546165466, "learning_rate": 1.9425821236034094e-07, "loss": 0.2898, "step": 5558 }, { "epoch": 2.759721992387887, "grad_norm": 0.36096465587615967, "learning_rate": 1.9346153344629583e-07, "loss": 0.342, "step": 5559 }, { "epoch": 2.7602184345523746, "grad_norm": 0.34155455231666565, "learning_rate": 1.9266645930481053e-07, "loss": 0.3363, "step": 5560 }, { "epoch": 2.7607148767168623, "grad_norm": 0.3537258207798004, "learning_rate": 1.9187299020133775e-07, "loss": 0.3356, "step": 5561 }, { "epoch": 2.76121131888135, "grad_norm": 0.353324830532074, "learning_rate": 1.910811264007967e-07, "loss": 0.3178, "step": 5562 }, { "epoch": 2.7617077610458383, "grad_norm": 0.37241336703300476, "learning_rate": 1.9029086816756804e-07, "loss": 0.362, "step": 5563 }, { "epoch": 2.762204203210326, "grad_norm": 0.3317876160144806, "learning_rate": 1.8950221576549743e-07, "loss": 0.3232, "step": 5564 }, { "epoch": 2.7627006453748137, "grad_norm": 0.36465170979499817, "learning_rate": 1.887151694578959e-07, "loss": 0.3453, "step": 5565 }, { "epoch": 2.763197087539302, "grad_norm": 0.35255521535873413, "learning_rate": 1.8792972950753495e-07, "loss": 0.2893, "step": 5566 }, { "epoch": 2.7636935297037892, "grad_norm": 0.38375750184059143, "learning_rate": 1.8714589617665314e-07, "loss": 0.3067, "step": 5567 }, { "epoch": 2.7641899718682774, "grad_norm": 0.3361433744430542, "learning_rate": 1.8636366972694996e-07, "loss": 0.2909, "step": 5568 }, { "epoch": 2.764686414032765, "grad_norm": 0.36026254296302795, "learning_rate": 1.8558305041958992e-07, "loss": 0.2897, "step": 5569 }, { "epoch": 2.765182856197253, "grad_norm": 0.39513155817985535, "learning_rate": 1.8480403851520167e-07, "loss": 0.3408, "step": 5570 }, { "epoch": 2.765679298361741, "grad_norm": 0.3611394166946411, "learning_rate": 1.840266342738739e-07, "loss": 0.3065, "step": 5571 }, { "epoch": 2.766175740526229, "grad_norm": 0.3152003586292267, "learning_rate": 1.832508379551634e-07, "loss": 0.3161, "step": 5572 }, { "epoch": 2.7666721826907166, "grad_norm": 0.3445037603378296, "learning_rate": 1.8247664981808522e-07, "loss": 0.3309, "step": 5573 }, { "epoch": 2.7671686248552043, "grad_norm": 0.3714391589164734, "learning_rate": 1.8170407012112146e-07, "loss": 0.3504, "step": 5574 }, { "epoch": 2.767665067019692, "grad_norm": 0.3782818913459778, "learning_rate": 1.8093309912221302e-07, "loss": 0.3324, "step": 5575 }, { "epoch": 2.7681615091841802, "grad_norm": 0.3448541760444641, "learning_rate": 1.8016373707876956e-07, "loss": 0.2855, "step": 5576 }, { "epoch": 2.768657951348668, "grad_norm": 0.3321858048439026, "learning_rate": 1.7939598424765726e-07, "loss": 0.3104, "step": 5577 }, { "epoch": 2.7691543935131557, "grad_norm": 0.3805257976055145, "learning_rate": 1.7862984088520886e-07, "loss": 0.358, "step": 5578 }, { "epoch": 2.7696508356776435, "grad_norm": 0.33879172801971436, "learning_rate": 1.778653072472203e-07, "loss": 0.3232, "step": 5579 }, { "epoch": 2.770147277842131, "grad_norm": 0.3655674159526825, "learning_rate": 1.7710238358894683e-07, "loss": 0.3177, "step": 5580 }, { "epoch": 2.7706437200066194, "grad_norm": 0.333980530500412, "learning_rate": 1.763410701651086e-07, "loss": 0.3022, "step": 5581 }, { "epoch": 2.771140162171107, "grad_norm": 0.35272330045700073, "learning_rate": 1.7558136722988617e-07, "loss": 0.3175, "step": 5582 }, { "epoch": 2.771636604335595, "grad_norm": 0.35899367928504944, "learning_rate": 1.7482327503692552e-07, "loss": 0.321, "step": 5583 }, { "epoch": 2.7721330465000826, "grad_norm": 0.3562261164188385, "learning_rate": 1.7406679383933255e-07, "loss": 0.3003, "step": 5584 }, { "epoch": 2.7726294886645704, "grad_norm": 0.3836006820201874, "learning_rate": 1.7331192388967523e-07, "loss": 0.3218, "step": 5585 }, { "epoch": 2.7731259308290586, "grad_norm": 0.36694610118865967, "learning_rate": 1.7255866543998412e-07, "loss": 0.3223, "step": 5586 }, { "epoch": 2.7736223729935463, "grad_norm": 0.3378390967845917, "learning_rate": 1.7180701874175198e-07, "loss": 0.2959, "step": 5587 }, { "epoch": 2.774118815158034, "grad_norm": 0.36125943064689636, "learning_rate": 1.710569840459342e-07, "loss": 0.3242, "step": 5588 }, { "epoch": 2.774615257322522, "grad_norm": 0.3867473006248474, "learning_rate": 1.7030856160294485e-07, "loss": 0.32, "step": 5589 }, { "epoch": 2.7751116994870095, "grad_norm": 0.37524107098579407, "learning_rate": 1.695617516626641e-07, "loss": 0.3033, "step": 5590 }, { "epoch": 2.7756081416514977, "grad_norm": 0.3676213026046753, "learning_rate": 1.6881655447442968e-07, "loss": 0.3529, "step": 5591 }, { "epoch": 2.7761045838159855, "grad_norm": 0.34988269209861755, "learning_rate": 1.680729702870437e-07, "loss": 0.3482, "step": 5592 }, { "epoch": 2.776601025980473, "grad_norm": 0.3331615924835205, "learning_rate": 1.6733099934876873e-07, "loss": 0.286, "step": 5593 }, { "epoch": 2.777097468144961, "grad_norm": 0.38145846128463745, "learning_rate": 1.6659064190732764e-07, "loss": 0.3385, "step": 5594 }, { "epoch": 2.7775939103094487, "grad_norm": 0.3282145857810974, "learning_rate": 1.6585189820990776e-07, "loss": 0.3281, "step": 5595 }, { "epoch": 2.778090352473937, "grad_norm": 0.3336464762687683, "learning_rate": 1.6511476850315344e-07, "loss": 0.2927, "step": 5596 }, { "epoch": 2.7785867946384246, "grad_norm": 0.39038848876953125, "learning_rate": 1.643792530331728e-07, "loss": 0.3504, "step": 5597 }, { "epoch": 2.7790832368029124, "grad_norm": 0.34262049198150635, "learning_rate": 1.6364535204553444e-07, "loss": 0.2927, "step": 5598 }, { "epoch": 2.7795796789674005, "grad_norm": 0.35189738869667053, "learning_rate": 1.62913065785269e-07, "loss": 0.3078, "step": 5599 }, { "epoch": 2.7800761211318883, "grad_norm": 0.3645104467868805, "learning_rate": 1.621823944968659e-07, "loss": 0.3244, "step": 5600 }, { "epoch": 2.780572563296376, "grad_norm": 0.3996545374393463, "learning_rate": 1.6145333842427612e-07, "loss": 0.366, "step": 5601 }, { "epoch": 2.7810690054608638, "grad_norm": 0.3468364179134369, "learning_rate": 1.6072589781091274e-07, "loss": 0.2961, "step": 5602 }, { "epoch": 2.7815654476253515, "grad_norm": 0.34357285499572754, "learning_rate": 1.6000007289964815e-07, "loss": 0.3454, "step": 5603 }, { "epoch": 2.7820618897898397, "grad_norm": 0.3521554172039032, "learning_rate": 1.5927586393281458e-07, "loss": 0.2978, "step": 5604 }, { "epoch": 2.7825583319543274, "grad_norm": 0.36956799030303955, "learning_rate": 1.5855327115220698e-07, "loss": 0.3476, "step": 5605 }, { "epoch": 2.783054774118815, "grad_norm": 0.3767161965370178, "learning_rate": 1.57832294799079e-07, "loss": 0.3014, "step": 5606 }, { "epoch": 2.783551216283303, "grad_norm": 0.33672913908958435, "learning_rate": 1.5711293511414482e-07, "loss": 0.3095, "step": 5607 }, { "epoch": 2.7840476584477907, "grad_norm": 0.31011781096458435, "learning_rate": 1.5639519233757895e-07, "loss": 0.3069, "step": 5608 }, { "epoch": 2.784544100612279, "grad_norm": 0.35205399990081787, "learning_rate": 1.556790667090169e-07, "loss": 0.3225, "step": 5609 }, { "epoch": 2.7850405427767666, "grad_norm": 0.3396511375904083, "learning_rate": 1.5496455846755242e-07, "loss": 0.3434, "step": 5610 }, { "epoch": 2.7855369849412543, "grad_norm": 0.37803512811660767, "learning_rate": 1.542516678517425e-07, "loss": 0.3205, "step": 5611 }, { "epoch": 2.786033427105742, "grad_norm": 0.3586587607860565, "learning_rate": 1.5354039509959894e-07, "loss": 0.3564, "step": 5612 }, { "epoch": 2.78652986927023, "grad_norm": 0.37642422318458557, "learning_rate": 1.5283074044859904e-07, "loss": 0.269, "step": 5613 }, { "epoch": 2.787026311434718, "grad_norm": 0.35804808139801025, "learning_rate": 1.5212270413567544e-07, "loss": 0.3041, "step": 5614 }, { "epoch": 2.7875227535992058, "grad_norm": 0.36166661977767944, "learning_rate": 1.514162863972235e-07, "loss": 0.3291, "step": 5615 }, { "epoch": 2.7880191957636935, "grad_norm": 0.3471485674381256, "learning_rate": 1.5071148746909569e-07, "loss": 0.2865, "step": 5616 }, { "epoch": 2.7885156379281812, "grad_norm": 0.3892098367214203, "learning_rate": 1.5000830758660656e-07, "loss": 0.3288, "step": 5617 }, { "epoch": 2.789012080092669, "grad_norm": 0.35717275738716125, "learning_rate": 1.493067469845283e-07, "loss": 0.3636, "step": 5618 }, { "epoch": 2.789508522257157, "grad_norm": 0.34716343879699707, "learning_rate": 1.486068058970913e-07, "loss": 0.2946, "step": 5619 }, { "epoch": 2.790004964421645, "grad_norm": 0.36766043305397034, "learning_rate": 1.479084845579898e-07, "loss": 0.3385, "step": 5620 }, { "epoch": 2.7905014065861327, "grad_norm": 0.33706802129745483, "learning_rate": 1.4721178320037167e-07, "loss": 0.315, "step": 5621 }, { "epoch": 2.7909978487506204, "grad_norm": 0.3496515452861786, "learning_rate": 1.4651670205684863e-07, "loss": 0.3037, "step": 5622 }, { "epoch": 2.791494290915108, "grad_norm": 0.37200409173965454, "learning_rate": 1.4582324135948734e-07, "loss": 0.3638, "step": 5623 }, { "epoch": 2.7919907330795963, "grad_norm": 0.35504138469696045, "learning_rate": 1.4513140133981752e-07, "loss": 0.2963, "step": 5624 }, { "epoch": 2.792487175244084, "grad_norm": 0.36613425612449646, "learning_rate": 1.4444118222882387e-07, "loss": 0.2637, "step": 5625 }, { "epoch": 2.792983617408572, "grad_norm": 0.36539262533187866, "learning_rate": 1.4375258425695317e-07, "loss": 0.385, "step": 5626 }, { "epoch": 2.79348005957306, "grad_norm": 0.3566344380378723, "learning_rate": 1.4306560765410925e-07, "loss": 0.3105, "step": 5627 }, { "epoch": 2.7939765017375473, "grad_norm": 0.3694019913673401, "learning_rate": 1.4238025264965428e-07, "loss": 0.337, "step": 5628 }, { "epoch": 2.7944729439020355, "grad_norm": 0.3293834328651428, "learning_rate": 1.4169651947241069e-07, "loss": 0.2757, "step": 5629 }, { "epoch": 2.7949693860665232, "grad_norm": 0.37880340218544006, "learning_rate": 1.4101440835065705e-07, "loss": 0.3111, "step": 5630 }, { "epoch": 2.795465828231011, "grad_norm": 0.35935336351394653, "learning_rate": 1.4033391951213392e-07, "loss": 0.2947, "step": 5631 }, { "epoch": 2.795962270395499, "grad_norm": 0.3473678231239319, "learning_rate": 1.3965505318403572e-07, "loss": 0.3405, "step": 5632 }, { "epoch": 2.796458712559987, "grad_norm": 0.355953186750412, "learning_rate": 1.389778095930183e-07, "loss": 0.3606, "step": 5633 }, { "epoch": 2.7969551547244746, "grad_norm": 0.3257331848144531, "learning_rate": 1.3830218896519532e-07, "loss": 0.333, "step": 5634 }, { "epoch": 2.7974515968889624, "grad_norm": 0.3461960256099701, "learning_rate": 1.3762819152613793e-07, "loss": 0.3169, "step": 5635 }, { "epoch": 2.79794803905345, "grad_norm": 0.3538033664226532, "learning_rate": 1.3695581750087562e-07, "loss": 0.2749, "step": 5636 }, { "epoch": 2.7984444812179383, "grad_norm": 0.3713544011116028, "learning_rate": 1.3628506711389545e-07, "loss": 0.3364, "step": 5637 }, { "epoch": 2.798940923382426, "grad_norm": 0.3655187785625458, "learning_rate": 1.3561594058914218e-07, "loss": 0.3399, "step": 5638 }, { "epoch": 2.799437365546914, "grad_norm": 0.3412390351295471, "learning_rate": 1.3494843815002047e-07, "loss": 0.2827, "step": 5639 }, { "epoch": 2.7999338077114015, "grad_norm": 0.3738322854042053, "learning_rate": 1.3428256001939034e-07, "loss": 0.337, "step": 5640 }, { "epoch": 2.8004302498758893, "grad_norm": 0.37260428071022034, "learning_rate": 1.3361830641957118e-07, "loss": 0.3087, "step": 5641 }, { "epoch": 2.8009266920403775, "grad_norm": 0.3305455446243286, "learning_rate": 1.3295567757233729e-07, "loss": 0.2973, "step": 5642 }, { "epoch": 2.801423134204865, "grad_norm": 0.365952730178833, "learning_rate": 1.3229467369892446e-07, "loss": 0.3065, "step": 5643 }, { "epoch": 2.801919576369353, "grad_norm": 0.38318169116973877, "learning_rate": 1.3163529502002337e-07, "loss": 0.3107, "step": 5644 }, { "epoch": 2.8024160185338407, "grad_norm": 0.33342787623405457, "learning_rate": 1.3097754175578182e-07, "loss": 0.2742, "step": 5645 }, { "epoch": 2.8029124606983284, "grad_norm": 0.32618942856788635, "learning_rate": 1.303214141258069e-07, "loss": 0.2739, "step": 5646 }, { "epoch": 2.8034089028628166, "grad_norm": 0.36738041043281555, "learning_rate": 1.2966691234916119e-07, "loss": 0.3298, "step": 5647 }, { "epoch": 2.8039053450273044, "grad_norm": 0.32760313153266907, "learning_rate": 1.290140366443654e-07, "loss": 0.3095, "step": 5648 }, { "epoch": 2.804401787191792, "grad_norm": 0.35675495862960815, "learning_rate": 1.2836278722939576e-07, "loss": 0.3225, "step": 5649 }, { "epoch": 2.80489822935628, "grad_norm": 0.3292243182659149, "learning_rate": 1.2771316432168889e-07, "loss": 0.3042, "step": 5650 }, { "epoch": 2.8053946715207676, "grad_norm": 0.41632699966430664, "learning_rate": 1.270651681381341e-07, "loss": 0.3528, "step": 5651 }, { "epoch": 2.805891113685256, "grad_norm": 0.3381537199020386, "learning_rate": 1.2641879889508158e-07, "loss": 0.2607, "step": 5652 }, { "epoch": 2.8063875558497435, "grad_norm": 0.3508282005786896, "learning_rate": 1.2577405680833433e-07, "loss": 0.3347, "step": 5653 }, { "epoch": 2.8068839980142313, "grad_norm": 0.350970596075058, "learning_rate": 1.2513094209315625e-07, "loss": 0.2993, "step": 5654 }, { "epoch": 2.807380440178719, "grad_norm": 0.34049898386001587, "learning_rate": 1.24489454964265e-07, "loss": 0.3748, "step": 5655 }, { "epoch": 2.8078768823432068, "grad_norm": 0.31094199419021606, "learning_rate": 1.2384959563583542e-07, "loss": 0.3227, "step": 5656 }, { "epoch": 2.808373324507695, "grad_norm": 0.3512331247329712, "learning_rate": 1.2321136432149938e-07, "loss": 0.373, "step": 5657 }, { "epoch": 2.8088697666721827, "grad_norm": 0.3457282483577728, "learning_rate": 1.2257476123434474e-07, "loss": 0.3329, "step": 5658 }, { "epoch": 2.8093662088366704, "grad_norm": 0.38924264907836914, "learning_rate": 1.2193978658691708e-07, "loss": 0.2865, "step": 5659 }, { "epoch": 2.8098626510011586, "grad_norm": 0.3735146224498749, "learning_rate": 1.2130644059121565e-07, "loss": 0.2883, "step": 5660 }, { "epoch": 2.8103590931656464, "grad_norm": 0.37291404604911804, "learning_rate": 1.2067472345869858e-07, "loss": 0.3051, "step": 5661 }, { "epoch": 2.810855535330134, "grad_norm": 0.3663608133792877, "learning_rate": 1.2004463540027822e-07, "loss": 0.3472, "step": 5662 }, { "epoch": 2.811351977494622, "grad_norm": 0.35869044065475464, "learning_rate": 1.1941617662632466e-07, "loss": 0.333, "step": 5663 }, { "epoch": 2.8118484196591096, "grad_norm": 0.3652966022491455, "learning_rate": 1.1878934734666281e-07, "loss": 0.3516, "step": 5664 }, { "epoch": 2.8123448618235978, "grad_norm": 0.3586307466030121, "learning_rate": 1.1816414777057361e-07, "loss": 0.263, "step": 5665 }, { "epoch": 2.8128413039880855, "grad_norm": 0.3517773449420929, "learning_rate": 1.1754057810679509e-07, "loss": 0.3188, "step": 5666 }, { "epoch": 2.8133377461525733, "grad_norm": 0.4016648232936859, "learning_rate": 1.1691863856351904e-07, "loss": 0.322, "step": 5667 }, { "epoch": 2.813834188317061, "grad_norm": 0.3547186553478241, "learning_rate": 1.1629832934839491e-07, "loss": 0.2845, "step": 5668 }, { "epoch": 2.8143306304815487, "grad_norm": 0.3690248429775238, "learning_rate": 1.1567965066852704e-07, "loss": 0.3355, "step": 5669 }, { "epoch": 2.814827072646037, "grad_norm": 0.3867990970611572, "learning_rate": 1.1506260273047576e-07, "loss": 0.2757, "step": 5670 }, { "epoch": 2.8153235148105247, "grad_norm": 0.40250903367996216, "learning_rate": 1.1444718574025516e-07, "loss": 0.2908, "step": 5671 }, { "epoch": 2.8158199569750124, "grad_norm": 0.3619917333126068, "learning_rate": 1.1383339990333753e-07, "loss": 0.3331, "step": 5672 }, { "epoch": 2.8163163991395, "grad_norm": 0.37128931283950806, "learning_rate": 1.1322124542465008e-07, "loss": 0.3418, "step": 5673 }, { "epoch": 2.816812841303988, "grad_norm": 0.3267002999782562, "learning_rate": 1.1261072250857264e-07, "loss": 0.3038, "step": 5674 }, { "epoch": 2.817309283468476, "grad_norm": 0.3758770823478699, "learning_rate": 1.1200183135894327e-07, "loss": 0.3305, "step": 5675 }, { "epoch": 2.817805725632964, "grad_norm": 0.3429974615573883, "learning_rate": 1.113945721790538e-07, "loss": 0.3489, "step": 5676 }, { "epoch": 2.8183021677974516, "grad_norm": 0.3664774000644684, "learning_rate": 1.1078894517165206e-07, "loss": 0.2992, "step": 5677 }, { "epoch": 2.8187986099619393, "grad_norm": 0.34790852665901184, "learning_rate": 1.1018495053894018e-07, "loss": 0.3015, "step": 5678 }, { "epoch": 2.819295052126427, "grad_norm": 0.3505485951900482, "learning_rate": 1.095825884825752e-07, "loss": 0.3329, "step": 5679 }, { "epoch": 2.8197914942909152, "grad_norm": 0.3432074785232544, "learning_rate": 1.0898185920366954e-07, "loss": 0.3239, "step": 5680 }, { "epoch": 2.820287936455403, "grad_norm": 0.36307621002197266, "learning_rate": 1.0838276290279115e-07, "loss": 0.3577, "step": 5681 }, { "epoch": 2.8207843786198907, "grad_norm": 0.36189237236976624, "learning_rate": 1.0778529977996166e-07, "loss": 0.2655, "step": 5682 }, { "epoch": 2.8212808207843785, "grad_norm": 0.36046111583709717, "learning_rate": 1.0718947003465652e-07, "loss": 0.3153, "step": 5683 }, { "epoch": 2.821777262948866, "grad_norm": 0.3555960953235626, "learning_rate": 1.0659527386580882e-07, "loss": 0.3355, "step": 5684 }, { "epoch": 2.8222737051133544, "grad_norm": 0.4054773151874542, "learning_rate": 1.0600271147180374e-07, "loss": 0.3189, "step": 5685 }, { "epoch": 2.822770147277842, "grad_norm": 0.38118234276771545, "learning_rate": 1.0541178305048139e-07, "loss": 0.3346, "step": 5686 }, { "epoch": 2.82326658944233, "grad_norm": 0.3331247568130493, "learning_rate": 1.0482248879913725e-07, "loss": 0.2951, "step": 5687 }, { "epoch": 2.823763031606818, "grad_norm": 0.3501502573490143, "learning_rate": 1.0423482891452119e-07, "loss": 0.3047, "step": 5688 }, { "epoch": 2.8242594737713054, "grad_norm": 0.34358030557632446, "learning_rate": 1.0364880359283625e-07, "loss": 0.3365, "step": 5689 }, { "epoch": 2.8247559159357936, "grad_norm": 0.33545568585395813, "learning_rate": 1.0306441302973924e-07, "loss": 0.3472, "step": 5690 }, { "epoch": 2.8252523581002813, "grad_norm": 0.31655561923980713, "learning_rate": 1.024816574203441e-07, "loss": 0.2828, "step": 5691 }, { "epoch": 2.825748800264769, "grad_norm": 0.3575187623500824, "learning_rate": 1.0190053695921631e-07, "loss": 0.3461, "step": 5692 }, { "epoch": 2.8262452424292572, "grad_norm": 0.32378703355789185, "learning_rate": 1.0132105184037677e-07, "loss": 0.3152, "step": 5693 }, { "epoch": 2.826741684593745, "grad_norm": 0.34777501225471497, "learning_rate": 1.007432022572985e-07, "loss": 0.3252, "step": 5694 }, { "epoch": 2.8272381267582327, "grad_norm": 0.3315499424934387, "learning_rate": 1.001669884029105e-07, "loss": 0.3317, "step": 5695 }, { "epoch": 2.8277345689227205, "grad_norm": 0.3532181680202484, "learning_rate": 9.959241046959611e-08, "loss": 0.3109, "step": 5696 }, { "epoch": 2.828231011087208, "grad_norm": 0.3742104172706604, "learning_rate": 9.90194686491891e-08, "loss": 0.3483, "step": 5697 }, { "epoch": 2.8287274532516964, "grad_norm": 0.3319903016090393, "learning_rate": 9.84481631329809e-08, "loss": 0.2516, "step": 5698 }, { "epoch": 2.829223895416184, "grad_norm": 0.37332114577293396, "learning_rate": 9.787849411171391e-08, "loss": 0.3156, "step": 5699 }, { "epoch": 2.829720337580672, "grad_norm": 0.37766188383102417, "learning_rate": 9.731046177558545e-08, "loss": 0.3393, "step": 5700 }, { "epoch": 2.8302167797451596, "grad_norm": 0.37220054864883423, "learning_rate": 9.674406631424549e-08, "loss": 0.3699, "step": 5701 }, { "epoch": 2.8307132219096474, "grad_norm": 0.3547527492046356, "learning_rate": 9.617930791679997e-08, "loss": 0.3195, "step": 5702 }, { "epoch": 2.8312096640741355, "grad_norm": 0.4041077196598053, "learning_rate": 9.561618677180418e-08, "loss": 0.3094, "step": 5703 }, { "epoch": 2.8317061062386233, "grad_norm": 0.3485874831676483, "learning_rate": 9.505470306726994e-08, "loss": 0.3282, "step": 5704 }, { "epoch": 2.832202548403111, "grad_norm": 0.3322729766368866, "learning_rate": 9.449485699066174e-08, "loss": 0.3368, "step": 5705 }, { "epoch": 2.8326989905675988, "grad_norm": 0.3694564700126648, "learning_rate": 9.393664872889619e-08, "loss": 0.3087, "step": 5706 }, { "epoch": 2.8331954327320865, "grad_norm": 0.36892932653427124, "learning_rate": 9.338007846834474e-08, "loss": 0.3422, "step": 5707 }, { "epoch": 2.8336918748965747, "grad_norm": 0.3784196674823761, "learning_rate": 9.282514639482986e-08, "loss": 0.3269, "step": 5708 }, { "epoch": 2.8341883170610624, "grad_norm": 0.3772556185722351, "learning_rate": 9.227185269362893e-08, "loss": 0.3523, "step": 5709 }, { "epoch": 2.83468475922555, "grad_norm": 0.34522438049316406, "learning_rate": 9.172019754947192e-08, "loss": 0.2781, "step": 5710 }, { "epoch": 2.835181201390038, "grad_norm": 0.3519779145717621, "learning_rate": 9.117018114654153e-08, "loss": 0.299, "step": 5711 }, { "epoch": 2.8356776435545257, "grad_norm": 0.347801148891449, "learning_rate": 9.062180366847306e-08, "loss": 0.3288, "step": 5712 }, { "epoch": 2.836174085719014, "grad_norm": 0.3524707853794098, "learning_rate": 9.007506529835452e-08, "loss": 0.2986, "step": 5713 }, { "epoch": 2.8366705278835016, "grad_norm": 0.3777543008327484, "learning_rate": 8.952996621872767e-08, "loss": 0.3709, "step": 5714 }, { "epoch": 2.8371669700479893, "grad_norm": 0.3457048833370209, "learning_rate": 8.898650661158582e-08, "loss": 0.3048, "step": 5715 }, { "epoch": 2.837663412212477, "grad_norm": 0.31768521666526794, "learning_rate": 8.844468665837546e-08, "loss": 0.2805, "step": 5716 }, { "epoch": 2.838159854376965, "grad_norm": 0.37192782759666443, "learning_rate": 8.790450653999527e-08, "loss": 0.3376, "step": 5717 }, { "epoch": 2.838656296541453, "grad_norm": 0.37470152974128723, "learning_rate": 8.736596643679762e-08, "loss": 0.3733, "step": 5718 }, { "epoch": 2.8391527387059408, "grad_norm": 0.32468220591545105, "learning_rate": 8.682906652858536e-08, "loss": 0.2851, "step": 5719 }, { "epoch": 2.8396491808704285, "grad_norm": 0.35236820578575134, "learning_rate": 8.629380699461453e-08, "loss": 0.3273, "step": 5720 }, { "epoch": 2.8401456230349167, "grad_norm": 0.3730660080909729, "learning_rate": 8.576018801359553e-08, "loss": 0.3075, "step": 5721 }, { "epoch": 2.8406420651994044, "grad_norm": 0.4074752628803253, "learning_rate": 8.52282097636875e-08, "loss": 0.3064, "step": 5722 }, { "epoch": 2.841138507363892, "grad_norm": 0.3469920754432678, "learning_rate": 8.469787242250504e-08, "loss": 0.3096, "step": 5723 }, { "epoch": 2.84163494952838, "grad_norm": 0.33227282762527466, "learning_rate": 8.416917616711095e-08, "loss": 0.3075, "step": 5724 }, { "epoch": 2.8421313916928677, "grad_norm": 0.3695741593837738, "learning_rate": 8.364212117402515e-08, "loss": 0.3127, "step": 5725 }, { "epoch": 2.842627833857356, "grad_norm": 0.3390914499759674, "learning_rate": 8.311670761921576e-08, "loss": 0.3391, "step": 5726 }, { "epoch": 2.8431242760218436, "grad_norm": 0.3290618062019348, "learning_rate": 8.259293567810412e-08, "loss": 0.3031, "step": 5727 }, { "epoch": 2.8436207181863313, "grad_norm": 0.3649337589740753, "learning_rate": 8.207080552556313e-08, "loss": 0.3062, "step": 5728 }, { "epoch": 2.844117160350819, "grad_norm": 0.36846911907196045, "learning_rate": 8.155031733591889e-08, "loss": 0.2993, "step": 5729 }, { "epoch": 2.844613602515307, "grad_norm": 0.36980852484703064, "learning_rate": 8.103147128294742e-08, "loss": 0.3392, "step": 5730 }, { "epoch": 2.845110044679795, "grad_norm": 0.3261367976665497, "learning_rate": 8.051426753987734e-08, "loss": 0.2725, "step": 5731 }, { "epoch": 2.8456064868442827, "grad_norm": 0.375102162361145, "learning_rate": 7.999870627938944e-08, "loss": 0.3597, "step": 5732 }, { "epoch": 2.8461029290087705, "grad_norm": 0.34641826152801514, "learning_rate": 7.94847876736149e-08, "loss": 0.286, "step": 5733 }, { "epoch": 2.8465993711732582, "grad_norm": 0.3396618962287903, "learning_rate": 7.897251189413758e-08, "loss": 0.2987, "step": 5734 }, { "epoch": 2.847095813337746, "grad_norm": 0.34791237115859985, "learning_rate": 7.846187911199287e-08, "loss": 0.3525, "step": 5735 }, { "epoch": 2.847592255502234, "grad_norm": 0.3704623878002167, "learning_rate": 7.795288949766611e-08, "loss": 0.3401, "step": 5736 }, { "epoch": 2.848088697666722, "grad_norm": 0.39533329010009766, "learning_rate": 7.744554322109633e-08, "loss": 0.2864, "step": 5737 }, { "epoch": 2.8485851398312096, "grad_norm": 0.36108672618865967, "learning_rate": 7.693984045167192e-08, "loss": 0.3506, "step": 5738 }, { "epoch": 2.8490815819956974, "grad_norm": 0.34487250447273254, "learning_rate": 7.643578135823338e-08, "loss": 0.28, "step": 5739 }, { "epoch": 2.849578024160185, "grad_norm": 0.3330000638961792, "learning_rate": 7.593336610907221e-08, "loss": 0.275, "step": 5740 }, { "epoch": 2.8500744663246733, "grad_norm": 0.3661389946937561, "learning_rate": 7.543259487193144e-08, "loss": 0.3417, "step": 5741 }, { "epoch": 2.850570908489161, "grad_norm": 0.3399885892868042, "learning_rate": 7.493346781400457e-08, "loss": 0.2676, "step": 5742 }, { "epoch": 2.851067350653649, "grad_norm": 0.3617069125175476, "learning_rate": 7.443598510193716e-08, "loss": 0.3153, "step": 5743 }, { "epoch": 2.8515637928181365, "grad_norm": 0.3393939137458801, "learning_rate": 7.394014690182583e-08, "loss": 0.3399, "step": 5744 }, { "epoch": 2.8520602349826243, "grad_norm": 0.35405609011650085, "learning_rate": 7.344595337921534e-08, "loss": 0.3119, "step": 5745 }, { "epoch": 2.8525566771471125, "grad_norm": 0.3583833575248718, "learning_rate": 7.29534046991054e-08, "loss": 0.3254, "step": 5746 }, { "epoch": 2.8530531193116, "grad_norm": 0.3494022786617279, "learning_rate": 7.246250102594332e-08, "loss": 0.3278, "step": 5747 }, { "epoch": 2.853549561476088, "grad_norm": 0.35021522641181946, "learning_rate": 7.197324252362969e-08, "loss": 0.2976, "step": 5748 }, { "epoch": 2.854046003640576, "grad_norm": 0.33130666613578796, "learning_rate": 7.148562935551384e-08, "loss": 0.3179, "step": 5749 }, { "epoch": 2.8545424458050634, "grad_norm": 0.3875633180141449, "learning_rate": 7.099966168439665e-08, "loss": 0.3159, "step": 5750 }, { "epoch": 2.8550388879695516, "grad_norm": 0.3258378207683563, "learning_rate": 7.051533967252999e-08, "loss": 0.2981, "step": 5751 }, { "epoch": 2.8555353301340394, "grad_norm": 0.36108124256134033, "learning_rate": 7.003266348161508e-08, "loss": 0.3011, "step": 5752 }, { "epoch": 2.856031772298527, "grad_norm": 0.4046954810619354, "learning_rate": 6.955163327280467e-08, "loss": 0.3531, "step": 5753 }, { "epoch": 2.8565282144630153, "grad_norm": 0.3624899983406067, "learning_rate": 6.907224920670141e-08, "loss": 0.314, "step": 5754 }, { "epoch": 2.857024656627503, "grad_norm": 0.35781005024909973, "learning_rate": 6.859451144336005e-08, "loss": 0.3231, "step": 5755 }, { "epoch": 2.857521098791991, "grad_norm": 0.3502821624279022, "learning_rate": 6.811842014228243e-08, "loss": 0.3433, "step": 5756 }, { "epoch": 2.8580175409564785, "grad_norm": 0.32700619101524353, "learning_rate": 6.764397546242307e-08, "loss": 0.3012, "step": 5757 }, { "epoch": 2.8585139831209663, "grad_norm": 0.3322509527206421, "learning_rate": 6.717117756218639e-08, "loss": 0.3103, "step": 5758 }, { "epoch": 2.8590104252854545, "grad_norm": 0.34256765246391296, "learning_rate": 6.670002659942664e-08, "loss": 0.3404, "step": 5759 }, { "epoch": 2.859506867449942, "grad_norm": 0.35889875888824463, "learning_rate": 6.623052273144914e-08, "loss": 0.3598, "step": 5760 }, { "epoch": 2.86000330961443, "grad_norm": 0.3396240174770355, "learning_rate": 6.576266611500681e-08, "loss": 0.3563, "step": 5761 }, { "epoch": 2.8604997517789177, "grad_norm": 0.3895612359046936, "learning_rate": 6.529645690630526e-08, "loss": 0.3445, "step": 5762 }, { "epoch": 2.8609961939434054, "grad_norm": 0.37056204676628113, "learning_rate": 6.483189526099887e-08, "loss": 0.2493, "step": 5763 }, { "epoch": 2.8614926361078936, "grad_norm": 0.3664003908634186, "learning_rate": 6.436898133419301e-08, "loss": 0.3299, "step": 5764 }, { "epoch": 2.8619890782723814, "grad_norm": 0.33610913157463074, "learning_rate": 6.390771528044016e-08, "loss": 0.2938, "step": 5765 }, { "epoch": 2.862485520436869, "grad_norm": 0.39308667182922363, "learning_rate": 6.344809725374601e-08, "loss": 0.3644, "step": 5766 }, { "epoch": 2.862981962601357, "grad_norm": 0.4087662696838379, "learning_rate": 6.29901274075645e-08, "loss": 0.3587, "step": 5767 }, { "epoch": 2.8634784047658446, "grad_norm": 0.40346378087997437, "learning_rate": 6.253380589479829e-08, "loss": 0.2762, "step": 5768 }, { "epoch": 2.8639748469303328, "grad_norm": 0.338087797164917, "learning_rate": 6.207913286780221e-08, "loss": 0.2926, "step": 5769 }, { "epoch": 2.8644712890948205, "grad_norm": 0.3935655951499939, "learning_rate": 6.162610847837813e-08, "loss": 0.2992, "step": 5770 }, { "epoch": 2.8649677312593083, "grad_norm": 0.36382928490638733, "learning_rate": 6.117473287777897e-08, "loss": 0.3666, "step": 5771 }, { "epoch": 2.865464173423796, "grad_norm": 0.36296480894088745, "learning_rate": 6.072500621670585e-08, "loss": 0.2984, "step": 5772 }, { "epoch": 2.8659606155882837, "grad_norm": 0.3547854423522949, "learning_rate": 6.027692864531198e-08, "loss": 0.3846, "step": 5773 }, { "epoch": 2.866457057752772, "grad_norm": 0.3476998209953308, "learning_rate": 5.983050031319714e-08, "loss": 0.3201, "step": 5774 }, { "epoch": 2.8669534999172597, "grad_norm": 0.37317955493927, "learning_rate": 5.938572136941156e-08, "loss": 0.3347, "step": 5775 }, { "epoch": 2.8674499420817474, "grad_norm": 0.3448174297809601, "learning_rate": 5.8942591962455334e-08, "loss": 0.3067, "step": 5776 }, { "epoch": 2.867946384246235, "grad_norm": 0.3452087938785553, "learning_rate": 5.8501112240277325e-08, "loss": 0.2878, "step": 5777 }, { "epoch": 2.868442826410723, "grad_norm": 0.3677646219730377, "learning_rate": 5.806128235027575e-08, "loss": 0.3113, "step": 5778 }, { "epoch": 2.868939268575211, "grad_norm": 0.3315812051296234, "learning_rate": 5.762310243929703e-08, "loss": 0.3017, "step": 5779 }, { "epoch": 2.869435710739699, "grad_norm": 0.3149280250072479, "learning_rate": 5.718657265363858e-08, "loss": 0.3223, "step": 5780 }, { "epoch": 2.8699321529041866, "grad_norm": 0.3912266492843628, "learning_rate": 5.6751693139044385e-08, "loss": 0.3563, "step": 5781 }, { "epoch": 2.8704285950686748, "grad_norm": 0.3618857264518738, "learning_rate": 5.6318464040710505e-08, "loss": 0.2656, "step": 5782 }, { "epoch": 2.8709250372331625, "grad_norm": 0.37160027027130127, "learning_rate": 5.5886885503279584e-08, "loss": 0.2581, "step": 5783 }, { "epoch": 2.8714214793976502, "grad_norm": 0.34229597449302673, "learning_rate": 5.5456957670843584e-08, "loss": 0.3132, "step": 5784 }, { "epoch": 2.871917921562138, "grad_norm": 0.320896714925766, "learning_rate": 5.502868068694489e-08, "loss": 0.2665, "step": 5785 }, { "epoch": 2.8724143637266257, "grad_norm": 0.3858053684234619, "learning_rate": 5.460205469457247e-08, "loss": 0.3664, "step": 5786 }, { "epoch": 2.872910805891114, "grad_norm": 0.33330559730529785, "learning_rate": 5.417707983616571e-08, "loss": 0.2935, "step": 5787 }, { "epoch": 2.8734072480556017, "grad_norm": 0.35816100239753723, "learning_rate": 5.375375625361168e-08, "loss": 0.2594, "step": 5788 }, { "epoch": 2.8739036902200894, "grad_norm": 0.3892193138599396, "learning_rate": 5.3332084088247305e-08, "loss": 0.3531, "step": 5789 }, { "epoch": 2.874400132384577, "grad_norm": 0.37115687131881714, "learning_rate": 5.2912063480857204e-08, "loss": 0.344, "step": 5790 }, { "epoch": 2.874896574549065, "grad_norm": 0.3173314332962036, "learning_rate": 5.2493694571673635e-08, "loss": 0.3002, "step": 5791 }, { "epoch": 2.875393016713553, "grad_norm": 0.30451124906539917, "learning_rate": 5.207697750038099e-08, "loss": 0.3044, "step": 5792 }, { "epoch": 2.875889458878041, "grad_norm": 0.32372477650642395, "learning_rate": 5.166191240610741e-08, "loss": 0.3028, "step": 5793 }, { "epoch": 2.8763859010425286, "grad_norm": 0.3618679642677307, "learning_rate": 5.1248499427433704e-08, "loss": 0.3297, "step": 5794 }, { "epoch": 2.8768823432070163, "grad_norm": 0.34795650839805603, "learning_rate": 5.083673870238559e-08, "loss": 0.3124, "step": 5795 }, { "epoch": 2.877378785371504, "grad_norm": 0.3207903206348419, "learning_rate": 5.0426630368440314e-08, "loss": 0.3054, "step": 5796 }, { "epoch": 2.8778752275359922, "grad_norm": 0.3751979470252991, "learning_rate": 5.001817456252111e-08, "loss": 0.3298, "step": 5797 }, { "epoch": 2.87837166970048, "grad_norm": 0.38016974925994873, "learning_rate": 4.9611371421000034e-08, "loss": 0.3291, "step": 5798 }, { "epoch": 2.8788681118649677, "grad_norm": 0.3506561517715454, "learning_rate": 4.9206221079698414e-08, "loss": 0.3133, "step": 5799 }, { "epoch": 2.8793645540294555, "grad_norm": 0.3947136104106903, "learning_rate": 4.8802723673884164e-08, "loss": 0.2965, "step": 5800 }, { "epoch": 2.879860996193943, "grad_norm": 0.3792905807495117, "learning_rate": 4.8400879338274534e-08, "loss": 0.3178, "step": 5801 }, { "epoch": 2.8803574383584314, "grad_norm": 0.3595844507217407, "learning_rate": 4.800068820703385e-08, "loss": 0.3139, "step": 5802 }, { "epoch": 2.880853880522919, "grad_norm": 0.3277224600315094, "learning_rate": 4.760215041377636e-08, "loss": 0.2642, "step": 5803 }, { "epoch": 2.881350322687407, "grad_norm": 0.3778817653656006, "learning_rate": 4.7205266091561175e-08, "loss": 0.307, "step": 5804 }, { "epoch": 2.8818467648518946, "grad_norm": 0.34511640667915344, "learning_rate": 4.6810035372898964e-08, "loss": 0.3492, "step": 5805 }, { "epoch": 2.8823432070163824, "grad_norm": 0.35311445593833923, "learning_rate": 4.641645838974473e-08, "loss": 0.3288, "step": 5806 }, { "epoch": 2.8828396491808705, "grad_norm": 0.33497461676597595, "learning_rate": 4.602453527350503e-08, "loss": 0.2771, "step": 5807 }, { "epoch": 2.8833360913453583, "grad_norm": 0.41831547021865845, "learning_rate": 4.5634266155031304e-08, "loss": 0.3613, "step": 5808 }, { "epoch": 2.883832533509846, "grad_norm": 0.36901992559432983, "learning_rate": 4.524565116462321e-08, "loss": 0.3456, "step": 5809 }, { "epoch": 2.884328975674334, "grad_norm": 0.3354109525680542, "learning_rate": 4.4858690432030285e-08, "loss": 0.2511, "step": 5810 }, { "epoch": 2.8848254178388215, "grad_norm": 0.33005040884017944, "learning_rate": 4.447338408644697e-08, "loss": 0.3005, "step": 5811 }, { "epoch": 2.8853218600033097, "grad_norm": 0.36519744992256165, "learning_rate": 4.4089732256517026e-08, "loss": 0.3049, "step": 5812 }, { "epoch": 2.8858183021677974, "grad_norm": 0.37483295798301697, "learning_rate": 4.370773507033077e-08, "loss": 0.3204, "step": 5813 }, { "epoch": 2.886314744332285, "grad_norm": 0.34043118357658386, "learning_rate": 4.332739265542785e-08, "loss": 0.3215, "step": 5814 }, { "epoch": 2.8868111864967734, "grad_norm": 0.33026614785194397, "learning_rate": 4.294870513879335e-08, "loss": 0.3476, "step": 5815 }, { "epoch": 2.887307628661261, "grad_norm": 0.34400829672813416, "learning_rate": 4.257167264686113e-08, "loss": 0.3262, "step": 5816 }, { "epoch": 2.887804070825749, "grad_norm": 0.35698965191841125, "learning_rate": 4.219629530551217e-08, "loss": 0.3219, "step": 5817 }, { "epoch": 2.8883005129902366, "grad_norm": 0.360811710357666, "learning_rate": 4.1822573240073995e-08, "loss": 0.4015, "step": 5818 }, { "epoch": 2.8887969551547243, "grad_norm": 0.3106553554534912, "learning_rate": 4.145050657532346e-08, "loss": 0.2782, "step": 5819 }, { "epoch": 2.8892933973192125, "grad_norm": 0.3821720778942108, "learning_rate": 4.108009543548286e-08, "loss": 0.321, "step": 5820 }, { "epoch": 2.8897898394837003, "grad_norm": 0.36974847316741943, "learning_rate": 4.071133994422216e-08, "loss": 0.287, "step": 5821 }, { "epoch": 2.890286281648188, "grad_norm": 0.3853445053100586, "learning_rate": 4.034424022465899e-08, "loss": 0.3619, "step": 5822 }, { "epoch": 2.8907827238126758, "grad_norm": 0.35037174820899963, "learning_rate": 3.9978796399358086e-08, "loss": 0.2843, "step": 5823 }, { "epoch": 2.8912791659771635, "grad_norm": 0.3801828622817993, "learning_rate": 3.961500859033074e-08, "loss": 0.3695, "step": 5824 }, { "epoch": 2.8917756081416517, "grad_norm": 0.3212483823299408, "learning_rate": 3.925287691903701e-08, "loss": 0.2955, "step": 5825 }, { "epoch": 2.8922720503061394, "grad_norm": 0.35687515139579773, "learning_rate": 3.8892401506381846e-08, "loss": 0.3133, "step": 5826 }, { "epoch": 2.892768492470627, "grad_norm": 0.3646802604198456, "learning_rate": 3.8533582472717877e-08, "loss": 0.3556, "step": 5827 }, { "epoch": 2.893264934635115, "grad_norm": 0.3236078917980194, "learning_rate": 3.817641993784593e-08, "loss": 0.2947, "step": 5828 }, { "epoch": 2.8937613767996027, "grad_norm": 0.3322260081768036, "learning_rate": 3.782091402101229e-08, "loss": 0.3106, "step": 5829 }, { "epoch": 2.894257818964091, "grad_norm": 0.364132821559906, "learning_rate": 3.746706484091145e-08, "loss": 0.4181, "step": 5830 }, { "epoch": 2.8947542611285786, "grad_norm": 0.29108214378356934, "learning_rate": 3.711487251568335e-08, "loss": 0.2601, "step": 5831 }, { "epoch": 2.8952507032930663, "grad_norm": 0.3989344537258148, "learning_rate": 3.67643371629145e-08, "loss": 0.3713, "step": 5832 }, { "epoch": 2.895747145457554, "grad_norm": 0.3397443890571594, "learning_rate": 3.641545889964126e-08, "loss": 0.2609, "step": 5833 }, { "epoch": 2.896243587622042, "grad_norm": 0.3600442707538605, "learning_rate": 3.606823784234326e-08, "loss": 0.3303, "step": 5834 }, { "epoch": 2.89674002978653, "grad_norm": 0.3932231068611145, "learning_rate": 3.572267410694885e-08, "loss": 0.3266, "step": 5835 }, { "epoch": 2.8972364719510177, "grad_norm": 0.35228848457336426, "learning_rate": 3.5378767808831315e-08, "loss": 0.2702, "step": 5836 }, { "epoch": 2.8977329141155055, "grad_norm": 0.33322954177856445, "learning_rate": 3.503651906281269e-08, "loss": 0.2987, "step": 5837 }, { "epoch": 2.8982293562799932, "grad_norm": 0.3574203848838806, "learning_rate": 3.469592798316046e-08, "loss": 0.3159, "step": 5838 }, { "epoch": 2.898725798444481, "grad_norm": 0.3649877905845642, "learning_rate": 3.435699468358755e-08, "loss": 0.3598, "step": 5839 }, { "epoch": 2.899222240608969, "grad_norm": 0.3577049970626831, "learning_rate": 3.401971927725623e-08, "loss": 0.3458, "step": 5840 }, { "epoch": 2.899718682773457, "grad_norm": 0.32483839988708496, "learning_rate": 3.368410187677196e-08, "loss": 0.2583, "step": 5841 }, { "epoch": 2.9002151249379446, "grad_norm": 0.3708347678184509, "learning_rate": 3.3350142594190115e-08, "loss": 0.3396, "step": 5842 }, { "epoch": 2.900711567102433, "grad_norm": 0.3419421315193176, "learning_rate": 3.301784154100818e-08, "loss": 0.3003, "step": 5843 }, { "epoch": 2.90120800926692, "grad_norm": 0.329065203666687, "learning_rate": 3.268719882817517e-08, "loss": 0.2728, "step": 5844 }, { "epoch": 2.9017044514314083, "grad_norm": 0.372586727142334, "learning_rate": 3.235821456608168e-08, "loss": 0.3144, "step": 5845 }, { "epoch": 2.902200893595896, "grad_norm": 0.3494338095188141, "learning_rate": 3.203088886456762e-08, "loss": 0.3342, "step": 5846 }, { "epoch": 2.902697335760384, "grad_norm": 0.36120566725730896, "learning_rate": 3.17052218329178e-08, "loss": 0.3039, "step": 5847 }, { "epoch": 2.903193777924872, "grad_norm": 0.34465697407722473, "learning_rate": 3.138121357986357e-08, "loss": 0.2965, "step": 5848 }, { "epoch": 2.9036902200893597, "grad_norm": 0.3179689049720764, "learning_rate": 3.105886421358284e-08, "loss": 0.3003, "step": 5849 }, { "epoch": 2.9041866622538475, "grad_norm": 0.3665119707584381, "learning_rate": 3.073817384169841e-08, "loss": 0.3412, "step": 5850 }, { "epoch": 2.904683104418335, "grad_norm": 0.3368304669857025, "learning_rate": 3.041914257128131e-08, "loss": 0.2964, "step": 5851 }, { "epoch": 2.905179546582823, "grad_norm": 0.34681445360183716, "learning_rate": 3.010177050884633e-08, "loss": 0.3042, "step": 5852 }, { "epoch": 2.905675988747311, "grad_norm": 0.3612997829914093, "learning_rate": 2.9786057760355925e-08, "loss": 0.3149, "step": 5853 }, { "epoch": 2.906172430911799, "grad_norm": 0.3831785023212433, "learning_rate": 2.9472004431218004e-08, "loss": 0.2848, "step": 5854 }, { "epoch": 2.9066688730762866, "grad_norm": 0.3408016562461853, "learning_rate": 2.9159610626286472e-08, "loss": 0.3092, "step": 5855 }, { "epoch": 2.9071653152407744, "grad_norm": 0.343795508146286, "learning_rate": 2.8848876449860673e-08, "loss": 0.3037, "step": 5856 }, { "epoch": 2.907661757405262, "grad_norm": 0.3506411612033844, "learning_rate": 2.8539802005687068e-08, "loss": 0.2882, "step": 5857 }, { "epoch": 2.9081581995697503, "grad_norm": 0.3292355537414551, "learning_rate": 2.823238739695644e-08, "loss": 0.3194, "step": 5858 }, { "epoch": 2.908654641734238, "grad_norm": 0.35522130131721497, "learning_rate": 2.792663272630669e-08, "loss": 0.3458, "step": 5859 }, { "epoch": 2.909151083898726, "grad_norm": 0.35519471764564514, "learning_rate": 2.7622538095820606e-08, "loss": 0.2928, "step": 5860 }, { "epoch": 2.9096475260632135, "grad_norm": 0.3516862094402313, "learning_rate": 2.7320103607027527e-08, "loss": 0.3175, "step": 5861 }, { "epoch": 2.9101439682277013, "grad_norm": 0.3521372377872467, "learning_rate": 2.701932936090168e-08, "loss": 0.2954, "step": 5862 }, { "epoch": 2.9106404103921895, "grad_norm": 0.37828123569488525, "learning_rate": 2.672021545786385e-08, "loss": 0.353, "step": 5863 }, { "epoch": 2.911136852556677, "grad_norm": 0.38411465287208557, "learning_rate": 2.642276199777971e-08, "loss": 0.3031, "step": 5864 }, { "epoch": 2.911633294721165, "grad_norm": 0.3172350823879242, "learning_rate": 2.612696907996093e-08, "loss": 0.3085, "step": 5865 }, { "epoch": 2.9121297368856527, "grad_norm": 0.3402601182460785, "learning_rate": 2.583283680316462e-08, "loss": 0.3325, "step": 5866 }, { "epoch": 2.9126261790501404, "grad_norm": 0.31878378987312317, "learning_rate": 2.5540365265594446e-08, "loss": 0.2883, "step": 5867 }, { "epoch": 2.9131226212146286, "grad_norm": 0.3483652174472809, "learning_rate": 2.5249554564897305e-08, "loss": 0.3634, "step": 5868 }, { "epoch": 2.9136190633791164, "grad_norm": 0.386526495218277, "learning_rate": 2.496040479816775e-08, "loss": 0.3128, "step": 5869 }, { "epoch": 2.914115505543604, "grad_norm": 0.36119920015335083, "learning_rate": 2.467291606194522e-08, "loss": 0.2775, "step": 5870 }, { "epoch": 2.914611947708092, "grad_norm": 0.35195279121398926, "learning_rate": 2.4387088452214046e-08, "loss": 0.3085, "step": 5871 }, { "epoch": 2.9151083898725796, "grad_norm": 0.3637546896934509, "learning_rate": 2.4102922064404566e-08, "loss": 0.2948, "step": 5872 }, { "epoch": 2.9156048320370678, "grad_norm": 0.36215847730636597, "learning_rate": 2.3820416993391437e-08, "loss": 0.3304, "step": 5873 }, { "epoch": 2.9161012742015555, "grad_norm": 0.35122165083885193, "learning_rate": 2.3539573333496436e-08, "loss": 0.3386, "step": 5874 }, { "epoch": 2.9165977163660433, "grad_norm": 0.3310145437717438, "learning_rate": 2.326039117848511e-08, "loss": 0.2747, "step": 5875 }, { "epoch": 2.9170941585305314, "grad_norm": 0.36445772647857666, "learning_rate": 2.298287062156901e-08, "loss": 0.3694, "step": 5876 }, { "epoch": 2.917590600695019, "grad_norm": 0.3350781500339508, "learning_rate": 2.270701175540402e-08, "loss": 0.2782, "step": 5877 }, { "epoch": 2.918087042859507, "grad_norm": 0.36898162961006165, "learning_rate": 2.243281467209313e-08, "loss": 0.3165, "step": 5878 }, { "epoch": 2.9185834850239947, "grad_norm": 0.3589234948158264, "learning_rate": 2.2160279463182554e-08, "loss": 0.3403, "step": 5879 }, { "epoch": 2.9190799271884824, "grad_norm": 0.399433970451355, "learning_rate": 2.1889406219663955e-08, "loss": 0.3894, "step": 5880 }, { "epoch": 2.9195763693529706, "grad_norm": 0.3411198854446411, "learning_rate": 2.16201950319761e-08, "loss": 0.2702, "step": 5881 }, { "epoch": 2.9200728115174583, "grad_norm": 0.3366437554359436, "learning_rate": 2.135264598999931e-08, "loss": 0.3027, "step": 5882 }, { "epoch": 2.920569253681946, "grad_norm": 0.3676186203956604, "learning_rate": 2.1086759183062132e-08, "loss": 0.3141, "step": 5883 }, { "epoch": 2.921065695846434, "grad_norm": 0.379587322473526, "learning_rate": 2.0822534699936892e-08, "loss": 0.2936, "step": 5884 }, { "epoch": 2.9215621380109216, "grad_norm": 0.3672168254852295, "learning_rate": 2.0559972628840795e-08, "loss": 0.3493, "step": 5885 }, { "epoch": 2.9220585801754098, "grad_norm": 0.37231364846229553, "learning_rate": 2.0299073057435946e-08, "loss": 0.315, "step": 5886 }, { "epoch": 2.9225550223398975, "grad_norm": 0.35573089122772217, "learning_rate": 2.0039836072829888e-08, "loss": 0.3207, "step": 5887 }, { "epoch": 2.9230514645043852, "grad_norm": 0.3516179919242859, "learning_rate": 1.978226176157505e-08, "loss": 0.3345, "step": 5888 }, { "epoch": 2.923547906668873, "grad_norm": 0.3654276430606842, "learning_rate": 1.9526350209667645e-08, "loss": 0.2955, "step": 5889 }, { "epoch": 2.9240443488333607, "grad_norm": 0.33905574679374695, "learning_rate": 1.9272101502550432e-08, "loss": 0.313, "step": 5890 }, { "epoch": 2.924540790997849, "grad_norm": 0.35337910056114197, "learning_rate": 1.901951572510996e-08, "loss": 0.3752, "step": 5891 }, { "epoch": 2.9250372331623367, "grad_norm": 0.3030838370323181, "learning_rate": 1.8768592961677655e-08, "loss": 0.2955, "step": 5892 }, { "epoch": 2.9255336753268244, "grad_norm": 0.3620661497116089, "learning_rate": 1.8519333296029286e-08, "loss": 0.3398, "step": 5893 }, { "epoch": 2.926030117491312, "grad_norm": 0.3981822729110718, "learning_rate": 1.827173681138661e-08, "loss": 0.3272, "step": 5894 }, { "epoch": 2.9265265596558, "grad_norm": 0.35129284858703613, "learning_rate": 1.802580359041517e-08, "loss": 0.3006, "step": 5895 }, { "epoch": 2.927023001820288, "grad_norm": 0.3365897238254547, "learning_rate": 1.7781533715225952e-08, "loss": 0.349, "step": 5896 }, { "epoch": 2.927519443984776, "grad_norm": 0.34062182903289795, "learning_rate": 1.7538927267372606e-08, "loss": 0.278, "step": 5897 }, { "epoch": 2.9280158861492636, "grad_norm": 0.40191659331321716, "learning_rate": 1.7297984327856456e-08, "loss": 0.3441, "step": 5898 }, { "epoch": 2.9285123283137513, "grad_norm": 0.3492032587528229, "learning_rate": 1.7058704977120366e-08, "loss": 0.3174, "step": 5899 }, { "epoch": 2.929008770478239, "grad_norm": 0.3349549174308777, "learning_rate": 1.6821089295053773e-08, "loss": 0.3459, "step": 5900 }, { "epoch": 2.9295052126427272, "grad_norm": 0.3352149724960327, "learning_rate": 1.6585137360990434e-08, "loss": 0.3099, "step": 5901 }, { "epoch": 2.930001654807215, "grad_norm": 0.38698387145996094, "learning_rate": 1.6350849253708444e-08, "loss": 0.3518, "step": 5902 }, { "epoch": 2.9304980969717027, "grad_norm": 0.3494347929954529, "learning_rate": 1.6118225051429125e-08, "loss": 0.3053, "step": 5903 }, { "epoch": 2.930994539136191, "grad_norm": 0.3583677113056183, "learning_rate": 1.5887264831820348e-08, "loss": 0.3056, "step": 5904 }, { "epoch": 2.931490981300678, "grad_norm": 0.35734543204307556, "learning_rate": 1.5657968671993208e-08, "loss": 0.3173, "step": 5905 }, { "epoch": 2.9319874234651664, "grad_norm": 0.3638274371623993, "learning_rate": 1.543033664850313e-08, "loss": 0.3197, "step": 5906 }, { "epoch": 2.932483865629654, "grad_norm": 0.3469976484775543, "learning_rate": 1.5204368837350437e-08, "loss": 0.3228, "step": 5907 }, { "epoch": 2.932980307794142, "grad_norm": 0.3334726095199585, "learning_rate": 1.498006531398033e-08, "loss": 0.299, "step": 5908 }, { "epoch": 2.93347674995863, "grad_norm": 0.373489648103714, "learning_rate": 1.4757426153280685e-08, "loss": 0.3401, "step": 5909 }, { "epoch": 2.933973192123118, "grad_norm": 0.3958427906036377, "learning_rate": 1.4536451429585374e-08, "loss": 0.3271, "step": 5910 }, { "epoch": 2.9344696342876055, "grad_norm": 0.37905168533325195, "learning_rate": 1.4317141216671493e-08, "loss": 0.2715, "step": 5911 }, { "epoch": 2.9349660764520933, "grad_norm": 0.3371579945087433, "learning_rate": 1.409949558776047e-08, "loss": 0.2954, "step": 5912 }, { "epoch": 2.935462518616581, "grad_norm": 0.3661666214466095, "learning_rate": 1.3883514615519178e-08, "loss": 0.3476, "step": 5913 }, { "epoch": 2.935958960781069, "grad_norm": 0.33281195163726807, "learning_rate": 1.3669198372056602e-08, "loss": 0.2977, "step": 5914 }, { "epoch": 2.936455402945557, "grad_norm": 0.36843982338905334, "learning_rate": 1.3456546928928282e-08, "loss": 0.3607, "step": 5915 }, { "epoch": 2.9369518451100447, "grad_norm": 0.3634592592716217, "learning_rate": 1.324556035713187e-08, "loss": 0.3181, "step": 5916 }, { "epoch": 2.9374482872745324, "grad_norm": 0.3692750036716461, "learning_rate": 1.3036238727110462e-08, "loss": 0.3346, "step": 5917 }, { "epoch": 2.93794472943902, "grad_norm": 0.3571542501449585, "learning_rate": 1.2828582108750376e-08, "loss": 0.298, "step": 5918 }, { "epoch": 2.9384411716035084, "grad_norm": 0.33136531710624695, "learning_rate": 1.2622590571383376e-08, "loss": 0.3226, "step": 5919 }, { "epoch": 2.938937613767996, "grad_norm": 0.34673866629600525, "learning_rate": 1.241826418378389e-08, "loss": 0.3154, "step": 5920 }, { "epoch": 2.939434055932484, "grad_norm": 0.36765170097351074, "learning_rate": 1.2215603014170685e-08, "loss": 0.3066, "step": 5921 }, { "epoch": 2.9399304980969716, "grad_norm": 0.3733616769313812, "learning_rate": 1.2014607130207967e-08, "loss": 0.3401, "step": 5922 }, { "epoch": 2.9404269402614593, "grad_norm": 0.3295247554779053, "learning_rate": 1.1815276599001501e-08, "loss": 0.3164, "step": 5923 }, { "epoch": 2.9409233824259475, "grad_norm": 0.3620993494987488, "learning_rate": 1.1617611487103054e-08, "loss": 0.3187, "step": 5924 }, { "epoch": 2.9414198245904353, "grad_norm": 0.3794199228286743, "learning_rate": 1.1421611860507054e-08, "loss": 0.3568, "step": 5925 }, { "epoch": 2.941916266754923, "grad_norm": 0.33352896571159363, "learning_rate": 1.1227277784652823e-08, "loss": 0.2845, "step": 5926 }, { "epoch": 2.9424127089194108, "grad_norm": 0.35707220435142517, "learning_rate": 1.1034609324423463e-08, "loss": 0.3397, "step": 5927 }, { "epoch": 2.9429091510838985, "grad_norm": 0.31402015686035156, "learning_rate": 1.084360654414529e-08, "loss": 0.3084, "step": 5928 }, { "epoch": 2.9434055932483867, "grad_norm": 0.34989601373672485, "learning_rate": 1.0654269507589522e-08, "loss": 0.3419, "step": 5929 }, { "epoch": 2.9439020354128744, "grad_norm": 0.33612099289894104, "learning_rate": 1.0466598277970031e-08, "loss": 0.2763, "step": 5930 }, { "epoch": 2.944398477577362, "grad_norm": 0.35086530447006226, "learning_rate": 1.0280592917945032e-08, "loss": 0.3088, "step": 5931 }, { "epoch": 2.94489491974185, "grad_norm": 0.35800451040267944, "learning_rate": 1.009625348961707e-08, "loss": 0.325, "step": 5932 }, { "epoch": 2.9453913619063377, "grad_norm": 0.3705187141895294, "learning_rate": 9.913580054532468e-09, "loss": 0.344, "step": 5933 }, { "epoch": 2.945887804070826, "grad_norm": 0.3796229958534241, "learning_rate": 9.732572673680218e-09, "loss": 0.2668, "step": 5934 }, { "epoch": 2.9463842462353136, "grad_norm": 0.418099969625473, "learning_rate": 9.5532314074942e-09, "loss": 0.3184, "step": 5935 }, { "epoch": 2.9468806883998013, "grad_norm": 0.34530186653137207, "learning_rate": 9.375556315850964e-09, "loss": 0.2814, "step": 5936 }, { "epoch": 2.9473771305642895, "grad_norm": 0.3453540802001953, "learning_rate": 9.199547458071945e-09, "loss": 0.3582, "step": 5937 }, { "epoch": 2.9478735727287773, "grad_norm": 0.3815596401691437, "learning_rate": 9.025204892921801e-09, "loss": 0.3086, "step": 5938 }, { "epoch": 2.948370014893265, "grad_norm": 0.3581494390964508, "learning_rate": 8.852528678608418e-09, "loss": 0.3334, "step": 5939 }, { "epoch": 2.9488664570577527, "grad_norm": 0.3631856143474579, "learning_rate": 8.681518872784011e-09, "loss": 0.3391, "step": 5940 }, { "epoch": 2.9493628992222405, "grad_norm": 0.344501256942749, "learning_rate": 8.512175532543466e-09, "loss": 0.3311, "step": 5941 }, { "epoch": 2.9498593413867287, "grad_norm": 0.338371217250824, "learning_rate": 8.344498714427107e-09, "loss": 0.2825, "step": 5942 }, { "epoch": 2.9503557835512164, "grad_norm": 0.37534135580062866, "learning_rate": 8.178488474416269e-09, "loss": 0.3103, "step": 5943 }, { "epoch": 2.950852225715704, "grad_norm": 0.38861650228500366, "learning_rate": 8.014144867938279e-09, "loss": 0.359, "step": 5944 }, { "epoch": 2.951348667880192, "grad_norm": 0.3818235397338867, "learning_rate": 7.851467949862579e-09, "loss": 0.2913, "step": 5945 }, { "epoch": 2.9518451100446796, "grad_norm": 0.36097007989883423, "learning_rate": 7.690457774502947e-09, "loss": 0.3495, "step": 5946 }, { "epoch": 2.952341552209168, "grad_norm": 0.3605872690677643, "learning_rate": 7.531114395615823e-09, "loss": 0.2989, "step": 5947 }, { "epoch": 2.9528379943736556, "grad_norm": 0.38615620136260986, "learning_rate": 7.373437866401434e-09, "loss": 0.3769, "step": 5948 }, { "epoch": 2.9533344365381433, "grad_norm": 0.3670876920223236, "learning_rate": 7.2174282395043314e-09, "loss": 0.354, "step": 5949 }, { "epoch": 2.953830878702631, "grad_norm": 0.3298483192920685, "learning_rate": 7.06308556701174e-09, "loss": 0.2819, "step": 5950 }, { "epoch": 2.954327320867119, "grad_norm": 0.3362278938293457, "learning_rate": 6.910409900454107e-09, "loss": 0.2827, "step": 5951 }, { "epoch": 2.954823763031607, "grad_norm": 0.3675220310688019, "learning_rate": 6.759401290806211e-09, "loss": 0.3156, "step": 5952 }, { "epoch": 2.9553202051960947, "grad_norm": 0.3347279727458954, "learning_rate": 6.610059788485501e-09, "loss": 0.2861, "step": 5953 }, { "epoch": 2.9558166473605825, "grad_norm": 0.40086859464645386, "learning_rate": 6.462385443353203e-09, "loss": 0.3945, "step": 5954 }, { "epoch": 2.95631308952507, "grad_norm": 0.3633287250995636, "learning_rate": 6.316378304713211e-09, "loss": 0.3234, "step": 5955 }, { "epoch": 2.956809531689558, "grad_norm": 0.37670060992240906, "learning_rate": 6.172038421313753e-09, "loss": 0.314, "step": 5956 }, { "epoch": 2.957305973854046, "grad_norm": 0.36178672313690186, "learning_rate": 6.029365841345724e-09, "loss": 0.3188, "step": 5957 }, { "epoch": 2.957802416018534, "grad_norm": 0.33332544565200806, "learning_rate": 5.888360612444355e-09, "loss": 0.3261, "step": 5958 }, { "epoch": 2.9582988581830216, "grad_norm": 0.3821881115436554, "learning_rate": 5.749022781686431e-09, "loss": 0.3411, "step": 5959 }, { "epoch": 2.9587953003475094, "grad_norm": 0.37103134393692017, "learning_rate": 5.6113523955941825e-09, "loss": 0.3048, "step": 5960 }, { "epoch": 2.959291742511997, "grad_norm": 0.35088303685188293, "learning_rate": 5.475349500130844e-09, "loss": 0.2751, "step": 5961 }, { "epoch": 2.9597881846764853, "grad_norm": 0.35296428203582764, "learning_rate": 5.341014140705092e-09, "loss": 0.3309, "step": 5962 }, { "epoch": 2.960284626840973, "grad_norm": 0.36383727192878723, "learning_rate": 5.208346362167161e-09, "loss": 0.3392, "step": 5963 }, { "epoch": 2.960781069005461, "grad_norm": 0.3360246419906616, "learning_rate": 5.077346208811618e-09, "loss": 0.3139, "step": 5964 }, { "epoch": 2.961277511169949, "grad_norm": 0.34675028920173645, "learning_rate": 4.948013724375145e-09, "loss": 0.3299, "step": 5965 }, { "epoch": 2.9617739533344363, "grad_norm": 0.3670087456703186, "learning_rate": 4.820348952039311e-09, "loss": 0.2914, "step": 5966 }, { "epoch": 2.9622703954989245, "grad_norm": 0.4265909194946289, "learning_rate": 4.694351934427799e-09, "loss": 0.3495, "step": 5967 }, { "epoch": 2.962766837663412, "grad_norm": 0.3343380093574524, "learning_rate": 4.5700227136069585e-09, "loss": 0.2673, "step": 5968 }, { "epoch": 2.9632632798279, "grad_norm": 0.3497827649116516, "learning_rate": 4.447361331087474e-09, "loss": 0.3754, "step": 5969 }, { "epoch": 2.963759721992388, "grad_norm": 0.3363845944404602, "learning_rate": 4.326367827822142e-09, "loss": 0.2923, "step": 5970 }, { "epoch": 2.964256164156876, "grad_norm": 0.3298611044883728, "learning_rate": 4.207042244208092e-09, "loss": 0.327, "step": 5971 }, { "epoch": 2.9647526063213636, "grad_norm": 0.3245616853237152, "learning_rate": 4.0893846200840135e-09, "loss": 0.348, "step": 5972 }, { "epoch": 2.9652490484858514, "grad_norm": 0.37708842754364014, "learning_rate": 3.973394994733481e-09, "loss": 0.3283, "step": 5973 }, { "epoch": 2.965745490650339, "grad_norm": 0.38689985871315, "learning_rate": 3.85907340688163e-09, "loss": 0.351, "step": 5974 }, { "epoch": 2.9662419328148273, "grad_norm": 0.3158999979496002, "learning_rate": 3.746419894697928e-09, "loss": 0.2829, "step": 5975 }, { "epoch": 2.966738374979315, "grad_norm": 0.3266194462776184, "learning_rate": 3.635434495793955e-09, "loss": 0.3321, "step": 5976 }, { "epoch": 2.9672348171438028, "grad_norm": 0.35777971148490906, "learning_rate": 3.5261172472245143e-09, "loss": 0.3253, "step": 5977 }, { "epoch": 2.9677312593082905, "grad_norm": 0.3388652503490448, "learning_rate": 3.4184681854876335e-09, "loss": 0.2797, "step": 5978 }, { "epoch": 2.9682277014727783, "grad_norm": 0.3316115438938141, "learning_rate": 3.3124873465251172e-09, "loss": 0.2942, "step": 5979 }, { "epoch": 2.9687241436372664, "grad_norm": 0.3822654187679291, "learning_rate": 3.208174765720329e-09, "loss": 0.3381, "step": 5980 }, { "epoch": 2.969220585801754, "grad_norm": 0.4048785865306854, "learning_rate": 3.1055304779009645e-09, "loss": 0.3318, "step": 5981 }, { "epoch": 2.969717027966242, "grad_norm": 0.3820323646068573, "learning_rate": 3.004554517336833e-09, "loss": 0.3372, "step": 5982 }, { "epoch": 2.9702134701307297, "grad_norm": 0.34306347370147705, "learning_rate": 2.905246917740967e-09, "loss": 0.2826, "step": 5983 }, { "epoch": 2.9707099122952174, "grad_norm": 0.3657141625881195, "learning_rate": 2.8076077122696222e-09, "loss": 0.3645, "step": 5984 }, { "epoch": 2.9712063544597056, "grad_norm": 0.3417608439922333, "learning_rate": 2.711636933522277e-09, "loss": 0.3466, "step": 5985 }, { "epoch": 2.9717027966241933, "grad_norm": 0.34811320900917053, "learning_rate": 2.617334613540523e-09, "loss": 0.2532, "step": 5986 }, { "epoch": 2.972199238788681, "grad_norm": 0.3872687816619873, "learning_rate": 2.5247007838091753e-09, "loss": 0.294, "step": 5987 }, { "epoch": 2.972695680953169, "grad_norm": 0.3747205436229706, "learning_rate": 2.4337354752562714e-09, "loss": 0.3641, "step": 5988 }, { "epoch": 2.9731921231176566, "grad_norm": 0.31318798661231995, "learning_rate": 2.3444387182530726e-09, "loss": 0.289, "step": 5989 }, { "epoch": 2.9736885652821448, "grad_norm": 0.3632310628890991, "learning_rate": 2.256810542612953e-09, "loss": 0.3499, "step": 5990 }, { "epoch": 2.9741850074466325, "grad_norm": 0.37687060236930847, "learning_rate": 2.170850977592509e-09, "loss": 0.3279, "step": 5991 }, { "epoch": 2.9746814496111202, "grad_norm": 0.362166166305542, "learning_rate": 2.0865600518915618e-09, "loss": 0.3386, "step": 5992 }, { "epoch": 2.975177891775608, "grad_norm": 0.3283799886703491, "learning_rate": 2.0039377936525995e-09, "loss": 0.279, "step": 5993 }, { "epoch": 2.9756743339400957, "grad_norm": 0.3931915760040283, "learning_rate": 1.922984230460778e-09, "loss": 0.354, "step": 5994 }, { "epoch": 2.976170776104584, "grad_norm": 0.38223496079444885, "learning_rate": 1.8436993893444777e-09, "loss": 0.3549, "step": 5995 }, { "epoch": 2.9766672182690717, "grad_norm": 0.3679300546646118, "learning_rate": 1.7660832967741904e-09, "loss": 0.3225, "step": 5996 }, { "epoch": 2.9771636604335594, "grad_norm": 0.3750530481338501, "learning_rate": 1.6901359786641869e-09, "loss": 0.2973, "step": 5997 }, { "epoch": 2.9776601025980476, "grad_norm": 0.3771143853664398, "learning_rate": 1.615857460371406e-09, "loss": 0.3667, "step": 5998 }, { "epoch": 2.9781565447625353, "grad_norm": 0.3256872892379761, "learning_rate": 1.5432477666954548e-09, "loss": 0.2794, "step": 5999 }, { "epoch": 2.978652986927023, "grad_norm": 0.39347875118255615, "learning_rate": 1.4723069218780528e-09, "loss": 0.3292, "step": 6000 }, { "epoch": 2.979149429091511, "grad_norm": 0.30912527441978455, "learning_rate": 1.403034949605253e-09, "loss": 0.2834, "step": 6001 }, { "epoch": 2.9796458712559986, "grad_norm": 0.3810815215110779, "learning_rate": 1.3354318730052219e-09, "loss": 0.2897, "step": 6002 }, { "epoch": 2.9801423134204867, "grad_norm": 0.3949945271015167, "learning_rate": 1.2694977146476828e-09, "loss": 0.2859, "step": 6003 }, { "epoch": 2.9806387555849745, "grad_norm": 0.3654613196849823, "learning_rate": 1.2052324965466934e-09, "loss": 0.2957, "step": 6004 }, { "epoch": 2.9811351977494622, "grad_norm": 0.35050472617149353, "learning_rate": 1.1426362401595337e-09, "loss": 0.3449, "step": 6005 }, { "epoch": 2.98163163991395, "grad_norm": 0.32394182682037354, "learning_rate": 1.0817089663844872e-09, "loss": 0.316, "step": 6006 }, { "epoch": 2.9821280820784377, "grad_norm": 0.36590060591697693, "learning_rate": 1.0224506955636148e-09, "loss": 0.3398, "step": 6007 }, { "epoch": 2.982624524242926, "grad_norm": 0.3405512571334839, "learning_rate": 9.648614474816465e-10, "loss": 0.3372, "step": 6008 }, { "epoch": 2.9831209664074136, "grad_norm": 0.37151259183883667, "learning_rate": 9.089412413665344e-10, "loss": 0.307, "step": 6009 }, { "epoch": 2.9836174085719014, "grad_norm": 0.37492436170578003, "learning_rate": 8.54690095887789e-10, "loss": 0.3311, "step": 6010 }, { "epoch": 2.984113850736389, "grad_norm": 0.34284016489982605, "learning_rate": 8.021080291592542e-10, "loss": 0.2689, "step": 6011 }, { "epoch": 2.984610292900877, "grad_norm": 0.37257468700408936, "learning_rate": 7.511950587357764e-10, "loss": 0.3133, "step": 6012 }, { "epoch": 2.985106735065365, "grad_norm": 0.36359941959381104, "learning_rate": 7.019512016165353e-10, "loss": 0.3295, "step": 6013 }, { "epoch": 2.985603177229853, "grad_norm": 0.353798508644104, "learning_rate": 6.543764742422687e-10, "loss": 0.3226, "step": 6014 }, { "epoch": 2.9860996193943405, "grad_norm": 0.34513112902641296, "learning_rate": 6.084708924969373e-10, "loss": 0.283, "step": 6015 }, { "epoch": 2.9865960615588283, "grad_norm": 0.3707151412963867, "learning_rate": 5.642344717071702e-10, "loss": 0.3171, "step": 6016 }, { "epoch": 2.987092503723316, "grad_norm": 0.3194657266139984, "learning_rate": 5.21667226642264e-10, "loss": 0.3314, "step": 6017 }, { "epoch": 2.987588945887804, "grad_norm": 0.37061360478401184, "learning_rate": 4.807691715147389e-10, "loss": 0.3755, "step": 6018 }, { "epoch": 2.988085388052292, "grad_norm": 0.3427880108356476, "learning_rate": 4.4154031997867274e-10, "loss": 0.3167, "step": 6019 }, { "epoch": 2.9885818302167797, "grad_norm": 0.37667346000671387, "learning_rate": 4.039806851324768e-10, "loss": 0.2966, "step": 6020 }, { "epoch": 2.9890782723812674, "grad_norm": 0.36760374903678894, "learning_rate": 3.6809027951500987e-10, "loss": 0.2845, "step": 6021 }, { "epoch": 2.989574714545755, "grad_norm": 0.3790621757507324, "learning_rate": 3.338691151100193e-10, "loss": 0.3562, "step": 6022 }, { "epoch": 2.9900711567102434, "grad_norm": 0.3348396420478821, "learning_rate": 3.013172033422551e-10, "loss": 0.3338, "step": 6023 }, { "epoch": 2.990567598874731, "grad_norm": 0.3657049834728241, "learning_rate": 2.7043455508080075e-10, "loss": 0.3167, "step": 6024 }, { "epoch": 2.991064041039219, "grad_norm": 0.35203084349632263, "learning_rate": 2.412211806362974e-10, "loss": 0.321, "step": 6025 }, { "epoch": 2.991560483203707, "grad_norm": 0.3398763835430145, "learning_rate": 2.1367708976205436e-10, "loss": 0.2998, "step": 6026 }, { "epoch": 2.9920569253681943, "grad_norm": 0.34934762120246887, "learning_rate": 1.8780229165404894e-10, "loss": 0.3456, "step": 6027 }, { "epoch": 2.9925533675326825, "grad_norm": 0.37747177481651306, "learning_rate": 1.6359679495148162e-10, "loss": 0.3333, "step": 6028 }, { "epoch": 2.9930498096971703, "grad_norm": 0.35838139057159424, "learning_rate": 1.4106060773622088e-10, "loss": 0.2982, "step": 6029 }, { "epoch": 2.993546251861658, "grad_norm": 0.3449688255786896, "learning_rate": 1.2019373753224816e-10, "loss": 0.2781, "step": 6030 }, { "epoch": 2.994042694026146, "grad_norm": 0.38742536306381226, "learning_rate": 1.0099619130621296e-10, "loss": 0.2977, "step": 6031 }, { "epoch": 2.994539136190634, "grad_norm": 0.36422327160835266, "learning_rate": 8.346797546798791e-11, "loss": 0.3365, "step": 6032 }, { "epoch": 2.9950355783551217, "grad_norm": 0.3335469365119934, "learning_rate": 6.760909586900343e-11, "loss": 0.2497, "step": 6033 }, { "epoch": 2.9955320205196094, "grad_norm": 0.3498923182487488, "learning_rate": 5.3419557805578504e-11, "loss": 0.3485, "step": 6034 }, { "epoch": 2.996028462684097, "grad_norm": 0.3398236930370331, "learning_rate": 4.0899366013924524e-11, "loss": 0.3248, "step": 6035 }, { "epoch": 2.9965249048485854, "grad_norm": 0.390967458486557, "learning_rate": 3.00485246745863e-11, "loss": 0.32, "step": 6036 }, { "epoch": 2.997021347013073, "grad_norm": 0.3811165690422058, "learning_rate": 2.086703741022156e-11, "loss": 0.294, "step": 6037 }, { "epoch": 2.997517789177561, "grad_norm": 0.35245969891548157, "learning_rate": 1.3354907286711184e-11, "loss": 0.2951, "step": 6038 }, { "epoch": 2.9980142313420486, "grad_norm": 0.36663201451301575, "learning_rate": 7.512136812048987e-12, "loss": 0.3112, "step": 6039 }, { "epoch": 2.9985106735065363, "grad_norm": 0.32222142815589905, "learning_rate": 3.3387279363417123e-12, "loss": 0.2699, "step": 6040 }, { "epoch": 2.9990071156710245, "grad_norm": 0.36560073494911194, "learning_rate": 8.346820540294787e-13, "loss": 0.3185, "step": 6041 }, { "epoch": 2.9995035578355123, "grad_norm": 0.36803409457206726, "learning_rate": 0.0, "loss": 0.31, "step": 6042 }, { "epoch": 2.9995035578355123, "step": 6042, "total_flos": 5836783430926336.0, "train_loss": 0.37817217498396216, "train_runtime": 189002.7671, "train_samples_per_second": 3.069, "train_steps_per_second": 0.032 } ], "logging_steps": 1.0, "max_steps": 6042, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5836783430926336.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }