{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.998197873490719, "eval_steps": 500, "global_step": 9245, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005406379527842854, "grad_norm": 5.852370738983154, "learning_rate": 1.0810810810810811e-08, "loss": 0.8755, "step": 1 }, { "epoch": 0.001081275905568571, "grad_norm": 5.950657367706299, "learning_rate": 2.1621621621621623e-08, "loss": 0.8924, "step": 2 }, { "epoch": 0.0016219138583528565, "grad_norm": 6.0590081214904785, "learning_rate": 3.2432432432432436e-08, "loss": 0.8789, "step": 3 }, { "epoch": 0.002162551811137142, "grad_norm": 5.988236427307129, "learning_rate": 4.3243243243243246e-08, "loss": 0.8708, "step": 4 }, { "epoch": 0.0027031897639214274, "grad_norm": 5.799636363983154, "learning_rate": 5.405405405405406e-08, "loss": 0.8629, "step": 5 }, { "epoch": 0.003243827716705713, "grad_norm": 6.322631359100342, "learning_rate": 6.486486486486487e-08, "loss": 0.9277, "step": 6 }, { "epoch": 0.003784465669489998, "grad_norm": 6.104108810424805, "learning_rate": 7.567567567567568e-08, "loss": 0.9011, "step": 7 }, { "epoch": 0.004325103622274284, "grad_norm": 6.1530280113220215, "learning_rate": 8.648648648648649e-08, "loss": 0.89, "step": 8 }, { "epoch": 0.004865741575058569, "grad_norm": 5.7841596603393555, "learning_rate": 9.72972972972973e-08, "loss": 0.8426, "step": 9 }, { "epoch": 0.005406379527842855, "grad_norm": 5.69771671295166, "learning_rate": 1.0810810810810812e-07, "loss": 0.846, "step": 10 }, { "epoch": 0.00594701748062714, "grad_norm": 5.842521667480469, "learning_rate": 1.1891891891891893e-07, "loss": 0.8756, "step": 11 }, { "epoch": 0.006487655433411426, "grad_norm": 5.661792278289795, "learning_rate": 1.2972972972972974e-07, "loss": 0.8472, "step": 12 }, { "epoch": 0.0070282933861957105, "grad_norm": 5.664099216461182, "learning_rate": 1.4054054054054055e-07, "loss": 0.8736, "step": 13 }, { "epoch": 0.007568931338979996, "grad_norm": 5.9114603996276855, "learning_rate": 1.5135135135135135e-07, "loss": 0.8842, "step": 14 }, { "epoch": 0.008109569291764282, "grad_norm": 5.857370853424072, "learning_rate": 1.6216216216216218e-07, "loss": 0.8719, "step": 15 }, { "epoch": 0.008650207244548567, "grad_norm": 5.932351112365723, "learning_rate": 1.7297297297297298e-07, "loss": 0.9108, "step": 16 }, { "epoch": 0.009190845197332853, "grad_norm": 5.903899669647217, "learning_rate": 1.8378378378378379e-07, "loss": 0.8717, "step": 17 }, { "epoch": 0.009731483150117138, "grad_norm": 5.735729694366455, "learning_rate": 1.945945945945946e-07, "loss": 0.8519, "step": 18 }, { "epoch": 0.010272121102901424, "grad_norm": 5.9615159034729, "learning_rate": 2.0540540540540542e-07, "loss": 0.8696, "step": 19 }, { "epoch": 0.01081275905568571, "grad_norm": 5.628999710083008, "learning_rate": 2.1621621621621625e-07, "loss": 0.8514, "step": 20 }, { "epoch": 0.011353397008469995, "grad_norm": 5.775051593780518, "learning_rate": 2.2702702702702705e-07, "loss": 0.8545, "step": 21 }, { "epoch": 0.01189403496125428, "grad_norm": 5.561074733734131, "learning_rate": 2.3783783783783785e-07, "loss": 0.8694, "step": 22 }, { "epoch": 0.012434672914038566, "grad_norm": 5.445178985595703, "learning_rate": 2.486486486486487e-07, "loss": 0.8473, "step": 23 }, { "epoch": 0.012975310866822852, "grad_norm": 5.540316581726074, "learning_rate": 2.594594594594595e-07, "loss": 0.8695, "step": 24 }, { "epoch": 0.013515948819607137, "grad_norm": 5.309917449951172, "learning_rate": 2.702702702702703e-07, "loss": 0.8353, "step": 25 }, { "epoch": 0.014056586772391421, "grad_norm": 5.3465189933776855, "learning_rate": 2.810810810810811e-07, "loss": 0.8436, "step": 26 }, { "epoch": 0.014597224725175707, "grad_norm": 5.423165798187256, "learning_rate": 2.918918918918919e-07, "loss": 0.8549, "step": 27 }, { "epoch": 0.015137862677959992, "grad_norm": 5.301184177398682, "learning_rate": 3.027027027027027e-07, "loss": 0.8415, "step": 28 }, { "epoch": 0.015678500630744278, "grad_norm": 4.40065860748291, "learning_rate": 3.135135135135135e-07, "loss": 0.8191, "step": 29 }, { "epoch": 0.016219138583528563, "grad_norm": 4.320012092590332, "learning_rate": 3.2432432432432436e-07, "loss": 0.8082, "step": 30 }, { "epoch": 0.01675977653631285, "grad_norm": 4.359866619110107, "learning_rate": 3.3513513513513516e-07, "loss": 0.84, "step": 31 }, { "epoch": 0.017300414489097134, "grad_norm": 4.388699531555176, "learning_rate": 3.4594594594594597e-07, "loss": 0.8323, "step": 32 }, { "epoch": 0.01784105244188142, "grad_norm": 4.4560747146606445, "learning_rate": 3.567567567567568e-07, "loss": 0.8409, "step": 33 }, { "epoch": 0.018381690394665706, "grad_norm": 4.259395122528076, "learning_rate": 3.6756756756756757e-07, "loss": 0.7983, "step": 34 }, { "epoch": 0.01892232834744999, "grad_norm": 4.209655284881592, "learning_rate": 3.7837837837837843e-07, "loss": 0.8256, "step": 35 }, { "epoch": 0.019462966300234277, "grad_norm": 3.996368885040283, "learning_rate": 3.891891891891892e-07, "loss": 0.8241, "step": 36 }, { "epoch": 0.020003604253018562, "grad_norm": 4.126471042633057, "learning_rate": 4.0000000000000003e-07, "loss": 0.8436, "step": 37 }, { "epoch": 0.020544242205802848, "grad_norm": 3.9851861000061035, "learning_rate": 4.1081081081081084e-07, "loss": 0.8082, "step": 38 }, { "epoch": 0.021084880158587133, "grad_norm": 3.124469518661499, "learning_rate": 4.2162162162162164e-07, "loss": 0.8038, "step": 39 }, { "epoch": 0.02162551811137142, "grad_norm": 2.3070788383483887, "learning_rate": 4.324324324324325e-07, "loss": 0.7691, "step": 40 }, { "epoch": 0.022166156064155704, "grad_norm": 2.2585034370422363, "learning_rate": 4.4324324324324325e-07, "loss": 0.7469, "step": 41 }, { "epoch": 0.02270679401693999, "grad_norm": 2.3153300285339355, "learning_rate": 4.540540540540541e-07, "loss": 0.7967, "step": 42 }, { "epoch": 0.023247431969724276, "grad_norm": 2.2460641860961914, "learning_rate": 4.6486486486486485e-07, "loss": 0.7945, "step": 43 }, { "epoch": 0.02378806992250856, "grad_norm": 2.041891574859619, "learning_rate": 4.756756756756757e-07, "loss": 0.7405, "step": 44 }, { "epoch": 0.024328707875292847, "grad_norm": 2.197927474975586, "learning_rate": 4.864864864864865e-07, "loss": 0.7911, "step": 45 }, { "epoch": 0.024869345828077132, "grad_norm": 2.032799243927002, "learning_rate": 4.972972972972974e-07, "loss": 0.7952, "step": 46 }, { "epoch": 0.025409983780861418, "grad_norm": 1.8903582096099854, "learning_rate": 5.081081081081081e-07, "loss": 0.757, "step": 47 }, { "epoch": 0.025950621733645703, "grad_norm": 1.9750922918319702, "learning_rate": 5.18918918918919e-07, "loss": 0.754, "step": 48 }, { "epoch": 0.02649125968642999, "grad_norm": 1.8000760078430176, "learning_rate": 5.297297297297297e-07, "loss": 0.7691, "step": 49 }, { "epoch": 0.027031897639214274, "grad_norm": 1.7124290466308594, "learning_rate": 5.405405405405406e-07, "loss": 0.7252, "step": 50 }, { "epoch": 0.02757253559199856, "grad_norm": 1.6576565504074097, "learning_rate": 5.513513513513514e-07, "loss": 0.7495, "step": 51 }, { "epoch": 0.028113173544782842, "grad_norm": 1.3588616847991943, "learning_rate": 5.621621621621622e-07, "loss": 0.7624, "step": 52 }, { "epoch": 0.028653811497567128, "grad_norm": 1.3072082996368408, "learning_rate": 5.72972972972973e-07, "loss": 0.752, "step": 53 }, { "epoch": 0.029194449450351413, "grad_norm": 1.344785213470459, "learning_rate": 5.837837837837838e-07, "loss": 0.7367, "step": 54 }, { "epoch": 0.0297350874031357, "grad_norm": 1.5406584739685059, "learning_rate": 5.945945945945947e-07, "loss": 0.7839, "step": 55 }, { "epoch": 0.030275725355919984, "grad_norm": 1.5574135780334473, "learning_rate": 6.054054054054054e-07, "loss": 0.7108, "step": 56 }, { "epoch": 0.03081636330870427, "grad_norm": 1.608785629272461, "learning_rate": 6.162162162162163e-07, "loss": 0.7083, "step": 57 }, { "epoch": 0.031357001261488555, "grad_norm": 1.6619904041290283, "learning_rate": 6.27027027027027e-07, "loss": 0.7478, "step": 58 }, { "epoch": 0.03189763921427284, "grad_norm": 1.5372380018234253, "learning_rate": 6.378378378378379e-07, "loss": 0.719, "step": 59 }, { "epoch": 0.03243827716705713, "grad_norm": 1.4971483945846558, "learning_rate": 6.486486486486487e-07, "loss": 0.7139, "step": 60 }, { "epoch": 0.03297891511984141, "grad_norm": 1.4338963031768799, "learning_rate": 6.594594594594596e-07, "loss": 0.7308, "step": 61 }, { "epoch": 0.0335195530726257, "grad_norm": 1.2289550304412842, "learning_rate": 6.702702702702703e-07, "loss": 0.739, "step": 62 }, { "epoch": 0.03406019102540998, "grad_norm": 1.2169913053512573, "learning_rate": 6.810810810810811e-07, "loss": 0.7369, "step": 63 }, { "epoch": 0.03460082897819427, "grad_norm": 0.9731535911560059, "learning_rate": 6.918918918918919e-07, "loss": 0.6949, "step": 64 }, { "epoch": 0.035141466930978554, "grad_norm": 0.9446195960044861, "learning_rate": 7.027027027027028e-07, "loss": 0.7081, "step": 65 }, { "epoch": 0.03568210488376284, "grad_norm": 0.9269970059394836, "learning_rate": 7.135135135135136e-07, "loss": 0.7229, "step": 66 }, { "epoch": 0.036222742836547125, "grad_norm": 0.8842006325721741, "learning_rate": 7.243243243243243e-07, "loss": 0.696, "step": 67 }, { "epoch": 0.03676338078933141, "grad_norm": 0.8420222997665405, "learning_rate": 7.351351351351351e-07, "loss": 0.7491, "step": 68 }, { "epoch": 0.0373040187421157, "grad_norm": 0.8452447056770325, "learning_rate": 7.45945945945946e-07, "loss": 0.7079, "step": 69 }, { "epoch": 0.03784465669489998, "grad_norm": 0.8561710119247437, "learning_rate": 7.567567567567569e-07, "loss": 0.6656, "step": 70 }, { "epoch": 0.03838529464768427, "grad_norm": 0.8087544441223145, "learning_rate": 7.675675675675676e-07, "loss": 0.6841, "step": 71 }, { "epoch": 0.03892593260046855, "grad_norm": 0.8077256679534912, "learning_rate": 7.783783783783784e-07, "loss": 0.665, "step": 72 }, { "epoch": 0.03946657055325284, "grad_norm": 0.7797601819038391, "learning_rate": 7.891891891891892e-07, "loss": 0.7235, "step": 73 }, { "epoch": 0.040007208506037124, "grad_norm": 0.6821624040603638, "learning_rate": 8.000000000000001e-07, "loss": 0.7056, "step": 74 }, { "epoch": 0.04054784645882141, "grad_norm": 0.7008046507835388, "learning_rate": 8.108108108108109e-07, "loss": 0.6892, "step": 75 }, { "epoch": 0.041088484411605695, "grad_norm": 0.6429518461227417, "learning_rate": 8.216216216216217e-07, "loss": 0.6758, "step": 76 }, { "epoch": 0.04162912236438998, "grad_norm": 0.6321120858192444, "learning_rate": 8.324324324324324e-07, "loss": 0.6655, "step": 77 }, { "epoch": 0.04216976031717427, "grad_norm": 0.6515984535217285, "learning_rate": 8.432432432432433e-07, "loss": 0.6735, "step": 78 }, { "epoch": 0.04271039826995855, "grad_norm": 0.5830267667770386, "learning_rate": 8.540540540540541e-07, "loss": 0.6482, "step": 79 }, { "epoch": 0.04325103622274284, "grad_norm": 0.5883980393409729, "learning_rate": 8.64864864864865e-07, "loss": 0.6438, "step": 80 }, { "epoch": 0.04379167417552712, "grad_norm": 0.5688890814781189, "learning_rate": 8.756756756756756e-07, "loss": 0.6729, "step": 81 }, { "epoch": 0.04433231212831141, "grad_norm": 0.6022357940673828, "learning_rate": 8.864864864864865e-07, "loss": 0.6681, "step": 82 }, { "epoch": 0.044872950081095694, "grad_norm": 0.557185709476471, "learning_rate": 8.972972972972974e-07, "loss": 0.6756, "step": 83 }, { "epoch": 0.04541358803387998, "grad_norm": 0.5133994221687317, "learning_rate": 9.081081081081082e-07, "loss": 0.6096, "step": 84 }, { "epoch": 0.045954225986664266, "grad_norm": 0.5198540687561035, "learning_rate": 9.189189189189191e-07, "loss": 0.631, "step": 85 }, { "epoch": 0.04649486393944855, "grad_norm": 0.49539971351623535, "learning_rate": 9.297297297297297e-07, "loss": 0.6403, "step": 86 }, { "epoch": 0.04703550189223284, "grad_norm": 0.527891218662262, "learning_rate": 9.405405405405406e-07, "loss": 0.664, "step": 87 }, { "epoch": 0.04757613984501712, "grad_norm": 0.5219648480415344, "learning_rate": 9.513513513513514e-07, "loss": 0.63, "step": 88 }, { "epoch": 0.04811677779780141, "grad_norm": 0.5005459189414978, "learning_rate": 9.621621621621622e-07, "loss": 0.64, "step": 89 }, { "epoch": 0.04865741575058569, "grad_norm": 0.5416708588600159, "learning_rate": 9.72972972972973e-07, "loss": 0.6376, "step": 90 }, { "epoch": 0.04919805370336998, "grad_norm": 0.460190087556839, "learning_rate": 9.837837837837839e-07, "loss": 0.6326, "step": 91 }, { "epoch": 0.049738691656154264, "grad_norm": 0.518156886100769, "learning_rate": 9.945945945945947e-07, "loss": 0.6809, "step": 92 }, { "epoch": 0.05027932960893855, "grad_norm": 0.453222393989563, "learning_rate": 1.0054054054054054e-06, "loss": 0.6444, "step": 93 }, { "epoch": 0.050819967561722836, "grad_norm": 0.4345966875553131, "learning_rate": 1.0162162162162162e-06, "loss": 0.6285, "step": 94 }, { "epoch": 0.05136060551450712, "grad_norm": 0.46965357661247253, "learning_rate": 1.027027027027027e-06, "loss": 0.6763, "step": 95 }, { "epoch": 0.05190124346729141, "grad_norm": 0.43054792284965515, "learning_rate": 1.037837837837838e-06, "loss": 0.6359, "step": 96 }, { "epoch": 0.05244188142007569, "grad_norm": 0.4354804754257202, "learning_rate": 1.0486486486486488e-06, "loss": 0.623, "step": 97 }, { "epoch": 0.05298251937285998, "grad_norm": 0.4513593912124634, "learning_rate": 1.0594594594594595e-06, "loss": 0.6113, "step": 98 }, { "epoch": 0.05352315732564426, "grad_norm": 0.42707252502441406, "learning_rate": 1.0702702702702703e-06, "loss": 0.6193, "step": 99 }, { "epoch": 0.05406379527842855, "grad_norm": 0.41745811700820923, "learning_rate": 1.0810810810810812e-06, "loss": 0.6259, "step": 100 }, { "epoch": 0.054604433231212834, "grad_norm": 0.4500015676021576, "learning_rate": 1.091891891891892e-06, "loss": 0.6484, "step": 101 }, { "epoch": 0.05514507118399712, "grad_norm": 0.4127950966358185, "learning_rate": 1.1027027027027029e-06, "loss": 0.6298, "step": 102 }, { "epoch": 0.0556857091367814, "grad_norm": 0.48789313435554504, "learning_rate": 1.1135135135135135e-06, "loss": 0.6584, "step": 103 }, { "epoch": 0.056226347089565684, "grad_norm": 0.4403907358646393, "learning_rate": 1.1243243243243244e-06, "loss": 0.6092, "step": 104 }, { "epoch": 0.05676698504234997, "grad_norm": 0.4237998127937317, "learning_rate": 1.1351351351351352e-06, "loss": 0.6304, "step": 105 }, { "epoch": 0.057307622995134255, "grad_norm": 0.411171019077301, "learning_rate": 1.145945945945946e-06, "loss": 0.5931, "step": 106 }, { "epoch": 0.05784826094791854, "grad_norm": 0.4114699065685272, "learning_rate": 1.1567567567567567e-06, "loss": 0.6625, "step": 107 }, { "epoch": 0.058388898900702826, "grad_norm": 0.4349936544895172, "learning_rate": 1.1675675675675676e-06, "loss": 0.6267, "step": 108 }, { "epoch": 0.05892953685348711, "grad_norm": 0.390249103307724, "learning_rate": 1.1783783783783784e-06, "loss": 0.6373, "step": 109 }, { "epoch": 0.0594701748062714, "grad_norm": 0.41917869448661804, "learning_rate": 1.1891891891891893e-06, "loss": 0.6195, "step": 110 }, { "epoch": 0.06001081275905568, "grad_norm": 0.40572988986968994, "learning_rate": 1.2000000000000002e-06, "loss": 0.5918, "step": 111 }, { "epoch": 0.06055145071183997, "grad_norm": 0.41473883390426636, "learning_rate": 1.2108108108108108e-06, "loss": 0.616, "step": 112 }, { "epoch": 0.061092088664624254, "grad_norm": 0.4362037181854248, "learning_rate": 1.2216216216216217e-06, "loss": 0.6047, "step": 113 }, { "epoch": 0.06163272661740854, "grad_norm": 0.4196556508541107, "learning_rate": 1.2324324324324325e-06, "loss": 0.6072, "step": 114 }, { "epoch": 0.062173364570192825, "grad_norm": 0.3973116874694824, "learning_rate": 1.2432432432432434e-06, "loss": 0.5933, "step": 115 }, { "epoch": 0.06271400252297711, "grad_norm": 0.42316290736198425, "learning_rate": 1.254054054054054e-06, "loss": 0.619, "step": 116 }, { "epoch": 0.0632546404757614, "grad_norm": 0.4168538451194763, "learning_rate": 1.264864864864865e-06, "loss": 0.5945, "step": 117 }, { "epoch": 0.06379527842854568, "grad_norm": 0.4332961440086365, "learning_rate": 1.2756756756756757e-06, "loss": 0.6496, "step": 118 }, { "epoch": 0.06433591638132997, "grad_norm": 0.40682727098464966, "learning_rate": 1.2864864864864866e-06, "loss": 0.6178, "step": 119 }, { "epoch": 0.06487655433411425, "grad_norm": 0.4624468982219696, "learning_rate": 1.2972972972972974e-06, "loss": 0.632, "step": 120 }, { "epoch": 0.06541719228689855, "grad_norm": 0.4604220986366272, "learning_rate": 1.308108108108108e-06, "loss": 0.5954, "step": 121 }, { "epoch": 0.06595783023968282, "grad_norm": 0.3809317648410797, "learning_rate": 1.3189189189189192e-06, "loss": 0.5924, "step": 122 }, { "epoch": 0.06649846819246712, "grad_norm": 0.4062090516090393, "learning_rate": 1.3297297297297298e-06, "loss": 0.6023, "step": 123 }, { "epoch": 0.0670391061452514, "grad_norm": 0.37150242924690247, "learning_rate": 1.3405405405405407e-06, "loss": 0.6204, "step": 124 }, { "epoch": 0.06757974409803569, "grad_norm": 0.40651530027389526, "learning_rate": 1.3513513513513515e-06, "loss": 0.6133, "step": 125 }, { "epoch": 0.06812038205081997, "grad_norm": 0.41236740350723267, "learning_rate": 1.3621621621621622e-06, "loss": 0.5861, "step": 126 }, { "epoch": 0.06866102000360426, "grad_norm": 0.35920029878616333, "learning_rate": 1.3729729729729732e-06, "loss": 0.5781, "step": 127 }, { "epoch": 0.06920165795638854, "grad_norm": 0.44092392921447754, "learning_rate": 1.3837837837837839e-06, "loss": 0.6472, "step": 128 }, { "epoch": 0.06974229590917283, "grad_norm": 0.4139423966407776, "learning_rate": 1.3945945945945947e-06, "loss": 0.6242, "step": 129 }, { "epoch": 0.07028293386195711, "grad_norm": 0.3792780041694641, "learning_rate": 1.4054054054054056e-06, "loss": 0.6001, "step": 130 }, { "epoch": 0.0708235718147414, "grad_norm": 0.41122201085090637, "learning_rate": 1.4162162162162162e-06, "loss": 0.583, "step": 131 }, { "epoch": 0.07136420976752568, "grad_norm": 0.37738776206970215, "learning_rate": 1.4270270270270273e-06, "loss": 0.6071, "step": 132 }, { "epoch": 0.07190484772030997, "grad_norm": 0.3700563609600067, "learning_rate": 1.437837837837838e-06, "loss": 0.6138, "step": 133 }, { "epoch": 0.07244548567309425, "grad_norm": 0.4004262387752533, "learning_rate": 1.4486486486486486e-06, "loss": 0.6297, "step": 134 }, { "epoch": 0.07298612362587854, "grad_norm": 0.35735851526260376, "learning_rate": 1.4594594594594596e-06, "loss": 0.6531, "step": 135 }, { "epoch": 0.07352676157866282, "grad_norm": 0.40201085805892944, "learning_rate": 1.4702702702702703e-06, "loss": 0.5891, "step": 136 }, { "epoch": 0.07406739953144711, "grad_norm": 0.42626097798347473, "learning_rate": 1.4810810810810814e-06, "loss": 0.6261, "step": 137 }, { "epoch": 0.0746080374842314, "grad_norm": 0.39403700828552246, "learning_rate": 1.491891891891892e-06, "loss": 0.5996, "step": 138 }, { "epoch": 0.07514867543701567, "grad_norm": 0.3774298429489136, "learning_rate": 1.5027027027027026e-06, "loss": 0.6039, "step": 139 }, { "epoch": 0.07568931338979996, "grad_norm": 0.40128400921821594, "learning_rate": 1.5135135135135137e-06, "loss": 0.6185, "step": 140 }, { "epoch": 0.07622995134258424, "grad_norm": 0.3873974680900574, "learning_rate": 1.5243243243243244e-06, "loss": 0.5823, "step": 141 }, { "epoch": 0.07677058929536854, "grad_norm": 0.3734970688819885, "learning_rate": 1.5351351351351352e-06, "loss": 0.6091, "step": 142 }, { "epoch": 0.07731122724815281, "grad_norm": 0.36722680926322937, "learning_rate": 1.545945945945946e-06, "loss": 0.5835, "step": 143 }, { "epoch": 0.0778518652009371, "grad_norm": 0.4275578260421753, "learning_rate": 1.5567567567567567e-06, "loss": 0.582, "step": 144 }, { "epoch": 0.07839250315372139, "grad_norm": 0.4208986163139343, "learning_rate": 1.5675675675675678e-06, "loss": 0.5632, "step": 145 }, { "epoch": 0.07893314110650568, "grad_norm": 0.3910946547985077, "learning_rate": 1.5783783783783784e-06, "loss": 0.6165, "step": 146 }, { "epoch": 0.07947377905928996, "grad_norm": 0.4066162407398224, "learning_rate": 1.5891891891891893e-06, "loss": 0.6138, "step": 147 }, { "epoch": 0.08001441701207425, "grad_norm": 0.386677622795105, "learning_rate": 1.6000000000000001e-06, "loss": 0.594, "step": 148 }, { "epoch": 0.08055505496485853, "grad_norm": 0.41342198848724365, "learning_rate": 1.6108108108108108e-06, "loss": 0.5992, "step": 149 }, { "epoch": 0.08109569291764282, "grad_norm": 0.3809179365634918, "learning_rate": 1.6216216216216219e-06, "loss": 0.5768, "step": 150 }, { "epoch": 0.0816363308704271, "grad_norm": 0.39420464634895325, "learning_rate": 1.6324324324324325e-06, "loss": 0.5875, "step": 151 }, { "epoch": 0.08217696882321139, "grad_norm": 0.37135252356529236, "learning_rate": 1.6432432432432434e-06, "loss": 0.6015, "step": 152 }, { "epoch": 0.08271760677599567, "grad_norm": 0.39454683661460876, "learning_rate": 1.6540540540540542e-06, "loss": 0.5874, "step": 153 }, { "epoch": 0.08325824472877996, "grad_norm": 0.3885583281517029, "learning_rate": 1.6648648648648649e-06, "loss": 0.5867, "step": 154 }, { "epoch": 0.08379888268156424, "grad_norm": 0.37252458930015564, "learning_rate": 1.675675675675676e-06, "loss": 0.5975, "step": 155 }, { "epoch": 0.08433952063434853, "grad_norm": 0.39173391461372375, "learning_rate": 1.6864864864864866e-06, "loss": 0.585, "step": 156 }, { "epoch": 0.08488015858713281, "grad_norm": 0.4323269724845886, "learning_rate": 1.6972972972972972e-06, "loss": 0.5694, "step": 157 }, { "epoch": 0.0854207965399171, "grad_norm": 0.3776825964450836, "learning_rate": 1.7081081081081083e-06, "loss": 0.5765, "step": 158 }, { "epoch": 0.08596143449270138, "grad_norm": 0.3599972724914551, "learning_rate": 1.718918918918919e-06, "loss": 0.5845, "step": 159 }, { "epoch": 0.08650207244548568, "grad_norm": 0.37649205327033997, "learning_rate": 1.72972972972973e-06, "loss": 0.5836, "step": 160 }, { "epoch": 0.08704271039826995, "grad_norm": 0.3656623065471649, "learning_rate": 1.7405405405405406e-06, "loss": 0.5357, "step": 161 }, { "epoch": 0.08758334835105425, "grad_norm": 0.37666720151901245, "learning_rate": 1.7513513513513513e-06, "loss": 0.5441, "step": 162 }, { "epoch": 0.08812398630383853, "grad_norm": 0.3532339036464691, "learning_rate": 1.7621621621621623e-06, "loss": 0.5623, "step": 163 }, { "epoch": 0.08866462425662282, "grad_norm": 0.3519897758960724, "learning_rate": 1.772972972972973e-06, "loss": 0.5903, "step": 164 }, { "epoch": 0.0892052622094071, "grad_norm": 0.40431109070777893, "learning_rate": 1.783783783783784e-06, "loss": 0.6142, "step": 165 }, { "epoch": 0.08974590016219139, "grad_norm": 0.4137735366821289, "learning_rate": 1.7945945945945947e-06, "loss": 0.5966, "step": 166 }, { "epoch": 0.09028653811497567, "grad_norm": 0.3849121332168579, "learning_rate": 1.8054054054054053e-06, "loss": 0.5876, "step": 167 }, { "epoch": 0.09082717606775996, "grad_norm": 0.37211450934410095, "learning_rate": 1.8162162162162164e-06, "loss": 0.5441, "step": 168 }, { "epoch": 0.09136781402054424, "grad_norm": 0.35666146874427795, "learning_rate": 1.827027027027027e-06, "loss": 0.5828, "step": 169 }, { "epoch": 0.09190845197332853, "grad_norm": 0.3767271637916565, "learning_rate": 1.8378378378378381e-06, "loss": 0.5968, "step": 170 }, { "epoch": 0.09244908992611281, "grad_norm": 0.3989579379558563, "learning_rate": 1.8486486486486488e-06, "loss": 0.5844, "step": 171 }, { "epoch": 0.0929897278788971, "grad_norm": 0.364155113697052, "learning_rate": 1.8594594594594594e-06, "loss": 0.5798, "step": 172 }, { "epoch": 0.09353036583168138, "grad_norm": 0.38305285573005676, "learning_rate": 1.8702702702702705e-06, "loss": 0.5977, "step": 173 }, { "epoch": 0.09407100378446567, "grad_norm": 0.44144946336746216, "learning_rate": 1.8810810810810811e-06, "loss": 0.5732, "step": 174 }, { "epoch": 0.09461164173724995, "grad_norm": 0.35379329323768616, "learning_rate": 1.8918918918918922e-06, "loss": 0.5859, "step": 175 }, { "epoch": 0.09515227969003424, "grad_norm": 0.3723418414592743, "learning_rate": 1.9027027027027028e-06, "loss": 0.5889, "step": 176 }, { "epoch": 0.09569291764281852, "grad_norm": 0.38720229268074036, "learning_rate": 1.9135135135135135e-06, "loss": 0.6096, "step": 177 }, { "epoch": 0.09623355559560282, "grad_norm": 0.384669691324234, "learning_rate": 1.9243243243243243e-06, "loss": 0.5813, "step": 178 }, { "epoch": 0.0967741935483871, "grad_norm": 0.3830939531326294, "learning_rate": 1.935135135135135e-06, "loss": 0.535, "step": 179 }, { "epoch": 0.09731483150117139, "grad_norm": 0.3666214048862457, "learning_rate": 1.945945945945946e-06, "loss": 0.5891, "step": 180 }, { "epoch": 0.09785546945395567, "grad_norm": 0.37354567646980286, "learning_rate": 1.956756756756757e-06, "loss": 0.6001, "step": 181 }, { "epoch": 0.09839610740673996, "grad_norm": 0.3545113205909729, "learning_rate": 1.9675675675675678e-06, "loss": 0.5886, "step": 182 }, { "epoch": 0.09893674535952424, "grad_norm": 0.3804175555706024, "learning_rate": 1.9783783783783786e-06, "loss": 0.5686, "step": 183 }, { "epoch": 0.09947738331230853, "grad_norm": 0.37139418721199036, "learning_rate": 1.9891891891891895e-06, "loss": 0.5788, "step": 184 }, { "epoch": 0.10001802126509281, "grad_norm": 0.34333956241607666, "learning_rate": 2.0000000000000003e-06, "loss": 0.5442, "step": 185 }, { "epoch": 0.1005586592178771, "grad_norm": 0.37467560172080994, "learning_rate": 2.0108108108108108e-06, "loss": 0.5533, "step": 186 }, { "epoch": 0.10109929717066138, "grad_norm": 0.40123438835144043, "learning_rate": 2.0216216216216216e-06, "loss": 0.554, "step": 187 }, { "epoch": 0.10163993512344567, "grad_norm": 0.41539064049720764, "learning_rate": 2.0324324324324325e-06, "loss": 0.5893, "step": 188 }, { "epoch": 0.10218057307622995, "grad_norm": 0.3829135596752167, "learning_rate": 2.0432432432432433e-06, "loss": 0.5845, "step": 189 }, { "epoch": 0.10272121102901424, "grad_norm": 0.3924802541732788, "learning_rate": 2.054054054054054e-06, "loss": 0.5521, "step": 190 }, { "epoch": 0.10326184898179852, "grad_norm": 0.37531155347824097, "learning_rate": 2.064864864864865e-06, "loss": 0.571, "step": 191 }, { "epoch": 0.10380248693458281, "grad_norm": 0.3559173345565796, "learning_rate": 2.075675675675676e-06, "loss": 0.5636, "step": 192 }, { "epoch": 0.10434312488736709, "grad_norm": 0.3618552088737488, "learning_rate": 2.0864864864864868e-06, "loss": 0.5587, "step": 193 }, { "epoch": 0.10488376284015138, "grad_norm": 0.36590248346328735, "learning_rate": 2.0972972972972976e-06, "loss": 0.557, "step": 194 }, { "epoch": 0.10542440079293566, "grad_norm": 0.3591752052307129, "learning_rate": 2.1081081081081085e-06, "loss": 0.5506, "step": 195 }, { "epoch": 0.10596503874571996, "grad_norm": 0.41891753673553467, "learning_rate": 2.118918918918919e-06, "loss": 0.5663, "step": 196 }, { "epoch": 0.10650567669850423, "grad_norm": 0.37557801604270935, "learning_rate": 2.1297297297297298e-06, "loss": 0.5797, "step": 197 }, { "epoch": 0.10704631465128853, "grad_norm": 0.3890296220779419, "learning_rate": 2.1405405405405406e-06, "loss": 0.5387, "step": 198 }, { "epoch": 0.1075869526040728, "grad_norm": 0.43507513403892517, "learning_rate": 2.1513513513513515e-06, "loss": 0.5988, "step": 199 }, { "epoch": 0.1081275905568571, "grad_norm": 0.3923652470111847, "learning_rate": 2.1621621621621623e-06, "loss": 0.6047, "step": 200 }, { "epoch": 0.10866822850964138, "grad_norm": 0.38558241724967957, "learning_rate": 2.172972972972973e-06, "loss": 0.5726, "step": 201 }, { "epoch": 0.10920886646242567, "grad_norm": 0.3587067425251007, "learning_rate": 2.183783783783784e-06, "loss": 0.5832, "step": 202 }, { "epoch": 0.10974950441520995, "grad_norm": 0.3850076198577881, "learning_rate": 2.194594594594595e-06, "loss": 0.556, "step": 203 }, { "epoch": 0.11029014236799424, "grad_norm": 0.38720348477363586, "learning_rate": 2.2054054054054058e-06, "loss": 0.5684, "step": 204 }, { "epoch": 0.11083078032077852, "grad_norm": 0.40378227829933167, "learning_rate": 2.2162162162162166e-06, "loss": 0.5897, "step": 205 }, { "epoch": 0.1113714182735628, "grad_norm": 0.4102720320224762, "learning_rate": 2.227027027027027e-06, "loss": 0.5628, "step": 206 }, { "epoch": 0.11191205622634709, "grad_norm": 0.36637264490127563, "learning_rate": 2.237837837837838e-06, "loss": 0.5643, "step": 207 }, { "epoch": 0.11245269417913137, "grad_norm": 0.4347660541534424, "learning_rate": 2.2486486486486488e-06, "loss": 0.5591, "step": 208 }, { "epoch": 0.11299333213191566, "grad_norm": 0.3727447986602783, "learning_rate": 2.2594594594594596e-06, "loss": 0.5625, "step": 209 }, { "epoch": 0.11353397008469994, "grad_norm": 0.3756256103515625, "learning_rate": 2.2702702702702705e-06, "loss": 0.5595, "step": 210 }, { "epoch": 0.11407460803748423, "grad_norm": 0.40890535712242126, "learning_rate": 2.2810810810810813e-06, "loss": 0.5918, "step": 211 }, { "epoch": 0.11461524599026851, "grad_norm": 0.4243277907371521, "learning_rate": 2.291891891891892e-06, "loss": 0.5755, "step": 212 }, { "epoch": 0.1151558839430528, "grad_norm": 0.41777583956718445, "learning_rate": 2.302702702702703e-06, "loss": 0.5478, "step": 213 }, { "epoch": 0.11569652189583708, "grad_norm": 0.4105633497238159, "learning_rate": 2.3135135135135135e-06, "loss": 0.5595, "step": 214 }, { "epoch": 0.11623715984862137, "grad_norm": 0.40353402495384216, "learning_rate": 2.3243243243243247e-06, "loss": 0.5876, "step": 215 }, { "epoch": 0.11677779780140565, "grad_norm": 0.3937397301197052, "learning_rate": 2.335135135135135e-06, "loss": 0.5392, "step": 216 }, { "epoch": 0.11731843575418995, "grad_norm": 0.4327187240123749, "learning_rate": 2.345945945945946e-06, "loss": 0.5869, "step": 217 }, { "epoch": 0.11785907370697422, "grad_norm": 0.42935019731521606, "learning_rate": 2.356756756756757e-06, "loss": 0.5673, "step": 218 }, { "epoch": 0.11839971165975852, "grad_norm": 0.3601163923740387, "learning_rate": 2.3675675675675677e-06, "loss": 0.5625, "step": 219 }, { "epoch": 0.1189403496125428, "grad_norm": 0.4096478223800659, "learning_rate": 2.3783783783783786e-06, "loss": 0.5521, "step": 220 }, { "epoch": 0.11948098756532709, "grad_norm": 0.36564552783966064, "learning_rate": 2.3891891891891895e-06, "loss": 0.554, "step": 221 }, { "epoch": 0.12002162551811137, "grad_norm": 0.39213013648986816, "learning_rate": 2.4000000000000003e-06, "loss": 0.5721, "step": 222 }, { "epoch": 0.12056226347089566, "grad_norm": 0.37426066398620605, "learning_rate": 2.410810810810811e-06, "loss": 0.5603, "step": 223 }, { "epoch": 0.12110290142367994, "grad_norm": 0.42405861616134644, "learning_rate": 2.4216216216216216e-06, "loss": 0.5629, "step": 224 }, { "epoch": 0.12164353937646423, "grad_norm": 0.39380067586898804, "learning_rate": 2.432432432432433e-06, "loss": 0.5963, "step": 225 }, { "epoch": 0.12218417732924851, "grad_norm": 0.3430948257446289, "learning_rate": 2.4432432432432433e-06, "loss": 0.5709, "step": 226 }, { "epoch": 0.1227248152820328, "grad_norm": 0.3845606744289398, "learning_rate": 2.454054054054054e-06, "loss": 0.5535, "step": 227 }, { "epoch": 0.12326545323481708, "grad_norm": 0.3496875762939453, "learning_rate": 2.464864864864865e-06, "loss": 0.5422, "step": 228 }, { "epoch": 0.12380609118760137, "grad_norm": 0.3898939788341522, "learning_rate": 2.475675675675676e-06, "loss": 0.574, "step": 229 }, { "epoch": 0.12434672914038565, "grad_norm": 0.4322075843811035, "learning_rate": 2.4864864864864867e-06, "loss": 0.5608, "step": 230 }, { "epoch": 0.12488736709316994, "grad_norm": 0.34907200932502747, "learning_rate": 2.4972972972972976e-06, "loss": 0.5549, "step": 231 }, { "epoch": 0.12542800504595422, "grad_norm": 0.3408522307872772, "learning_rate": 2.508108108108108e-06, "loss": 0.5739, "step": 232 }, { "epoch": 0.1259686429987385, "grad_norm": 0.3903674781322479, "learning_rate": 2.518918918918919e-06, "loss": 0.5778, "step": 233 }, { "epoch": 0.1265092809515228, "grad_norm": 0.3834710717201233, "learning_rate": 2.52972972972973e-06, "loss": 0.5541, "step": 234 }, { "epoch": 0.12704991890430709, "grad_norm": 0.39471179246902466, "learning_rate": 2.540540540540541e-06, "loss": 0.5499, "step": 235 }, { "epoch": 0.12759055685709136, "grad_norm": 0.3803953230381012, "learning_rate": 2.5513513513513515e-06, "loss": 0.5729, "step": 236 }, { "epoch": 0.12813119480987564, "grad_norm": 0.37784209847450256, "learning_rate": 2.5621621621621623e-06, "loss": 0.5713, "step": 237 }, { "epoch": 0.12867183276265995, "grad_norm": 0.43192943930625916, "learning_rate": 2.572972972972973e-06, "loss": 0.5796, "step": 238 }, { "epoch": 0.12921247071544423, "grad_norm": 0.35144469141960144, "learning_rate": 2.5837837837837844e-06, "loss": 0.5362, "step": 239 }, { "epoch": 0.1297531086682285, "grad_norm": 0.40244975686073303, "learning_rate": 2.594594594594595e-06, "loss": 0.5254, "step": 240 }, { "epoch": 0.13029374662101278, "grad_norm": 0.3917606770992279, "learning_rate": 2.6054054054054057e-06, "loss": 0.5728, "step": 241 }, { "epoch": 0.1308343845737971, "grad_norm": 0.423109233379364, "learning_rate": 2.616216216216216e-06, "loss": 0.5823, "step": 242 }, { "epoch": 0.13137502252658137, "grad_norm": 0.3996261954307556, "learning_rate": 2.627027027027027e-06, "loss": 0.5372, "step": 243 }, { "epoch": 0.13191566047936565, "grad_norm": 0.41690677404403687, "learning_rate": 2.6378378378378383e-06, "loss": 0.5515, "step": 244 }, { "epoch": 0.13245629843214993, "grad_norm": 0.3668651878833771, "learning_rate": 2.648648648648649e-06, "loss": 0.5446, "step": 245 }, { "epoch": 0.13299693638493423, "grad_norm": 0.33522850275039673, "learning_rate": 2.6594594594594596e-06, "loss": 0.5253, "step": 246 }, { "epoch": 0.1335375743377185, "grad_norm": 0.38769128918647766, "learning_rate": 2.6702702702702704e-06, "loss": 0.5796, "step": 247 }, { "epoch": 0.1340782122905028, "grad_norm": 0.4321063160896301, "learning_rate": 2.6810810810810813e-06, "loss": 0.545, "step": 248 }, { "epoch": 0.13461885024328707, "grad_norm": 0.35301586985588074, "learning_rate": 2.6918918918918926e-06, "loss": 0.5416, "step": 249 }, { "epoch": 0.13515948819607138, "grad_norm": 0.39626818895339966, "learning_rate": 2.702702702702703e-06, "loss": 0.5494, "step": 250 }, { "epoch": 0.13570012614885565, "grad_norm": 0.3496905565261841, "learning_rate": 2.713513513513514e-06, "loss": 0.5474, "step": 251 }, { "epoch": 0.13624076410163993, "grad_norm": 0.4058341383934021, "learning_rate": 2.7243243243243243e-06, "loss": 0.5699, "step": 252 }, { "epoch": 0.1367814020544242, "grad_norm": 0.39988845586776733, "learning_rate": 2.735135135135135e-06, "loss": 0.5506, "step": 253 }, { "epoch": 0.13732204000720852, "grad_norm": 0.3820221722126007, "learning_rate": 2.7459459459459464e-06, "loss": 0.5321, "step": 254 }, { "epoch": 0.1378626779599928, "grad_norm": 0.38915273547172546, "learning_rate": 2.7567567567567573e-06, "loss": 0.5377, "step": 255 }, { "epoch": 0.13840331591277708, "grad_norm": 0.35650572180747986, "learning_rate": 2.7675675675675677e-06, "loss": 0.5215, "step": 256 }, { "epoch": 0.13894395386556135, "grad_norm": 0.4189383387565613, "learning_rate": 2.7783783783783786e-06, "loss": 0.5357, "step": 257 }, { "epoch": 0.13948459181834566, "grad_norm": 0.367410808801651, "learning_rate": 2.7891891891891894e-06, "loss": 0.5716, "step": 258 }, { "epoch": 0.14002522977112994, "grad_norm": 0.4189624488353729, "learning_rate": 2.8000000000000003e-06, "loss": 0.5677, "step": 259 }, { "epoch": 0.14056586772391422, "grad_norm": 0.4024697542190552, "learning_rate": 2.810810810810811e-06, "loss": 0.5188, "step": 260 }, { "epoch": 0.1411065056766985, "grad_norm": 0.4311826229095459, "learning_rate": 2.821621621621622e-06, "loss": 0.5732, "step": 261 }, { "epoch": 0.1416471436294828, "grad_norm": 0.373671293258667, "learning_rate": 2.8324324324324324e-06, "loss": 0.5558, "step": 262 }, { "epoch": 0.14218778158226708, "grad_norm": 0.39763349294662476, "learning_rate": 2.8432432432432433e-06, "loss": 0.5529, "step": 263 }, { "epoch": 0.14272841953505136, "grad_norm": 0.3955563008785248, "learning_rate": 2.8540540540540546e-06, "loss": 0.5517, "step": 264 }, { "epoch": 0.14326905748783564, "grad_norm": 0.3701838552951813, "learning_rate": 2.8648648648648654e-06, "loss": 0.5422, "step": 265 }, { "epoch": 0.14380969544061994, "grad_norm": 0.38581880927085876, "learning_rate": 2.875675675675676e-06, "loss": 0.5332, "step": 266 }, { "epoch": 0.14435033339340422, "grad_norm": 0.3548599183559418, "learning_rate": 2.8864864864864867e-06, "loss": 0.5484, "step": 267 }, { "epoch": 0.1448909713461885, "grad_norm": 0.37688708305358887, "learning_rate": 2.897297297297297e-06, "loss": 0.5266, "step": 268 }, { "epoch": 0.14543160929897278, "grad_norm": 0.36959773302078247, "learning_rate": 2.9081081081081084e-06, "loss": 0.5356, "step": 269 }, { "epoch": 0.1459722472517571, "grad_norm": 0.3831232786178589, "learning_rate": 2.9189189189189193e-06, "loss": 0.5296, "step": 270 }, { "epoch": 0.14651288520454137, "grad_norm": 0.38201797008514404, "learning_rate": 2.92972972972973e-06, "loss": 0.5175, "step": 271 }, { "epoch": 0.14705352315732564, "grad_norm": 0.36365392804145813, "learning_rate": 2.9405405405405406e-06, "loss": 0.5827, "step": 272 }, { "epoch": 0.14759416111010992, "grad_norm": 0.3701610565185547, "learning_rate": 2.9513513513513514e-06, "loss": 0.554, "step": 273 }, { "epoch": 0.14813479906289423, "grad_norm": 0.4099448025226593, "learning_rate": 2.9621621621621627e-06, "loss": 0.5455, "step": 274 }, { "epoch": 0.1486754370156785, "grad_norm": 0.3831491470336914, "learning_rate": 2.9729729729729736e-06, "loss": 0.5252, "step": 275 }, { "epoch": 0.1492160749684628, "grad_norm": 0.3430909812450409, "learning_rate": 2.983783783783784e-06, "loss": 0.5578, "step": 276 }, { "epoch": 0.14975671292124706, "grad_norm": 0.39842531085014343, "learning_rate": 2.994594594594595e-06, "loss": 0.5418, "step": 277 }, { "epoch": 0.15029735087403134, "grad_norm": 0.37241166830062866, "learning_rate": 3.0054054054054053e-06, "loss": 0.5336, "step": 278 }, { "epoch": 0.15083798882681565, "grad_norm": 0.4128376543521881, "learning_rate": 3.016216216216216e-06, "loss": 0.5677, "step": 279 }, { "epoch": 0.15137862677959993, "grad_norm": 0.42050155997276306, "learning_rate": 3.0270270270270274e-06, "loss": 0.5476, "step": 280 }, { "epoch": 0.1519192647323842, "grad_norm": 0.40356847643852234, "learning_rate": 3.0378378378378383e-06, "loss": 0.5339, "step": 281 }, { "epoch": 0.15245990268516849, "grad_norm": 0.38171207904815674, "learning_rate": 3.0486486486486487e-06, "loss": 0.5655, "step": 282 }, { "epoch": 0.1530005406379528, "grad_norm": 0.34982213377952576, "learning_rate": 3.0594594594594596e-06, "loss": 0.557, "step": 283 }, { "epoch": 0.15354117859073707, "grad_norm": 0.42924508452415466, "learning_rate": 3.0702702702702704e-06, "loss": 0.5474, "step": 284 }, { "epoch": 0.15408181654352135, "grad_norm": 0.3678603172302246, "learning_rate": 3.0810810810810817e-06, "loss": 0.5421, "step": 285 }, { "epoch": 0.15462245449630563, "grad_norm": 0.3565238118171692, "learning_rate": 3.091891891891892e-06, "loss": 0.5412, "step": 286 }, { "epoch": 0.15516309244908993, "grad_norm": 0.36498183012008667, "learning_rate": 3.102702702702703e-06, "loss": 0.5455, "step": 287 }, { "epoch": 0.1557037304018742, "grad_norm": 0.38334113359451294, "learning_rate": 3.1135135135135134e-06, "loss": 0.5392, "step": 288 }, { "epoch": 0.1562443683546585, "grad_norm": 0.36851766705513, "learning_rate": 3.1243243243243243e-06, "loss": 0.5403, "step": 289 }, { "epoch": 0.15678500630744277, "grad_norm": 0.384204626083374, "learning_rate": 3.1351351351351356e-06, "loss": 0.5414, "step": 290 }, { "epoch": 0.15732564426022708, "grad_norm": 0.35691317915916443, "learning_rate": 3.1459459459459464e-06, "loss": 0.5319, "step": 291 }, { "epoch": 0.15786628221301136, "grad_norm": 0.41789743304252625, "learning_rate": 3.156756756756757e-06, "loss": 0.5399, "step": 292 }, { "epoch": 0.15840692016579563, "grad_norm": 0.370802104473114, "learning_rate": 3.1675675675675677e-06, "loss": 0.5379, "step": 293 }, { "epoch": 0.1589475581185799, "grad_norm": 0.42494484782218933, "learning_rate": 3.1783783783783786e-06, "loss": 0.5087, "step": 294 }, { "epoch": 0.15948819607136422, "grad_norm": 0.38942399621009827, "learning_rate": 3.1891891891891894e-06, "loss": 0.549, "step": 295 }, { "epoch": 0.1600288340241485, "grad_norm": 0.4098314344882965, "learning_rate": 3.2000000000000003e-06, "loss": 0.5276, "step": 296 }, { "epoch": 0.16056947197693278, "grad_norm": 0.4091181457042694, "learning_rate": 3.210810810810811e-06, "loss": 0.5031, "step": 297 }, { "epoch": 0.16111010992971705, "grad_norm": 0.4209524691104889, "learning_rate": 3.2216216216216216e-06, "loss": 0.5755, "step": 298 }, { "epoch": 0.16165074788250136, "grad_norm": 0.46801844239234924, "learning_rate": 3.2324324324324324e-06, "loss": 0.5484, "step": 299 }, { "epoch": 0.16219138583528564, "grad_norm": 0.3911043703556061, "learning_rate": 3.2432432432432437e-06, "loss": 0.5408, "step": 300 }, { "epoch": 0.16273202378806992, "grad_norm": 0.3921964466571808, "learning_rate": 3.2540540540540546e-06, "loss": 0.5569, "step": 301 }, { "epoch": 0.1632726617408542, "grad_norm": 0.4882521331310272, "learning_rate": 3.264864864864865e-06, "loss": 0.5486, "step": 302 }, { "epoch": 0.1638132996936385, "grad_norm": 0.511541485786438, "learning_rate": 3.275675675675676e-06, "loss": 0.556, "step": 303 }, { "epoch": 0.16435393764642278, "grad_norm": 0.3747992217540741, "learning_rate": 3.2864864864864867e-06, "loss": 0.5433, "step": 304 }, { "epoch": 0.16489457559920706, "grad_norm": 0.4771915078163147, "learning_rate": 3.2972972972972976e-06, "loss": 0.5317, "step": 305 }, { "epoch": 0.16543521355199134, "grad_norm": 0.4885832965373993, "learning_rate": 3.3081081081081084e-06, "loss": 0.5573, "step": 306 }, { "epoch": 0.16597585150477565, "grad_norm": 0.4261198341846466, "learning_rate": 3.3189189189189193e-06, "loss": 0.5403, "step": 307 }, { "epoch": 0.16651648945755992, "grad_norm": 0.45114848017692566, "learning_rate": 3.3297297297297297e-06, "loss": 0.5356, "step": 308 }, { "epoch": 0.1670571274103442, "grad_norm": 0.4223145842552185, "learning_rate": 3.3405405405405406e-06, "loss": 0.5298, "step": 309 }, { "epoch": 0.16759776536312848, "grad_norm": 0.4394230842590332, "learning_rate": 3.351351351351352e-06, "loss": 0.5417, "step": 310 }, { "epoch": 0.1681384033159128, "grad_norm": 0.46592485904693604, "learning_rate": 3.3621621621621627e-06, "loss": 0.5412, "step": 311 }, { "epoch": 0.16867904126869707, "grad_norm": 0.42755240201950073, "learning_rate": 3.372972972972973e-06, "loss": 0.5346, "step": 312 }, { "epoch": 0.16921967922148134, "grad_norm": 0.37076422572135925, "learning_rate": 3.383783783783784e-06, "loss": 0.5483, "step": 313 }, { "epoch": 0.16976031717426562, "grad_norm": 0.4419684112071991, "learning_rate": 3.3945945945945944e-06, "loss": 0.5465, "step": 314 }, { "epoch": 0.17030095512704993, "grad_norm": 0.4431931972503662, "learning_rate": 3.4054054054054057e-06, "loss": 0.5445, "step": 315 }, { "epoch": 0.1708415930798342, "grad_norm": 0.43325522541999817, "learning_rate": 3.4162162162162166e-06, "loss": 0.5653, "step": 316 }, { "epoch": 0.1713822310326185, "grad_norm": 0.37014782428741455, "learning_rate": 3.4270270270270274e-06, "loss": 0.5208, "step": 317 }, { "epoch": 0.17192286898540277, "grad_norm": 0.43956634402275085, "learning_rate": 3.437837837837838e-06, "loss": 0.5343, "step": 318 }, { "epoch": 0.17246350693818707, "grad_norm": 0.38273492455482483, "learning_rate": 3.4486486486486487e-06, "loss": 0.5368, "step": 319 }, { "epoch": 0.17300414489097135, "grad_norm": 0.3921017348766327, "learning_rate": 3.45945945945946e-06, "loss": 0.5535, "step": 320 }, { "epoch": 0.17354478284375563, "grad_norm": 0.3745984137058258, "learning_rate": 3.470270270270271e-06, "loss": 0.5361, "step": 321 }, { "epoch": 0.1740854207965399, "grad_norm": 0.40335318446159363, "learning_rate": 3.4810810810810813e-06, "loss": 0.5894, "step": 322 }, { "epoch": 0.17462605874932421, "grad_norm": 0.35682252049446106, "learning_rate": 3.491891891891892e-06, "loss": 0.5241, "step": 323 }, { "epoch": 0.1751666967021085, "grad_norm": 0.37713858485221863, "learning_rate": 3.5027027027027026e-06, "loss": 0.5321, "step": 324 }, { "epoch": 0.17570733465489277, "grad_norm": 0.449979692697525, "learning_rate": 3.513513513513514e-06, "loss": 0.531, "step": 325 }, { "epoch": 0.17624797260767705, "grad_norm": 0.40404993295669556, "learning_rate": 3.5243243243243247e-06, "loss": 0.5409, "step": 326 }, { "epoch": 0.17678861056046136, "grad_norm": 0.36095544695854187, "learning_rate": 3.5351351351351355e-06, "loss": 0.5302, "step": 327 }, { "epoch": 0.17732924851324564, "grad_norm": 0.4284367561340332, "learning_rate": 3.545945945945946e-06, "loss": 0.5538, "step": 328 }, { "epoch": 0.17786988646602991, "grad_norm": 0.40965935587882996, "learning_rate": 3.556756756756757e-06, "loss": 0.518, "step": 329 }, { "epoch": 0.1784105244188142, "grad_norm": 0.39643898606300354, "learning_rate": 3.567567567567568e-06, "loss": 0.5696, "step": 330 }, { "epoch": 0.1789511623715985, "grad_norm": 0.4323926270008087, "learning_rate": 3.5783783783783785e-06, "loss": 0.523, "step": 331 }, { "epoch": 0.17949180032438278, "grad_norm": 0.45723623037338257, "learning_rate": 3.5891891891891894e-06, "loss": 0.5406, "step": 332 }, { "epoch": 0.18003243827716706, "grad_norm": 0.4142730236053467, "learning_rate": 3.6000000000000003e-06, "loss": 0.5098, "step": 333 }, { "epoch": 0.18057307622995133, "grad_norm": 0.4000495672225952, "learning_rate": 3.6108108108108107e-06, "loss": 0.5312, "step": 334 }, { "epoch": 0.18111371418273564, "grad_norm": 0.391166090965271, "learning_rate": 3.621621621621622e-06, "loss": 0.5144, "step": 335 }, { "epoch": 0.18165435213551992, "grad_norm": 0.4517064392566681, "learning_rate": 3.632432432432433e-06, "loss": 0.5252, "step": 336 }, { "epoch": 0.1821949900883042, "grad_norm": 0.3642129898071289, "learning_rate": 3.6432432432432437e-06, "loss": 0.5576, "step": 337 }, { "epoch": 0.18273562804108848, "grad_norm": 0.3912559449672699, "learning_rate": 3.654054054054054e-06, "loss": 0.4994, "step": 338 }, { "epoch": 0.18327626599387278, "grad_norm": 0.39499011635780334, "learning_rate": 3.664864864864865e-06, "loss": 0.5234, "step": 339 }, { "epoch": 0.18381690394665706, "grad_norm": 0.44011837244033813, "learning_rate": 3.6756756756756763e-06, "loss": 0.536, "step": 340 }, { "epoch": 0.18435754189944134, "grad_norm": 0.4400962293148041, "learning_rate": 3.6864864864864867e-06, "loss": 0.5515, "step": 341 }, { "epoch": 0.18489817985222562, "grad_norm": 0.37328705191612244, "learning_rate": 3.6972972972972975e-06, "loss": 0.5448, "step": 342 }, { "epoch": 0.1854388178050099, "grad_norm": 0.4239843785762787, "learning_rate": 3.7081081081081084e-06, "loss": 0.5415, "step": 343 }, { "epoch": 0.1859794557577942, "grad_norm": 0.37935659289360046, "learning_rate": 3.718918918918919e-06, "loss": 0.5329, "step": 344 }, { "epoch": 0.18652009371057848, "grad_norm": 0.4207356870174408, "learning_rate": 3.72972972972973e-06, "loss": 0.5708, "step": 345 }, { "epoch": 0.18706073166336276, "grad_norm": 0.39368534088134766, "learning_rate": 3.740540540540541e-06, "loss": 0.5069, "step": 346 }, { "epoch": 0.18760136961614704, "grad_norm": 0.4316987693309784, "learning_rate": 3.751351351351352e-06, "loss": 0.5682, "step": 347 }, { "epoch": 0.18814200756893135, "grad_norm": 0.4061681032180786, "learning_rate": 3.7621621621621623e-06, "loss": 0.5243, "step": 348 }, { "epoch": 0.18868264552171563, "grad_norm": 0.41535401344299316, "learning_rate": 3.772972972972973e-06, "loss": 0.5242, "step": 349 }, { "epoch": 0.1892232834744999, "grad_norm": 0.4037801921367645, "learning_rate": 3.7837837837837844e-06, "loss": 0.5053, "step": 350 }, { "epoch": 0.18976392142728418, "grad_norm": 0.38925549387931824, "learning_rate": 3.794594594594595e-06, "loss": 0.5159, "step": 351 }, { "epoch": 0.1903045593800685, "grad_norm": 0.42589956521987915, "learning_rate": 3.8054054054054057e-06, "loss": 0.5292, "step": 352 }, { "epoch": 0.19084519733285277, "grad_norm": 0.4325747787952423, "learning_rate": 3.8162162162162165e-06, "loss": 0.5386, "step": 353 }, { "epoch": 0.19138583528563705, "grad_norm": 0.4532448649406433, "learning_rate": 3.827027027027027e-06, "loss": 0.4949, "step": 354 }, { "epoch": 0.19192647323842132, "grad_norm": 0.4209156036376953, "learning_rate": 3.837837837837838e-06, "loss": 0.568, "step": 355 }, { "epoch": 0.19246711119120563, "grad_norm": 0.4481404423713684, "learning_rate": 3.848648648648649e-06, "loss": 0.5192, "step": 356 }, { "epoch": 0.1930077491439899, "grad_norm": 0.46470949053764343, "learning_rate": 3.85945945945946e-06, "loss": 0.5272, "step": 357 }, { "epoch": 0.1935483870967742, "grad_norm": 0.39657002687454224, "learning_rate": 3.87027027027027e-06, "loss": 0.5328, "step": 358 }, { "epoch": 0.19408902504955847, "grad_norm": 0.4064314365386963, "learning_rate": 3.881081081081081e-06, "loss": 0.5537, "step": 359 }, { "epoch": 0.19462966300234277, "grad_norm": 0.4129345417022705, "learning_rate": 3.891891891891892e-06, "loss": 0.5393, "step": 360 }, { "epoch": 0.19517030095512705, "grad_norm": 0.4236624538898468, "learning_rate": 3.902702702702703e-06, "loss": 0.5184, "step": 361 }, { "epoch": 0.19571093890791133, "grad_norm": 0.4176543354988098, "learning_rate": 3.913513513513514e-06, "loss": 0.5385, "step": 362 }, { "epoch": 0.1962515768606956, "grad_norm": 0.380862832069397, "learning_rate": 3.924324324324324e-06, "loss": 0.5184, "step": 363 }, { "epoch": 0.19679221481347992, "grad_norm": 0.4342859983444214, "learning_rate": 3.9351351351351355e-06, "loss": 0.5366, "step": 364 }, { "epoch": 0.1973328527662642, "grad_norm": 0.42140740156173706, "learning_rate": 3.945945945945947e-06, "loss": 0.5391, "step": 365 }, { "epoch": 0.19787349071904847, "grad_norm": 0.4249398410320282, "learning_rate": 3.956756756756757e-06, "loss": 0.5344, "step": 366 }, { "epoch": 0.19841412867183275, "grad_norm": 0.41181671619415283, "learning_rate": 3.967567567567568e-06, "loss": 0.5461, "step": 367 }, { "epoch": 0.19895476662461706, "grad_norm": 0.4575495719909668, "learning_rate": 3.978378378378379e-06, "loss": 0.5401, "step": 368 }, { "epoch": 0.19949540457740134, "grad_norm": 0.40454474091529846, "learning_rate": 3.989189189189189e-06, "loss": 0.5209, "step": 369 }, { "epoch": 0.20003604253018561, "grad_norm": 0.3996911346912384, "learning_rate": 4.000000000000001e-06, "loss": 0.5215, "step": 370 }, { "epoch": 0.2005766804829699, "grad_norm": 0.3987369239330292, "learning_rate": 4.010810810810811e-06, "loss": 0.5282, "step": 371 }, { "epoch": 0.2011173184357542, "grad_norm": 0.426537424325943, "learning_rate": 4.0216216216216215e-06, "loss": 0.5152, "step": 372 }, { "epoch": 0.20165795638853848, "grad_norm": 0.4209159314632416, "learning_rate": 4.032432432432433e-06, "loss": 0.4976, "step": 373 }, { "epoch": 0.20219859434132276, "grad_norm": 0.39458101987838745, "learning_rate": 4.043243243243243e-06, "loss": 0.5333, "step": 374 }, { "epoch": 0.20273923229410704, "grad_norm": 0.4080427587032318, "learning_rate": 4.0540540540540545e-06, "loss": 0.5364, "step": 375 }, { "epoch": 0.20327987024689134, "grad_norm": 0.41344642639160156, "learning_rate": 4.064864864864865e-06, "loss": 0.5157, "step": 376 }, { "epoch": 0.20382050819967562, "grad_norm": 0.4057735502719879, "learning_rate": 4.075675675675676e-06, "loss": 0.546, "step": 377 }, { "epoch": 0.2043611461524599, "grad_norm": 0.41593965888023376, "learning_rate": 4.086486486486487e-06, "loss": 0.5241, "step": 378 }, { "epoch": 0.20490178410524418, "grad_norm": 0.4008232653141022, "learning_rate": 4.097297297297297e-06, "loss": 0.5214, "step": 379 }, { "epoch": 0.20544242205802848, "grad_norm": 0.4257887303829193, "learning_rate": 4.108108108108108e-06, "loss": 0.558, "step": 380 }, { "epoch": 0.20598306001081276, "grad_norm": 0.3643846809864044, "learning_rate": 4.11891891891892e-06, "loss": 0.5604, "step": 381 }, { "epoch": 0.20652369796359704, "grad_norm": 0.4190754294395447, "learning_rate": 4.12972972972973e-06, "loss": 0.4863, "step": 382 }, { "epoch": 0.20706433591638132, "grad_norm": 0.3812675476074219, "learning_rate": 4.1405405405405405e-06, "loss": 0.528, "step": 383 }, { "epoch": 0.20760497386916563, "grad_norm": 0.36235401034355164, "learning_rate": 4.151351351351352e-06, "loss": 0.5291, "step": 384 }, { "epoch": 0.2081456118219499, "grad_norm": 0.4425322115421295, "learning_rate": 4.162162162162163e-06, "loss": 0.525, "step": 385 }, { "epoch": 0.20868624977473418, "grad_norm": 0.4001314342021942, "learning_rate": 4.1729729729729735e-06, "loss": 0.5457, "step": 386 }, { "epoch": 0.20922688772751846, "grad_norm": 0.36931946873664856, "learning_rate": 4.183783783783784e-06, "loss": 0.5271, "step": 387 }, { "epoch": 0.20976752568030277, "grad_norm": 0.4713948965072632, "learning_rate": 4.194594594594595e-06, "loss": 0.5195, "step": 388 }, { "epoch": 0.21030816363308705, "grad_norm": 0.38855504989624023, "learning_rate": 4.205405405405406e-06, "loss": 0.565, "step": 389 }, { "epoch": 0.21084880158587133, "grad_norm": 0.4155072867870331, "learning_rate": 4.216216216216217e-06, "loss": 0.5278, "step": 390 }, { "epoch": 0.2113894395386556, "grad_norm": 0.42699480056762695, "learning_rate": 4.227027027027027e-06, "loss": 0.5426, "step": 391 }, { "epoch": 0.2119300774914399, "grad_norm": 0.4101499915122986, "learning_rate": 4.237837837837838e-06, "loss": 0.5184, "step": 392 }, { "epoch": 0.2124707154442242, "grad_norm": 0.41484978795051575, "learning_rate": 4.248648648648649e-06, "loss": 0.5373, "step": 393 }, { "epoch": 0.21301135339700847, "grad_norm": 0.36816850304603577, "learning_rate": 4.2594594594594595e-06, "loss": 0.5512, "step": 394 }, { "epoch": 0.21355199134979275, "grad_norm": 0.4117318391799927, "learning_rate": 4.270270270270271e-06, "loss": 0.5204, "step": 395 }, { "epoch": 0.21409262930257705, "grad_norm": 0.38216206431388855, "learning_rate": 4.281081081081081e-06, "loss": 0.5353, "step": 396 }, { "epoch": 0.21463326725536133, "grad_norm": 0.3792795240879059, "learning_rate": 4.2918918918918925e-06, "loss": 0.5727, "step": 397 }, { "epoch": 0.2151739052081456, "grad_norm": 0.41061505675315857, "learning_rate": 4.302702702702703e-06, "loss": 0.5343, "step": 398 }, { "epoch": 0.2157145431609299, "grad_norm": 0.39943134784698486, "learning_rate": 4.313513513513513e-06, "loss": 0.4986, "step": 399 }, { "epoch": 0.2162551811137142, "grad_norm": 0.41517776250839233, "learning_rate": 4.324324324324325e-06, "loss": 0.5402, "step": 400 }, { "epoch": 0.21679581906649847, "grad_norm": 0.45214927196502686, "learning_rate": 4.335135135135136e-06, "loss": 0.5219, "step": 401 }, { "epoch": 0.21733645701928275, "grad_norm": 0.36262819170951843, "learning_rate": 4.345945945945946e-06, "loss": 0.5203, "step": 402 }, { "epoch": 0.21787709497206703, "grad_norm": 0.3521535396575928, "learning_rate": 4.356756756756757e-06, "loss": 0.4823, "step": 403 }, { "epoch": 0.21841773292485134, "grad_norm": 0.4195312559604645, "learning_rate": 4.367567567567568e-06, "loss": 0.5261, "step": 404 }, { "epoch": 0.21895837087763562, "grad_norm": 0.42293110489845276, "learning_rate": 4.378378378378379e-06, "loss": 0.5414, "step": 405 }, { "epoch": 0.2194990088304199, "grad_norm": 0.38924261927604675, "learning_rate": 4.38918918918919e-06, "loss": 0.5251, "step": 406 }, { "epoch": 0.22003964678320417, "grad_norm": 0.42991113662719727, "learning_rate": 4.4e-06, "loss": 0.5396, "step": 407 }, { "epoch": 0.22058028473598848, "grad_norm": 0.37058499455451965, "learning_rate": 4.4108108108108115e-06, "loss": 0.5398, "step": 408 }, { "epoch": 0.22112092268877276, "grad_norm": 0.3862038254737854, "learning_rate": 4.421621621621622e-06, "loss": 0.5369, "step": 409 }, { "epoch": 0.22166156064155704, "grad_norm": 0.39511534571647644, "learning_rate": 4.432432432432433e-06, "loss": 0.5137, "step": 410 }, { "epoch": 0.22220219859434132, "grad_norm": 0.4125954210758209, "learning_rate": 4.443243243243244e-06, "loss": 0.4952, "step": 411 }, { "epoch": 0.2227428365471256, "grad_norm": 0.3931781053543091, "learning_rate": 4.454054054054054e-06, "loss": 0.4975, "step": 412 }, { "epoch": 0.2232834744999099, "grad_norm": 0.41780611872673035, "learning_rate": 4.464864864864865e-06, "loss": 0.5341, "step": 413 }, { "epoch": 0.22382411245269418, "grad_norm": 0.4228370487689972, "learning_rate": 4.475675675675676e-06, "loss": 0.5232, "step": 414 }, { "epoch": 0.22436475040547846, "grad_norm": 0.4475858509540558, "learning_rate": 4.486486486486487e-06, "loss": 0.5384, "step": 415 }, { "epoch": 0.22490538835826274, "grad_norm": 0.41531985998153687, "learning_rate": 4.4972972972972975e-06, "loss": 0.5101, "step": 416 }, { "epoch": 0.22544602631104704, "grad_norm": 0.35512199997901917, "learning_rate": 4.508108108108109e-06, "loss": 0.5023, "step": 417 }, { "epoch": 0.22598666426383132, "grad_norm": 0.40374282002449036, "learning_rate": 4.518918918918919e-06, "loss": 0.5207, "step": 418 }, { "epoch": 0.2265273022166156, "grad_norm": 0.4103836119174957, "learning_rate": 4.52972972972973e-06, "loss": 0.5279, "step": 419 }, { "epoch": 0.22706794016939988, "grad_norm": 0.4135481119155884, "learning_rate": 4.540540540540541e-06, "loss": 0.5147, "step": 420 }, { "epoch": 0.22760857812218419, "grad_norm": 0.39813584089279175, "learning_rate": 4.551351351351352e-06, "loss": 0.5228, "step": 421 }, { "epoch": 0.22814921607496846, "grad_norm": 0.38910800218582153, "learning_rate": 4.562162162162163e-06, "loss": 0.5271, "step": 422 }, { "epoch": 0.22868985402775274, "grad_norm": 0.45979151129722595, "learning_rate": 4.572972972972973e-06, "loss": 0.5279, "step": 423 }, { "epoch": 0.22923049198053702, "grad_norm": 0.3684897720813751, "learning_rate": 4.583783783783784e-06, "loss": 0.5373, "step": 424 }, { "epoch": 0.22977112993332133, "grad_norm": 0.4329938292503357, "learning_rate": 4.594594594594596e-06, "loss": 0.5363, "step": 425 }, { "epoch": 0.2303117678861056, "grad_norm": 0.37203264236450195, "learning_rate": 4.605405405405406e-06, "loss": 0.5156, "step": 426 }, { "epoch": 0.23085240583888988, "grad_norm": 0.39848098158836365, "learning_rate": 4.6162162162162165e-06, "loss": 0.5154, "step": 427 }, { "epoch": 0.23139304379167416, "grad_norm": 0.41350409388542175, "learning_rate": 4.627027027027027e-06, "loss": 0.5437, "step": 428 }, { "epoch": 0.23193368174445847, "grad_norm": 0.4166233539581299, "learning_rate": 4.637837837837838e-06, "loss": 0.4931, "step": 429 }, { "epoch": 0.23247431969724275, "grad_norm": 0.42275962233543396, "learning_rate": 4.6486486486486495e-06, "loss": 0.518, "step": 430 }, { "epoch": 0.23301495765002703, "grad_norm": 0.4211007356643677, "learning_rate": 4.65945945945946e-06, "loss": 0.5162, "step": 431 }, { "epoch": 0.2335555956028113, "grad_norm": 0.5026114583015442, "learning_rate": 4.67027027027027e-06, "loss": 0.5065, "step": 432 }, { "epoch": 0.2340962335555956, "grad_norm": 0.3831497132778168, "learning_rate": 4.681081081081082e-06, "loss": 0.534, "step": 433 }, { "epoch": 0.2346368715083799, "grad_norm": 0.39763692021369934, "learning_rate": 4.691891891891892e-06, "loss": 0.5248, "step": 434 }, { "epoch": 0.23517750946116417, "grad_norm": 0.4102565050125122, "learning_rate": 4.702702702702703e-06, "loss": 0.5093, "step": 435 }, { "epoch": 0.23571814741394845, "grad_norm": 0.4028921127319336, "learning_rate": 4.713513513513514e-06, "loss": 0.508, "step": 436 }, { "epoch": 0.23625878536673275, "grad_norm": 0.42078977823257446, "learning_rate": 4.724324324324325e-06, "loss": 0.5275, "step": 437 }, { "epoch": 0.23679942331951703, "grad_norm": 0.38161078095436096, "learning_rate": 4.7351351351351355e-06, "loss": 0.5021, "step": 438 }, { "epoch": 0.2373400612723013, "grad_norm": 0.3946899473667145, "learning_rate": 4.745945945945946e-06, "loss": 0.5612, "step": 439 }, { "epoch": 0.2378806992250856, "grad_norm": 0.36809343099594116, "learning_rate": 4.756756756756757e-06, "loss": 0.5318, "step": 440 }, { "epoch": 0.2384213371778699, "grad_norm": 0.45496001839637756, "learning_rate": 4.7675675675675685e-06, "loss": 0.543, "step": 441 }, { "epoch": 0.23896197513065418, "grad_norm": 0.409921258687973, "learning_rate": 4.778378378378379e-06, "loss": 0.5507, "step": 442 }, { "epoch": 0.23950261308343845, "grad_norm": 0.36140987277030945, "learning_rate": 4.789189189189189e-06, "loss": 0.4944, "step": 443 }, { "epoch": 0.24004325103622273, "grad_norm": 0.4025464951992035, "learning_rate": 4.800000000000001e-06, "loss": 0.5327, "step": 444 }, { "epoch": 0.24058388898900704, "grad_norm": 0.43016675114631653, "learning_rate": 4.810810810810811e-06, "loss": 0.5118, "step": 445 }, { "epoch": 0.24112452694179132, "grad_norm": 0.4288389980792999, "learning_rate": 4.821621621621622e-06, "loss": 0.5224, "step": 446 }, { "epoch": 0.2416651648945756, "grad_norm": 0.3723931610584259, "learning_rate": 4.832432432432433e-06, "loss": 0.5475, "step": 447 }, { "epoch": 0.24220580284735987, "grad_norm": 0.4410860538482666, "learning_rate": 4.843243243243243e-06, "loss": 0.5215, "step": 448 }, { "epoch": 0.24274644080014418, "grad_norm": 0.3706596791744232, "learning_rate": 4.8540540540540545e-06, "loss": 0.5163, "step": 449 }, { "epoch": 0.24328707875292846, "grad_norm": 0.39684516191482544, "learning_rate": 4.864864864864866e-06, "loss": 0.5322, "step": 450 }, { "epoch": 0.24382771670571274, "grad_norm": 0.36360347270965576, "learning_rate": 4.875675675675676e-06, "loss": 0.5238, "step": 451 }, { "epoch": 0.24436835465849702, "grad_norm": 0.39696750044822693, "learning_rate": 4.886486486486487e-06, "loss": 0.5106, "step": 452 }, { "epoch": 0.24490899261128132, "grad_norm": 0.3955710828304291, "learning_rate": 4.897297297297298e-06, "loss": 0.5243, "step": 453 }, { "epoch": 0.2454496305640656, "grad_norm": 0.44684091210365295, "learning_rate": 4.908108108108108e-06, "loss": 0.5061, "step": 454 }, { "epoch": 0.24599026851684988, "grad_norm": 0.3783811032772064, "learning_rate": 4.91891891891892e-06, "loss": 0.5005, "step": 455 }, { "epoch": 0.24653090646963416, "grad_norm": 0.37094858288764954, "learning_rate": 4.92972972972973e-06, "loss": 0.5342, "step": 456 }, { "epoch": 0.24707154442241847, "grad_norm": 0.4043397307395935, "learning_rate": 4.940540540540541e-06, "loss": 0.5345, "step": 457 }, { "epoch": 0.24761218237520274, "grad_norm": 0.3585631549358368, "learning_rate": 4.951351351351352e-06, "loss": 0.5163, "step": 458 }, { "epoch": 0.24815282032798702, "grad_norm": 0.39285627007484436, "learning_rate": 4.962162162162162e-06, "loss": 0.5038, "step": 459 }, { "epoch": 0.2486934582807713, "grad_norm": 0.4428061842918396, "learning_rate": 4.9729729729729735e-06, "loss": 0.523, "step": 460 }, { "epoch": 0.2492340962335556, "grad_norm": 0.42921119928359985, "learning_rate": 4.983783783783785e-06, "loss": 0.5051, "step": 461 }, { "epoch": 0.2497747341863399, "grad_norm": 0.40692782402038574, "learning_rate": 4.994594594594595e-06, "loss": 0.5137, "step": 462 }, { "epoch": 0.2503153721391242, "grad_norm": 0.44367969036102295, "learning_rate": 5.005405405405406e-06, "loss": 0.5026, "step": 463 }, { "epoch": 0.25085601009190844, "grad_norm": 0.42934948205947876, "learning_rate": 5.016216216216216e-06, "loss": 0.5215, "step": 464 }, { "epoch": 0.25139664804469275, "grad_norm": 0.4149029552936554, "learning_rate": 5.027027027027027e-06, "loss": 0.5419, "step": 465 }, { "epoch": 0.251937285997477, "grad_norm": 0.38296329975128174, "learning_rate": 5.037837837837838e-06, "loss": 0.4804, "step": 466 }, { "epoch": 0.2524779239502613, "grad_norm": 0.3958582580089569, "learning_rate": 5.048648648648648e-06, "loss": 0.4778, "step": 467 }, { "epoch": 0.2530185619030456, "grad_norm": 0.4397348165512085, "learning_rate": 5.05945945945946e-06, "loss": 0.5233, "step": 468 }, { "epoch": 0.25355919985582986, "grad_norm": 0.3755987286567688, "learning_rate": 5.070270270270271e-06, "loss": 0.5157, "step": 469 }, { "epoch": 0.25409983780861417, "grad_norm": 0.3970812261104584, "learning_rate": 5.081081081081082e-06, "loss": 0.5501, "step": 470 }, { "epoch": 0.2546404757613985, "grad_norm": 0.4246158003807068, "learning_rate": 5.0918918918918925e-06, "loss": 0.5231, "step": 471 }, { "epoch": 0.25518111371418273, "grad_norm": 0.4050613343715668, "learning_rate": 5.102702702702703e-06, "loss": 0.5035, "step": 472 }, { "epoch": 0.25572175166696703, "grad_norm": 0.40757644176483154, "learning_rate": 5.113513513513514e-06, "loss": 0.5176, "step": 473 }, { "epoch": 0.2562623896197513, "grad_norm": 0.4407704174518585, "learning_rate": 5.124324324324325e-06, "loss": 0.4858, "step": 474 }, { "epoch": 0.2568030275725356, "grad_norm": 0.418594092130661, "learning_rate": 5.135135135135135e-06, "loss": 0.5224, "step": 475 }, { "epoch": 0.2573436655253199, "grad_norm": 0.3590792119503021, "learning_rate": 5.145945945945946e-06, "loss": 0.4955, "step": 476 }, { "epoch": 0.25788430347810415, "grad_norm": 0.42272108793258667, "learning_rate": 5.156756756756757e-06, "loss": 0.5289, "step": 477 }, { "epoch": 0.25842494143088846, "grad_norm": 0.41260215640068054, "learning_rate": 5.167567567567569e-06, "loss": 0.5104, "step": 478 }, { "epoch": 0.25896557938367276, "grad_norm": 0.4283524751663208, "learning_rate": 5.178378378378379e-06, "loss": 0.5138, "step": 479 }, { "epoch": 0.259506217336457, "grad_norm": 0.44842174649238586, "learning_rate": 5.18918918918919e-06, "loss": 0.5199, "step": 480 }, { "epoch": 0.2600468552892413, "grad_norm": 0.41024303436279297, "learning_rate": 5.2e-06, "loss": 0.5197, "step": 481 }, { "epoch": 0.26058749324202557, "grad_norm": 0.41243913769721985, "learning_rate": 5.2108108108108115e-06, "loss": 0.5571, "step": 482 }, { "epoch": 0.2611281311948099, "grad_norm": 0.48198091983795166, "learning_rate": 5.221621621621622e-06, "loss": 0.5142, "step": 483 }, { "epoch": 0.2616687691475942, "grad_norm": 0.43659496307373047, "learning_rate": 5.232432432432432e-06, "loss": 0.5217, "step": 484 }, { "epoch": 0.26220940710037843, "grad_norm": 0.4106965959072113, "learning_rate": 5.243243243243244e-06, "loss": 0.5165, "step": 485 }, { "epoch": 0.26275004505316274, "grad_norm": 0.3779434561729431, "learning_rate": 5.254054054054054e-06, "loss": 0.542, "step": 486 }, { "epoch": 0.263290683005947, "grad_norm": 0.4298580288887024, "learning_rate": 5.2648648648648645e-06, "loss": 0.5366, "step": 487 }, { "epoch": 0.2638313209587313, "grad_norm": 0.41386786103248596, "learning_rate": 5.275675675675677e-06, "loss": 0.5227, "step": 488 }, { "epoch": 0.2643719589115156, "grad_norm": 0.36224329471588135, "learning_rate": 5.286486486486487e-06, "loss": 0.4809, "step": 489 }, { "epoch": 0.26491259686429985, "grad_norm": 0.41032809019088745, "learning_rate": 5.297297297297298e-06, "loss": 0.4871, "step": 490 }, { "epoch": 0.26545323481708416, "grad_norm": 0.4151748716831207, "learning_rate": 5.308108108108109e-06, "loss": 0.5274, "step": 491 }, { "epoch": 0.26599387276986847, "grad_norm": 0.45982950925827026, "learning_rate": 5.318918918918919e-06, "loss": 0.5358, "step": 492 }, { "epoch": 0.2665345107226527, "grad_norm": 0.43396931886672974, "learning_rate": 5.3297297297297305e-06, "loss": 0.5012, "step": 493 }, { "epoch": 0.267075148675437, "grad_norm": 0.4250200688838959, "learning_rate": 5.340540540540541e-06, "loss": 0.4813, "step": 494 }, { "epoch": 0.2676157866282213, "grad_norm": 0.43627679347991943, "learning_rate": 5.351351351351351e-06, "loss": 0.5231, "step": 495 }, { "epoch": 0.2681564245810056, "grad_norm": 0.4634500741958618, "learning_rate": 5.362162162162163e-06, "loss": 0.539, "step": 496 }, { "epoch": 0.2686970625337899, "grad_norm": 0.39024657011032104, "learning_rate": 5.372972972972973e-06, "loss": 0.5073, "step": 497 }, { "epoch": 0.26923770048657414, "grad_norm": 0.5674286484718323, "learning_rate": 5.383783783783785e-06, "loss": 0.5221, "step": 498 }, { "epoch": 0.26977833843935844, "grad_norm": 0.37081941962242126, "learning_rate": 5.394594594594596e-06, "loss": 0.5141, "step": 499 }, { "epoch": 0.27031897639214275, "grad_norm": 0.4416964650154114, "learning_rate": 5.405405405405406e-06, "loss": 0.5246, "step": 500 }, { "epoch": 0.270859614344927, "grad_norm": 0.4249023199081421, "learning_rate": 5.4162162162162165e-06, "loss": 0.5269, "step": 501 }, { "epoch": 0.2714002522977113, "grad_norm": 0.40850886702537537, "learning_rate": 5.427027027027028e-06, "loss": 0.5444, "step": 502 }, { "epoch": 0.27194089025049556, "grad_norm": 0.45538514852523804, "learning_rate": 5.437837837837838e-06, "loss": 0.5308, "step": 503 }, { "epoch": 0.27248152820327987, "grad_norm": 0.4299362301826477, "learning_rate": 5.448648648648649e-06, "loss": 0.4999, "step": 504 }, { "epoch": 0.27302216615606417, "grad_norm": 0.36193957924842834, "learning_rate": 5.45945945945946e-06, "loss": 0.4833, "step": 505 }, { "epoch": 0.2735628041088484, "grad_norm": 0.4897722005844116, "learning_rate": 5.47027027027027e-06, "loss": 0.5118, "step": 506 }, { "epoch": 0.27410344206163273, "grad_norm": 0.4764401614665985, "learning_rate": 5.481081081081081e-06, "loss": 0.5213, "step": 507 }, { "epoch": 0.27464408001441704, "grad_norm": 0.42601868510246277, "learning_rate": 5.491891891891893e-06, "loss": 0.4803, "step": 508 }, { "epoch": 0.2751847179672013, "grad_norm": 0.46341875195503235, "learning_rate": 5.502702702702703e-06, "loss": 0.5192, "step": 509 }, { "epoch": 0.2757253559199856, "grad_norm": 0.4102022349834442, "learning_rate": 5.513513513513515e-06, "loss": 0.5356, "step": 510 }, { "epoch": 0.27626599387276984, "grad_norm": 0.3962627649307251, "learning_rate": 5.524324324324325e-06, "loss": 0.5035, "step": 511 }, { "epoch": 0.27680663182555415, "grad_norm": 0.4694930613040924, "learning_rate": 5.5351351351351355e-06, "loss": 0.4978, "step": 512 }, { "epoch": 0.27734726977833846, "grad_norm": 0.37531691789627075, "learning_rate": 5.545945945945947e-06, "loss": 0.4892, "step": 513 }, { "epoch": 0.2778879077311227, "grad_norm": 0.4179112911224365, "learning_rate": 5.556756756756757e-06, "loss": 0.5121, "step": 514 }, { "epoch": 0.278428545683907, "grad_norm": 0.49263471364974976, "learning_rate": 5.567567567567568e-06, "loss": 0.55, "step": 515 }, { "epoch": 0.2789691836366913, "grad_norm": 0.38340288400650024, "learning_rate": 5.578378378378379e-06, "loss": 0.5081, "step": 516 }, { "epoch": 0.27950982158947557, "grad_norm": 0.41528379917144775, "learning_rate": 5.589189189189189e-06, "loss": 0.5343, "step": 517 }, { "epoch": 0.2800504595422599, "grad_norm": 0.45989990234375, "learning_rate": 5.600000000000001e-06, "loss": 0.4923, "step": 518 }, { "epoch": 0.28059109749504413, "grad_norm": 0.4144502580165863, "learning_rate": 5.610810810810812e-06, "loss": 0.498, "step": 519 }, { "epoch": 0.28113173544782843, "grad_norm": 0.4108593463897705, "learning_rate": 5.621621621621622e-06, "loss": 0.5354, "step": 520 }, { "epoch": 0.28167237340061274, "grad_norm": 0.4681094288825989, "learning_rate": 5.632432432432433e-06, "loss": 0.5247, "step": 521 }, { "epoch": 0.282213011353397, "grad_norm": 0.42972666025161743, "learning_rate": 5.643243243243244e-06, "loss": 0.5047, "step": 522 }, { "epoch": 0.2827536493061813, "grad_norm": 0.39277395606040955, "learning_rate": 5.6540540540540545e-06, "loss": 0.5076, "step": 523 }, { "epoch": 0.2832942872589656, "grad_norm": 0.3716997802257538, "learning_rate": 5.664864864864865e-06, "loss": 0.5112, "step": 524 }, { "epoch": 0.28383492521174986, "grad_norm": 0.45764875411987305, "learning_rate": 5.675675675675676e-06, "loss": 0.5132, "step": 525 }, { "epoch": 0.28437556316453416, "grad_norm": 0.3713022470474243, "learning_rate": 5.686486486486487e-06, "loss": 0.5225, "step": 526 }, { "epoch": 0.2849162011173184, "grad_norm": 0.38339367508888245, "learning_rate": 5.697297297297297e-06, "loss": 0.5333, "step": 527 }, { "epoch": 0.2854568390701027, "grad_norm": 0.44646668434143066, "learning_rate": 5.708108108108109e-06, "loss": 0.5145, "step": 528 }, { "epoch": 0.285997477022887, "grad_norm": 0.38393041491508484, "learning_rate": 5.71891891891892e-06, "loss": 0.5363, "step": 529 }, { "epoch": 0.2865381149756713, "grad_norm": 0.43032675981521606, "learning_rate": 5.729729729729731e-06, "loss": 0.5306, "step": 530 }, { "epoch": 0.2870787529284556, "grad_norm": 0.39705681800842285, "learning_rate": 5.740540540540541e-06, "loss": 0.5062, "step": 531 }, { "epoch": 0.2876193908812399, "grad_norm": 0.3787713050842285, "learning_rate": 5.751351351351352e-06, "loss": 0.5135, "step": 532 }, { "epoch": 0.28816002883402414, "grad_norm": 0.4790848195552826, "learning_rate": 5.762162162162163e-06, "loss": 0.5115, "step": 533 }, { "epoch": 0.28870066678680845, "grad_norm": 0.41118842363357544, "learning_rate": 5.7729729729729734e-06, "loss": 0.5205, "step": 534 }, { "epoch": 0.2892413047395927, "grad_norm": 0.43289870023727417, "learning_rate": 5.783783783783784e-06, "loss": 0.4883, "step": 535 }, { "epoch": 0.289781942692377, "grad_norm": 0.4232019782066345, "learning_rate": 5.794594594594594e-06, "loss": 0.49, "step": 536 }, { "epoch": 0.2903225806451613, "grad_norm": 0.4845992922782898, "learning_rate": 5.805405405405406e-06, "loss": 0.5161, "step": 537 }, { "epoch": 0.29086321859794556, "grad_norm": 0.4551534354686737, "learning_rate": 5.816216216216217e-06, "loss": 0.4807, "step": 538 }, { "epoch": 0.29140385655072987, "grad_norm": 0.4931628704071045, "learning_rate": 5.827027027027028e-06, "loss": 0.5086, "step": 539 }, { "epoch": 0.2919444945035142, "grad_norm": 0.4340893626213074, "learning_rate": 5.837837837837839e-06, "loss": 0.5285, "step": 540 }, { "epoch": 0.2924851324562984, "grad_norm": 0.43022266030311584, "learning_rate": 5.848648648648649e-06, "loss": 0.5156, "step": 541 }, { "epoch": 0.29302577040908273, "grad_norm": 0.425538569688797, "learning_rate": 5.85945945945946e-06, "loss": 0.4961, "step": 542 }, { "epoch": 0.293566408361867, "grad_norm": 0.3988337218761444, "learning_rate": 5.870270270270271e-06, "loss": 0.511, "step": 543 }, { "epoch": 0.2941070463146513, "grad_norm": 0.4838657081127167, "learning_rate": 5.881081081081081e-06, "loss": 0.5001, "step": 544 }, { "epoch": 0.2946476842674356, "grad_norm": 0.3641073703765869, "learning_rate": 5.8918918918918924e-06, "loss": 0.5078, "step": 545 }, { "epoch": 0.29518832222021985, "grad_norm": 0.4349755644798279, "learning_rate": 5.902702702702703e-06, "loss": 0.508, "step": 546 }, { "epoch": 0.29572896017300415, "grad_norm": 0.4596833884716034, "learning_rate": 5.913513513513513e-06, "loss": 0.5187, "step": 547 }, { "epoch": 0.29626959812578846, "grad_norm": 0.38194069266319275, "learning_rate": 5.9243243243243254e-06, "loss": 0.5192, "step": 548 }, { "epoch": 0.2968102360785727, "grad_norm": 0.46379154920578003, "learning_rate": 5.935135135135136e-06, "loss": 0.5088, "step": 549 }, { "epoch": 0.297350874031357, "grad_norm": 0.39888012409210205, "learning_rate": 5.945945945945947e-06, "loss": 0.502, "step": 550 }, { "epoch": 0.29789151198414127, "grad_norm": 0.4444519877433777, "learning_rate": 5.9567567567567576e-06, "loss": 0.5161, "step": 551 }, { "epoch": 0.2984321499369256, "grad_norm": 0.39441052079200745, "learning_rate": 5.967567567567568e-06, "loss": 0.5412, "step": 552 }, { "epoch": 0.2989727878897099, "grad_norm": 0.38844984769821167, "learning_rate": 5.978378378378379e-06, "loss": 0.5206, "step": 553 }, { "epoch": 0.29951342584249413, "grad_norm": 0.4147244989871979, "learning_rate": 5.98918918918919e-06, "loss": 0.4934, "step": 554 }, { "epoch": 0.30005406379527844, "grad_norm": 0.4128243327140808, "learning_rate": 6e-06, "loss": 0.5326, "step": 555 }, { "epoch": 0.3005947017480627, "grad_norm": 0.42865583300590515, "learning_rate": 6.010810810810811e-06, "loss": 0.5191, "step": 556 }, { "epoch": 0.301135339700847, "grad_norm": 0.4863196015357971, "learning_rate": 6.021621621621622e-06, "loss": 0.5329, "step": 557 }, { "epoch": 0.3016759776536313, "grad_norm": 0.4681631326675415, "learning_rate": 6.032432432432432e-06, "loss": 0.5294, "step": 558 }, { "epoch": 0.30221661560641555, "grad_norm": 0.43204593658447266, "learning_rate": 6.043243243243244e-06, "loss": 0.4957, "step": 559 }, { "epoch": 0.30275725355919986, "grad_norm": 0.4105343520641327, "learning_rate": 6.054054054054055e-06, "loss": 0.5097, "step": 560 }, { "epoch": 0.30329789151198416, "grad_norm": 0.47166261076927185, "learning_rate": 6.064864864864865e-06, "loss": 0.475, "step": 561 }, { "epoch": 0.3038385294647684, "grad_norm": 0.3812154531478882, "learning_rate": 6.0756756756756766e-06, "loss": 0.5167, "step": 562 }, { "epoch": 0.3043791674175527, "grad_norm": 0.47877979278564453, "learning_rate": 6.086486486486487e-06, "loss": 0.5109, "step": 563 }, { "epoch": 0.30491980537033697, "grad_norm": 0.4922278821468353, "learning_rate": 6.0972972972972974e-06, "loss": 0.5142, "step": 564 }, { "epoch": 0.3054604433231213, "grad_norm": 0.4220641851425171, "learning_rate": 6.108108108108109e-06, "loss": 0.5106, "step": 565 }, { "epoch": 0.3060010812759056, "grad_norm": 0.45131972432136536, "learning_rate": 6.118918918918919e-06, "loss": 0.503, "step": 566 }, { "epoch": 0.30654171922868984, "grad_norm": 0.4449264407157898, "learning_rate": 6.12972972972973e-06, "loss": 0.524, "step": 567 }, { "epoch": 0.30708235718147414, "grad_norm": 0.4497717618942261, "learning_rate": 6.140540540540541e-06, "loss": 0.4707, "step": 568 }, { "epoch": 0.30762299513425845, "grad_norm": 0.41556915640830994, "learning_rate": 6.151351351351352e-06, "loss": 0.5225, "step": 569 }, { "epoch": 0.3081636330870427, "grad_norm": 0.4636322855949402, "learning_rate": 6.162162162162163e-06, "loss": 0.5551, "step": 570 }, { "epoch": 0.308704271039827, "grad_norm": 0.43292292952537537, "learning_rate": 6.172972972972974e-06, "loss": 0.5122, "step": 571 }, { "epoch": 0.30924490899261126, "grad_norm": 0.41534942388534546, "learning_rate": 6.183783783783784e-06, "loss": 0.5409, "step": 572 }, { "epoch": 0.30978554694539556, "grad_norm": 0.511157214641571, "learning_rate": 6.194594594594595e-06, "loss": 0.5344, "step": 573 }, { "epoch": 0.31032618489817987, "grad_norm": 0.3864719271659851, "learning_rate": 6.205405405405406e-06, "loss": 0.5174, "step": 574 }, { "epoch": 0.3108668228509641, "grad_norm": 0.497886061668396, "learning_rate": 6.2162162162162164e-06, "loss": 0.5055, "step": 575 }, { "epoch": 0.3114074608037484, "grad_norm": 0.4637546241283417, "learning_rate": 6.227027027027027e-06, "loss": 0.5136, "step": 576 }, { "epoch": 0.31194809875653273, "grad_norm": 0.3968018591403961, "learning_rate": 6.237837837837838e-06, "loss": 0.4929, "step": 577 }, { "epoch": 0.312488736709317, "grad_norm": 0.425836443901062, "learning_rate": 6.2486486486486486e-06, "loss": 0.5039, "step": 578 }, { "epoch": 0.3130293746621013, "grad_norm": 0.4729524850845337, "learning_rate": 6.259459459459461e-06, "loss": 0.5123, "step": 579 }, { "epoch": 0.31357001261488554, "grad_norm": 0.39072057604789734, "learning_rate": 6.270270270270271e-06, "loss": 0.5353, "step": 580 }, { "epoch": 0.31411065056766985, "grad_norm": 0.42268452048301697, "learning_rate": 6.2810810810810816e-06, "loss": 0.4989, "step": 581 }, { "epoch": 0.31465128852045415, "grad_norm": 0.4206162989139557, "learning_rate": 6.291891891891893e-06, "loss": 0.4921, "step": 582 }, { "epoch": 0.3151919264732384, "grad_norm": 0.432198166847229, "learning_rate": 6.302702702702703e-06, "loss": 0.5086, "step": 583 }, { "epoch": 0.3157325644260227, "grad_norm": 0.4336259663105011, "learning_rate": 6.313513513513514e-06, "loss": 0.5164, "step": 584 }, { "epoch": 0.316273202378807, "grad_norm": 0.40399715304374695, "learning_rate": 6.324324324324325e-06, "loss": 0.5118, "step": 585 }, { "epoch": 0.31681384033159127, "grad_norm": 0.4406914710998535, "learning_rate": 6.335135135135135e-06, "loss": 0.5025, "step": 586 }, { "epoch": 0.3173544782843756, "grad_norm": 0.39285317063331604, "learning_rate": 6.345945945945946e-06, "loss": 0.4876, "step": 587 }, { "epoch": 0.3178951162371598, "grad_norm": 0.3913812041282654, "learning_rate": 6.356756756756757e-06, "loss": 0.51, "step": 588 }, { "epoch": 0.31843575418994413, "grad_norm": 0.4313332736492157, "learning_rate": 6.367567567567568e-06, "loss": 0.5002, "step": 589 }, { "epoch": 0.31897639214272844, "grad_norm": 0.43284839391708374, "learning_rate": 6.378378378378379e-06, "loss": 0.5064, "step": 590 }, { "epoch": 0.3195170300955127, "grad_norm": 0.46096357703208923, "learning_rate": 6.38918918918919e-06, "loss": 0.5427, "step": 591 }, { "epoch": 0.320057668048297, "grad_norm": 0.381386399269104, "learning_rate": 6.4000000000000006e-06, "loss": 0.525, "step": 592 }, { "epoch": 0.3205983060010813, "grad_norm": 0.3691636025905609, "learning_rate": 6.410810810810811e-06, "loss": 0.5181, "step": 593 }, { "epoch": 0.32113894395386555, "grad_norm": 0.43879830837249756, "learning_rate": 6.421621621621622e-06, "loss": 0.5185, "step": 594 }, { "epoch": 0.32167958190664986, "grad_norm": 0.39650213718414307, "learning_rate": 6.432432432432433e-06, "loss": 0.5095, "step": 595 }, { "epoch": 0.3222202198594341, "grad_norm": 0.4146016538143158, "learning_rate": 6.443243243243243e-06, "loss": 0.4853, "step": 596 }, { "epoch": 0.3227608578122184, "grad_norm": 0.45325127243995667, "learning_rate": 6.454054054054054e-06, "loss": 0.5022, "step": 597 }, { "epoch": 0.3233014957650027, "grad_norm": 0.4641883373260498, "learning_rate": 6.464864864864865e-06, "loss": 0.4808, "step": 598 }, { "epoch": 0.323842133717787, "grad_norm": 0.47337257862091064, "learning_rate": 6.475675675675677e-06, "loss": 0.4783, "step": 599 }, { "epoch": 0.3243827716705713, "grad_norm": 0.43825823068618774, "learning_rate": 6.486486486486487e-06, "loss": 0.4901, "step": 600 }, { "epoch": 0.3249234096233556, "grad_norm": 0.45925870537757874, "learning_rate": 6.497297297297298e-06, "loss": 0.5031, "step": 601 }, { "epoch": 0.32546404757613984, "grad_norm": 0.3915764391422272, "learning_rate": 6.508108108108109e-06, "loss": 0.5013, "step": 602 }, { "epoch": 0.32600468552892414, "grad_norm": 0.4049145579338074, "learning_rate": 6.5189189189189196e-06, "loss": 0.5256, "step": 603 }, { "epoch": 0.3265453234817084, "grad_norm": 0.4432182312011719, "learning_rate": 6.52972972972973e-06, "loss": 0.5022, "step": 604 }, { "epoch": 0.3270859614344927, "grad_norm": 0.406897634267807, "learning_rate": 6.540540540540541e-06, "loss": 0.4814, "step": 605 }, { "epoch": 0.327626599387277, "grad_norm": 0.42340806126594543, "learning_rate": 6.551351351351352e-06, "loss": 0.4999, "step": 606 }, { "epoch": 0.32816723734006126, "grad_norm": 0.3778286874294281, "learning_rate": 6.562162162162162e-06, "loss": 0.4939, "step": 607 }, { "epoch": 0.32870787529284556, "grad_norm": 0.4262266159057617, "learning_rate": 6.572972972972973e-06, "loss": 0.5157, "step": 608 }, { "epoch": 0.32924851324562987, "grad_norm": 0.36879613995552063, "learning_rate": 6.583783783783785e-06, "loss": 0.4924, "step": 609 }, { "epoch": 0.3297891511984141, "grad_norm": 0.4040409028530121, "learning_rate": 6.594594594594595e-06, "loss": 0.5092, "step": 610 }, { "epoch": 0.3303297891511984, "grad_norm": 0.46349969506263733, "learning_rate": 6.605405405405406e-06, "loss": 0.5096, "step": 611 }, { "epoch": 0.3308704271039827, "grad_norm": 0.38087141513824463, "learning_rate": 6.616216216216217e-06, "loss": 0.4965, "step": 612 }, { "epoch": 0.331411065056767, "grad_norm": 0.46425509452819824, "learning_rate": 6.627027027027027e-06, "loss": 0.5177, "step": 613 }, { "epoch": 0.3319517030095513, "grad_norm": 0.3941066563129425, "learning_rate": 6.6378378378378385e-06, "loss": 0.5098, "step": 614 }, { "epoch": 0.33249234096233554, "grad_norm": 0.448647141456604, "learning_rate": 6.648648648648649e-06, "loss": 0.5162, "step": 615 }, { "epoch": 0.33303297891511985, "grad_norm": 0.4808944761753082, "learning_rate": 6.659459459459459e-06, "loss": 0.5126, "step": 616 }, { "epoch": 0.3335736168679041, "grad_norm": 0.3767862021923065, "learning_rate": 6.670270270270271e-06, "loss": 0.4942, "step": 617 }, { "epoch": 0.3341142548206884, "grad_norm": 0.45056983828544617, "learning_rate": 6.681081081081081e-06, "loss": 0.5181, "step": 618 }, { "epoch": 0.3346548927734727, "grad_norm": 0.4133990705013275, "learning_rate": 6.691891891891893e-06, "loss": 0.5102, "step": 619 }, { "epoch": 0.33519553072625696, "grad_norm": 0.4201817512512207, "learning_rate": 6.702702702702704e-06, "loss": 0.5051, "step": 620 }, { "epoch": 0.33573616867904127, "grad_norm": 0.42155951261520386, "learning_rate": 6.713513513513514e-06, "loss": 0.5313, "step": 621 }, { "epoch": 0.3362768066318256, "grad_norm": 0.4288482367992401, "learning_rate": 6.724324324324325e-06, "loss": 0.5151, "step": 622 }, { "epoch": 0.3368174445846098, "grad_norm": 0.4295049011707306, "learning_rate": 6.735135135135136e-06, "loss": 0.5024, "step": 623 }, { "epoch": 0.33735808253739413, "grad_norm": 0.49055394530296326, "learning_rate": 6.745945945945946e-06, "loss": 0.4825, "step": 624 }, { "epoch": 0.3378987204901784, "grad_norm": 0.448925644159317, "learning_rate": 6.7567567567567575e-06, "loss": 0.5031, "step": 625 }, { "epoch": 0.3384393584429627, "grad_norm": 0.4884868562221527, "learning_rate": 6.767567567567568e-06, "loss": 0.5085, "step": 626 }, { "epoch": 0.338979996395747, "grad_norm": 0.42473411560058594, "learning_rate": 6.778378378378378e-06, "loss": 0.5232, "step": 627 }, { "epoch": 0.33952063434853125, "grad_norm": 0.41970014572143555, "learning_rate": 6.789189189189189e-06, "loss": 0.486, "step": 628 }, { "epoch": 0.34006127230131555, "grad_norm": 0.5064523220062256, "learning_rate": 6.800000000000001e-06, "loss": 0.5403, "step": 629 }, { "epoch": 0.34060191025409986, "grad_norm": 0.4276356101036072, "learning_rate": 6.810810810810811e-06, "loss": 0.5248, "step": 630 }, { "epoch": 0.3411425482068841, "grad_norm": 0.42455577850341797, "learning_rate": 6.821621621621623e-06, "loss": 0.493, "step": 631 }, { "epoch": 0.3416831861596684, "grad_norm": 0.41362032294273376, "learning_rate": 6.832432432432433e-06, "loss": 0.5029, "step": 632 }, { "epoch": 0.34222382411245267, "grad_norm": 0.4547756016254425, "learning_rate": 6.8432432432432435e-06, "loss": 0.5121, "step": 633 }, { "epoch": 0.342764462065237, "grad_norm": 0.4629051387310028, "learning_rate": 6.854054054054055e-06, "loss": 0.5363, "step": 634 }, { "epoch": 0.3433051000180213, "grad_norm": 0.40793025493621826, "learning_rate": 6.864864864864865e-06, "loss": 0.5012, "step": 635 }, { "epoch": 0.34384573797080553, "grad_norm": 0.4819142520427704, "learning_rate": 6.875675675675676e-06, "loss": 0.5064, "step": 636 }, { "epoch": 0.34438637592358984, "grad_norm": 0.38700756430625916, "learning_rate": 6.886486486486487e-06, "loss": 0.4911, "step": 637 }, { "epoch": 0.34492701387637414, "grad_norm": 0.39876338839530945, "learning_rate": 6.897297297297297e-06, "loss": 0.4797, "step": 638 }, { "epoch": 0.3454676518291584, "grad_norm": 0.4039168953895569, "learning_rate": 6.9081081081081095e-06, "loss": 0.5161, "step": 639 }, { "epoch": 0.3460082897819427, "grad_norm": 0.38275012373924255, "learning_rate": 6.91891891891892e-06, "loss": 0.4998, "step": 640 }, { "epoch": 0.34654892773472695, "grad_norm": 0.3935031294822693, "learning_rate": 6.92972972972973e-06, "loss": 0.4834, "step": 641 }, { "epoch": 0.34708956568751126, "grad_norm": 0.39518025517463684, "learning_rate": 6.940540540540542e-06, "loss": 0.5069, "step": 642 }, { "epoch": 0.34763020364029557, "grad_norm": 0.43953222036361694, "learning_rate": 6.951351351351352e-06, "loss": 0.5199, "step": 643 }, { "epoch": 0.3481708415930798, "grad_norm": 0.37210172414779663, "learning_rate": 6.9621621621621625e-06, "loss": 0.4859, "step": 644 }, { "epoch": 0.3487114795458641, "grad_norm": 0.394963800907135, "learning_rate": 6.972972972972973e-06, "loss": 0.5049, "step": 645 }, { "epoch": 0.34925211749864843, "grad_norm": 0.3912118971347809, "learning_rate": 6.983783783783784e-06, "loss": 0.5092, "step": 646 }, { "epoch": 0.3497927554514327, "grad_norm": 0.35926076769828796, "learning_rate": 6.994594594594595e-06, "loss": 0.4674, "step": 647 }, { "epoch": 0.350333393404217, "grad_norm": 0.41905924677848816, "learning_rate": 7.005405405405405e-06, "loss": 0.5523, "step": 648 }, { "epoch": 0.35087403135700124, "grad_norm": 0.4475938677787781, "learning_rate": 7.016216216216217e-06, "loss": 0.4922, "step": 649 }, { "epoch": 0.35141466930978554, "grad_norm": 0.41779378056526184, "learning_rate": 7.027027027027028e-06, "loss": 0.48, "step": 650 }, { "epoch": 0.35195530726256985, "grad_norm": 0.514970600605011, "learning_rate": 7.037837837837839e-06, "loss": 0.4898, "step": 651 }, { "epoch": 0.3524959452153541, "grad_norm": 0.4767131209373474, "learning_rate": 7.048648648648649e-06, "loss": 0.4883, "step": 652 }, { "epoch": 0.3530365831681384, "grad_norm": 0.41229724884033203, "learning_rate": 7.05945945945946e-06, "loss": 0.5173, "step": 653 }, { "epoch": 0.3535772211209227, "grad_norm": 0.5163008570671082, "learning_rate": 7.070270270270271e-06, "loss": 0.5112, "step": 654 }, { "epoch": 0.35411785907370696, "grad_norm": 0.4336077570915222, "learning_rate": 7.0810810810810815e-06, "loss": 0.4909, "step": 655 }, { "epoch": 0.35465849702649127, "grad_norm": 0.427859902381897, "learning_rate": 7.091891891891892e-06, "loss": 0.505, "step": 656 }, { "epoch": 0.3551991349792755, "grad_norm": 0.5053046941757202, "learning_rate": 7.102702702702703e-06, "loss": 0.5302, "step": 657 }, { "epoch": 0.35573977293205983, "grad_norm": 0.45524269342422485, "learning_rate": 7.113513513513514e-06, "loss": 0.5067, "step": 658 }, { "epoch": 0.35628041088484413, "grad_norm": 0.4407271444797516, "learning_rate": 7.124324324324326e-06, "loss": 0.4893, "step": 659 }, { "epoch": 0.3568210488376284, "grad_norm": 0.6443756222724915, "learning_rate": 7.135135135135136e-06, "loss": 0.5403, "step": 660 }, { "epoch": 0.3573616867904127, "grad_norm": 0.4299008548259735, "learning_rate": 7.145945945945947e-06, "loss": 0.5098, "step": 661 }, { "epoch": 0.357902324743197, "grad_norm": 0.5455183386802673, "learning_rate": 7.156756756756757e-06, "loss": 0.5076, "step": 662 }, { "epoch": 0.35844296269598125, "grad_norm": 0.44949883222579956, "learning_rate": 7.167567567567568e-06, "loss": 0.4943, "step": 663 }, { "epoch": 0.35898360064876556, "grad_norm": 0.5578931570053101, "learning_rate": 7.178378378378379e-06, "loss": 0.5043, "step": 664 }, { "epoch": 0.3595242386015498, "grad_norm": 0.5532004833221436, "learning_rate": 7.189189189189189e-06, "loss": 0.5407, "step": 665 }, { "epoch": 0.3600648765543341, "grad_norm": 0.5385209321975708, "learning_rate": 7.2000000000000005e-06, "loss": 0.5239, "step": 666 }, { "epoch": 0.3606055145071184, "grad_norm": 0.4979776442050934, "learning_rate": 7.210810810810811e-06, "loss": 0.4752, "step": 667 }, { "epoch": 0.36114615245990267, "grad_norm": 0.4731312692165375, "learning_rate": 7.221621621621621e-06, "loss": 0.5116, "step": 668 }, { "epoch": 0.361686790412687, "grad_norm": 0.40968212485313416, "learning_rate": 7.2324324324324335e-06, "loss": 0.5132, "step": 669 }, { "epoch": 0.3622274283654713, "grad_norm": 0.42712947726249695, "learning_rate": 7.243243243243244e-06, "loss": 0.4824, "step": 670 }, { "epoch": 0.36276806631825553, "grad_norm": 0.4217482805252075, "learning_rate": 7.254054054054055e-06, "loss": 0.5044, "step": 671 }, { "epoch": 0.36330870427103984, "grad_norm": 0.38178786635398865, "learning_rate": 7.264864864864866e-06, "loss": 0.4892, "step": 672 }, { "epoch": 0.3638493422238241, "grad_norm": 0.3964153230190277, "learning_rate": 7.275675675675676e-06, "loss": 0.5067, "step": 673 }, { "epoch": 0.3643899801766084, "grad_norm": 0.40324804186820984, "learning_rate": 7.286486486486487e-06, "loss": 0.5193, "step": 674 }, { "epoch": 0.3649306181293927, "grad_norm": 0.42960992455482483, "learning_rate": 7.297297297297298e-06, "loss": 0.5044, "step": 675 }, { "epoch": 0.36547125608217695, "grad_norm": 0.3774147629737854, "learning_rate": 7.308108108108108e-06, "loss": 0.4853, "step": 676 }, { "epoch": 0.36601189403496126, "grad_norm": 0.4166586399078369, "learning_rate": 7.3189189189189195e-06, "loss": 0.4982, "step": 677 }, { "epoch": 0.36655253198774557, "grad_norm": 0.4181104004383087, "learning_rate": 7.32972972972973e-06, "loss": 0.5031, "step": 678 }, { "epoch": 0.3670931699405298, "grad_norm": 0.4069172143936157, "learning_rate": 7.340540540540542e-06, "loss": 0.4984, "step": 679 }, { "epoch": 0.3676338078933141, "grad_norm": 0.3852164149284363, "learning_rate": 7.3513513513513525e-06, "loss": 0.4852, "step": 680 }, { "epoch": 0.3681744458460984, "grad_norm": 0.41952404379844666, "learning_rate": 7.362162162162163e-06, "loss": 0.5073, "step": 681 }, { "epoch": 0.3687150837988827, "grad_norm": 0.41212549805641174, "learning_rate": 7.372972972972973e-06, "loss": 0.5102, "step": 682 }, { "epoch": 0.369255721751667, "grad_norm": 0.4410938322544098, "learning_rate": 7.383783783783785e-06, "loss": 0.5301, "step": 683 }, { "epoch": 0.36979635970445124, "grad_norm": 0.41383302211761475, "learning_rate": 7.394594594594595e-06, "loss": 0.4985, "step": 684 }, { "epoch": 0.37033699765723554, "grad_norm": 0.4224023222923279, "learning_rate": 7.4054054054054055e-06, "loss": 0.508, "step": 685 }, { "epoch": 0.3708776356100198, "grad_norm": 0.4155189096927643, "learning_rate": 7.416216216216217e-06, "loss": 0.5059, "step": 686 }, { "epoch": 0.3714182735628041, "grad_norm": 0.4235122501850128, "learning_rate": 7.427027027027027e-06, "loss": 0.5151, "step": 687 }, { "epoch": 0.3719589115155884, "grad_norm": 0.45957595109939575, "learning_rate": 7.437837837837838e-06, "loss": 0.5124, "step": 688 }, { "epoch": 0.37249954946837266, "grad_norm": 0.5240591764450073, "learning_rate": 7.44864864864865e-06, "loss": 0.509, "step": 689 }, { "epoch": 0.37304018742115697, "grad_norm": 0.49150049686431885, "learning_rate": 7.45945945945946e-06, "loss": 0.5061, "step": 690 }, { "epoch": 0.37358082537394127, "grad_norm": 0.4253467619419098, "learning_rate": 7.4702702702702715e-06, "loss": 0.5015, "step": 691 }, { "epoch": 0.3741214633267255, "grad_norm": 0.41869038343429565, "learning_rate": 7.481081081081082e-06, "loss": 0.4868, "step": 692 }, { "epoch": 0.37466210127950983, "grad_norm": 0.42171812057495117, "learning_rate": 7.491891891891892e-06, "loss": 0.4947, "step": 693 }, { "epoch": 0.3752027392322941, "grad_norm": 0.4336179494857788, "learning_rate": 7.502702702702704e-06, "loss": 0.4918, "step": 694 }, { "epoch": 0.3757433771850784, "grad_norm": 0.42052584886550903, "learning_rate": 7.513513513513514e-06, "loss": 0.4798, "step": 695 }, { "epoch": 0.3762840151378627, "grad_norm": 0.45332416892051697, "learning_rate": 7.5243243243243245e-06, "loss": 0.4838, "step": 696 }, { "epoch": 0.37682465309064694, "grad_norm": 0.4129831790924072, "learning_rate": 7.535135135135136e-06, "loss": 0.4943, "step": 697 }, { "epoch": 0.37736529104343125, "grad_norm": 0.4154702126979828, "learning_rate": 7.545945945945946e-06, "loss": 0.5256, "step": 698 }, { "epoch": 0.37790592899621556, "grad_norm": 0.4334423840045929, "learning_rate": 7.5567567567567575e-06, "loss": 0.4954, "step": 699 }, { "epoch": 0.3784465669489998, "grad_norm": 0.43977442383766174, "learning_rate": 7.567567567567569e-06, "loss": 0.5084, "step": 700 }, { "epoch": 0.3789872049017841, "grad_norm": 0.4473543167114258, "learning_rate": 7.578378378378379e-06, "loss": 0.5121, "step": 701 }, { "epoch": 0.37952784285456836, "grad_norm": 0.3824658691883087, "learning_rate": 7.58918918918919e-06, "loss": 0.4946, "step": 702 }, { "epoch": 0.38006848080735267, "grad_norm": 0.43371015787124634, "learning_rate": 7.600000000000001e-06, "loss": 0.4827, "step": 703 }, { "epoch": 0.380609118760137, "grad_norm": 0.4218309819698334, "learning_rate": 7.610810810810811e-06, "loss": 0.4795, "step": 704 }, { "epoch": 0.38114975671292123, "grad_norm": 0.42625129222869873, "learning_rate": 7.621621621621622e-06, "loss": 0.4908, "step": 705 }, { "epoch": 0.38169039466570553, "grad_norm": 0.42555558681488037, "learning_rate": 7.632432432432433e-06, "loss": 0.5112, "step": 706 }, { "epoch": 0.38223103261848984, "grad_norm": 0.4020969867706299, "learning_rate": 7.643243243243244e-06, "loss": 0.5084, "step": 707 }, { "epoch": 0.3827716705712741, "grad_norm": 0.38164466619491577, "learning_rate": 7.654054054054054e-06, "loss": 0.477, "step": 708 }, { "epoch": 0.3833123085240584, "grad_norm": 0.3920608162879944, "learning_rate": 7.664864864864866e-06, "loss": 0.4878, "step": 709 }, { "epoch": 0.38385294647684265, "grad_norm": 0.4406064748764038, "learning_rate": 7.675675675675676e-06, "loss": 0.5438, "step": 710 }, { "epoch": 0.38439358442962696, "grad_norm": 0.45603087544441223, "learning_rate": 7.686486486486487e-06, "loss": 0.5086, "step": 711 }, { "epoch": 0.38493422238241126, "grad_norm": 0.391500324010849, "learning_rate": 7.697297297297297e-06, "loss": 0.4899, "step": 712 }, { "epoch": 0.3854748603351955, "grad_norm": 0.3915819823741913, "learning_rate": 7.70810810810811e-06, "loss": 0.5003, "step": 713 }, { "epoch": 0.3860154982879798, "grad_norm": 0.4155017137527466, "learning_rate": 7.71891891891892e-06, "loss": 0.5054, "step": 714 }, { "epoch": 0.3865561362407641, "grad_norm": 0.42171376943588257, "learning_rate": 7.72972972972973e-06, "loss": 0.5209, "step": 715 }, { "epoch": 0.3870967741935484, "grad_norm": 0.4514196217060089, "learning_rate": 7.74054054054054e-06, "loss": 0.4754, "step": 716 }, { "epoch": 0.3876374121463327, "grad_norm": 0.4144493043422699, "learning_rate": 7.751351351351351e-06, "loss": 0.4744, "step": 717 }, { "epoch": 0.38817805009911693, "grad_norm": 0.43851664662361145, "learning_rate": 7.762162162162162e-06, "loss": 0.4882, "step": 718 }, { "epoch": 0.38871868805190124, "grad_norm": 0.42534932494163513, "learning_rate": 7.772972972972974e-06, "loss": 0.5156, "step": 719 }, { "epoch": 0.38925932600468555, "grad_norm": 0.4332650303840637, "learning_rate": 7.783783783783784e-06, "loss": 0.5038, "step": 720 }, { "epoch": 0.3897999639574698, "grad_norm": 0.42367222905158997, "learning_rate": 7.794594594594596e-06, "loss": 0.4778, "step": 721 }, { "epoch": 0.3903406019102541, "grad_norm": 0.38908496499061584, "learning_rate": 7.805405405405407e-06, "loss": 0.5106, "step": 722 }, { "epoch": 0.3908812398630384, "grad_norm": 0.41853058338165283, "learning_rate": 7.816216216216217e-06, "loss": 0.5044, "step": 723 }, { "epoch": 0.39142187781582266, "grad_norm": 0.3883611261844635, "learning_rate": 7.827027027027028e-06, "loss": 0.5226, "step": 724 }, { "epoch": 0.39196251576860697, "grad_norm": 0.41836339235305786, "learning_rate": 7.837837837837838e-06, "loss": 0.4814, "step": 725 }, { "epoch": 0.3925031537213912, "grad_norm": 0.42219579219818115, "learning_rate": 7.848648648648648e-06, "loss": 0.4845, "step": 726 }, { "epoch": 0.3930437916741755, "grad_norm": 0.42650148272514343, "learning_rate": 7.859459459459459e-06, "loss": 0.4963, "step": 727 }, { "epoch": 0.39358442962695983, "grad_norm": 0.4264717102050781, "learning_rate": 7.870270270270271e-06, "loss": 0.4947, "step": 728 }, { "epoch": 0.3941250675797441, "grad_norm": 0.4335402250289917, "learning_rate": 7.881081081081081e-06, "loss": 0.5013, "step": 729 }, { "epoch": 0.3946657055325284, "grad_norm": 0.3991732597351074, "learning_rate": 7.891891891891894e-06, "loss": 0.4918, "step": 730 }, { "epoch": 0.3952063434853127, "grad_norm": 0.4234263300895691, "learning_rate": 7.902702702702704e-06, "loss": 0.4999, "step": 731 }, { "epoch": 0.39574698143809695, "grad_norm": 0.453852117061615, "learning_rate": 7.913513513513514e-06, "loss": 0.4612, "step": 732 }, { "epoch": 0.39628761939088125, "grad_norm": 0.38619935512542725, "learning_rate": 7.924324324324325e-06, "loss": 0.5215, "step": 733 }, { "epoch": 0.3968282573436655, "grad_norm": 0.48475557565689087, "learning_rate": 7.935135135135135e-06, "loss": 0.4862, "step": 734 }, { "epoch": 0.3973688952964498, "grad_norm": 0.46672797203063965, "learning_rate": 7.945945945945946e-06, "loss": 0.5342, "step": 735 }, { "epoch": 0.3979095332492341, "grad_norm": 0.3979105055332184, "learning_rate": 7.956756756756758e-06, "loss": 0.5131, "step": 736 }, { "epoch": 0.39845017120201837, "grad_norm": 0.4489961266517639, "learning_rate": 7.967567567567568e-06, "loss": 0.5055, "step": 737 }, { "epoch": 0.3989908091548027, "grad_norm": 0.47179901599884033, "learning_rate": 7.978378378378379e-06, "loss": 0.4689, "step": 738 }, { "epoch": 0.399531447107587, "grad_norm": 0.4245077073574066, "learning_rate": 7.989189189189191e-06, "loss": 0.4865, "step": 739 }, { "epoch": 0.40007208506037123, "grad_norm": 0.44382888078689575, "learning_rate": 8.000000000000001e-06, "loss": 0.5002, "step": 740 }, { "epoch": 0.40061272301315554, "grad_norm": 0.36594852805137634, "learning_rate": 8.010810810810812e-06, "loss": 0.5004, "step": 741 }, { "epoch": 0.4011533609659398, "grad_norm": 0.41024965047836304, "learning_rate": 8.021621621621622e-06, "loss": 0.4795, "step": 742 }, { "epoch": 0.4016939989187241, "grad_norm": 0.40164679288864136, "learning_rate": 8.032432432432433e-06, "loss": 0.5139, "step": 743 }, { "epoch": 0.4022346368715084, "grad_norm": 0.36995360255241394, "learning_rate": 8.043243243243243e-06, "loss": 0.4804, "step": 744 }, { "epoch": 0.40277527482429265, "grad_norm": 0.47509828209877014, "learning_rate": 8.054054054054055e-06, "loss": 0.5083, "step": 745 }, { "epoch": 0.40331591277707696, "grad_norm": 0.4017750918865204, "learning_rate": 8.064864864864866e-06, "loss": 0.4956, "step": 746 }, { "epoch": 0.40385655072986126, "grad_norm": 0.4321381449699402, "learning_rate": 8.075675675675676e-06, "loss": 0.4857, "step": 747 }, { "epoch": 0.4043971886826455, "grad_norm": 0.44780081510543823, "learning_rate": 8.086486486486486e-06, "loss": 0.4923, "step": 748 }, { "epoch": 0.4049378266354298, "grad_norm": 0.4115518033504486, "learning_rate": 8.097297297297297e-06, "loss": 0.5112, "step": 749 }, { "epoch": 0.40547846458821407, "grad_norm": 0.42116203904151917, "learning_rate": 8.108108108108109e-06, "loss": 0.4866, "step": 750 }, { "epoch": 0.4060191025409984, "grad_norm": 0.4541042149066925, "learning_rate": 8.11891891891892e-06, "loss": 0.4817, "step": 751 }, { "epoch": 0.4065597404937827, "grad_norm": 0.4548441469669342, "learning_rate": 8.12972972972973e-06, "loss": 0.506, "step": 752 }, { "epoch": 0.40710037844656694, "grad_norm": 0.4485017657279968, "learning_rate": 8.140540540540542e-06, "loss": 0.524, "step": 753 }, { "epoch": 0.40764101639935124, "grad_norm": 0.45467260479927063, "learning_rate": 8.151351351351352e-06, "loss": 0.4907, "step": 754 }, { "epoch": 0.4081816543521355, "grad_norm": 0.39999452233314514, "learning_rate": 8.162162162162163e-06, "loss": 0.4985, "step": 755 }, { "epoch": 0.4087222923049198, "grad_norm": 0.48386865854263306, "learning_rate": 8.172972972972973e-06, "loss": 0.494, "step": 756 }, { "epoch": 0.4092629302577041, "grad_norm": 0.39953333139419556, "learning_rate": 8.183783783783784e-06, "loss": 0.4965, "step": 757 }, { "epoch": 0.40980356821048836, "grad_norm": 0.46499499678611755, "learning_rate": 8.194594594594594e-06, "loss": 0.484, "step": 758 }, { "epoch": 0.41034420616327266, "grad_norm": 0.4665820598602295, "learning_rate": 8.205405405405406e-06, "loss": 0.4871, "step": 759 }, { "epoch": 0.41088484411605697, "grad_norm": 0.4831865429878235, "learning_rate": 8.216216216216217e-06, "loss": 0.4941, "step": 760 }, { "epoch": 0.4114254820688412, "grad_norm": 0.4756391644477844, "learning_rate": 8.227027027027029e-06, "loss": 0.5108, "step": 761 }, { "epoch": 0.4119661200216255, "grad_norm": 0.5320138931274414, "learning_rate": 8.23783783783784e-06, "loss": 0.4794, "step": 762 }, { "epoch": 0.4125067579744098, "grad_norm": 0.4134671688079834, "learning_rate": 8.24864864864865e-06, "loss": 0.4939, "step": 763 }, { "epoch": 0.4130473959271941, "grad_norm": 0.48983827233314514, "learning_rate": 8.25945945945946e-06, "loss": 0.5266, "step": 764 }, { "epoch": 0.4135880338799784, "grad_norm": 0.44971051812171936, "learning_rate": 8.27027027027027e-06, "loss": 0.5201, "step": 765 }, { "epoch": 0.41412867183276264, "grad_norm": 0.43499618768692017, "learning_rate": 8.281081081081081e-06, "loss": 0.4692, "step": 766 }, { "epoch": 0.41466930978554695, "grad_norm": 0.3952126204967499, "learning_rate": 8.291891891891891e-06, "loss": 0.4571, "step": 767 }, { "epoch": 0.41520994773833125, "grad_norm": 0.43571987748146057, "learning_rate": 8.302702702702704e-06, "loss": 0.5128, "step": 768 }, { "epoch": 0.4157505856911155, "grad_norm": 0.41786375641822815, "learning_rate": 8.313513513513514e-06, "loss": 0.5126, "step": 769 }, { "epoch": 0.4162912236438998, "grad_norm": 0.40955227613449097, "learning_rate": 8.324324324324326e-06, "loss": 0.5046, "step": 770 }, { "epoch": 0.41683186159668406, "grad_norm": 0.4118677079677582, "learning_rate": 8.335135135135137e-06, "loss": 0.5037, "step": 771 }, { "epoch": 0.41737249954946837, "grad_norm": 0.43250730633735657, "learning_rate": 8.345945945945947e-06, "loss": 0.4943, "step": 772 }, { "epoch": 0.4179131375022527, "grad_norm": 0.4406627416610718, "learning_rate": 8.356756756756757e-06, "loss": 0.4897, "step": 773 }, { "epoch": 0.4184537754550369, "grad_norm": 0.42739009857177734, "learning_rate": 8.367567567567568e-06, "loss": 0.4963, "step": 774 }, { "epoch": 0.41899441340782123, "grad_norm": 0.44201985001564026, "learning_rate": 8.378378378378378e-06, "loss": 0.5287, "step": 775 }, { "epoch": 0.41953505136060554, "grad_norm": 0.4748345613479614, "learning_rate": 8.38918918918919e-06, "loss": 0.4876, "step": 776 }, { "epoch": 0.4200756893133898, "grad_norm": 0.3963128924369812, "learning_rate": 8.400000000000001e-06, "loss": 0.4826, "step": 777 }, { "epoch": 0.4206163272661741, "grad_norm": 0.4639700651168823, "learning_rate": 8.410810810810811e-06, "loss": 0.4927, "step": 778 }, { "epoch": 0.42115696521895835, "grad_norm": 0.4406186044216156, "learning_rate": 8.421621621621622e-06, "loss": 0.506, "step": 779 }, { "epoch": 0.42169760317174265, "grad_norm": 0.42991262674331665, "learning_rate": 8.432432432432434e-06, "loss": 0.4856, "step": 780 }, { "epoch": 0.42223824112452696, "grad_norm": 0.4585091769695282, "learning_rate": 8.443243243243244e-06, "loss": 0.5051, "step": 781 }, { "epoch": 0.4227788790773112, "grad_norm": 0.4431307911872864, "learning_rate": 8.454054054054055e-06, "loss": 0.4688, "step": 782 }, { "epoch": 0.4233195170300955, "grad_norm": 0.3975408375263214, "learning_rate": 8.464864864864865e-06, "loss": 0.4898, "step": 783 }, { "epoch": 0.4238601549828798, "grad_norm": 0.5118733644485474, "learning_rate": 8.475675675675676e-06, "loss": 0.5056, "step": 784 }, { "epoch": 0.4244007929356641, "grad_norm": 0.3885602056980133, "learning_rate": 8.486486486486488e-06, "loss": 0.4638, "step": 785 }, { "epoch": 0.4249414308884484, "grad_norm": 0.5289828181266785, "learning_rate": 8.497297297297298e-06, "loss": 0.5084, "step": 786 }, { "epoch": 0.42548206884123263, "grad_norm": 0.4151102602481842, "learning_rate": 8.508108108108109e-06, "loss": 0.4546, "step": 787 }, { "epoch": 0.42602270679401694, "grad_norm": 0.47288182377815247, "learning_rate": 8.518918918918919e-06, "loss": 0.4731, "step": 788 }, { "epoch": 0.42656334474680124, "grad_norm": 0.5048025250434875, "learning_rate": 8.52972972972973e-06, "loss": 0.4979, "step": 789 }, { "epoch": 0.4271039826995855, "grad_norm": 0.4965549111366272, "learning_rate": 8.540540540540542e-06, "loss": 0.4864, "step": 790 }, { "epoch": 0.4276446206523698, "grad_norm": 0.46014106273651123, "learning_rate": 8.551351351351352e-06, "loss": 0.5041, "step": 791 }, { "epoch": 0.4281852586051541, "grad_norm": 0.4505784809589386, "learning_rate": 8.562162162162162e-06, "loss": 0.4712, "step": 792 }, { "epoch": 0.42872589655793836, "grad_norm": 0.42782407999038696, "learning_rate": 8.572972972972975e-06, "loss": 0.4536, "step": 793 }, { "epoch": 0.42926653451072266, "grad_norm": 0.47117555141448975, "learning_rate": 8.583783783783785e-06, "loss": 0.4968, "step": 794 }, { "epoch": 0.4298071724635069, "grad_norm": 0.47286736965179443, "learning_rate": 8.594594594594595e-06, "loss": 0.5189, "step": 795 }, { "epoch": 0.4303478104162912, "grad_norm": 0.4804339110851288, "learning_rate": 8.605405405405406e-06, "loss": 0.4976, "step": 796 }, { "epoch": 0.4308884483690755, "grad_norm": 0.4585397243499756, "learning_rate": 8.616216216216216e-06, "loss": 0.4704, "step": 797 }, { "epoch": 0.4314290863218598, "grad_norm": 0.46230971813201904, "learning_rate": 8.627027027027027e-06, "loss": 0.464, "step": 798 }, { "epoch": 0.4319697242746441, "grad_norm": 0.47910165786743164, "learning_rate": 8.637837837837837e-06, "loss": 0.49, "step": 799 }, { "epoch": 0.4325103622274284, "grad_norm": 0.4503049850463867, "learning_rate": 8.64864864864865e-06, "loss": 0.468, "step": 800 }, { "epoch": 0.43305100018021264, "grad_norm": 0.4591892659664154, "learning_rate": 8.65945945945946e-06, "loss": 0.4667, "step": 801 }, { "epoch": 0.43359163813299695, "grad_norm": 0.4422140121459961, "learning_rate": 8.670270270270272e-06, "loss": 0.4797, "step": 802 }, { "epoch": 0.4341322760857812, "grad_norm": 0.39958667755126953, "learning_rate": 8.681081081081082e-06, "loss": 0.4966, "step": 803 }, { "epoch": 0.4346729140385655, "grad_norm": 0.41113945841789246, "learning_rate": 8.691891891891893e-06, "loss": 0.4689, "step": 804 }, { "epoch": 0.4352135519913498, "grad_norm": 0.412852942943573, "learning_rate": 8.702702702702703e-06, "loss": 0.4716, "step": 805 }, { "epoch": 0.43575418994413406, "grad_norm": 0.39494407176971436, "learning_rate": 8.713513513513514e-06, "loss": 0.4642, "step": 806 }, { "epoch": 0.43629482789691837, "grad_norm": 0.48442819714546204, "learning_rate": 8.724324324324324e-06, "loss": 0.5024, "step": 807 }, { "epoch": 0.4368354658497027, "grad_norm": 0.435715913772583, "learning_rate": 8.735135135135136e-06, "loss": 0.4853, "step": 808 }, { "epoch": 0.4373761038024869, "grad_norm": 0.48299872875213623, "learning_rate": 8.745945945945947e-06, "loss": 0.4968, "step": 809 }, { "epoch": 0.43791674175527123, "grad_norm": 0.44625693559646606, "learning_rate": 8.756756756756759e-06, "loss": 0.5115, "step": 810 }, { "epoch": 0.4384573797080555, "grad_norm": 0.4905652403831482, "learning_rate": 8.767567567567569e-06, "loss": 0.5058, "step": 811 }, { "epoch": 0.4389980176608398, "grad_norm": 0.5242207050323486, "learning_rate": 8.77837837837838e-06, "loss": 0.4825, "step": 812 }, { "epoch": 0.4395386556136241, "grad_norm": 0.3821278214454651, "learning_rate": 8.78918918918919e-06, "loss": 0.4632, "step": 813 }, { "epoch": 0.44007929356640835, "grad_norm": 0.5015333890914917, "learning_rate": 8.8e-06, "loss": 0.4676, "step": 814 }, { "epoch": 0.44061993151919265, "grad_norm": 0.40313366055488586, "learning_rate": 8.810810810810811e-06, "loss": 0.4801, "step": 815 }, { "epoch": 0.44116056947197696, "grad_norm": 0.44474324584007263, "learning_rate": 8.821621621621623e-06, "loss": 0.4948, "step": 816 }, { "epoch": 0.4417012074247612, "grad_norm": 0.38576412200927734, "learning_rate": 8.832432432432433e-06, "loss": 0.4921, "step": 817 }, { "epoch": 0.4422418453775455, "grad_norm": 0.4141075611114502, "learning_rate": 8.843243243243244e-06, "loss": 0.4777, "step": 818 }, { "epoch": 0.44278248333032977, "grad_norm": 0.39242422580718994, "learning_rate": 8.854054054054054e-06, "loss": 0.5041, "step": 819 }, { "epoch": 0.4433231212831141, "grad_norm": 0.3840166926383972, "learning_rate": 8.864864864864866e-06, "loss": 0.4682, "step": 820 }, { "epoch": 0.4438637592358984, "grad_norm": 0.40388667583465576, "learning_rate": 8.875675675675677e-06, "loss": 0.5043, "step": 821 }, { "epoch": 0.44440439718868263, "grad_norm": 0.39842742681503296, "learning_rate": 8.886486486486487e-06, "loss": 0.4772, "step": 822 }, { "epoch": 0.44494503514146694, "grad_norm": 0.45574501156806946, "learning_rate": 8.897297297297298e-06, "loss": 0.5023, "step": 823 }, { "epoch": 0.4454856730942512, "grad_norm": 0.43097352981567383, "learning_rate": 8.908108108108108e-06, "loss": 0.4865, "step": 824 }, { "epoch": 0.4460263110470355, "grad_norm": 0.45888960361480713, "learning_rate": 8.91891891891892e-06, "loss": 0.4877, "step": 825 }, { "epoch": 0.4465669489998198, "grad_norm": 0.4810618758201599, "learning_rate": 8.92972972972973e-06, "loss": 0.5086, "step": 826 }, { "epoch": 0.44710758695260405, "grad_norm": 0.4267573952674866, "learning_rate": 8.940540540540541e-06, "loss": 0.4821, "step": 827 }, { "epoch": 0.44764822490538836, "grad_norm": 0.41196686029434204, "learning_rate": 8.951351351351352e-06, "loss": 0.4911, "step": 828 }, { "epoch": 0.44818886285817267, "grad_norm": 0.5312232971191406, "learning_rate": 8.962162162162162e-06, "loss": 0.4836, "step": 829 }, { "epoch": 0.4487295008109569, "grad_norm": 0.42353129386901855, "learning_rate": 8.972972972972974e-06, "loss": 0.5265, "step": 830 }, { "epoch": 0.4492701387637412, "grad_norm": 0.45074447989463806, "learning_rate": 8.983783783783785e-06, "loss": 0.4786, "step": 831 }, { "epoch": 0.4498107767165255, "grad_norm": 0.43651434779167175, "learning_rate": 8.994594594594595e-06, "loss": 0.4862, "step": 832 }, { "epoch": 0.4503514146693098, "grad_norm": 0.4827129542827606, "learning_rate": 9.005405405405407e-06, "loss": 0.4819, "step": 833 }, { "epoch": 0.4508920526220941, "grad_norm": 0.5431792736053467, "learning_rate": 9.016216216216218e-06, "loss": 0.4788, "step": 834 }, { "epoch": 0.45143269057487834, "grad_norm": 0.4722309708595276, "learning_rate": 9.027027027027028e-06, "loss": 0.4944, "step": 835 }, { "epoch": 0.45197332852766264, "grad_norm": 0.5574517846107483, "learning_rate": 9.037837837837838e-06, "loss": 0.488, "step": 836 }, { "epoch": 0.45251396648044695, "grad_norm": 0.5468060970306396, "learning_rate": 9.048648648648649e-06, "loss": 0.4972, "step": 837 }, { "epoch": 0.4530546044332312, "grad_norm": 0.4097936451435089, "learning_rate": 9.05945945945946e-06, "loss": 0.4536, "step": 838 }, { "epoch": 0.4535952423860155, "grad_norm": 0.44951799511909485, "learning_rate": 9.07027027027027e-06, "loss": 0.5394, "step": 839 }, { "epoch": 0.45413588033879976, "grad_norm": 0.5354393124580383, "learning_rate": 9.081081081081082e-06, "loss": 0.4717, "step": 840 }, { "epoch": 0.45467651829158406, "grad_norm": 0.38110360503196716, "learning_rate": 9.091891891891892e-06, "loss": 0.4923, "step": 841 }, { "epoch": 0.45521715624436837, "grad_norm": 0.5034909248352051, "learning_rate": 9.102702702702704e-06, "loss": 0.5018, "step": 842 }, { "epoch": 0.4557577941971526, "grad_norm": 0.42320170998573303, "learning_rate": 9.113513513513515e-06, "loss": 0.4791, "step": 843 }, { "epoch": 0.45629843214993693, "grad_norm": 0.4968869388103485, "learning_rate": 9.124324324324325e-06, "loss": 0.5325, "step": 844 }, { "epoch": 0.45683907010272123, "grad_norm": 0.4150351583957672, "learning_rate": 9.135135135135136e-06, "loss": 0.5068, "step": 845 }, { "epoch": 0.4573797080555055, "grad_norm": 0.541016161441803, "learning_rate": 9.145945945945946e-06, "loss": 0.4865, "step": 846 }, { "epoch": 0.4579203460082898, "grad_norm": 0.418040931224823, "learning_rate": 9.156756756756757e-06, "loss": 0.4926, "step": 847 }, { "epoch": 0.45846098396107404, "grad_norm": 0.5246473550796509, "learning_rate": 9.167567567567569e-06, "loss": 0.4897, "step": 848 }, { "epoch": 0.45900162191385835, "grad_norm": 0.4080889821052551, "learning_rate": 9.178378378378379e-06, "loss": 0.5093, "step": 849 }, { "epoch": 0.45954225986664266, "grad_norm": 0.5630201697349548, "learning_rate": 9.189189189189191e-06, "loss": 0.4847, "step": 850 }, { "epoch": 0.4600828978194269, "grad_norm": 0.4602736234664917, "learning_rate": 9.200000000000002e-06, "loss": 0.4777, "step": 851 }, { "epoch": 0.4606235357722112, "grad_norm": 0.5108170509338379, "learning_rate": 9.210810810810812e-06, "loss": 0.4929, "step": 852 }, { "epoch": 0.4611641737249955, "grad_norm": 0.4432177245616913, "learning_rate": 9.221621621621623e-06, "loss": 0.475, "step": 853 }, { "epoch": 0.46170481167777977, "grad_norm": 0.46254798769950867, "learning_rate": 9.232432432432433e-06, "loss": 0.5245, "step": 854 }, { "epoch": 0.4622454496305641, "grad_norm": 0.45064395666122437, "learning_rate": 9.243243243243243e-06, "loss": 0.4997, "step": 855 }, { "epoch": 0.4627860875833483, "grad_norm": 0.5564315915107727, "learning_rate": 9.254054054054054e-06, "loss": 0.5174, "step": 856 }, { "epoch": 0.46332672553613263, "grad_norm": 0.40368762612342834, "learning_rate": 9.264864864864866e-06, "loss": 0.4819, "step": 857 }, { "epoch": 0.46386736348891694, "grad_norm": 0.528178334236145, "learning_rate": 9.275675675675676e-06, "loss": 0.5142, "step": 858 }, { "epoch": 0.4644080014417012, "grad_norm": 0.39754781126976013, "learning_rate": 9.286486486486487e-06, "loss": 0.4875, "step": 859 }, { "epoch": 0.4649486393944855, "grad_norm": 0.49752360582351685, "learning_rate": 9.297297297297299e-06, "loss": 0.529, "step": 860 }, { "epoch": 0.4654892773472698, "grad_norm": 0.4451174736022949, "learning_rate": 9.30810810810811e-06, "loss": 0.508, "step": 861 }, { "epoch": 0.46602991530005405, "grad_norm": 0.42956191301345825, "learning_rate": 9.31891891891892e-06, "loss": 0.4789, "step": 862 }, { "epoch": 0.46657055325283836, "grad_norm": 0.5587397813796997, "learning_rate": 9.32972972972973e-06, "loss": 0.4988, "step": 863 }, { "epoch": 0.4671111912056226, "grad_norm": 0.47554630041122437, "learning_rate": 9.34054054054054e-06, "loss": 0.4953, "step": 864 }, { "epoch": 0.4676518291584069, "grad_norm": 0.47560325264930725, "learning_rate": 9.351351351351353e-06, "loss": 0.4597, "step": 865 }, { "epoch": 0.4681924671111912, "grad_norm": 0.5811336636543274, "learning_rate": 9.362162162162163e-06, "loss": 0.5106, "step": 866 }, { "epoch": 0.4687331050639755, "grad_norm": 0.4019627571105957, "learning_rate": 9.372972972972974e-06, "loss": 0.509, "step": 867 }, { "epoch": 0.4692737430167598, "grad_norm": 0.46386411786079407, "learning_rate": 9.383783783783784e-06, "loss": 0.4927, "step": 868 }, { "epoch": 0.4698143809695441, "grad_norm": 0.4141891300678253, "learning_rate": 9.394594594594595e-06, "loss": 0.4789, "step": 869 }, { "epoch": 0.47035501892232834, "grad_norm": 0.45206698775291443, "learning_rate": 9.405405405405407e-06, "loss": 0.5101, "step": 870 }, { "epoch": 0.47089565687511264, "grad_norm": 0.44673988223075867, "learning_rate": 9.416216216216217e-06, "loss": 0.4695, "step": 871 }, { "epoch": 0.4714362948278969, "grad_norm": 0.3691551089286804, "learning_rate": 9.427027027027028e-06, "loss": 0.4872, "step": 872 }, { "epoch": 0.4719769327806812, "grad_norm": 0.46097657084465027, "learning_rate": 9.437837837837838e-06, "loss": 0.499, "step": 873 }, { "epoch": 0.4725175707334655, "grad_norm": 0.4701019823551178, "learning_rate": 9.44864864864865e-06, "loss": 0.4717, "step": 874 }, { "epoch": 0.47305820868624976, "grad_norm": 0.40333500504493713, "learning_rate": 9.45945945945946e-06, "loss": 0.4815, "step": 875 }, { "epoch": 0.47359884663903407, "grad_norm": 0.5153550505638123, "learning_rate": 9.470270270270271e-06, "loss": 0.4859, "step": 876 }, { "epoch": 0.47413948459181837, "grad_norm": 0.43943801522254944, "learning_rate": 9.481081081081081e-06, "loss": 0.495, "step": 877 }, { "epoch": 0.4746801225446026, "grad_norm": 0.4598594307899475, "learning_rate": 9.491891891891892e-06, "loss": 0.4883, "step": 878 }, { "epoch": 0.47522076049738693, "grad_norm": 0.49989616870880127, "learning_rate": 9.502702702702702e-06, "loss": 0.4905, "step": 879 }, { "epoch": 0.4757613984501712, "grad_norm": 0.4743301272392273, "learning_rate": 9.513513513513514e-06, "loss": 0.5096, "step": 880 }, { "epoch": 0.4763020364029555, "grad_norm": 0.47162696719169617, "learning_rate": 9.524324324324325e-06, "loss": 0.5105, "step": 881 }, { "epoch": 0.4768426743557398, "grad_norm": 0.4532954692840576, "learning_rate": 9.535135135135137e-06, "loss": 0.493, "step": 882 }, { "epoch": 0.47738331230852404, "grad_norm": 0.4708048403263092, "learning_rate": 9.545945945945947e-06, "loss": 0.4864, "step": 883 }, { "epoch": 0.47792395026130835, "grad_norm": 0.49821099638938904, "learning_rate": 9.556756756756758e-06, "loss": 0.4927, "step": 884 }, { "epoch": 0.47846458821409266, "grad_norm": 0.4563126564025879, "learning_rate": 9.567567567567568e-06, "loss": 0.4955, "step": 885 }, { "epoch": 0.4790052261668769, "grad_norm": 0.600861132144928, "learning_rate": 9.578378378378379e-06, "loss": 0.4744, "step": 886 }, { "epoch": 0.4795458641196612, "grad_norm": 0.600531816482544, "learning_rate": 9.589189189189189e-06, "loss": 0.5105, "step": 887 }, { "epoch": 0.48008650207244546, "grad_norm": 0.44443172216415405, "learning_rate": 9.600000000000001e-06, "loss": 0.5067, "step": 888 }, { "epoch": 0.48062714002522977, "grad_norm": 0.5058892369270325, "learning_rate": 9.610810810810812e-06, "loss": 0.4788, "step": 889 }, { "epoch": 0.4811677779780141, "grad_norm": 0.4944954812526703, "learning_rate": 9.621621621621622e-06, "loss": 0.5049, "step": 890 }, { "epoch": 0.48170841593079833, "grad_norm": 0.48458966612815857, "learning_rate": 9.632432432432434e-06, "loss": 0.497, "step": 891 }, { "epoch": 0.48224905388358263, "grad_norm": 0.4858192503452301, "learning_rate": 9.643243243243245e-06, "loss": 0.5056, "step": 892 }, { "epoch": 0.4827896918363669, "grad_norm": 0.5097047686576843, "learning_rate": 9.654054054054055e-06, "loss": 0.5185, "step": 893 }, { "epoch": 0.4833303297891512, "grad_norm": 0.441301167011261, "learning_rate": 9.664864864864866e-06, "loss": 0.4605, "step": 894 }, { "epoch": 0.4838709677419355, "grad_norm": 0.47697439789772034, "learning_rate": 9.675675675675676e-06, "loss": 0.4996, "step": 895 }, { "epoch": 0.48441160569471975, "grad_norm": 0.4510503113269806, "learning_rate": 9.686486486486486e-06, "loss": 0.4805, "step": 896 }, { "epoch": 0.48495224364750406, "grad_norm": 0.4589500427246094, "learning_rate": 9.697297297297299e-06, "loss": 0.492, "step": 897 }, { "epoch": 0.48549288160028836, "grad_norm": 0.43563947081565857, "learning_rate": 9.708108108108109e-06, "loss": 0.5112, "step": 898 }, { "epoch": 0.4860335195530726, "grad_norm": 0.4395964741706848, "learning_rate": 9.71891891891892e-06, "loss": 0.5, "step": 899 }, { "epoch": 0.4865741575058569, "grad_norm": 0.44560515880584717, "learning_rate": 9.729729729729732e-06, "loss": 0.4992, "step": 900 }, { "epoch": 0.48711479545864117, "grad_norm": 0.4070109724998474, "learning_rate": 9.740540540540542e-06, "loss": 0.4875, "step": 901 }, { "epoch": 0.4876554334114255, "grad_norm": 0.47943007946014404, "learning_rate": 9.751351351351352e-06, "loss": 0.4986, "step": 902 }, { "epoch": 0.4881960713642098, "grad_norm": 0.3845299184322357, "learning_rate": 9.762162162162163e-06, "loss": 0.4804, "step": 903 }, { "epoch": 0.48873670931699403, "grad_norm": 0.43732935190200806, "learning_rate": 9.772972972972973e-06, "loss": 0.5169, "step": 904 }, { "epoch": 0.48927734726977834, "grad_norm": 0.4056658446788788, "learning_rate": 9.783783783783785e-06, "loss": 0.4862, "step": 905 }, { "epoch": 0.48981798522256265, "grad_norm": 0.4409744441509247, "learning_rate": 9.794594594594596e-06, "loss": 0.491, "step": 906 }, { "epoch": 0.4903586231753469, "grad_norm": 0.41933363676071167, "learning_rate": 9.805405405405406e-06, "loss": 0.4925, "step": 907 }, { "epoch": 0.4908992611281312, "grad_norm": 0.5303579568862915, "learning_rate": 9.816216216216217e-06, "loss": 0.5048, "step": 908 }, { "epoch": 0.49143989908091545, "grad_norm": 0.40795671939849854, "learning_rate": 9.827027027027027e-06, "loss": 0.4779, "step": 909 }, { "epoch": 0.49198053703369976, "grad_norm": 0.4110850989818573, "learning_rate": 9.83783783783784e-06, "loss": 0.4776, "step": 910 }, { "epoch": 0.49252117498648407, "grad_norm": 0.42300498485565186, "learning_rate": 9.84864864864865e-06, "loss": 0.4823, "step": 911 }, { "epoch": 0.4930618129392683, "grad_norm": 0.4710748791694641, "learning_rate": 9.85945945945946e-06, "loss": 0.4955, "step": 912 }, { "epoch": 0.4936024508920526, "grad_norm": 0.42153269052505493, "learning_rate": 9.87027027027027e-06, "loss": 0.5232, "step": 913 }, { "epoch": 0.49414308884483693, "grad_norm": 0.3920591175556183, "learning_rate": 9.881081081081083e-06, "loss": 0.4944, "step": 914 }, { "epoch": 0.4946837267976212, "grad_norm": 0.39492344856262207, "learning_rate": 9.891891891891893e-06, "loss": 0.5115, "step": 915 }, { "epoch": 0.4952243647504055, "grad_norm": 0.4347766041755676, "learning_rate": 9.902702702702704e-06, "loss": 0.4869, "step": 916 }, { "epoch": 0.49576500270318974, "grad_norm": 0.41567090153694153, "learning_rate": 9.913513513513514e-06, "loss": 0.4873, "step": 917 }, { "epoch": 0.49630564065597405, "grad_norm": 0.3792874813079834, "learning_rate": 9.924324324324324e-06, "loss": 0.494, "step": 918 }, { "epoch": 0.49684627860875835, "grad_norm": 0.47275909781455994, "learning_rate": 9.935135135135135e-06, "loss": 0.504, "step": 919 }, { "epoch": 0.4973869165615426, "grad_norm": 0.4395892918109894, "learning_rate": 9.945945945945947e-06, "loss": 0.4731, "step": 920 }, { "epoch": 0.4979275545143269, "grad_norm": 0.5087149143218994, "learning_rate": 9.956756756756757e-06, "loss": 0.4962, "step": 921 }, { "epoch": 0.4984681924671112, "grad_norm": 0.4961149990558624, "learning_rate": 9.96756756756757e-06, "loss": 0.505, "step": 922 }, { "epoch": 0.49900883041989547, "grad_norm": 0.4813699424266815, "learning_rate": 9.97837837837838e-06, "loss": 0.4839, "step": 923 }, { "epoch": 0.4995494683726798, "grad_norm": 0.5078656077384949, "learning_rate": 9.98918918918919e-06, "loss": 0.4867, "step": 924 }, { "epoch": 0.500090106325464, "grad_norm": 0.4412589371204376, "learning_rate": 1e-05, "loss": 0.5309, "step": 925 }, { "epoch": 0.5006307442782484, "grad_norm": 0.46785590052604675, "learning_rate": 9.999999643554535e-06, "loss": 0.4825, "step": 926 }, { "epoch": 0.5011713822310326, "grad_norm": 0.5026047229766846, "learning_rate": 9.999998574218182e-06, "loss": 0.4846, "step": 927 }, { "epoch": 0.5017120201838169, "grad_norm": 0.42060837149620056, "learning_rate": 9.9999967919911e-06, "loss": 0.4552, "step": 928 }, { "epoch": 0.5022526581366011, "grad_norm": 0.48285382986068726, "learning_rate": 9.999994296873541e-06, "loss": 0.4936, "step": 929 }, { "epoch": 0.5027932960893855, "grad_norm": 0.3900724947452545, "learning_rate": 9.999991088865861e-06, "loss": 0.4824, "step": 930 }, { "epoch": 0.5033339340421698, "grad_norm": 0.45784997940063477, "learning_rate": 9.999987167968517e-06, "loss": 0.4894, "step": 931 }, { "epoch": 0.503874571994954, "grad_norm": 0.4959925711154938, "learning_rate": 9.999982534182068e-06, "loss": 0.4813, "step": 932 }, { "epoch": 0.5044152099477384, "grad_norm": 0.39042192697525024, "learning_rate": 9.999977187507175e-06, "loss": 0.5168, "step": 933 }, { "epoch": 0.5049558479005226, "grad_norm": 0.4453545808792114, "learning_rate": 9.9999711279446e-06, "loss": 0.471, "step": 934 }, { "epoch": 0.5054964858533069, "grad_norm": 0.4667404890060425, "learning_rate": 9.999964355495207e-06, "loss": 0.4893, "step": 935 }, { "epoch": 0.5060371238060912, "grad_norm": 0.39634689688682556, "learning_rate": 9.999956870159961e-06, "loss": 0.4771, "step": 936 }, { "epoch": 0.5065777617588755, "grad_norm": 0.5295143723487854, "learning_rate": 9.999948671939931e-06, "loss": 0.5007, "step": 937 }, { "epoch": 0.5071183997116597, "grad_norm": 0.4279506206512451, "learning_rate": 9.999939760836287e-06, "loss": 0.4681, "step": 938 }, { "epoch": 0.5076590376644441, "grad_norm": 0.47725000977516174, "learning_rate": 9.999930136850293e-06, "loss": 0.5091, "step": 939 }, { "epoch": 0.5081996756172283, "grad_norm": 0.4600881338119507, "learning_rate": 9.999919799983327e-06, "loss": 0.499, "step": 940 }, { "epoch": 0.5087403135700126, "grad_norm": 0.43939170241355896, "learning_rate": 9.999908750236862e-06, "loss": 0.5016, "step": 941 }, { "epoch": 0.509280951522797, "grad_norm": 0.4821349084377289, "learning_rate": 9.999896987612473e-06, "loss": 0.4762, "step": 942 }, { "epoch": 0.5098215894755812, "grad_norm": 0.4333769381046295, "learning_rate": 9.999884512111837e-06, "loss": 0.4742, "step": 943 }, { "epoch": 0.5103622274283655, "grad_norm": 0.4694845974445343, "learning_rate": 9.999871323736732e-06, "loss": 0.4831, "step": 944 }, { "epoch": 0.5109028653811497, "grad_norm": 0.4363771378993988, "learning_rate": 9.99985742248904e-06, "loss": 0.4514, "step": 945 }, { "epoch": 0.5114435033339341, "grad_norm": 0.45749279856681824, "learning_rate": 9.99984280837074e-06, "loss": 0.4961, "step": 946 }, { "epoch": 0.5119841412867183, "grad_norm": 0.4644739329814911, "learning_rate": 9.999827481383919e-06, "loss": 0.5025, "step": 947 }, { "epoch": 0.5125247792395026, "grad_norm": 0.45313525199890137, "learning_rate": 9.999811441530761e-06, "loss": 0.5196, "step": 948 }, { "epoch": 0.5130654171922869, "grad_norm": 0.38895201683044434, "learning_rate": 9.999794688813551e-06, "loss": 0.4669, "step": 949 }, { "epoch": 0.5136060551450712, "grad_norm": 0.5269231796264648, "learning_rate": 9.999777223234682e-06, "loss": 0.5097, "step": 950 }, { "epoch": 0.5141466930978554, "grad_norm": 0.39397677779197693, "learning_rate": 9.99975904479664e-06, "loss": 0.5056, "step": 951 }, { "epoch": 0.5146873310506398, "grad_norm": 0.4682712256908417, "learning_rate": 9.99974015350202e-06, "loss": 0.4958, "step": 952 }, { "epoch": 0.515227969003424, "grad_norm": 0.37193721532821655, "learning_rate": 9.999720549353513e-06, "loss": 0.4654, "step": 953 }, { "epoch": 0.5157686069562083, "grad_norm": 0.4029079079627991, "learning_rate": 9.999700232353916e-06, "loss": 0.4991, "step": 954 }, { "epoch": 0.5163092449089927, "grad_norm": 0.4342360198497772, "learning_rate": 9.999679202506126e-06, "loss": 0.5069, "step": 955 }, { "epoch": 0.5168498828617769, "grad_norm": 0.4561976492404938, "learning_rate": 9.999657459813137e-06, "loss": 0.4831, "step": 956 }, { "epoch": 0.5173905208145612, "grad_norm": 0.4105386734008789, "learning_rate": 9.999635004278054e-06, "loss": 0.4802, "step": 957 }, { "epoch": 0.5179311587673455, "grad_norm": 0.4714571535587311, "learning_rate": 9.999611835904078e-06, "loss": 0.4846, "step": 958 }, { "epoch": 0.5184717967201298, "grad_norm": 0.41593897342681885, "learning_rate": 9.99958795469451e-06, "loss": 0.4963, "step": 959 }, { "epoch": 0.519012434672914, "grad_norm": 0.5482795834541321, "learning_rate": 9.999563360652757e-06, "loss": 0.4869, "step": 960 }, { "epoch": 0.5195530726256983, "grad_norm": 0.4962485134601593, "learning_rate": 9.999538053782323e-06, "loss": 0.5279, "step": 961 }, { "epoch": 0.5200937105784826, "grad_norm": 0.5878384113311768, "learning_rate": 9.99951203408682e-06, "loss": 0.518, "step": 962 }, { "epoch": 0.5206343485312669, "grad_norm": 0.4186796545982361, "learning_rate": 9.999485301569955e-06, "loss": 0.4825, "step": 963 }, { "epoch": 0.5211749864840511, "grad_norm": 0.5061416029930115, "learning_rate": 9.999457856235542e-06, "loss": 0.4795, "step": 964 }, { "epoch": 0.5217156244368355, "grad_norm": 0.5024672746658325, "learning_rate": 9.999429698087491e-06, "loss": 0.4988, "step": 965 }, { "epoch": 0.5222562623896198, "grad_norm": 0.5579217076301575, "learning_rate": 9.999400827129817e-06, "loss": 0.4675, "step": 966 }, { "epoch": 0.522796900342404, "grad_norm": 0.5686841607093811, "learning_rate": 9.99937124336664e-06, "loss": 0.5074, "step": 967 }, { "epoch": 0.5233375382951884, "grad_norm": 0.4582173824310303, "learning_rate": 9.999340946802173e-06, "loss": 0.4826, "step": 968 }, { "epoch": 0.5238781762479726, "grad_norm": 0.63434898853302, "learning_rate": 9.99930993744074e-06, "loss": 0.4696, "step": 969 }, { "epoch": 0.5244188142007569, "grad_norm": 0.46918338537216187, "learning_rate": 9.99927821528676e-06, "loss": 0.4864, "step": 970 }, { "epoch": 0.5249594521535412, "grad_norm": 0.5041794180870056, "learning_rate": 9.999245780344758e-06, "loss": 0.4982, "step": 971 }, { "epoch": 0.5255000901063255, "grad_norm": 0.5856842994689941, "learning_rate": 9.999212632619356e-06, "loss": 0.4844, "step": 972 }, { "epoch": 0.5260407280591097, "grad_norm": 0.46322301030158997, "learning_rate": 9.999178772115279e-06, "loss": 0.5029, "step": 973 }, { "epoch": 0.526581366011894, "grad_norm": 0.6072745323181152, "learning_rate": 9.999144198837358e-06, "loss": 0.4939, "step": 974 }, { "epoch": 0.5271220039646783, "grad_norm": 0.49522969126701355, "learning_rate": 9.999108912790521e-06, "loss": 0.4633, "step": 975 }, { "epoch": 0.5276626419174626, "grad_norm": 0.48789697885513306, "learning_rate": 9.9990729139798e-06, "loss": 0.5224, "step": 976 }, { "epoch": 0.5282032798702468, "grad_norm": 0.5480666160583496, "learning_rate": 9.999036202410324e-06, "loss": 0.5199, "step": 977 }, { "epoch": 0.5287439178230312, "grad_norm": 0.43815743923187256, "learning_rate": 9.998998778087333e-06, "loss": 0.4733, "step": 978 }, { "epoch": 0.5292845557758155, "grad_norm": 0.41781720519065857, "learning_rate": 9.99896064101616e-06, "loss": 0.4953, "step": 979 }, { "epoch": 0.5298251937285997, "grad_norm": 0.5263339877128601, "learning_rate": 9.99892179120224e-06, "loss": 0.5135, "step": 980 }, { "epoch": 0.5303658316813841, "grad_norm": 0.43066519498825073, "learning_rate": 9.998882228651117e-06, "loss": 0.4939, "step": 981 }, { "epoch": 0.5309064696341683, "grad_norm": 0.4255053400993347, "learning_rate": 9.99884195336843e-06, "loss": 0.4641, "step": 982 }, { "epoch": 0.5314471075869526, "grad_norm": 0.4449794292449951, "learning_rate": 9.998800965359918e-06, "loss": 0.4738, "step": 983 }, { "epoch": 0.5319877455397369, "grad_norm": 0.378813773393631, "learning_rate": 9.99875926463143e-06, "loss": 0.4687, "step": 984 }, { "epoch": 0.5325283834925212, "grad_norm": 0.4286113679409027, "learning_rate": 9.99871685118891e-06, "loss": 0.4721, "step": 985 }, { "epoch": 0.5330690214453054, "grad_norm": 0.37922412157058716, "learning_rate": 9.998673725038401e-06, "loss": 0.4848, "step": 986 }, { "epoch": 0.5336096593980898, "grad_norm": 0.3998044729232788, "learning_rate": 9.998629886186058e-06, "loss": 0.4849, "step": 987 }, { "epoch": 0.534150297350874, "grad_norm": 0.40176922082901, "learning_rate": 9.998585334638128e-06, "loss": 0.5059, "step": 988 }, { "epoch": 0.5346909353036583, "grad_norm": 0.37144216895103455, "learning_rate": 9.998540070400966e-06, "loss": 0.5057, "step": 989 }, { "epoch": 0.5352315732564425, "grad_norm": 0.3971169888973236, "learning_rate": 9.998494093481022e-06, "loss": 0.4729, "step": 990 }, { "epoch": 0.5357722112092269, "grad_norm": 0.39043352007865906, "learning_rate": 9.998447403884853e-06, "loss": 0.504, "step": 991 }, { "epoch": 0.5363128491620112, "grad_norm": 0.41439974308013916, "learning_rate": 9.998400001619116e-06, "loss": 0.4721, "step": 992 }, { "epoch": 0.5368534871147954, "grad_norm": 0.4104996621608734, "learning_rate": 9.998351886690569e-06, "loss": 0.4801, "step": 993 }, { "epoch": 0.5373941250675798, "grad_norm": 0.5444105863571167, "learning_rate": 9.998303059106073e-06, "loss": 0.4981, "step": 994 }, { "epoch": 0.537934763020364, "grad_norm": 0.3640936613082886, "learning_rate": 9.998253518872592e-06, "loss": 0.4988, "step": 995 }, { "epoch": 0.5384754009731483, "grad_norm": 0.456035852432251, "learning_rate": 9.998203265997184e-06, "loss": 0.4709, "step": 996 }, { "epoch": 0.5390160389259326, "grad_norm": 0.403956800699234, "learning_rate": 9.998152300487016e-06, "loss": 0.4912, "step": 997 }, { "epoch": 0.5395566768787169, "grad_norm": 0.40325304865837097, "learning_rate": 9.998100622349355e-06, "loss": 0.5039, "step": 998 }, { "epoch": 0.5400973148315011, "grad_norm": 0.3739657402038574, "learning_rate": 9.998048231591572e-06, "loss": 0.4892, "step": 999 }, { "epoch": 0.5406379527842855, "grad_norm": 0.4549963176250458, "learning_rate": 9.997995128221131e-06, "loss": 0.5058, "step": 1000 }, { "epoch": 0.5411785907370698, "grad_norm": 0.44868186116218567, "learning_rate": 9.99794131224561e-06, "loss": 0.4867, "step": 1001 }, { "epoch": 0.541719228689854, "grad_norm": 0.43002334237098694, "learning_rate": 9.997886783672677e-06, "loss": 0.4481, "step": 1002 }, { "epoch": 0.5422598666426384, "grad_norm": 0.4119184613227844, "learning_rate": 9.997831542510107e-06, "loss": 0.4738, "step": 1003 }, { "epoch": 0.5428005045954226, "grad_norm": 0.38203638792037964, "learning_rate": 9.997775588765779e-06, "loss": 0.4523, "step": 1004 }, { "epoch": 0.5433411425482069, "grad_norm": 0.4788086414337158, "learning_rate": 9.997718922447669e-06, "loss": 0.4946, "step": 1005 }, { "epoch": 0.5438817805009911, "grad_norm": 0.38479748368263245, "learning_rate": 9.997661543563855e-06, "loss": 0.4975, "step": 1006 }, { "epoch": 0.5444224184537755, "grad_norm": 0.4428192973136902, "learning_rate": 9.99760345212252e-06, "loss": 0.4737, "step": 1007 }, { "epoch": 0.5449630564065597, "grad_norm": 0.36287838220596313, "learning_rate": 9.997544648131946e-06, "loss": 0.4715, "step": 1008 }, { "epoch": 0.545503694359344, "grad_norm": 0.45783060789108276, "learning_rate": 9.997485131600517e-06, "loss": 0.4992, "step": 1009 }, { "epoch": 0.5460443323121283, "grad_norm": 0.4367043077945709, "learning_rate": 9.99742490253672e-06, "loss": 0.4823, "step": 1010 }, { "epoch": 0.5465849702649126, "grad_norm": 0.4037528932094574, "learning_rate": 9.99736396094914e-06, "loss": 0.4802, "step": 1011 }, { "epoch": 0.5471256082176968, "grad_norm": 0.4337640702724457, "learning_rate": 9.997302306846468e-06, "loss": 0.4825, "step": 1012 }, { "epoch": 0.5476662461704812, "grad_norm": 0.3727249205112457, "learning_rate": 9.997239940237495e-06, "loss": 0.5, "step": 1013 }, { "epoch": 0.5482068841232655, "grad_norm": 0.46426814794540405, "learning_rate": 9.99717686113111e-06, "loss": 0.5231, "step": 1014 }, { "epoch": 0.5487475220760497, "grad_norm": 0.41532108187675476, "learning_rate": 9.99711306953631e-06, "loss": 0.4877, "step": 1015 }, { "epoch": 0.5492881600288341, "grad_norm": 0.404090940952301, "learning_rate": 9.997048565462188e-06, "loss": 0.4797, "step": 1016 }, { "epoch": 0.5498287979816183, "grad_norm": 0.37468329071998596, "learning_rate": 9.996983348917941e-06, "loss": 0.4654, "step": 1017 }, { "epoch": 0.5503694359344026, "grad_norm": 0.42747071385383606, "learning_rate": 9.996917419912869e-06, "loss": 0.4719, "step": 1018 }, { "epoch": 0.5509100738871869, "grad_norm": 0.38618093729019165, "learning_rate": 9.996850778456371e-06, "loss": 0.486, "step": 1019 }, { "epoch": 0.5514507118399712, "grad_norm": 0.4205274283885956, "learning_rate": 9.99678342455795e-06, "loss": 0.4851, "step": 1020 }, { "epoch": 0.5519913497927554, "grad_norm": 0.39830052852630615, "learning_rate": 9.996715358227208e-06, "loss": 0.4658, "step": 1021 }, { "epoch": 0.5525319877455397, "grad_norm": 0.4612605571746826, "learning_rate": 9.996646579473848e-06, "loss": 0.5131, "step": 1022 }, { "epoch": 0.553072625698324, "grad_norm": 0.3865067958831787, "learning_rate": 9.99657708830768e-06, "loss": 0.4773, "step": 1023 }, { "epoch": 0.5536132636511083, "grad_norm": 0.48155269026756287, "learning_rate": 9.99650688473861e-06, "loss": 0.4887, "step": 1024 }, { "epoch": 0.5541539016038926, "grad_norm": 0.4000060558319092, "learning_rate": 9.996435968776646e-06, "loss": 0.4736, "step": 1025 }, { "epoch": 0.5546945395566769, "grad_norm": 0.3519155979156494, "learning_rate": 9.9963643404319e-06, "loss": 0.495, "step": 1026 }, { "epoch": 0.5552351775094612, "grad_norm": 0.4599016010761261, "learning_rate": 9.99629199971459e-06, "loss": 0.5075, "step": 1027 }, { "epoch": 0.5557758154622454, "grad_norm": 0.4261663258075714, "learning_rate": 9.996218946635021e-06, "loss": 0.4885, "step": 1028 }, { "epoch": 0.5563164534150298, "grad_norm": 0.433912992477417, "learning_rate": 9.996145181203616e-06, "loss": 0.5027, "step": 1029 }, { "epoch": 0.556857091367814, "grad_norm": 0.4443386495113373, "learning_rate": 9.996070703430888e-06, "loss": 0.4919, "step": 1030 }, { "epoch": 0.5573977293205983, "grad_norm": 0.36704230308532715, "learning_rate": 9.995995513327459e-06, "loss": 0.4832, "step": 1031 }, { "epoch": 0.5579383672733826, "grad_norm": 0.41120779514312744, "learning_rate": 9.995919610904045e-06, "loss": 0.4813, "step": 1032 }, { "epoch": 0.5584790052261669, "grad_norm": 0.42510706186294556, "learning_rate": 9.995842996171475e-06, "loss": 0.4859, "step": 1033 }, { "epoch": 0.5590196431789511, "grad_norm": 0.38486090302467346, "learning_rate": 9.995765669140668e-06, "loss": 0.4739, "step": 1034 }, { "epoch": 0.5595602811317354, "grad_norm": 0.42321518063545227, "learning_rate": 9.995687629822647e-06, "loss": 0.5091, "step": 1035 }, { "epoch": 0.5601009190845198, "grad_norm": 0.4389384388923645, "learning_rate": 9.995608878228544e-06, "loss": 0.4745, "step": 1036 }, { "epoch": 0.560641557037304, "grad_norm": 0.41088220477104187, "learning_rate": 9.995529414369582e-06, "loss": 0.4879, "step": 1037 }, { "epoch": 0.5611821949900883, "grad_norm": 0.4821391999721527, "learning_rate": 9.995449238257097e-06, "loss": 0.4993, "step": 1038 }, { "epoch": 0.5617228329428726, "grad_norm": 0.39829981327056885, "learning_rate": 9.995368349902514e-06, "loss": 0.4885, "step": 1039 }, { "epoch": 0.5622634708956569, "grad_norm": 0.48630595207214355, "learning_rate": 9.99528674931737e-06, "loss": 0.4893, "step": 1040 }, { "epoch": 0.5628041088484411, "grad_norm": 0.3742629587650299, "learning_rate": 9.9952044365133e-06, "loss": 0.4858, "step": 1041 }, { "epoch": 0.5633447468012255, "grad_norm": 0.4473123550415039, "learning_rate": 9.995121411502037e-06, "loss": 0.4932, "step": 1042 }, { "epoch": 0.5638853847540097, "grad_norm": 0.44922560453414917, "learning_rate": 9.995037674295419e-06, "loss": 0.5062, "step": 1043 }, { "epoch": 0.564426022706794, "grad_norm": 0.43669557571411133, "learning_rate": 9.994953224905387e-06, "loss": 0.4833, "step": 1044 }, { "epoch": 0.5649666606595783, "grad_norm": 0.4496200382709503, "learning_rate": 9.99486806334398e-06, "loss": 0.4857, "step": 1045 }, { "epoch": 0.5655072986123626, "grad_norm": 0.47932863235473633, "learning_rate": 9.994782189623342e-06, "loss": 0.4964, "step": 1046 }, { "epoch": 0.5660479365651468, "grad_norm": 0.41345056891441345, "learning_rate": 9.994695603755714e-06, "loss": 0.4876, "step": 1047 }, { "epoch": 0.5665885745179312, "grad_norm": 0.38490182161331177, "learning_rate": 9.994608305753443e-06, "loss": 0.5018, "step": 1048 }, { "epoch": 0.5671292124707155, "grad_norm": 0.4293023943901062, "learning_rate": 9.994520295628976e-06, "loss": 0.4534, "step": 1049 }, { "epoch": 0.5676698504234997, "grad_norm": 0.4372541010379791, "learning_rate": 9.994431573394861e-06, "loss": 0.5271, "step": 1050 }, { "epoch": 0.568210488376284, "grad_norm": 0.4250541031360626, "learning_rate": 9.994342139063748e-06, "loss": 0.4934, "step": 1051 }, { "epoch": 0.5687511263290683, "grad_norm": 0.4278985261917114, "learning_rate": 9.994251992648386e-06, "loss": 0.5077, "step": 1052 }, { "epoch": 0.5692917642818526, "grad_norm": 0.4010027348995209, "learning_rate": 9.994161134161635e-06, "loss": 0.4575, "step": 1053 }, { "epoch": 0.5698324022346368, "grad_norm": 0.40400251746177673, "learning_rate": 9.99406956361644e-06, "loss": 0.4913, "step": 1054 }, { "epoch": 0.5703730401874212, "grad_norm": 0.42402681708335876, "learning_rate": 9.993977281025862e-06, "loss": 0.4637, "step": 1055 }, { "epoch": 0.5709136781402054, "grad_norm": 0.5451776385307312, "learning_rate": 9.99388428640306e-06, "loss": 0.5021, "step": 1056 }, { "epoch": 0.5714543160929897, "grad_norm": 0.40865638852119446, "learning_rate": 9.99379057976129e-06, "loss": 0.4933, "step": 1057 }, { "epoch": 0.571994954045774, "grad_norm": 0.457672655582428, "learning_rate": 9.993696161113913e-06, "loss": 0.4834, "step": 1058 }, { "epoch": 0.5725355919985583, "grad_norm": 0.4288524389266968, "learning_rate": 9.993601030474392e-06, "loss": 0.5025, "step": 1059 }, { "epoch": 0.5730762299513426, "grad_norm": 0.4974946081638336, "learning_rate": 9.993505187856289e-06, "loss": 0.494, "step": 1060 }, { "epoch": 0.5736168679041269, "grad_norm": 0.4515710771083832, "learning_rate": 9.99340863327327e-06, "loss": 0.4805, "step": 1061 }, { "epoch": 0.5741575058569112, "grad_norm": 0.44452914595603943, "learning_rate": 9.993311366739103e-06, "loss": 0.4488, "step": 1062 }, { "epoch": 0.5746981438096954, "grad_norm": 0.447654664516449, "learning_rate": 9.993213388267653e-06, "loss": 0.454, "step": 1063 }, { "epoch": 0.5752387817624798, "grad_norm": 0.5111740827560425, "learning_rate": 9.993114697872894e-06, "loss": 0.4944, "step": 1064 }, { "epoch": 0.575779419715264, "grad_norm": 0.41599783301353455, "learning_rate": 9.993015295568893e-06, "loss": 0.4739, "step": 1065 }, { "epoch": 0.5763200576680483, "grad_norm": 0.4951651990413666, "learning_rate": 9.992915181369823e-06, "loss": 0.4785, "step": 1066 }, { "epoch": 0.5768606956208325, "grad_norm": 0.45784544944763184, "learning_rate": 9.99281435528996e-06, "loss": 0.4757, "step": 1067 }, { "epoch": 0.5774013335736169, "grad_norm": 0.45814573764801025, "learning_rate": 9.99271281734368e-06, "loss": 0.4734, "step": 1068 }, { "epoch": 0.5779419715264011, "grad_norm": 0.422540545463562, "learning_rate": 9.992610567545458e-06, "loss": 0.4826, "step": 1069 }, { "epoch": 0.5784826094791854, "grad_norm": 0.3874336779117584, "learning_rate": 9.992507605909873e-06, "loss": 0.4722, "step": 1070 }, { "epoch": 0.5790232474319698, "grad_norm": 0.42774495482444763, "learning_rate": 9.992403932451605e-06, "loss": 0.4768, "step": 1071 }, { "epoch": 0.579563885384754, "grad_norm": 0.4025377035140991, "learning_rate": 9.992299547185439e-06, "loss": 0.4956, "step": 1072 }, { "epoch": 0.5801045233375383, "grad_norm": 0.3693923354148865, "learning_rate": 9.992194450126252e-06, "loss": 0.4696, "step": 1073 }, { "epoch": 0.5806451612903226, "grad_norm": 0.3981197476387024, "learning_rate": 9.992088641289033e-06, "loss": 0.4505, "step": 1074 }, { "epoch": 0.5811857992431069, "grad_norm": 0.4446949362754822, "learning_rate": 9.991982120688865e-06, "loss": 0.4714, "step": 1075 }, { "epoch": 0.5817264371958911, "grad_norm": 0.4017349183559418, "learning_rate": 9.99187488834094e-06, "loss": 0.4514, "step": 1076 }, { "epoch": 0.5822670751486755, "grad_norm": 0.4286927282810211, "learning_rate": 9.991766944260544e-06, "loss": 0.479, "step": 1077 }, { "epoch": 0.5828077131014597, "grad_norm": 0.41668716073036194, "learning_rate": 9.991658288463067e-06, "loss": 0.4992, "step": 1078 }, { "epoch": 0.583348351054244, "grad_norm": 0.4160311222076416, "learning_rate": 9.991548920964001e-06, "loss": 0.5021, "step": 1079 }, { "epoch": 0.5838889890070283, "grad_norm": 0.46067795157432556, "learning_rate": 9.99143884177894e-06, "loss": 0.5154, "step": 1080 }, { "epoch": 0.5844296269598126, "grad_norm": 0.38376685976982117, "learning_rate": 9.99132805092358e-06, "loss": 0.4836, "step": 1081 }, { "epoch": 0.5849702649125968, "grad_norm": 0.37903881072998047, "learning_rate": 9.991216548413715e-06, "loss": 0.5008, "step": 1082 }, { "epoch": 0.5855109028653811, "grad_norm": 0.41807129979133606, "learning_rate": 9.991104334265246e-06, "loss": 0.4646, "step": 1083 }, { "epoch": 0.5860515408181655, "grad_norm": 0.46291136741638184, "learning_rate": 9.99099140849417e-06, "loss": 0.4829, "step": 1084 }, { "epoch": 0.5865921787709497, "grad_norm": 0.4093681573867798, "learning_rate": 9.990877771116588e-06, "loss": 0.465, "step": 1085 }, { "epoch": 0.587132816723734, "grad_norm": 0.4017224609851837, "learning_rate": 9.990763422148703e-06, "loss": 0.4684, "step": 1086 }, { "epoch": 0.5876734546765183, "grad_norm": 0.49730250239372253, "learning_rate": 9.990648361606815e-06, "loss": 0.4807, "step": 1087 }, { "epoch": 0.5882140926293026, "grad_norm": 0.4142555892467499, "learning_rate": 9.990532589507336e-06, "loss": 0.4974, "step": 1088 }, { "epoch": 0.5887547305820868, "grad_norm": 0.4138866364955902, "learning_rate": 9.990416105866768e-06, "loss": 0.4708, "step": 1089 }, { "epoch": 0.5892953685348712, "grad_norm": 0.4031008780002594, "learning_rate": 9.99029891070172e-06, "loss": 0.459, "step": 1090 }, { "epoch": 0.5898360064876554, "grad_norm": 0.4528948664665222, "learning_rate": 9.9901810040289e-06, "loss": 0.481, "step": 1091 }, { "epoch": 0.5903766444404397, "grad_norm": 0.4991556406021118, "learning_rate": 9.990062385865121e-06, "loss": 0.515, "step": 1092 }, { "epoch": 0.590917282393224, "grad_norm": 0.39666545391082764, "learning_rate": 9.989943056227294e-06, "loss": 0.4718, "step": 1093 }, { "epoch": 0.5914579203460083, "grad_norm": 0.4354439973831177, "learning_rate": 9.989823015132433e-06, "loss": 0.4642, "step": 1094 }, { "epoch": 0.5919985582987926, "grad_norm": 0.4511450529098511, "learning_rate": 9.989702262597656e-06, "loss": 0.5067, "step": 1095 }, { "epoch": 0.5925391962515769, "grad_norm": 0.42287683486938477, "learning_rate": 9.989580798640175e-06, "loss": 0.4525, "step": 1096 }, { "epoch": 0.5930798342043612, "grad_norm": 0.38862234354019165, "learning_rate": 9.98945862327731e-06, "loss": 0.4897, "step": 1097 }, { "epoch": 0.5936204721571454, "grad_norm": 0.46156349778175354, "learning_rate": 9.989335736526483e-06, "loss": 0.4876, "step": 1098 }, { "epoch": 0.5941611101099297, "grad_norm": 0.37772494554519653, "learning_rate": 9.989212138405213e-06, "loss": 0.4678, "step": 1099 }, { "epoch": 0.594701748062714, "grad_norm": 0.4161140024662018, "learning_rate": 9.989087828931121e-06, "loss": 0.4855, "step": 1100 }, { "epoch": 0.5952423860154983, "grad_norm": 0.4513489007949829, "learning_rate": 9.988962808121932e-06, "loss": 0.5092, "step": 1101 }, { "epoch": 0.5957830239682825, "grad_norm": 0.4317286014556885, "learning_rate": 9.988837075995472e-06, "loss": 0.4915, "step": 1102 }, { "epoch": 0.5963236619210669, "grad_norm": 0.4530947804450989, "learning_rate": 9.988710632569667e-06, "loss": 0.4924, "step": 1103 }, { "epoch": 0.5968642998738511, "grad_norm": 0.42525094747543335, "learning_rate": 9.988583477862544e-06, "loss": 0.5151, "step": 1104 }, { "epoch": 0.5974049378266354, "grad_norm": 0.41204145550727844, "learning_rate": 9.988455611892237e-06, "loss": 0.4995, "step": 1105 }, { "epoch": 0.5979455757794198, "grad_norm": 0.45464006066322327, "learning_rate": 9.98832703467697e-06, "loss": 0.4783, "step": 1106 }, { "epoch": 0.598486213732204, "grad_norm": 0.3779187500476837, "learning_rate": 9.98819774623508e-06, "loss": 0.4559, "step": 1107 }, { "epoch": 0.5990268516849883, "grad_norm": 0.4304542541503906, "learning_rate": 9.988067746584999e-06, "loss": 0.4827, "step": 1108 }, { "epoch": 0.5995674896377726, "grad_norm": 0.4540359079837799, "learning_rate": 9.987937035745264e-06, "loss": 0.471, "step": 1109 }, { "epoch": 0.6001081275905569, "grad_norm": 0.4767281115055084, "learning_rate": 9.987805613734508e-06, "loss": 0.4547, "step": 1110 }, { "epoch": 0.6006487655433411, "grad_norm": 0.40696877241134644, "learning_rate": 9.987673480571472e-06, "loss": 0.4869, "step": 1111 }, { "epoch": 0.6011894034961254, "grad_norm": 0.3924008011817932, "learning_rate": 9.987540636274995e-06, "loss": 0.4839, "step": 1112 }, { "epoch": 0.6017300414489097, "grad_norm": 0.40512779355049133, "learning_rate": 9.987407080864017e-06, "loss": 0.4998, "step": 1113 }, { "epoch": 0.602270679401694, "grad_norm": 0.4261493384838104, "learning_rate": 9.987272814357579e-06, "loss": 0.5025, "step": 1114 }, { "epoch": 0.6028113173544782, "grad_norm": 0.4489496648311615, "learning_rate": 9.987137836774827e-06, "loss": 0.4764, "step": 1115 }, { "epoch": 0.6033519553072626, "grad_norm": 0.3959837257862091, "learning_rate": 9.987002148135004e-06, "loss": 0.488, "step": 1116 }, { "epoch": 0.6038925932600469, "grad_norm": 0.4422343969345093, "learning_rate": 9.986865748457457e-06, "loss": 0.4584, "step": 1117 }, { "epoch": 0.6044332312128311, "grad_norm": 0.4124646484851837, "learning_rate": 9.986728637761632e-06, "loss": 0.4802, "step": 1118 }, { "epoch": 0.6049738691656155, "grad_norm": 0.4822395443916321, "learning_rate": 9.98659081606708e-06, "loss": 0.456, "step": 1119 }, { "epoch": 0.6055145071183997, "grad_norm": 0.3804273307323456, "learning_rate": 9.986452283393452e-06, "loss": 0.4689, "step": 1120 }, { "epoch": 0.606055145071184, "grad_norm": 0.4936402142047882, "learning_rate": 9.986313039760497e-06, "loss": 0.4651, "step": 1121 }, { "epoch": 0.6065957830239683, "grad_norm": 0.40174707770347595, "learning_rate": 9.98617308518807e-06, "loss": 0.5118, "step": 1122 }, { "epoch": 0.6071364209767526, "grad_norm": 0.4455929398536682, "learning_rate": 9.986032419696126e-06, "loss": 0.4786, "step": 1123 }, { "epoch": 0.6076770589295368, "grad_norm": 0.4902845323085785, "learning_rate": 9.985891043304718e-06, "loss": 0.4959, "step": 1124 }, { "epoch": 0.6082176968823212, "grad_norm": 0.4242980480194092, "learning_rate": 9.985748956034007e-06, "loss": 0.4847, "step": 1125 }, { "epoch": 0.6087583348351054, "grad_norm": 0.4486798942089081, "learning_rate": 9.985606157904249e-06, "loss": 0.5114, "step": 1126 }, { "epoch": 0.6092989727878897, "grad_norm": 0.46665695309638977, "learning_rate": 9.985462648935802e-06, "loss": 0.4665, "step": 1127 }, { "epoch": 0.6098396107406739, "grad_norm": 0.3827839195728302, "learning_rate": 9.985318429149133e-06, "loss": 0.4718, "step": 1128 }, { "epoch": 0.6103802486934583, "grad_norm": 0.5129957795143127, "learning_rate": 9.985173498564799e-06, "loss": 0.5015, "step": 1129 }, { "epoch": 0.6109208866462426, "grad_norm": 0.44158732891082764, "learning_rate": 9.985027857203469e-06, "loss": 0.5041, "step": 1130 }, { "epoch": 0.6114615245990268, "grad_norm": 0.4281488060951233, "learning_rate": 9.984881505085904e-06, "loss": 0.4996, "step": 1131 }, { "epoch": 0.6120021625518112, "grad_norm": 0.4399912655353546, "learning_rate": 9.984734442232972e-06, "loss": 0.5111, "step": 1132 }, { "epoch": 0.6125428005045954, "grad_norm": 0.4394729733467102, "learning_rate": 9.984586668665641e-06, "loss": 0.4912, "step": 1133 }, { "epoch": 0.6130834384573797, "grad_norm": 0.4478180408477783, "learning_rate": 9.984438184404981e-06, "loss": 0.4768, "step": 1134 }, { "epoch": 0.613624076410164, "grad_norm": 0.41149696707725525, "learning_rate": 9.984288989472162e-06, "loss": 0.5033, "step": 1135 }, { "epoch": 0.6141647143629483, "grad_norm": 0.4182077646255493, "learning_rate": 9.984139083888454e-06, "loss": 0.4516, "step": 1136 }, { "epoch": 0.6147053523157325, "grad_norm": 0.4762760400772095, "learning_rate": 9.983988467675234e-06, "loss": 0.5038, "step": 1137 }, { "epoch": 0.6152459902685169, "grad_norm": 0.456305593252182, "learning_rate": 9.983837140853977e-06, "loss": 0.4756, "step": 1138 }, { "epoch": 0.6157866282213011, "grad_norm": 0.4571511745452881, "learning_rate": 9.983685103446253e-06, "loss": 0.4954, "step": 1139 }, { "epoch": 0.6163272661740854, "grad_norm": 0.45587003231048584, "learning_rate": 9.983532355473744e-06, "loss": 0.4619, "step": 1140 }, { "epoch": 0.6168679041268698, "grad_norm": 0.426291286945343, "learning_rate": 9.983378896958228e-06, "loss": 0.499, "step": 1141 }, { "epoch": 0.617408542079654, "grad_norm": 0.4334041178226471, "learning_rate": 9.983224727921584e-06, "loss": 0.4913, "step": 1142 }, { "epoch": 0.6179491800324383, "grad_norm": 0.38864368200302124, "learning_rate": 9.983069848385794e-06, "loss": 0.4726, "step": 1143 }, { "epoch": 0.6184898179852225, "grad_norm": 0.43272289633750916, "learning_rate": 9.982914258372939e-06, "loss": 0.4992, "step": 1144 }, { "epoch": 0.6190304559380069, "grad_norm": 0.47826868295669556, "learning_rate": 9.982757957905204e-06, "loss": 0.4701, "step": 1145 }, { "epoch": 0.6195710938907911, "grad_norm": 0.40173253417015076, "learning_rate": 9.982600947004875e-06, "loss": 0.5083, "step": 1146 }, { "epoch": 0.6201117318435754, "grad_norm": 0.42841464281082153, "learning_rate": 9.982443225694335e-06, "loss": 0.4963, "step": 1147 }, { "epoch": 0.6206523697963597, "grad_norm": 0.48060914874076843, "learning_rate": 9.982284793996075e-06, "loss": 0.4913, "step": 1148 }, { "epoch": 0.621193007749144, "grad_norm": 0.37836718559265137, "learning_rate": 9.982125651932681e-06, "loss": 0.4872, "step": 1149 }, { "epoch": 0.6217336457019282, "grad_norm": 0.39764174818992615, "learning_rate": 9.981965799526846e-06, "loss": 0.4615, "step": 1150 }, { "epoch": 0.6222742836547126, "grad_norm": 0.4169751703739166, "learning_rate": 9.981805236801359e-06, "loss": 0.4836, "step": 1151 }, { "epoch": 0.6228149216074969, "grad_norm": 0.38069164752960205, "learning_rate": 9.981643963779116e-06, "loss": 0.489, "step": 1152 }, { "epoch": 0.6233555595602811, "grad_norm": 0.41259583830833435, "learning_rate": 9.981481980483107e-06, "loss": 0.5223, "step": 1153 }, { "epoch": 0.6238961975130655, "grad_norm": 0.44381216168403625, "learning_rate": 9.98131928693643e-06, "loss": 0.4799, "step": 1154 }, { "epoch": 0.6244368354658497, "grad_norm": 0.37238988280296326, "learning_rate": 9.981155883162281e-06, "loss": 0.4953, "step": 1155 }, { "epoch": 0.624977473418634, "grad_norm": 0.4142807424068451, "learning_rate": 9.980991769183957e-06, "loss": 0.454, "step": 1156 }, { "epoch": 0.6255181113714183, "grad_norm": 0.38495945930480957, "learning_rate": 9.980826945024858e-06, "loss": 0.478, "step": 1157 }, { "epoch": 0.6260587493242026, "grad_norm": 0.4185667335987091, "learning_rate": 9.980661410708484e-06, "loss": 0.4915, "step": 1158 }, { "epoch": 0.6265993872769868, "grad_norm": 0.3721354305744171, "learning_rate": 9.980495166258437e-06, "loss": 0.4879, "step": 1159 }, { "epoch": 0.6271400252297711, "grad_norm": 0.3885502219200134, "learning_rate": 9.980328211698418e-06, "loss": 0.4821, "step": 1160 }, { "epoch": 0.6276806631825554, "grad_norm": 0.38209933042526245, "learning_rate": 9.980160547052233e-06, "loss": 0.4753, "step": 1161 }, { "epoch": 0.6282213011353397, "grad_norm": 0.3807179033756256, "learning_rate": 9.979992172343789e-06, "loss": 0.4965, "step": 1162 }, { "epoch": 0.628761939088124, "grad_norm": 0.4110669493675232, "learning_rate": 9.979823087597088e-06, "loss": 0.5032, "step": 1163 }, { "epoch": 0.6293025770409083, "grad_norm": 0.3786408007144928, "learning_rate": 9.97965329283624e-06, "loss": 0.4762, "step": 1164 }, { "epoch": 0.6298432149936926, "grad_norm": 0.3834894597530365, "learning_rate": 9.979482788085455e-06, "loss": 0.4709, "step": 1165 }, { "epoch": 0.6303838529464768, "grad_norm": 0.39199164509773254, "learning_rate": 9.979311573369044e-06, "loss": 0.4881, "step": 1166 }, { "epoch": 0.6309244908992612, "grad_norm": 0.40243616700172424, "learning_rate": 9.979139648711415e-06, "loss": 0.4584, "step": 1167 }, { "epoch": 0.6314651288520454, "grad_norm": 0.42804813385009766, "learning_rate": 9.978967014137082e-06, "loss": 0.4939, "step": 1168 }, { "epoch": 0.6320057668048297, "grad_norm": 0.3774058222770691, "learning_rate": 9.978793669670661e-06, "loss": 0.4811, "step": 1169 }, { "epoch": 0.632546404757614, "grad_norm": 0.36529508233070374, "learning_rate": 9.978619615336866e-06, "loss": 0.4537, "step": 1170 }, { "epoch": 0.6330870427103983, "grad_norm": 0.3665902018547058, "learning_rate": 9.978444851160511e-06, "loss": 0.4753, "step": 1171 }, { "epoch": 0.6336276806631825, "grad_norm": 0.3722003400325775, "learning_rate": 9.978269377166517e-06, "loss": 0.4888, "step": 1172 }, { "epoch": 0.6341683186159668, "grad_norm": 0.3868032693862915, "learning_rate": 9.978093193379901e-06, "loss": 0.4977, "step": 1173 }, { "epoch": 0.6347089565687511, "grad_norm": 0.3496233820915222, "learning_rate": 9.977916299825786e-06, "loss": 0.4864, "step": 1174 }, { "epoch": 0.6352495945215354, "grad_norm": 0.3908425569534302, "learning_rate": 9.977738696529387e-06, "loss": 0.5026, "step": 1175 }, { "epoch": 0.6357902324743196, "grad_norm": 0.39688825607299805, "learning_rate": 9.97756038351603e-06, "loss": 0.4768, "step": 1176 }, { "epoch": 0.636330870427104, "grad_norm": 0.5034068822860718, "learning_rate": 9.97738136081114e-06, "loss": 0.5099, "step": 1177 }, { "epoch": 0.6368715083798883, "grad_norm": 0.3846215307712555, "learning_rate": 9.97720162844024e-06, "loss": 0.4817, "step": 1178 }, { "epoch": 0.6374121463326725, "grad_norm": 0.39605143666267395, "learning_rate": 9.977021186428955e-06, "loss": 0.4495, "step": 1179 }, { "epoch": 0.6379527842854569, "grad_norm": 0.4012170732021332, "learning_rate": 9.976840034803014e-06, "loss": 0.4809, "step": 1180 }, { "epoch": 0.6384934222382411, "grad_norm": 0.3802087604999542, "learning_rate": 9.976658173588244e-06, "loss": 0.4671, "step": 1181 }, { "epoch": 0.6390340601910254, "grad_norm": 0.36879563331604004, "learning_rate": 9.976475602810575e-06, "loss": 0.4917, "step": 1182 }, { "epoch": 0.6395746981438097, "grad_norm": 0.40868595242500305, "learning_rate": 9.976292322496037e-06, "loss": 0.4921, "step": 1183 }, { "epoch": 0.640115336096594, "grad_norm": 0.39740028977394104, "learning_rate": 9.976108332670763e-06, "loss": 0.4939, "step": 1184 }, { "epoch": 0.6406559740493782, "grad_norm": 0.386565625667572, "learning_rate": 9.975923633360985e-06, "loss": 0.4817, "step": 1185 }, { "epoch": 0.6411966120021626, "grad_norm": 0.43585434556007385, "learning_rate": 9.975738224593036e-06, "loss": 0.4609, "step": 1186 }, { "epoch": 0.6417372499549469, "grad_norm": 0.43665891885757446, "learning_rate": 9.975552106393356e-06, "loss": 0.4849, "step": 1187 }, { "epoch": 0.6422778879077311, "grad_norm": 0.35513120889663696, "learning_rate": 9.975365278788474e-06, "loss": 0.4719, "step": 1188 }, { "epoch": 0.6428185258605154, "grad_norm": 0.3776432275772095, "learning_rate": 9.975177741805034e-06, "loss": 0.48, "step": 1189 }, { "epoch": 0.6433591638132997, "grad_norm": 0.4380965530872345, "learning_rate": 9.974989495469771e-06, "loss": 0.4909, "step": 1190 }, { "epoch": 0.643899801766084, "grad_norm": 0.4393802583217621, "learning_rate": 9.974800539809526e-06, "loss": 0.5087, "step": 1191 }, { "epoch": 0.6444404397188682, "grad_norm": 0.3763464689254761, "learning_rate": 9.97461087485124e-06, "loss": 0.4719, "step": 1192 }, { "epoch": 0.6449810776716526, "grad_norm": 0.3885161578655243, "learning_rate": 9.974420500621956e-06, "loss": 0.457, "step": 1193 }, { "epoch": 0.6455217156244368, "grad_norm": 0.4224901795387268, "learning_rate": 9.974229417148815e-06, "loss": 0.4776, "step": 1194 }, { "epoch": 0.6460623535772211, "grad_norm": 0.45251429080963135, "learning_rate": 9.974037624459063e-06, "loss": 0.4674, "step": 1195 }, { "epoch": 0.6466029915300054, "grad_norm": 0.34927940368652344, "learning_rate": 9.973845122580046e-06, "loss": 0.4562, "step": 1196 }, { "epoch": 0.6471436294827897, "grad_norm": 0.4282715916633606, "learning_rate": 9.973651911539209e-06, "loss": 0.4931, "step": 1197 }, { "epoch": 0.647684267435574, "grad_norm": 0.42950549721717834, "learning_rate": 9.973457991364098e-06, "loss": 0.4701, "step": 1198 }, { "epoch": 0.6482249053883583, "grad_norm": 0.35387134552001953, "learning_rate": 9.973263362082365e-06, "loss": 0.4715, "step": 1199 }, { "epoch": 0.6487655433411426, "grad_norm": 0.42833399772644043, "learning_rate": 9.973068023721761e-06, "loss": 0.4997, "step": 1200 }, { "epoch": 0.6493061812939268, "grad_norm": 0.4325339198112488, "learning_rate": 9.972871976310134e-06, "loss": 0.4868, "step": 1201 }, { "epoch": 0.6498468192467112, "grad_norm": 0.4114973843097687, "learning_rate": 9.972675219875437e-06, "loss": 0.505, "step": 1202 }, { "epoch": 0.6503874571994954, "grad_norm": 0.40939682722091675, "learning_rate": 9.972477754445723e-06, "loss": 0.5068, "step": 1203 }, { "epoch": 0.6509280951522797, "grad_norm": 0.4267498552799225, "learning_rate": 9.972279580049145e-06, "loss": 0.4905, "step": 1204 }, { "epoch": 0.6514687331050639, "grad_norm": 0.44173136353492737, "learning_rate": 9.972080696713962e-06, "loss": 0.4844, "step": 1205 }, { "epoch": 0.6520093710578483, "grad_norm": 0.3955075442790985, "learning_rate": 9.971881104468527e-06, "loss": 0.482, "step": 1206 }, { "epoch": 0.6525500090106325, "grad_norm": 0.46882182359695435, "learning_rate": 9.971680803341299e-06, "loss": 0.465, "step": 1207 }, { "epoch": 0.6530906469634168, "grad_norm": 0.4290851950645447, "learning_rate": 9.971479793360837e-06, "loss": 0.4676, "step": 1208 }, { "epoch": 0.6536312849162011, "grad_norm": 0.3703151345252991, "learning_rate": 9.9712780745558e-06, "loss": 0.4683, "step": 1209 }, { "epoch": 0.6541719228689854, "grad_norm": 0.43469929695129395, "learning_rate": 9.971075646954946e-06, "loss": 0.4984, "step": 1210 }, { "epoch": 0.6547125608217697, "grad_norm": 0.4252399802207947, "learning_rate": 9.970872510587142e-06, "loss": 0.4777, "step": 1211 }, { "epoch": 0.655253198774554, "grad_norm": 0.418330579996109, "learning_rate": 9.970668665481347e-06, "loss": 0.4934, "step": 1212 }, { "epoch": 0.6557938367273383, "grad_norm": 0.4227907657623291, "learning_rate": 9.970464111666627e-06, "loss": 0.4675, "step": 1213 }, { "epoch": 0.6563344746801225, "grad_norm": 0.42185381054878235, "learning_rate": 9.970258849172146e-06, "loss": 0.4926, "step": 1214 }, { "epoch": 0.6568751126329069, "grad_norm": 0.4280652403831482, "learning_rate": 9.970052878027169e-06, "loss": 0.4808, "step": 1215 }, { "epoch": 0.6574157505856911, "grad_norm": 0.36444810032844543, "learning_rate": 9.969846198261063e-06, "loss": 0.4805, "step": 1216 }, { "epoch": 0.6579563885384754, "grad_norm": 0.40645283460617065, "learning_rate": 9.9696388099033e-06, "loss": 0.4702, "step": 1217 }, { "epoch": 0.6584970264912597, "grad_norm": 0.3740103840827942, "learning_rate": 9.969430712983443e-06, "loss": 0.5026, "step": 1218 }, { "epoch": 0.659037664444044, "grad_norm": 0.3753998875617981, "learning_rate": 9.969221907531168e-06, "loss": 0.455, "step": 1219 }, { "epoch": 0.6595783023968282, "grad_norm": 0.3687969744205475, "learning_rate": 9.969012393576241e-06, "loss": 0.4831, "step": 1220 }, { "epoch": 0.6601189403496125, "grad_norm": 0.42063668370246887, "learning_rate": 9.968802171148537e-06, "loss": 0.5077, "step": 1221 }, { "epoch": 0.6606595783023969, "grad_norm": 0.41339847445487976, "learning_rate": 9.968591240278028e-06, "loss": 0.4857, "step": 1222 }, { "epoch": 0.6612002162551811, "grad_norm": 0.3697742819786072, "learning_rate": 9.96837960099479e-06, "loss": 0.4841, "step": 1223 }, { "epoch": 0.6617408542079654, "grad_norm": 0.40105369687080383, "learning_rate": 9.968167253328995e-06, "loss": 0.4776, "step": 1224 }, { "epoch": 0.6622814921607497, "grad_norm": 0.3760954737663269, "learning_rate": 9.967954197310922e-06, "loss": 0.4687, "step": 1225 }, { "epoch": 0.662822130113534, "grad_norm": 0.42928802967071533, "learning_rate": 9.967740432970948e-06, "loss": 0.4781, "step": 1226 }, { "epoch": 0.6633627680663182, "grad_norm": 0.43400323390960693, "learning_rate": 9.967525960339548e-06, "loss": 0.4772, "step": 1227 }, { "epoch": 0.6639034060191026, "grad_norm": 0.36896318197250366, "learning_rate": 9.967310779447303e-06, "loss": 0.4509, "step": 1228 }, { "epoch": 0.6644440439718868, "grad_norm": 0.380281537771225, "learning_rate": 9.967094890324894e-06, "loss": 0.4915, "step": 1229 }, { "epoch": 0.6649846819246711, "grad_norm": 0.46708944439888, "learning_rate": 9.966878293003102e-06, "loss": 0.491, "step": 1230 }, { "epoch": 0.6655253198774554, "grad_norm": 0.397403746843338, "learning_rate": 9.966660987512809e-06, "loss": 0.4518, "step": 1231 }, { "epoch": 0.6660659578302397, "grad_norm": 0.45788633823394775, "learning_rate": 9.966442973884996e-06, "loss": 0.4717, "step": 1232 }, { "epoch": 0.666606595783024, "grad_norm": 0.3888041079044342, "learning_rate": 9.96622425215075e-06, "loss": 0.4547, "step": 1233 }, { "epoch": 0.6671472337358082, "grad_norm": 0.46574699878692627, "learning_rate": 9.966004822341254e-06, "loss": 0.4882, "step": 1234 }, { "epoch": 0.6676878716885926, "grad_norm": 0.40038323402404785, "learning_rate": 9.965784684487794e-06, "loss": 0.4799, "step": 1235 }, { "epoch": 0.6682285096413768, "grad_norm": 0.3611610531806946, "learning_rate": 9.965563838621758e-06, "loss": 0.4567, "step": 1236 }, { "epoch": 0.6687691475941611, "grad_norm": 0.4081619083881378, "learning_rate": 9.965342284774633e-06, "loss": 0.4668, "step": 1237 }, { "epoch": 0.6693097855469454, "grad_norm": 0.44255727529525757, "learning_rate": 9.965120022978007e-06, "loss": 0.4829, "step": 1238 }, { "epoch": 0.6698504234997297, "grad_norm": 0.38775256276130676, "learning_rate": 9.96489705326357e-06, "loss": 0.4827, "step": 1239 }, { "epoch": 0.6703910614525139, "grad_norm": 0.45577195286750793, "learning_rate": 9.964673375663114e-06, "loss": 0.4854, "step": 1240 }, { "epoch": 0.6709316994052983, "grad_norm": 0.4752182066440582, "learning_rate": 9.96444899020853e-06, "loss": 0.4689, "step": 1241 }, { "epoch": 0.6714723373580825, "grad_norm": 0.40431809425354004, "learning_rate": 9.964223896931809e-06, "loss": 0.482, "step": 1242 }, { "epoch": 0.6720129753108668, "grad_norm": 0.4463716149330139, "learning_rate": 9.963998095865047e-06, "loss": 0.4692, "step": 1243 }, { "epoch": 0.6725536132636512, "grad_norm": 0.4271736443042755, "learning_rate": 9.963771587040435e-06, "loss": 0.4754, "step": 1244 }, { "epoch": 0.6730942512164354, "grad_norm": 0.36027437448501587, "learning_rate": 9.96354437049027e-06, "loss": 0.4458, "step": 1245 }, { "epoch": 0.6736348891692197, "grad_norm": 0.43132495880126953, "learning_rate": 9.963316446246949e-06, "loss": 0.4931, "step": 1246 }, { "epoch": 0.674175527122004, "grad_norm": 0.39921796321868896, "learning_rate": 9.963087814342968e-06, "loss": 0.4604, "step": 1247 }, { "epoch": 0.6747161650747883, "grad_norm": 0.4080028235912323, "learning_rate": 9.962858474810926e-06, "loss": 0.4781, "step": 1248 }, { "epoch": 0.6752568030275725, "grad_norm": 0.48804527521133423, "learning_rate": 9.96262842768352e-06, "loss": 0.4637, "step": 1249 }, { "epoch": 0.6757974409803568, "grad_norm": 0.37016865611076355, "learning_rate": 9.962397672993552e-06, "loss": 0.4768, "step": 1250 }, { "epoch": 0.6763380789331411, "grad_norm": 0.42169275879859924, "learning_rate": 9.962166210773918e-06, "loss": 0.489, "step": 1251 }, { "epoch": 0.6768787168859254, "grad_norm": 0.4805634915828705, "learning_rate": 9.961934041057627e-06, "loss": 0.5032, "step": 1252 }, { "epoch": 0.6774193548387096, "grad_norm": 0.37036967277526855, "learning_rate": 9.961701163877773e-06, "loss": 0.4523, "step": 1253 }, { "epoch": 0.677959992791494, "grad_norm": 0.37964698672294617, "learning_rate": 9.961467579267565e-06, "loss": 0.4996, "step": 1254 }, { "epoch": 0.6785006307442782, "grad_norm": 0.41072869300842285, "learning_rate": 9.961233287260305e-06, "loss": 0.461, "step": 1255 }, { "epoch": 0.6790412686970625, "grad_norm": 0.3730875253677368, "learning_rate": 9.960998287889397e-06, "loss": 0.4772, "step": 1256 }, { "epoch": 0.6795819066498469, "grad_norm": 0.4048408567905426, "learning_rate": 9.96076258118835e-06, "loss": 0.4691, "step": 1257 }, { "epoch": 0.6801225446026311, "grad_norm": 0.4101283848285675, "learning_rate": 9.960526167190767e-06, "loss": 0.4616, "step": 1258 }, { "epoch": 0.6806631825554154, "grad_norm": 0.3689689636230469, "learning_rate": 9.960289045930358e-06, "loss": 0.4529, "step": 1259 }, { "epoch": 0.6812038205081997, "grad_norm": 0.4019097089767456, "learning_rate": 9.96005121744093e-06, "loss": 0.4804, "step": 1260 }, { "epoch": 0.681744458460984, "grad_norm": 0.4565180242061615, "learning_rate": 9.959812681756394e-06, "loss": 0.4852, "step": 1261 }, { "epoch": 0.6822850964137682, "grad_norm": 0.4157910645008087, "learning_rate": 9.959573438910757e-06, "loss": 0.48, "step": 1262 }, { "epoch": 0.6828257343665526, "grad_norm": 0.4228622317314148, "learning_rate": 9.95933348893813e-06, "loss": 0.4938, "step": 1263 }, { "epoch": 0.6833663723193368, "grad_norm": 0.41255107522010803, "learning_rate": 9.959092831872729e-06, "loss": 0.4863, "step": 1264 }, { "epoch": 0.6839070102721211, "grad_norm": 0.3899734318256378, "learning_rate": 9.958851467748863e-06, "loss": 0.457, "step": 1265 }, { "epoch": 0.6844476482249053, "grad_norm": 0.4378751814365387, "learning_rate": 9.958609396600944e-06, "loss": 0.467, "step": 1266 }, { "epoch": 0.6849882861776897, "grad_norm": 0.4468434154987335, "learning_rate": 9.958366618463488e-06, "loss": 0.4727, "step": 1267 }, { "epoch": 0.685528924130474, "grad_norm": 0.45046213269233704, "learning_rate": 9.958123133371111e-06, "loss": 0.4862, "step": 1268 }, { "epoch": 0.6860695620832582, "grad_norm": 0.41851988434791565, "learning_rate": 9.957878941358526e-06, "loss": 0.4733, "step": 1269 }, { "epoch": 0.6866102000360426, "grad_norm": 0.43167203664779663, "learning_rate": 9.957634042460551e-06, "loss": 0.4898, "step": 1270 }, { "epoch": 0.6871508379888268, "grad_norm": 0.4302287995815277, "learning_rate": 9.957388436712103e-06, "loss": 0.5006, "step": 1271 }, { "epoch": 0.6876914759416111, "grad_norm": 0.43457236886024475, "learning_rate": 9.957142124148201e-06, "loss": 0.4946, "step": 1272 }, { "epoch": 0.6882321138943954, "grad_norm": 0.3867625594139099, "learning_rate": 9.95689510480396e-06, "loss": 0.4711, "step": 1273 }, { "epoch": 0.6887727518471797, "grad_norm": 0.38470458984375, "learning_rate": 9.956647378714606e-06, "loss": 0.4362, "step": 1274 }, { "epoch": 0.6893133897999639, "grad_norm": 0.489411324262619, "learning_rate": 9.956398945915455e-06, "loss": 0.4919, "step": 1275 }, { "epoch": 0.6898540277527483, "grad_norm": 0.3836153447628021, "learning_rate": 9.956149806441927e-06, "loss": 0.4708, "step": 1276 }, { "epoch": 0.6903946657055325, "grad_norm": 0.46178779006004333, "learning_rate": 9.955899960329546e-06, "loss": 0.4884, "step": 1277 }, { "epoch": 0.6909353036583168, "grad_norm": 0.4728642702102661, "learning_rate": 9.955649407613936e-06, "loss": 0.4491, "step": 1278 }, { "epoch": 0.6914759416111012, "grad_norm": 0.4373341202735901, "learning_rate": 9.955398148330816e-06, "loss": 0.4728, "step": 1279 }, { "epoch": 0.6920165795638854, "grad_norm": 0.44000309705734253, "learning_rate": 9.955146182516015e-06, "loss": 0.4747, "step": 1280 }, { "epoch": 0.6925572175166697, "grad_norm": 0.4856782853603363, "learning_rate": 9.954893510205455e-06, "loss": 0.4607, "step": 1281 }, { "epoch": 0.6930978554694539, "grad_norm": 0.4034997522830963, "learning_rate": 9.954640131435162e-06, "loss": 0.4984, "step": 1282 }, { "epoch": 0.6936384934222383, "grad_norm": 0.4275628924369812, "learning_rate": 9.954386046241262e-06, "loss": 0.4602, "step": 1283 }, { "epoch": 0.6941791313750225, "grad_norm": 0.44787707924842834, "learning_rate": 9.954131254659983e-06, "loss": 0.4518, "step": 1284 }, { "epoch": 0.6947197693278068, "grad_norm": 0.4457874894142151, "learning_rate": 9.95387575672765e-06, "loss": 0.4858, "step": 1285 }, { "epoch": 0.6952604072805911, "grad_norm": 0.4127665162086487, "learning_rate": 9.953619552480697e-06, "loss": 0.483, "step": 1286 }, { "epoch": 0.6958010452333754, "grad_norm": 0.4638175964355469, "learning_rate": 9.953362641955649e-06, "loss": 0.4869, "step": 1287 }, { "epoch": 0.6963416831861596, "grad_norm": 0.4228275418281555, "learning_rate": 9.953105025189134e-06, "loss": 0.486, "step": 1288 }, { "epoch": 0.696882321138944, "grad_norm": 0.41768065094947815, "learning_rate": 9.952846702217886e-06, "loss": 0.4561, "step": 1289 }, { "epoch": 0.6974229590917282, "grad_norm": 0.4437529146671295, "learning_rate": 9.952587673078738e-06, "loss": 0.4815, "step": 1290 }, { "epoch": 0.6979635970445125, "grad_norm": 0.42724665999412537, "learning_rate": 9.952327937808616e-06, "loss": 0.4735, "step": 1291 }, { "epoch": 0.6985042349972969, "grad_norm": 0.43202629685401917, "learning_rate": 9.952067496444557e-06, "loss": 0.5053, "step": 1292 }, { "epoch": 0.6990448729500811, "grad_norm": 0.3635312020778656, "learning_rate": 9.951806349023693e-06, "loss": 0.4426, "step": 1293 }, { "epoch": 0.6995855109028654, "grad_norm": 0.3907013535499573, "learning_rate": 9.951544495583258e-06, "loss": 0.471, "step": 1294 }, { "epoch": 0.7001261488556497, "grad_norm": 0.4162873327732086, "learning_rate": 9.951281936160587e-06, "loss": 0.4902, "step": 1295 }, { "epoch": 0.700666786808434, "grad_norm": 0.37522828578948975, "learning_rate": 9.951018670793114e-06, "loss": 0.4931, "step": 1296 }, { "epoch": 0.7012074247612182, "grad_norm": 0.39658185839653015, "learning_rate": 9.950754699518374e-06, "loss": 0.4784, "step": 1297 }, { "epoch": 0.7017480627140025, "grad_norm": 0.40695467591285706, "learning_rate": 9.95049002237401e-06, "loss": 0.4808, "step": 1298 }, { "epoch": 0.7022887006667868, "grad_norm": 0.43931230902671814, "learning_rate": 9.950224639397749e-06, "loss": 0.4709, "step": 1299 }, { "epoch": 0.7028293386195711, "grad_norm": 0.44696277379989624, "learning_rate": 9.949958550627436e-06, "loss": 0.5105, "step": 1300 }, { "epoch": 0.7033699765723553, "grad_norm": 0.4636048972606659, "learning_rate": 9.94969175610101e-06, "loss": 0.477, "step": 1301 }, { "epoch": 0.7039106145251397, "grad_norm": 0.4922928810119629, "learning_rate": 9.949424255856506e-06, "loss": 0.4618, "step": 1302 }, { "epoch": 0.704451252477924, "grad_norm": 0.39951130747795105, "learning_rate": 9.949156049932065e-06, "loss": 0.4568, "step": 1303 }, { "epoch": 0.7049918904307082, "grad_norm": 0.4968952536582947, "learning_rate": 9.948887138365929e-06, "loss": 0.4889, "step": 1304 }, { "epoch": 0.7055325283834926, "grad_norm": 0.4298841953277588, "learning_rate": 9.948617521196438e-06, "loss": 0.4794, "step": 1305 }, { "epoch": 0.7060731663362768, "grad_norm": 0.3872496783733368, "learning_rate": 9.948347198462031e-06, "loss": 0.4762, "step": 1306 }, { "epoch": 0.7066138042890611, "grad_norm": 0.518255889415741, "learning_rate": 9.948076170201254e-06, "loss": 0.5167, "step": 1307 }, { "epoch": 0.7071544422418454, "grad_norm": 0.37470534443855286, "learning_rate": 9.947804436452748e-06, "loss": 0.4737, "step": 1308 }, { "epoch": 0.7076950801946297, "grad_norm": 0.39828360080718994, "learning_rate": 9.947531997255256e-06, "loss": 0.4744, "step": 1309 }, { "epoch": 0.7082357181474139, "grad_norm": 0.41359883546829224, "learning_rate": 9.947258852647623e-06, "loss": 0.5009, "step": 1310 }, { "epoch": 0.7087763561001982, "grad_norm": 0.41263335943222046, "learning_rate": 9.946985002668791e-06, "loss": 0.4463, "step": 1311 }, { "epoch": 0.7093169940529825, "grad_norm": 0.42026588320732117, "learning_rate": 9.94671044735781e-06, "loss": 0.4446, "step": 1312 }, { "epoch": 0.7098576320057668, "grad_norm": 0.451593816280365, "learning_rate": 9.94643518675382e-06, "loss": 0.4787, "step": 1313 }, { "epoch": 0.710398269958551, "grad_norm": 0.39087212085723877, "learning_rate": 9.94615922089607e-06, "loss": 0.4456, "step": 1314 }, { "epoch": 0.7109389079113354, "grad_norm": 0.38521644473075867, "learning_rate": 9.945882549823906e-06, "loss": 0.4822, "step": 1315 }, { "epoch": 0.7114795458641197, "grad_norm": 0.35561037063598633, "learning_rate": 9.945605173576775e-06, "loss": 0.4748, "step": 1316 }, { "epoch": 0.7120201838169039, "grad_norm": 0.46212202310562134, "learning_rate": 9.945327092194225e-06, "loss": 0.4913, "step": 1317 }, { "epoch": 0.7125608217696883, "grad_norm": 0.3639814853668213, "learning_rate": 9.945048305715907e-06, "loss": 0.5147, "step": 1318 }, { "epoch": 0.7131014597224725, "grad_norm": 0.4061166048049927, "learning_rate": 9.944768814181566e-06, "loss": 0.4557, "step": 1319 }, { "epoch": 0.7136420976752568, "grad_norm": 0.41976243257522583, "learning_rate": 9.944488617631053e-06, "loss": 0.4714, "step": 1320 }, { "epoch": 0.7141827356280411, "grad_norm": 0.4113599359989166, "learning_rate": 9.944207716104318e-06, "loss": 0.5008, "step": 1321 }, { "epoch": 0.7147233735808254, "grad_norm": 0.3758300840854645, "learning_rate": 9.943926109641411e-06, "loss": 0.4904, "step": 1322 }, { "epoch": 0.7152640115336096, "grad_norm": 0.3485066294670105, "learning_rate": 9.943643798282483e-06, "loss": 0.4385, "step": 1323 }, { "epoch": 0.715804649486394, "grad_norm": 0.3554293215274811, "learning_rate": 9.943360782067786e-06, "loss": 0.447, "step": 1324 }, { "epoch": 0.7163452874391782, "grad_norm": 0.40508759021759033, "learning_rate": 9.943077061037672e-06, "loss": 0.4715, "step": 1325 }, { "epoch": 0.7168859253919625, "grad_norm": 0.37202188372612, "learning_rate": 9.942792635232591e-06, "loss": 0.461, "step": 1326 }, { "epoch": 0.7174265633447467, "grad_norm": 0.422195166349411, "learning_rate": 9.9425075046931e-06, "loss": 0.4944, "step": 1327 }, { "epoch": 0.7179672012975311, "grad_norm": 0.38294172286987305, "learning_rate": 9.94222166945985e-06, "loss": 0.4745, "step": 1328 }, { "epoch": 0.7185078392503154, "grad_norm": 0.4370891749858856, "learning_rate": 9.941935129573596e-06, "loss": 0.4918, "step": 1329 }, { "epoch": 0.7190484772030996, "grad_norm": 0.41351190209388733, "learning_rate": 9.94164788507519e-06, "loss": 0.4768, "step": 1330 }, { "epoch": 0.719589115155884, "grad_norm": 0.40625694394111633, "learning_rate": 9.941359936005588e-06, "loss": 0.4781, "step": 1331 }, { "epoch": 0.7201297531086682, "grad_norm": 0.42381858825683594, "learning_rate": 9.941071282405848e-06, "loss": 0.4455, "step": 1332 }, { "epoch": 0.7206703910614525, "grad_norm": 0.39242449402809143, "learning_rate": 9.94078192431712e-06, "loss": 0.4503, "step": 1333 }, { "epoch": 0.7212110290142368, "grad_norm": 0.36869487166404724, "learning_rate": 9.940491861780666e-06, "loss": 0.4376, "step": 1334 }, { "epoch": 0.7217516669670211, "grad_norm": 0.44459354877471924, "learning_rate": 9.940201094837838e-06, "loss": 0.4842, "step": 1335 }, { "epoch": 0.7222923049198053, "grad_norm": 0.4525735676288605, "learning_rate": 9.939909623530098e-06, "loss": 0.4697, "step": 1336 }, { "epoch": 0.7228329428725897, "grad_norm": 0.3791995048522949, "learning_rate": 9.939617447898998e-06, "loss": 0.4845, "step": 1337 }, { "epoch": 0.723373580825374, "grad_norm": 0.4672689437866211, "learning_rate": 9.9393245679862e-06, "loss": 0.4522, "step": 1338 }, { "epoch": 0.7239142187781582, "grad_norm": 0.5381559133529663, "learning_rate": 9.939030983833459e-06, "loss": 0.4785, "step": 1339 }, { "epoch": 0.7244548567309426, "grad_norm": 0.40444672107696533, "learning_rate": 9.938736695482636e-06, "loss": 0.4875, "step": 1340 }, { "epoch": 0.7249954946837268, "grad_norm": 0.44594690203666687, "learning_rate": 9.938441702975689e-06, "loss": 0.4863, "step": 1341 }, { "epoch": 0.7255361326365111, "grad_norm": 0.39273694157600403, "learning_rate": 9.938146006354678e-06, "loss": 0.4682, "step": 1342 }, { "epoch": 0.7260767705892953, "grad_norm": 0.4593668580055237, "learning_rate": 9.937849605661765e-06, "loss": 0.502, "step": 1343 }, { "epoch": 0.7266174085420797, "grad_norm": 0.38885119557380676, "learning_rate": 9.937552500939205e-06, "loss": 0.4496, "step": 1344 }, { "epoch": 0.7271580464948639, "grad_norm": 0.43517187237739563, "learning_rate": 9.937254692229363e-06, "loss": 0.4791, "step": 1345 }, { "epoch": 0.7276986844476482, "grad_norm": 0.4335128664970398, "learning_rate": 9.9369561795747e-06, "loss": 0.4662, "step": 1346 }, { "epoch": 0.7282393224004325, "grad_norm": 0.387592613697052, "learning_rate": 9.936656963017774e-06, "loss": 0.4814, "step": 1347 }, { "epoch": 0.7287799603532168, "grad_norm": 0.43556758761405945, "learning_rate": 9.936357042601252e-06, "loss": 0.4942, "step": 1348 }, { "epoch": 0.729320598306001, "grad_norm": 0.3670481741428375, "learning_rate": 9.93605641836789e-06, "loss": 0.4919, "step": 1349 }, { "epoch": 0.7298612362587854, "grad_norm": 0.3413862884044647, "learning_rate": 9.935755090360554e-06, "loss": 0.4627, "step": 1350 }, { "epoch": 0.7304018742115697, "grad_norm": 0.3749856650829315, "learning_rate": 9.935453058622208e-06, "loss": 0.4553, "step": 1351 }, { "epoch": 0.7309425121643539, "grad_norm": 0.36184626817703247, "learning_rate": 9.935150323195912e-06, "loss": 0.4648, "step": 1352 }, { "epoch": 0.7314831501171383, "grad_norm": 0.33902183175086975, "learning_rate": 9.934846884124831e-06, "loss": 0.4704, "step": 1353 }, { "epoch": 0.7320237880699225, "grad_norm": 0.37856411933898926, "learning_rate": 9.93454274145223e-06, "loss": 0.4719, "step": 1354 }, { "epoch": 0.7325644260227068, "grad_norm": 0.42122843861579895, "learning_rate": 9.93423789522147e-06, "loss": 0.4759, "step": 1355 }, { "epoch": 0.7331050639754911, "grad_norm": 0.3885326683521271, "learning_rate": 9.933932345476019e-06, "loss": 0.4794, "step": 1356 }, { "epoch": 0.7336457019282754, "grad_norm": 0.4524173438549042, "learning_rate": 9.933626092259439e-06, "loss": 0.4581, "step": 1357 }, { "epoch": 0.7341863398810596, "grad_norm": 0.4196130037307739, "learning_rate": 9.933319135615396e-06, "loss": 0.4661, "step": 1358 }, { "epoch": 0.7347269778338439, "grad_norm": 0.4285523295402527, "learning_rate": 9.933011475587654e-06, "loss": 0.4712, "step": 1359 }, { "epoch": 0.7352676157866282, "grad_norm": 0.39787888526916504, "learning_rate": 9.932703112220084e-06, "loss": 0.4795, "step": 1360 }, { "epoch": 0.7358082537394125, "grad_norm": 0.40517759323120117, "learning_rate": 9.932394045556644e-06, "loss": 0.5015, "step": 1361 }, { "epoch": 0.7363488916921967, "grad_norm": 0.401108980178833, "learning_rate": 9.932084275641405e-06, "loss": 0.4867, "step": 1362 }, { "epoch": 0.7368895296449811, "grad_norm": 0.4435082972049713, "learning_rate": 9.931773802518532e-06, "loss": 0.4919, "step": 1363 }, { "epoch": 0.7374301675977654, "grad_norm": 0.35231056809425354, "learning_rate": 9.931462626232294e-06, "loss": 0.4706, "step": 1364 }, { "epoch": 0.7379708055505496, "grad_norm": 0.3894501328468323, "learning_rate": 9.931150746827055e-06, "loss": 0.4825, "step": 1365 }, { "epoch": 0.738511443503334, "grad_norm": 0.42613503336906433, "learning_rate": 9.930838164347282e-06, "loss": 0.4625, "step": 1366 }, { "epoch": 0.7390520814561182, "grad_norm": 0.35955050587654114, "learning_rate": 9.930524878837544e-06, "loss": 0.4622, "step": 1367 }, { "epoch": 0.7395927194089025, "grad_norm": 0.4098225235939026, "learning_rate": 9.93021089034251e-06, "loss": 0.4604, "step": 1368 }, { "epoch": 0.7401333573616868, "grad_norm": 0.40953758358955383, "learning_rate": 9.929896198906945e-06, "loss": 0.4694, "step": 1369 }, { "epoch": 0.7406739953144711, "grad_norm": 0.4058552384376526, "learning_rate": 9.929580804575718e-06, "loss": 0.4743, "step": 1370 }, { "epoch": 0.7412146332672553, "grad_norm": 0.38848456740379333, "learning_rate": 9.929264707393799e-06, "loss": 0.4914, "step": 1371 }, { "epoch": 0.7417552712200396, "grad_norm": 0.4356609880924225, "learning_rate": 9.928947907406255e-06, "loss": 0.4753, "step": 1372 }, { "epoch": 0.742295909172824, "grad_norm": 0.40148693323135376, "learning_rate": 9.928630404658255e-06, "loss": 0.4745, "step": 1373 }, { "epoch": 0.7428365471256082, "grad_norm": 0.34132710099220276, "learning_rate": 9.928312199195068e-06, "loss": 0.4791, "step": 1374 }, { "epoch": 0.7433771850783925, "grad_norm": 0.42795494198799133, "learning_rate": 9.927993291062064e-06, "loss": 0.4778, "step": 1375 }, { "epoch": 0.7439178230311768, "grad_norm": 0.3633674681186676, "learning_rate": 9.927673680304711e-06, "loss": 0.4842, "step": 1376 }, { "epoch": 0.7444584609839611, "grad_norm": 0.39830613136291504, "learning_rate": 9.927353366968578e-06, "loss": 0.4737, "step": 1377 }, { "epoch": 0.7449990989367453, "grad_norm": 0.38402634859085083, "learning_rate": 9.927032351099337e-06, "loss": 0.5031, "step": 1378 }, { "epoch": 0.7455397368895297, "grad_norm": 0.3705873489379883, "learning_rate": 9.926710632742758e-06, "loss": 0.4559, "step": 1379 }, { "epoch": 0.7460803748423139, "grad_norm": 0.41539448499679565, "learning_rate": 9.926388211944707e-06, "loss": 0.4836, "step": 1380 }, { "epoch": 0.7466210127950982, "grad_norm": 0.4214273989200592, "learning_rate": 9.926065088751157e-06, "loss": 0.479, "step": 1381 }, { "epoch": 0.7471616507478825, "grad_norm": 0.3808612823486328, "learning_rate": 9.92574126320818e-06, "loss": 0.4864, "step": 1382 }, { "epoch": 0.7477022887006668, "grad_norm": 0.37214893102645874, "learning_rate": 9.925416735361943e-06, "loss": 0.4502, "step": 1383 }, { "epoch": 0.748242926653451, "grad_norm": 0.39607030153274536, "learning_rate": 9.925091505258719e-06, "loss": 0.4415, "step": 1384 }, { "epoch": 0.7487835646062354, "grad_norm": 0.3850662410259247, "learning_rate": 9.924765572944879e-06, "loss": 0.5051, "step": 1385 }, { "epoch": 0.7493242025590197, "grad_norm": 0.35740000009536743, "learning_rate": 9.924438938466891e-06, "loss": 0.4403, "step": 1386 }, { "epoch": 0.7498648405118039, "grad_norm": 0.39912787079811096, "learning_rate": 9.92411160187133e-06, "loss": 0.472, "step": 1387 }, { "epoch": 0.7504054784645882, "grad_norm": 0.39975598454475403, "learning_rate": 9.923783563204863e-06, "loss": 0.4767, "step": 1388 }, { "epoch": 0.7509461164173725, "grad_norm": 0.4057595729827881, "learning_rate": 9.923454822514262e-06, "loss": 0.4497, "step": 1389 }, { "epoch": 0.7514867543701568, "grad_norm": 0.423401802778244, "learning_rate": 9.9231253798464e-06, "loss": 0.4804, "step": 1390 }, { "epoch": 0.752027392322941, "grad_norm": 0.4875843822956085, "learning_rate": 9.922795235248248e-06, "loss": 0.4802, "step": 1391 }, { "epoch": 0.7525680302757254, "grad_norm": 0.43842393159866333, "learning_rate": 9.922464388766878e-06, "loss": 0.4887, "step": 1392 }, { "epoch": 0.7531086682285096, "grad_norm": 0.4424212574958801, "learning_rate": 9.922132840449459e-06, "loss": 0.497, "step": 1393 }, { "epoch": 0.7536493061812939, "grad_norm": 0.5426601767539978, "learning_rate": 9.921800590343264e-06, "loss": 0.5274, "step": 1394 }, { "epoch": 0.7541899441340782, "grad_norm": 0.5073828101158142, "learning_rate": 9.921467638495666e-06, "loss": 0.4597, "step": 1395 }, { "epoch": 0.7547305820868625, "grad_norm": 0.4607311189174652, "learning_rate": 9.921133984954134e-06, "loss": 0.4833, "step": 1396 }, { "epoch": 0.7552712200396468, "grad_norm": 0.43839946389198303, "learning_rate": 9.92079962976624e-06, "loss": 0.4606, "step": 1397 }, { "epoch": 0.7558118579924311, "grad_norm": 0.5186724662780762, "learning_rate": 9.92046457297966e-06, "loss": 0.5192, "step": 1398 }, { "epoch": 0.7563524959452154, "grad_norm": 0.43365365266799927, "learning_rate": 9.92012881464216e-06, "loss": 0.5014, "step": 1399 }, { "epoch": 0.7568931338979996, "grad_norm": 0.44823896884918213, "learning_rate": 9.919792354801614e-06, "loss": 0.4609, "step": 1400 }, { "epoch": 0.757433771850784, "grad_norm": 0.44195356965065, "learning_rate": 9.919455193505996e-06, "loss": 0.4758, "step": 1401 }, { "epoch": 0.7579744098035682, "grad_norm": 0.4172375500202179, "learning_rate": 9.919117330803374e-06, "loss": 0.4824, "step": 1402 }, { "epoch": 0.7585150477563525, "grad_norm": 0.4180469214916229, "learning_rate": 9.918778766741924e-06, "loss": 0.4607, "step": 1403 }, { "epoch": 0.7590556857091367, "grad_norm": 0.48548153042793274, "learning_rate": 9.918439501369914e-06, "loss": 0.4904, "step": 1404 }, { "epoch": 0.7595963236619211, "grad_norm": 0.3765825927257538, "learning_rate": 9.91809953473572e-06, "loss": 0.5043, "step": 1405 }, { "epoch": 0.7601369616147053, "grad_norm": 0.5639870762825012, "learning_rate": 9.917758866887808e-06, "loss": 0.4973, "step": 1406 }, { "epoch": 0.7606775995674896, "grad_norm": 0.3645475208759308, "learning_rate": 9.917417497874756e-06, "loss": 0.4755, "step": 1407 }, { "epoch": 0.761218237520274, "grad_norm": 0.419842928647995, "learning_rate": 9.917075427745232e-06, "loss": 0.4727, "step": 1408 }, { "epoch": 0.7617588754730582, "grad_norm": 0.3567120432853699, "learning_rate": 9.916732656548008e-06, "loss": 0.4612, "step": 1409 }, { "epoch": 0.7622995134258425, "grad_norm": 0.4124714136123657, "learning_rate": 9.916389184331957e-06, "loss": 0.465, "step": 1410 }, { "epoch": 0.7628401513786268, "grad_norm": 0.3979954719543457, "learning_rate": 9.916045011146052e-06, "loss": 0.4812, "step": 1411 }, { "epoch": 0.7633807893314111, "grad_norm": 0.38222837448120117, "learning_rate": 9.915700137039359e-06, "loss": 0.4742, "step": 1412 }, { "epoch": 0.7639214272841953, "grad_norm": 0.38148653507232666, "learning_rate": 9.915354562061056e-06, "loss": 0.4833, "step": 1413 }, { "epoch": 0.7644620652369797, "grad_norm": 0.4023478329181671, "learning_rate": 9.91500828626041e-06, "loss": 0.4591, "step": 1414 }, { "epoch": 0.7650027031897639, "grad_norm": 0.3840067386627197, "learning_rate": 9.914661309686796e-06, "loss": 0.4685, "step": 1415 }, { "epoch": 0.7655433411425482, "grad_norm": 0.3622734844684601, "learning_rate": 9.914313632389682e-06, "loss": 0.4836, "step": 1416 }, { "epoch": 0.7660839790953325, "grad_norm": 0.3705524504184723, "learning_rate": 9.91396525441864e-06, "loss": 0.4529, "step": 1417 }, { "epoch": 0.7666246170481168, "grad_norm": 0.35576826333999634, "learning_rate": 9.913616175823343e-06, "loss": 0.4771, "step": 1418 }, { "epoch": 0.767165255000901, "grad_norm": 0.42125403881073, "learning_rate": 9.91326639665356e-06, "loss": 0.4655, "step": 1419 }, { "epoch": 0.7677058929536853, "grad_norm": 0.3521760404109955, "learning_rate": 9.912915916959162e-06, "loss": 0.4439, "step": 1420 }, { "epoch": 0.7682465309064697, "grad_norm": 0.4294596314430237, "learning_rate": 9.91256473679012e-06, "loss": 0.5037, "step": 1421 }, { "epoch": 0.7687871688592539, "grad_norm": 0.3854302763938904, "learning_rate": 9.912212856196506e-06, "loss": 0.4947, "step": 1422 }, { "epoch": 0.7693278068120382, "grad_norm": 0.4472251534461975, "learning_rate": 9.911860275228489e-06, "loss": 0.4737, "step": 1423 }, { "epoch": 0.7698684447648225, "grad_norm": 0.4280487298965454, "learning_rate": 9.91150699393634e-06, "loss": 0.4991, "step": 1424 }, { "epoch": 0.7704090827176068, "grad_norm": 0.4406580328941345, "learning_rate": 9.911153012370427e-06, "loss": 0.4579, "step": 1425 }, { "epoch": 0.770949720670391, "grad_norm": 0.42495012283325195, "learning_rate": 9.910798330581224e-06, "loss": 0.466, "step": 1426 }, { "epoch": 0.7714903586231754, "grad_norm": 0.40381088852882385, "learning_rate": 9.910442948619298e-06, "loss": 0.4617, "step": 1427 }, { "epoch": 0.7720309965759596, "grad_norm": 0.46315282583236694, "learning_rate": 9.91008686653532e-06, "loss": 0.4639, "step": 1428 }, { "epoch": 0.7725716345287439, "grad_norm": 0.3891032040119171, "learning_rate": 9.90973008438006e-06, "loss": 0.4897, "step": 1429 }, { "epoch": 0.7731122724815283, "grad_norm": 0.3929683268070221, "learning_rate": 9.909372602204385e-06, "loss": 0.4713, "step": 1430 }, { "epoch": 0.7736529104343125, "grad_norm": 0.40358665585517883, "learning_rate": 9.909014420059266e-06, "loss": 0.4846, "step": 1431 }, { "epoch": 0.7741935483870968, "grad_norm": 0.43662330508232117, "learning_rate": 9.908655537995772e-06, "loss": 0.4898, "step": 1432 }, { "epoch": 0.7747341863398811, "grad_norm": 0.3911401927471161, "learning_rate": 9.90829595606507e-06, "loss": 0.4864, "step": 1433 }, { "epoch": 0.7752748242926654, "grad_norm": 0.3795257806777954, "learning_rate": 9.907935674318431e-06, "loss": 0.509, "step": 1434 }, { "epoch": 0.7758154622454496, "grad_norm": 0.41082119941711426, "learning_rate": 9.907574692807223e-06, "loss": 0.5005, "step": 1435 }, { "epoch": 0.7763561001982339, "grad_norm": 0.3686942458152771, "learning_rate": 9.907213011582912e-06, "loss": 0.474, "step": 1436 }, { "epoch": 0.7768967381510182, "grad_norm": 0.3893868327140808, "learning_rate": 9.906850630697068e-06, "loss": 0.4844, "step": 1437 }, { "epoch": 0.7774373761038025, "grad_norm": 0.4082487225532532, "learning_rate": 9.906487550201357e-06, "loss": 0.4775, "step": 1438 }, { "epoch": 0.7779780140565867, "grad_norm": 0.3809872567653656, "learning_rate": 9.906123770147548e-06, "loss": 0.479, "step": 1439 }, { "epoch": 0.7785186520093711, "grad_norm": 0.38394907116889954, "learning_rate": 9.905759290587506e-06, "loss": 0.4658, "step": 1440 }, { "epoch": 0.7790592899621553, "grad_norm": 0.35896220803260803, "learning_rate": 9.905394111573201e-06, "loss": 0.4584, "step": 1441 }, { "epoch": 0.7795999279149396, "grad_norm": 0.41028234362602234, "learning_rate": 9.905028233156695e-06, "loss": 0.4857, "step": 1442 }, { "epoch": 0.780140565867724, "grad_norm": 0.3853236138820648, "learning_rate": 9.90466165539016e-06, "loss": 0.5003, "step": 1443 }, { "epoch": 0.7806812038205082, "grad_norm": 0.40787339210510254, "learning_rate": 9.904294378325856e-06, "loss": 0.4851, "step": 1444 }, { "epoch": 0.7812218417732925, "grad_norm": 0.38476669788360596, "learning_rate": 9.903926402016153e-06, "loss": 0.4824, "step": 1445 }, { "epoch": 0.7817624797260768, "grad_norm": 0.40921327471733093, "learning_rate": 9.903557726513515e-06, "loss": 0.4718, "step": 1446 }, { "epoch": 0.7823031176788611, "grad_norm": 0.37435829639434814, "learning_rate": 9.903188351870508e-06, "loss": 0.4491, "step": 1447 }, { "epoch": 0.7828437556316453, "grad_norm": 0.41100597381591797, "learning_rate": 9.902818278139794e-06, "loss": 0.4665, "step": 1448 }, { "epoch": 0.7833843935844296, "grad_norm": 0.36656615138053894, "learning_rate": 9.90244750537414e-06, "loss": 0.4828, "step": 1449 }, { "epoch": 0.7839250315372139, "grad_norm": 0.37309545278549194, "learning_rate": 9.902076033626409e-06, "loss": 0.4722, "step": 1450 }, { "epoch": 0.7844656694899982, "grad_norm": 0.38068512082099915, "learning_rate": 9.901703862949566e-06, "loss": 0.4807, "step": 1451 }, { "epoch": 0.7850063074427824, "grad_norm": 0.35660654306411743, "learning_rate": 9.901330993396671e-06, "loss": 0.4819, "step": 1452 }, { "epoch": 0.7855469453955668, "grad_norm": 0.3687169551849365, "learning_rate": 9.900957425020894e-06, "loss": 0.4913, "step": 1453 }, { "epoch": 0.786087583348351, "grad_norm": 0.3449983596801758, "learning_rate": 9.90058315787549e-06, "loss": 0.4555, "step": 1454 }, { "epoch": 0.7866282213011353, "grad_norm": 0.4075060188770294, "learning_rate": 9.900208192013825e-06, "loss": 0.4736, "step": 1455 }, { "epoch": 0.7871688592539197, "grad_norm": 0.3767589330673218, "learning_rate": 9.899832527489362e-06, "loss": 0.4451, "step": 1456 }, { "epoch": 0.7877094972067039, "grad_norm": 0.4201042354106903, "learning_rate": 9.899456164355661e-06, "loss": 0.4325, "step": 1457 }, { "epoch": 0.7882501351594882, "grad_norm": 0.3488657772541046, "learning_rate": 9.899079102666382e-06, "loss": 0.4328, "step": 1458 }, { "epoch": 0.7887907731122725, "grad_norm": 0.43522709608078003, "learning_rate": 9.898701342475287e-06, "loss": 0.4739, "step": 1459 }, { "epoch": 0.7893314110650568, "grad_norm": 0.4166751205921173, "learning_rate": 9.898322883836239e-06, "loss": 0.4805, "step": 1460 }, { "epoch": 0.789872049017841, "grad_norm": 0.40781643986701965, "learning_rate": 9.897943726803195e-06, "loss": 0.4888, "step": 1461 }, { "epoch": 0.7904126869706254, "grad_norm": 0.41623154282569885, "learning_rate": 9.897563871430212e-06, "loss": 0.4716, "step": 1462 }, { "epoch": 0.7909533249234096, "grad_norm": 0.3918064832687378, "learning_rate": 9.897183317771455e-06, "loss": 0.4712, "step": 1463 }, { "epoch": 0.7914939628761939, "grad_norm": 0.3671140968799591, "learning_rate": 9.896802065881178e-06, "loss": 0.461, "step": 1464 }, { "epoch": 0.7920346008289781, "grad_norm": 0.3947750926017761, "learning_rate": 9.896420115813741e-06, "loss": 0.4964, "step": 1465 }, { "epoch": 0.7925752387817625, "grad_norm": 0.4626021683216095, "learning_rate": 9.896037467623603e-06, "loss": 0.52, "step": 1466 }, { "epoch": 0.7931158767345468, "grad_norm": 0.3657527267932892, "learning_rate": 9.895654121365318e-06, "loss": 0.492, "step": 1467 }, { "epoch": 0.793656514687331, "grad_norm": 0.4211752414703369, "learning_rate": 9.895270077093547e-06, "loss": 0.4833, "step": 1468 }, { "epoch": 0.7941971526401154, "grad_norm": 0.4107241630554199, "learning_rate": 9.894885334863044e-06, "loss": 0.4552, "step": 1469 }, { "epoch": 0.7947377905928996, "grad_norm": 0.35872262716293335, "learning_rate": 9.894499894728665e-06, "loss": 0.4925, "step": 1470 }, { "epoch": 0.7952784285456839, "grad_norm": 0.36545562744140625, "learning_rate": 9.894113756745362e-06, "loss": 0.4484, "step": 1471 }, { "epoch": 0.7958190664984682, "grad_norm": 0.41451945900917053, "learning_rate": 9.893726920968196e-06, "loss": 0.4796, "step": 1472 }, { "epoch": 0.7963597044512525, "grad_norm": 0.4425036907196045, "learning_rate": 9.893339387452319e-06, "loss": 0.504, "step": 1473 }, { "epoch": 0.7969003424040367, "grad_norm": 0.397030770778656, "learning_rate": 9.892951156252982e-06, "loss": 0.4888, "step": 1474 }, { "epoch": 0.7974409803568211, "grad_norm": 0.37259095907211304, "learning_rate": 9.892562227425541e-06, "loss": 0.4654, "step": 1475 }, { "epoch": 0.7979816183096053, "grad_norm": 0.4276980757713318, "learning_rate": 9.89217260102545e-06, "loss": 0.4685, "step": 1476 }, { "epoch": 0.7985222562623896, "grad_norm": 0.35599982738494873, "learning_rate": 9.89178227710826e-06, "loss": 0.4595, "step": 1477 }, { "epoch": 0.799062894215174, "grad_norm": 0.3910425901412964, "learning_rate": 9.891391255729621e-06, "loss": 0.4874, "step": 1478 }, { "epoch": 0.7996035321679582, "grad_norm": 0.42644253373146057, "learning_rate": 9.890999536945284e-06, "loss": 0.4894, "step": 1479 }, { "epoch": 0.8001441701207425, "grad_norm": 0.4408269226551056, "learning_rate": 9.890607120811104e-06, "loss": 0.4637, "step": 1480 }, { "epoch": 0.8006848080735267, "grad_norm": 0.39104950428009033, "learning_rate": 9.890214007383026e-06, "loss": 0.4785, "step": 1481 }, { "epoch": 0.8012254460263111, "grad_norm": 0.3987670838832855, "learning_rate": 9.889820196717103e-06, "loss": 0.473, "step": 1482 }, { "epoch": 0.8017660839790953, "grad_norm": 0.4160068929195404, "learning_rate": 9.88942568886948e-06, "loss": 0.4616, "step": 1483 }, { "epoch": 0.8023067219318796, "grad_norm": 0.4000867009162903, "learning_rate": 9.88903048389641e-06, "loss": 0.5004, "step": 1484 }, { "epoch": 0.8028473598846639, "grad_norm": 0.36520716547966003, "learning_rate": 9.888634581854235e-06, "loss": 0.4564, "step": 1485 }, { "epoch": 0.8033879978374482, "grad_norm": 0.3652910590171814, "learning_rate": 9.888237982799407e-06, "loss": 0.4861, "step": 1486 }, { "epoch": 0.8039286357902324, "grad_norm": 0.3691157400608063, "learning_rate": 9.88784068678847e-06, "loss": 0.4612, "step": 1487 }, { "epoch": 0.8044692737430168, "grad_norm": 0.40664616227149963, "learning_rate": 9.887442693878068e-06, "loss": 0.4854, "step": 1488 }, { "epoch": 0.805009911695801, "grad_norm": 0.3441387414932251, "learning_rate": 9.887044004124951e-06, "loss": 0.4863, "step": 1489 }, { "epoch": 0.8055505496485853, "grad_norm": 0.3915308117866516, "learning_rate": 9.88664461758596e-06, "loss": 0.4784, "step": 1490 }, { "epoch": 0.8060911876013697, "grad_norm": 0.38703295588493347, "learning_rate": 9.886244534318038e-06, "loss": 0.4666, "step": 1491 }, { "epoch": 0.8066318255541539, "grad_norm": 0.3933779299259186, "learning_rate": 9.885843754378233e-06, "loss": 0.4811, "step": 1492 }, { "epoch": 0.8071724635069382, "grad_norm": 0.3390825390815735, "learning_rate": 9.88544227782368e-06, "loss": 0.4719, "step": 1493 }, { "epoch": 0.8077131014597225, "grad_norm": 0.38970884680747986, "learning_rate": 9.885040104711628e-06, "loss": 0.4446, "step": 1494 }, { "epoch": 0.8082537394125068, "grad_norm": 0.3798392713069916, "learning_rate": 9.884637235099414e-06, "loss": 0.4508, "step": 1495 }, { "epoch": 0.808794377365291, "grad_norm": 0.3552364706993103, "learning_rate": 9.884233669044479e-06, "loss": 0.4485, "step": 1496 }, { "epoch": 0.8093350153180753, "grad_norm": 0.3914445638656616, "learning_rate": 9.883829406604363e-06, "loss": 0.4576, "step": 1497 }, { "epoch": 0.8098756532708596, "grad_norm": 0.35896924138069153, "learning_rate": 9.883424447836705e-06, "loss": 0.4664, "step": 1498 }, { "epoch": 0.8104162912236439, "grad_norm": 0.3930225372314453, "learning_rate": 9.883018792799243e-06, "loss": 0.4897, "step": 1499 }, { "epoch": 0.8109569291764281, "grad_norm": 0.3645854890346527, "learning_rate": 9.882612441549817e-06, "loss": 0.458, "step": 1500 }, { "epoch": 0.8114975671292125, "grad_norm": 0.4014187455177307, "learning_rate": 9.882205394146362e-06, "loss": 0.4635, "step": 1501 }, { "epoch": 0.8120382050819968, "grad_norm": 0.3631240427494049, "learning_rate": 9.881797650646911e-06, "loss": 0.473, "step": 1502 }, { "epoch": 0.812578843034781, "grad_norm": 0.37682297825813293, "learning_rate": 9.881389211109604e-06, "loss": 0.4612, "step": 1503 }, { "epoch": 0.8131194809875654, "grad_norm": 0.4124244153499603, "learning_rate": 9.880980075592674e-06, "loss": 0.4795, "step": 1504 }, { "epoch": 0.8136601189403496, "grad_norm": 0.3777317702770233, "learning_rate": 9.880570244154455e-06, "loss": 0.4741, "step": 1505 }, { "epoch": 0.8142007568931339, "grad_norm": 0.3755948841571808, "learning_rate": 9.880159716853379e-06, "loss": 0.504, "step": 1506 }, { "epoch": 0.8147413948459182, "grad_norm": 0.3681623637676239, "learning_rate": 9.879748493747978e-06, "loss": 0.4705, "step": 1507 }, { "epoch": 0.8152820327987025, "grad_norm": 0.3670575022697449, "learning_rate": 9.879336574896885e-06, "loss": 0.4829, "step": 1508 }, { "epoch": 0.8158226707514867, "grad_norm": 0.40238499641418457, "learning_rate": 9.878923960358831e-06, "loss": 0.4778, "step": 1509 }, { "epoch": 0.816363308704271, "grad_norm": 0.4063103199005127, "learning_rate": 9.878510650192644e-06, "loss": 0.4859, "step": 1510 }, { "epoch": 0.8169039466570553, "grad_norm": 0.3617895841598511, "learning_rate": 9.878096644457254e-06, "loss": 0.4689, "step": 1511 }, { "epoch": 0.8174445846098396, "grad_norm": 0.3812806010246277, "learning_rate": 9.877681943211688e-06, "loss": 0.4841, "step": 1512 }, { "epoch": 0.8179852225626238, "grad_norm": 0.45879244804382324, "learning_rate": 9.877266546515075e-06, "loss": 0.4704, "step": 1513 }, { "epoch": 0.8185258605154082, "grad_norm": 0.3604988753795624, "learning_rate": 9.87685045442664e-06, "loss": 0.4815, "step": 1514 }, { "epoch": 0.8190664984681925, "grad_norm": 0.4087460935115814, "learning_rate": 9.876433667005711e-06, "loss": 0.4805, "step": 1515 }, { "epoch": 0.8196071364209767, "grad_norm": 0.3688940107822418, "learning_rate": 9.87601618431171e-06, "loss": 0.4603, "step": 1516 }, { "epoch": 0.8201477743737611, "grad_norm": 0.3499005138874054, "learning_rate": 9.875598006404164e-06, "loss": 0.49, "step": 1517 }, { "epoch": 0.8206884123265453, "grad_norm": 0.38962242007255554, "learning_rate": 9.875179133342692e-06, "loss": 0.4882, "step": 1518 }, { "epoch": 0.8212290502793296, "grad_norm": 0.38138699531555176, "learning_rate": 9.87475956518702e-06, "loss": 0.4711, "step": 1519 }, { "epoch": 0.8217696882321139, "grad_norm": 0.37326300144195557, "learning_rate": 9.874339301996968e-06, "loss": 0.4894, "step": 1520 }, { "epoch": 0.8223103261848982, "grad_norm": 0.3874557912349701, "learning_rate": 9.873918343832454e-06, "loss": 0.4858, "step": 1521 }, { "epoch": 0.8228509641376824, "grad_norm": 0.371981143951416, "learning_rate": 9.873496690753502e-06, "loss": 0.4413, "step": 1522 }, { "epoch": 0.8233916020904668, "grad_norm": 0.370301753282547, "learning_rate": 9.873074342820225e-06, "loss": 0.4494, "step": 1523 }, { "epoch": 0.823932240043251, "grad_norm": 0.3950522243976593, "learning_rate": 9.872651300092845e-06, "loss": 0.4624, "step": 1524 }, { "epoch": 0.8244728779960353, "grad_norm": 0.4005773365497589, "learning_rate": 9.87222756263168e-06, "loss": 0.4758, "step": 1525 }, { "epoch": 0.8250135159488196, "grad_norm": 0.326057106256485, "learning_rate": 9.871803130497139e-06, "loss": 0.4696, "step": 1526 }, { "epoch": 0.8255541539016039, "grad_norm": 0.39111393690109253, "learning_rate": 9.871378003749744e-06, "loss": 0.4595, "step": 1527 }, { "epoch": 0.8260947918543882, "grad_norm": 0.34531497955322266, "learning_rate": 9.870952182450104e-06, "loss": 0.4675, "step": 1528 }, { "epoch": 0.8266354298071724, "grad_norm": 0.37008053064346313, "learning_rate": 9.870525666658933e-06, "loss": 0.4645, "step": 1529 }, { "epoch": 0.8271760677599568, "grad_norm": 0.3724628984928131, "learning_rate": 9.870098456437045e-06, "loss": 0.4595, "step": 1530 }, { "epoch": 0.827716705712741, "grad_norm": 0.3725714385509491, "learning_rate": 9.869670551845347e-06, "loss": 0.4588, "step": 1531 }, { "epoch": 0.8282573436655253, "grad_norm": 0.4514995515346527, "learning_rate": 9.869241952944852e-06, "loss": 0.4459, "step": 1532 }, { "epoch": 0.8287979816183096, "grad_norm": 0.42052534222602844, "learning_rate": 9.868812659796669e-06, "loss": 0.4677, "step": 1533 }, { "epoch": 0.8293386195710939, "grad_norm": 0.40265753865242004, "learning_rate": 9.868382672462002e-06, "loss": 0.4826, "step": 1534 }, { "epoch": 0.8298792575238781, "grad_norm": 0.3773770034313202, "learning_rate": 9.867951991002162e-06, "loss": 0.4521, "step": 1535 }, { "epoch": 0.8304198954766625, "grad_norm": 0.40366092324256897, "learning_rate": 9.867520615478554e-06, "loss": 0.4692, "step": 1536 }, { "epoch": 0.8309605334294468, "grad_norm": 0.3797312080860138, "learning_rate": 9.867088545952682e-06, "loss": 0.4673, "step": 1537 }, { "epoch": 0.831501171382231, "grad_norm": 0.41201868653297424, "learning_rate": 9.866655782486147e-06, "loss": 0.4709, "step": 1538 }, { "epoch": 0.8320418093350154, "grad_norm": 0.35594871640205383, "learning_rate": 9.866222325140657e-06, "loss": 0.4818, "step": 1539 }, { "epoch": 0.8325824472877996, "grad_norm": 0.41564980149269104, "learning_rate": 9.865788173978011e-06, "loss": 0.4866, "step": 1540 }, { "epoch": 0.8331230852405839, "grad_norm": 0.37932664155960083, "learning_rate": 9.865353329060108e-06, "loss": 0.4673, "step": 1541 }, { "epoch": 0.8336637231933681, "grad_norm": 0.37759143114089966, "learning_rate": 9.86491779044895e-06, "loss": 0.4855, "step": 1542 }, { "epoch": 0.8342043611461525, "grad_norm": 0.3636254668235779, "learning_rate": 9.864481558206633e-06, "loss": 0.4706, "step": 1543 }, { "epoch": 0.8347449990989367, "grad_norm": 0.35987749695777893, "learning_rate": 9.864044632395357e-06, "loss": 0.4674, "step": 1544 }, { "epoch": 0.835285637051721, "grad_norm": 0.39580675959587097, "learning_rate": 9.863607013077414e-06, "loss": 0.4847, "step": 1545 }, { "epoch": 0.8358262750045053, "grad_norm": 0.38225269317626953, "learning_rate": 9.863168700315204e-06, "loss": 0.4625, "step": 1546 }, { "epoch": 0.8363669129572896, "grad_norm": 0.3854118883609772, "learning_rate": 9.862729694171216e-06, "loss": 0.456, "step": 1547 }, { "epoch": 0.8369075509100738, "grad_norm": 0.4238852262496948, "learning_rate": 9.862289994708044e-06, "loss": 0.4848, "step": 1548 }, { "epoch": 0.8374481888628582, "grad_norm": 0.40586042404174805, "learning_rate": 9.861849601988384e-06, "loss": 0.4621, "step": 1549 }, { "epoch": 0.8379888268156425, "grad_norm": 0.43681904673576355, "learning_rate": 9.86140851607502e-06, "loss": 0.494, "step": 1550 }, { "epoch": 0.8385294647684267, "grad_norm": 0.38671132922172546, "learning_rate": 9.860966737030846e-06, "loss": 0.4984, "step": 1551 }, { "epoch": 0.8390701027212111, "grad_norm": 0.40254631638526917, "learning_rate": 9.860524264918847e-06, "loss": 0.4929, "step": 1552 }, { "epoch": 0.8396107406739953, "grad_norm": 0.38910743594169617, "learning_rate": 9.860081099802111e-06, "loss": 0.4692, "step": 1553 }, { "epoch": 0.8401513786267796, "grad_norm": 0.35398274660110474, "learning_rate": 9.859637241743824e-06, "loss": 0.4821, "step": 1554 }, { "epoch": 0.8406920165795639, "grad_norm": 0.4559416174888611, "learning_rate": 9.85919269080727e-06, "loss": 0.4785, "step": 1555 }, { "epoch": 0.8412326545323482, "grad_norm": 0.38288402557373047, "learning_rate": 9.858747447055832e-06, "loss": 0.4379, "step": 1556 }, { "epoch": 0.8417732924851324, "grad_norm": 0.3954237699508667, "learning_rate": 9.858301510552993e-06, "loss": 0.4988, "step": 1557 }, { "epoch": 0.8423139304379167, "grad_norm": 0.37373679876327515, "learning_rate": 9.857854881362334e-06, "loss": 0.4434, "step": 1558 }, { "epoch": 0.842854568390701, "grad_norm": 0.3693135678768158, "learning_rate": 9.857407559547531e-06, "loss": 0.4698, "step": 1559 }, { "epoch": 0.8433952063434853, "grad_norm": 0.3623071014881134, "learning_rate": 9.856959545172369e-06, "loss": 0.4677, "step": 1560 }, { "epoch": 0.8439358442962696, "grad_norm": 0.35580453276634216, "learning_rate": 9.856510838300719e-06, "loss": 0.4777, "step": 1561 }, { "epoch": 0.8444764822490539, "grad_norm": 0.35490602254867554, "learning_rate": 9.85606143899656e-06, "loss": 0.4522, "step": 1562 }, { "epoch": 0.8450171202018382, "grad_norm": 0.35406145453453064, "learning_rate": 9.855611347323965e-06, "loss": 0.4664, "step": 1563 }, { "epoch": 0.8455577581546224, "grad_norm": 0.3772537410259247, "learning_rate": 9.855160563347108e-06, "loss": 0.4514, "step": 1564 }, { "epoch": 0.8460983961074068, "grad_norm": 0.40558236837387085, "learning_rate": 9.854709087130261e-06, "loss": 0.4579, "step": 1565 }, { "epoch": 0.846639034060191, "grad_norm": 0.4021514654159546, "learning_rate": 9.854256918737794e-06, "loss": 0.4742, "step": 1566 }, { "epoch": 0.8471796720129753, "grad_norm": 0.3992575705051422, "learning_rate": 9.853804058234177e-06, "loss": 0.4879, "step": 1567 }, { "epoch": 0.8477203099657596, "grad_norm": 0.37950408458709717, "learning_rate": 9.853350505683978e-06, "loss": 0.4649, "step": 1568 }, { "epoch": 0.8482609479185439, "grad_norm": 0.37125805020332336, "learning_rate": 9.852896261151865e-06, "loss": 0.4818, "step": 1569 }, { "epoch": 0.8488015858713281, "grad_norm": 0.366233229637146, "learning_rate": 9.852441324702599e-06, "loss": 0.4941, "step": 1570 }, { "epoch": 0.8493422238241125, "grad_norm": 0.34432452917099, "learning_rate": 9.85198569640105e-06, "loss": 0.4836, "step": 1571 }, { "epoch": 0.8498828617768968, "grad_norm": 0.38173070549964905, "learning_rate": 9.851529376312176e-06, "loss": 0.4613, "step": 1572 }, { "epoch": 0.850423499729681, "grad_norm": 0.35302814841270447, "learning_rate": 9.85107236450104e-06, "loss": 0.4597, "step": 1573 }, { "epoch": 0.8509641376824653, "grad_norm": 0.3876490592956543, "learning_rate": 9.850614661032803e-06, "loss": 0.4761, "step": 1574 }, { "epoch": 0.8515047756352496, "grad_norm": 0.3852789103984833, "learning_rate": 9.850156265972722e-06, "loss": 0.4698, "step": 1575 }, { "epoch": 0.8520454135880339, "grad_norm": 0.4346942603588104, "learning_rate": 9.849697179386152e-06, "loss": 0.4583, "step": 1576 }, { "epoch": 0.8525860515408181, "grad_norm": 0.36047878861427307, "learning_rate": 9.849237401338554e-06, "loss": 0.4565, "step": 1577 }, { "epoch": 0.8531266894936025, "grad_norm": 0.38957422971725464, "learning_rate": 9.848776931895478e-06, "loss": 0.4873, "step": 1578 }, { "epoch": 0.8536673274463867, "grad_norm": 0.35288092494010925, "learning_rate": 9.84831577112258e-06, "loss": 0.4777, "step": 1579 }, { "epoch": 0.854207965399171, "grad_norm": 0.4233868420124054, "learning_rate": 9.847853919085608e-06, "loss": 0.481, "step": 1580 }, { "epoch": 0.8547486033519553, "grad_norm": 0.38051849603652954, "learning_rate": 9.847391375850415e-06, "loss": 0.4453, "step": 1581 }, { "epoch": 0.8552892413047396, "grad_norm": 0.35708117485046387, "learning_rate": 9.84692814148295e-06, "loss": 0.4701, "step": 1582 }, { "epoch": 0.8558298792575239, "grad_norm": 0.38620758056640625, "learning_rate": 9.846464216049256e-06, "loss": 0.4564, "step": 1583 }, { "epoch": 0.8563705172103082, "grad_norm": 0.39909282326698303, "learning_rate": 9.845999599615481e-06, "loss": 0.5078, "step": 1584 }, { "epoch": 0.8569111551630925, "grad_norm": 0.341009259223938, "learning_rate": 9.845534292247872e-06, "loss": 0.4972, "step": 1585 }, { "epoch": 0.8574517931158767, "grad_norm": 0.4746313989162445, "learning_rate": 9.845068294012767e-06, "loss": 0.4795, "step": 1586 }, { "epoch": 0.857992431068661, "grad_norm": 0.47726917266845703, "learning_rate": 9.844601604976611e-06, "loss": 0.4888, "step": 1587 }, { "epoch": 0.8585330690214453, "grad_norm": 0.33320072293281555, "learning_rate": 9.844134225205941e-06, "loss": 0.4423, "step": 1588 }, { "epoch": 0.8590737069742296, "grad_norm": 0.44583311676979065, "learning_rate": 9.843666154767396e-06, "loss": 0.4602, "step": 1589 }, { "epoch": 0.8596143449270138, "grad_norm": 0.4997808337211609, "learning_rate": 9.843197393727713e-06, "loss": 0.4743, "step": 1590 }, { "epoch": 0.8601549828797982, "grad_norm": 0.32006052136421204, "learning_rate": 9.842727942153728e-06, "loss": 0.4549, "step": 1591 }, { "epoch": 0.8606956208325824, "grad_norm": 0.4787975549697876, "learning_rate": 9.842257800112372e-06, "loss": 0.4767, "step": 1592 }, { "epoch": 0.8612362587853667, "grad_norm": 0.3997071087360382, "learning_rate": 9.84178696767068e-06, "loss": 0.473, "step": 1593 }, { "epoch": 0.861776896738151, "grad_norm": 0.3409503996372223, "learning_rate": 9.841315444895778e-06, "loss": 0.4823, "step": 1594 }, { "epoch": 0.8623175346909353, "grad_norm": 0.35580211877822876, "learning_rate": 9.8408432318549e-06, "loss": 0.4511, "step": 1595 }, { "epoch": 0.8628581726437196, "grad_norm": 0.40508773922920227, "learning_rate": 9.84037032861537e-06, "loss": 0.4593, "step": 1596 }, { "epoch": 0.8633988105965039, "grad_norm": 0.3585672378540039, "learning_rate": 9.839896735244615e-06, "loss": 0.4548, "step": 1597 }, { "epoch": 0.8639394485492882, "grad_norm": 0.37689289450645447, "learning_rate": 9.839422451810159e-06, "loss": 0.4784, "step": 1598 }, { "epoch": 0.8644800865020724, "grad_norm": 0.42086952924728394, "learning_rate": 9.838947478379623e-06, "loss": 0.4825, "step": 1599 }, { "epoch": 0.8650207244548568, "grad_norm": 0.3835284113883972, "learning_rate": 9.838471815020731e-06, "loss": 0.4775, "step": 1600 }, { "epoch": 0.865561362407641, "grad_norm": 0.38955605030059814, "learning_rate": 9.8379954618013e-06, "loss": 0.4632, "step": 1601 }, { "epoch": 0.8661020003604253, "grad_norm": 0.43403568863868713, "learning_rate": 9.837518418789247e-06, "loss": 0.4784, "step": 1602 }, { "epoch": 0.8666426383132095, "grad_norm": 0.40000101923942566, "learning_rate": 9.837040686052591e-06, "loss": 0.4623, "step": 1603 }, { "epoch": 0.8671832762659939, "grad_norm": 0.4432052671909332, "learning_rate": 9.836562263659441e-06, "loss": 0.4765, "step": 1604 }, { "epoch": 0.8677239142187781, "grad_norm": 0.3897626996040344, "learning_rate": 9.836083151678014e-06, "loss": 0.5006, "step": 1605 }, { "epoch": 0.8682645521715624, "grad_norm": 0.4313247501850128, "learning_rate": 9.835603350176618e-06, "loss": 0.4695, "step": 1606 }, { "epoch": 0.8688051901243468, "grad_norm": 0.3997137248516083, "learning_rate": 9.835122859223668e-06, "loss": 0.465, "step": 1607 }, { "epoch": 0.869345828077131, "grad_norm": 0.4124264419078827, "learning_rate": 9.834641678887664e-06, "loss": 0.4776, "step": 1608 }, { "epoch": 0.8698864660299153, "grad_norm": 0.3896905779838562, "learning_rate": 9.834159809237217e-06, "loss": 0.4554, "step": 1609 }, { "epoch": 0.8704271039826996, "grad_norm": 0.46274518966674805, "learning_rate": 9.833677250341027e-06, "loss": 0.4832, "step": 1610 }, { "epoch": 0.8709677419354839, "grad_norm": 0.3725559413433075, "learning_rate": 9.833194002267901e-06, "loss": 0.4704, "step": 1611 }, { "epoch": 0.8715083798882681, "grad_norm": 0.3861299753189087, "learning_rate": 9.832710065086736e-06, "loss": 0.487, "step": 1612 }, { "epoch": 0.8720490178410525, "grad_norm": 0.42216816544532776, "learning_rate": 9.832225438866532e-06, "loss": 0.5083, "step": 1613 }, { "epoch": 0.8725896557938367, "grad_norm": 0.3609369695186615, "learning_rate": 9.831740123676387e-06, "loss": 0.4579, "step": 1614 }, { "epoch": 0.873130293746621, "grad_norm": 0.410856693983078, "learning_rate": 9.831254119585497e-06, "loss": 0.4679, "step": 1615 }, { "epoch": 0.8736709316994054, "grad_norm": 0.418223112821579, "learning_rate": 9.83076742666315e-06, "loss": 0.4571, "step": 1616 }, { "epoch": 0.8742115696521896, "grad_norm": 0.37163031101226807, "learning_rate": 9.830280044978745e-06, "loss": 0.4606, "step": 1617 }, { "epoch": 0.8747522076049739, "grad_norm": 0.3737943768501282, "learning_rate": 9.82979197460177e-06, "loss": 0.463, "step": 1618 }, { "epoch": 0.8752928455577581, "grad_norm": 0.37015417218208313, "learning_rate": 9.82930321560181e-06, "loss": 0.4651, "step": 1619 }, { "epoch": 0.8758334835105425, "grad_norm": 0.3578423857688904, "learning_rate": 9.828813768048555e-06, "loss": 0.4543, "step": 1620 }, { "epoch": 0.8763741214633267, "grad_norm": 0.35040757060050964, "learning_rate": 9.828323632011789e-06, "loss": 0.4631, "step": 1621 }, { "epoch": 0.876914759416111, "grad_norm": 0.44472888112068176, "learning_rate": 9.827832807561392e-06, "loss": 0.4635, "step": 1622 }, { "epoch": 0.8774553973688953, "grad_norm": 0.3634223937988281, "learning_rate": 9.827341294767347e-06, "loss": 0.4762, "step": 1623 }, { "epoch": 0.8779960353216796, "grad_norm": 0.43389296531677246, "learning_rate": 9.826849093699733e-06, "loss": 0.4297, "step": 1624 }, { "epoch": 0.8785366732744638, "grad_norm": 0.43347981572151184, "learning_rate": 9.826356204428726e-06, "loss": 0.4498, "step": 1625 }, { "epoch": 0.8790773112272482, "grad_norm": 0.40552183985710144, "learning_rate": 9.825862627024606e-06, "loss": 0.4644, "step": 1626 }, { "epoch": 0.8796179491800324, "grad_norm": 0.41467517614364624, "learning_rate": 9.825368361557738e-06, "loss": 0.4461, "step": 1627 }, { "epoch": 0.8801585871328167, "grad_norm": 0.44060826301574707, "learning_rate": 9.824873408098598e-06, "loss": 0.4671, "step": 1628 }, { "epoch": 0.8806992250856011, "grad_norm": 0.3913240134716034, "learning_rate": 9.824377766717758e-06, "loss": 0.4754, "step": 1629 }, { "epoch": 0.8812398630383853, "grad_norm": 0.4083966314792633, "learning_rate": 9.823881437485882e-06, "loss": 0.4769, "step": 1630 }, { "epoch": 0.8817805009911696, "grad_norm": 0.4295070171356201, "learning_rate": 9.823384420473738e-06, "loss": 0.4634, "step": 1631 }, { "epoch": 0.8823211389439539, "grad_norm": 0.41920551657676697, "learning_rate": 9.822886715752187e-06, "loss": 0.4609, "step": 1632 }, { "epoch": 0.8828617768967382, "grad_norm": 0.39997240900993347, "learning_rate": 9.822388323392193e-06, "loss": 0.4757, "step": 1633 }, { "epoch": 0.8834024148495224, "grad_norm": 0.38902947306632996, "learning_rate": 9.821889243464816e-06, "loss": 0.4798, "step": 1634 }, { "epoch": 0.8839430528023067, "grad_norm": 0.41250500082969666, "learning_rate": 9.821389476041212e-06, "loss": 0.4552, "step": 1635 }, { "epoch": 0.884483690755091, "grad_norm": 0.45538103580474854, "learning_rate": 9.82088902119264e-06, "loss": 0.4787, "step": 1636 }, { "epoch": 0.8850243287078753, "grad_norm": 0.4461895823478699, "learning_rate": 9.820387878990451e-06, "loss": 0.4538, "step": 1637 }, { "epoch": 0.8855649666606595, "grad_norm": 0.47144433856010437, "learning_rate": 9.819886049506098e-06, "loss": 0.4438, "step": 1638 }, { "epoch": 0.8861056046134439, "grad_norm": 0.43563005328178406, "learning_rate": 9.819383532811134e-06, "loss": 0.4701, "step": 1639 }, { "epoch": 0.8866462425662281, "grad_norm": 0.43610212206840515, "learning_rate": 9.8188803289772e-06, "loss": 0.4917, "step": 1640 }, { "epoch": 0.8871868805190124, "grad_norm": 0.39954859018325806, "learning_rate": 9.818376438076047e-06, "loss": 0.4816, "step": 1641 }, { "epoch": 0.8877275184717968, "grad_norm": 0.3857881426811218, "learning_rate": 9.817871860179519e-06, "loss": 0.4716, "step": 1642 }, { "epoch": 0.888268156424581, "grad_norm": 0.4059131443500519, "learning_rate": 9.817366595359556e-06, "loss": 0.4572, "step": 1643 }, { "epoch": 0.8888087943773653, "grad_norm": 0.4096532464027405, "learning_rate": 9.816860643688197e-06, "loss": 0.4901, "step": 1644 }, { "epoch": 0.8893494323301496, "grad_norm": 0.45331189036369324, "learning_rate": 9.816354005237583e-06, "loss": 0.4495, "step": 1645 }, { "epoch": 0.8898900702829339, "grad_norm": 0.429908812046051, "learning_rate": 9.815846680079946e-06, "loss": 0.4694, "step": 1646 }, { "epoch": 0.8904307082357181, "grad_norm": 0.37314942479133606, "learning_rate": 9.815338668287621e-06, "loss": 0.4854, "step": 1647 }, { "epoch": 0.8909713461885024, "grad_norm": 0.4905018210411072, "learning_rate": 9.81482996993304e-06, "loss": 0.4551, "step": 1648 }, { "epoch": 0.8915119841412867, "grad_norm": 0.3743281960487366, "learning_rate": 9.814320585088732e-06, "loss": 0.4557, "step": 1649 }, { "epoch": 0.892052622094071, "grad_norm": 0.42336776852607727, "learning_rate": 9.813810513827324e-06, "loss": 0.4594, "step": 1650 }, { "epoch": 0.8925932600468552, "grad_norm": 0.4253554046154022, "learning_rate": 9.813299756221539e-06, "loss": 0.4816, "step": 1651 }, { "epoch": 0.8931338979996396, "grad_norm": 0.45747578144073486, "learning_rate": 9.812788312344203e-06, "loss": 0.4935, "step": 1652 }, { "epoch": 0.8936745359524239, "grad_norm": 0.4536707401275635, "learning_rate": 9.812276182268236e-06, "loss": 0.4865, "step": 1653 }, { "epoch": 0.8942151739052081, "grad_norm": 0.43989959359169006, "learning_rate": 9.811763366066657e-06, "loss": 0.4677, "step": 1654 }, { "epoch": 0.8947558118579925, "grad_norm": 0.45789846777915955, "learning_rate": 9.811249863812581e-06, "loss": 0.4643, "step": 1655 }, { "epoch": 0.8952964498107767, "grad_norm": 0.3918055295944214, "learning_rate": 9.810735675579221e-06, "loss": 0.4734, "step": 1656 }, { "epoch": 0.895837087763561, "grad_norm": 0.4731237590312958, "learning_rate": 9.810220801439894e-06, "loss": 0.4887, "step": 1657 }, { "epoch": 0.8963777257163453, "grad_norm": 0.39390793442726135, "learning_rate": 9.809705241468004e-06, "loss": 0.4299, "step": 1658 }, { "epoch": 0.8969183636691296, "grad_norm": 0.4457356035709381, "learning_rate": 9.809188995737062e-06, "loss": 0.467, "step": 1659 }, { "epoch": 0.8974590016219138, "grad_norm": 0.43071913719177246, "learning_rate": 9.808672064320672e-06, "loss": 0.4533, "step": 1660 }, { "epoch": 0.8979996395746982, "grad_norm": 0.505720317363739, "learning_rate": 9.808154447292539e-06, "loss": 0.4819, "step": 1661 }, { "epoch": 0.8985402775274824, "grad_norm": 0.3969588875770569, "learning_rate": 9.807636144726463e-06, "loss": 0.4719, "step": 1662 }, { "epoch": 0.8990809154802667, "grad_norm": 0.4451639950275421, "learning_rate": 9.80711715669634e-06, "loss": 0.4385, "step": 1663 }, { "epoch": 0.899621553433051, "grad_norm": 0.40204963088035583, "learning_rate": 9.80659748327617e-06, "loss": 0.4589, "step": 1664 }, { "epoch": 0.9001621913858353, "grad_norm": 0.37509143352508545, "learning_rate": 9.806077124540045e-06, "loss": 0.449, "step": 1665 }, { "epoch": 0.9007028293386196, "grad_norm": 0.4153278172016144, "learning_rate": 9.80555608056216e-06, "loss": 0.473, "step": 1666 }, { "epoch": 0.9012434672914038, "grad_norm": 0.35227838158607483, "learning_rate": 9.805034351416799e-06, "loss": 0.4741, "step": 1667 }, { "epoch": 0.9017841052441882, "grad_norm": 0.39335453510284424, "learning_rate": 9.804511937178353e-06, "loss": 0.4675, "step": 1668 }, { "epoch": 0.9023247431969724, "grad_norm": 0.38329482078552246, "learning_rate": 9.803988837921307e-06, "loss": 0.4883, "step": 1669 }, { "epoch": 0.9028653811497567, "grad_norm": 0.3808650076389313, "learning_rate": 9.803465053720242e-06, "loss": 0.4607, "step": 1670 }, { "epoch": 0.903406019102541, "grad_norm": 0.37646231055259705, "learning_rate": 9.80294058464984e-06, "loss": 0.4577, "step": 1671 }, { "epoch": 0.9039466570553253, "grad_norm": 0.36820298433303833, "learning_rate": 9.802415430784877e-06, "loss": 0.4797, "step": 1672 }, { "epoch": 0.9044872950081095, "grad_norm": 0.37384963035583496, "learning_rate": 9.801889592200229e-06, "loss": 0.4568, "step": 1673 }, { "epoch": 0.9050279329608939, "grad_norm": 0.3755015730857849, "learning_rate": 9.80136306897087e-06, "loss": 0.4753, "step": 1674 }, { "epoch": 0.9055685709136782, "grad_norm": 0.4412163197994232, "learning_rate": 9.800835861171869e-06, "loss": 0.4805, "step": 1675 }, { "epoch": 0.9061092088664624, "grad_norm": 0.4151507318019867, "learning_rate": 9.800307968878395e-06, "loss": 0.4629, "step": 1676 }, { "epoch": 0.9066498468192468, "grad_norm": 0.4136570990085602, "learning_rate": 9.799779392165716e-06, "loss": 0.4265, "step": 1677 }, { "epoch": 0.907190484772031, "grad_norm": 0.40677595138549805, "learning_rate": 9.799250131109192e-06, "loss": 0.4717, "step": 1678 }, { "epoch": 0.9077311227248153, "grad_norm": 0.4381828308105469, "learning_rate": 9.798720185784288e-06, "loss": 0.4893, "step": 1679 }, { "epoch": 0.9082717606775995, "grad_norm": 0.4856142997741699, "learning_rate": 9.798189556266559e-06, "loss": 0.4538, "step": 1680 }, { "epoch": 0.9088123986303839, "grad_norm": 0.40494972467422485, "learning_rate": 9.797658242631664e-06, "loss": 0.4772, "step": 1681 }, { "epoch": 0.9093530365831681, "grad_norm": 0.4658674895763397, "learning_rate": 9.797126244955355e-06, "loss": 0.4778, "step": 1682 }, { "epoch": 0.9098936745359524, "grad_norm": 0.3895665407180786, "learning_rate": 9.796593563313483e-06, "loss": 0.4633, "step": 1683 }, { "epoch": 0.9104343124887367, "grad_norm": 0.46936503052711487, "learning_rate": 9.796060197781998e-06, "loss": 0.4864, "step": 1684 }, { "epoch": 0.910974950441521, "grad_norm": 0.3557020425796509, "learning_rate": 9.795526148436945e-06, "loss": 0.4757, "step": 1685 }, { "epoch": 0.9115155883943052, "grad_norm": 0.3669896423816681, "learning_rate": 9.794991415354468e-06, "loss": 0.4801, "step": 1686 }, { "epoch": 0.9120562263470896, "grad_norm": 0.35865387320518494, "learning_rate": 9.794455998610812e-06, "loss": 0.4782, "step": 1687 }, { "epoch": 0.9125968642998739, "grad_norm": 0.3865331709384918, "learning_rate": 9.79391989828231e-06, "loss": 0.4783, "step": 1688 }, { "epoch": 0.9131375022526581, "grad_norm": 0.37714827060699463, "learning_rate": 9.793383114445403e-06, "loss": 0.4599, "step": 1689 }, { "epoch": 0.9136781402054425, "grad_norm": 0.37188130617141724, "learning_rate": 9.792845647176621e-06, "loss": 0.4952, "step": 1690 }, { "epoch": 0.9142187781582267, "grad_norm": 0.3693977892398834, "learning_rate": 9.792307496552596e-06, "loss": 0.4661, "step": 1691 }, { "epoch": 0.914759416111011, "grad_norm": 0.3558450937271118, "learning_rate": 9.791768662650059e-06, "loss": 0.47, "step": 1692 }, { "epoch": 0.9153000540637953, "grad_norm": 0.3815310299396515, "learning_rate": 9.791229145545832e-06, "loss": 0.4877, "step": 1693 }, { "epoch": 0.9158406920165796, "grad_norm": 0.3444439768791199, "learning_rate": 9.790688945316841e-06, "loss": 0.4563, "step": 1694 }, { "epoch": 0.9163813299693638, "grad_norm": 0.43752437829971313, "learning_rate": 9.790148062040108e-06, "loss": 0.4387, "step": 1695 }, { "epoch": 0.9169219679221481, "grad_norm": 0.40530988574028015, "learning_rate": 9.789606495792748e-06, "loss": 0.5028, "step": 1696 }, { "epoch": 0.9174626058749324, "grad_norm": 0.40507200360298157, "learning_rate": 9.789064246651978e-06, "loss": 0.4567, "step": 1697 }, { "epoch": 0.9180032438277167, "grad_norm": 0.40165984630584717, "learning_rate": 9.78852131469511e-06, "loss": 0.4753, "step": 1698 }, { "epoch": 0.918543881780501, "grad_norm": 0.3914419114589691, "learning_rate": 9.787977699999556e-06, "loss": 0.4578, "step": 1699 }, { "epoch": 0.9190845197332853, "grad_norm": 0.3427618443965912, "learning_rate": 9.787433402642823e-06, "loss": 0.4669, "step": 1700 }, { "epoch": 0.9196251576860696, "grad_norm": 0.3847512900829315, "learning_rate": 9.786888422702516e-06, "loss": 0.4994, "step": 1701 }, { "epoch": 0.9201657956388538, "grad_norm": 0.4131098985671997, "learning_rate": 9.786342760256336e-06, "loss": 0.4595, "step": 1702 }, { "epoch": 0.9207064335916382, "grad_norm": 0.3641406297683716, "learning_rate": 9.785796415382084e-06, "loss": 0.4453, "step": 1703 }, { "epoch": 0.9212470715444224, "grad_norm": 0.4470204710960388, "learning_rate": 9.785249388157656e-06, "loss": 0.4708, "step": 1704 }, { "epoch": 0.9217877094972067, "grad_norm": 0.3770560026168823, "learning_rate": 9.784701678661045e-06, "loss": 0.4624, "step": 1705 }, { "epoch": 0.922328347449991, "grad_norm": 0.4491824209690094, "learning_rate": 9.784153286970346e-06, "loss": 0.4868, "step": 1706 }, { "epoch": 0.9228689854027753, "grad_norm": 0.3728610873222351, "learning_rate": 9.783604213163744e-06, "loss": 0.4444, "step": 1707 }, { "epoch": 0.9234096233555595, "grad_norm": 0.3844902813434601, "learning_rate": 9.783054457319528e-06, "loss": 0.4467, "step": 1708 }, { "epoch": 0.9239502613083439, "grad_norm": 0.3831673264503479, "learning_rate": 9.782504019516079e-06, "loss": 0.4898, "step": 1709 }, { "epoch": 0.9244908992611282, "grad_norm": 0.35555362701416016, "learning_rate": 9.781952899831876e-06, "loss": 0.481, "step": 1710 }, { "epoch": 0.9250315372139124, "grad_norm": 0.39292532205581665, "learning_rate": 9.781401098345503e-06, "loss": 0.4751, "step": 1711 }, { "epoch": 0.9255721751666967, "grad_norm": 0.3829486072063446, "learning_rate": 9.780848615135627e-06, "loss": 0.48, "step": 1712 }, { "epoch": 0.926112813119481, "grad_norm": 0.4107460379600525, "learning_rate": 9.780295450281026e-06, "loss": 0.4799, "step": 1713 }, { "epoch": 0.9266534510722653, "grad_norm": 0.379866361618042, "learning_rate": 9.779741603860567e-06, "loss": 0.4582, "step": 1714 }, { "epoch": 0.9271940890250495, "grad_norm": 0.37752336263656616, "learning_rate": 9.779187075953215e-06, "loss": 0.4599, "step": 1715 }, { "epoch": 0.9277347269778339, "grad_norm": 0.4197329580783844, "learning_rate": 9.778631866638036e-06, "loss": 0.4722, "step": 1716 }, { "epoch": 0.9282753649306181, "grad_norm": 0.38856256008148193, "learning_rate": 9.778075975994188e-06, "loss": 0.4679, "step": 1717 }, { "epoch": 0.9288160028834024, "grad_norm": 0.44080206751823425, "learning_rate": 9.777519404100933e-06, "loss": 0.4653, "step": 1718 }, { "epoch": 0.9293566408361867, "grad_norm": 0.39855122566223145, "learning_rate": 9.77696215103762e-06, "loss": 0.4706, "step": 1719 }, { "epoch": 0.929897278788971, "grad_norm": 0.41832059621810913, "learning_rate": 9.776404216883709e-06, "loss": 0.5104, "step": 1720 }, { "epoch": 0.9304379167417552, "grad_norm": 0.45665568113327026, "learning_rate": 9.775845601718742e-06, "loss": 0.4891, "step": 1721 }, { "epoch": 0.9309785546945396, "grad_norm": 0.44155561923980713, "learning_rate": 9.775286305622368e-06, "loss": 0.4684, "step": 1722 }, { "epoch": 0.9315191926473239, "grad_norm": 0.4479288160800934, "learning_rate": 9.774726328674333e-06, "loss": 0.4584, "step": 1723 }, { "epoch": 0.9320598306001081, "grad_norm": 0.40802037715911865, "learning_rate": 9.774165670954474e-06, "loss": 0.4648, "step": 1724 }, { "epoch": 0.9326004685528924, "grad_norm": 0.39233508706092834, "learning_rate": 9.77360433254273e-06, "loss": 0.4705, "step": 1725 }, { "epoch": 0.9331411065056767, "grad_norm": 0.49127158522605896, "learning_rate": 9.773042313519135e-06, "loss": 0.4733, "step": 1726 }, { "epoch": 0.933681744458461, "grad_norm": 0.36547306180000305, "learning_rate": 9.77247961396382e-06, "loss": 0.4773, "step": 1727 }, { "epoch": 0.9342223824112452, "grad_norm": 0.45778974890708923, "learning_rate": 9.771916233957015e-06, "loss": 0.4535, "step": 1728 }, { "epoch": 0.9347630203640296, "grad_norm": 0.4647793471813202, "learning_rate": 9.771352173579048e-06, "loss": 0.4809, "step": 1729 }, { "epoch": 0.9353036583168138, "grad_norm": 0.3391762375831604, "learning_rate": 9.770787432910336e-06, "loss": 0.4735, "step": 1730 }, { "epoch": 0.9358442962695981, "grad_norm": 0.428021103143692, "learning_rate": 9.770222012031404e-06, "loss": 0.4604, "step": 1731 }, { "epoch": 0.9363849342223824, "grad_norm": 0.41578346490859985, "learning_rate": 9.769655911022864e-06, "loss": 0.4928, "step": 1732 }, { "epoch": 0.9369255721751667, "grad_norm": 0.39558741450309753, "learning_rate": 9.769089129965435e-06, "loss": 0.4755, "step": 1733 }, { "epoch": 0.937466210127951, "grad_norm": 0.3967934846878052, "learning_rate": 9.768521668939924e-06, "loss": 0.4611, "step": 1734 }, { "epoch": 0.9380068480807353, "grad_norm": 0.34875285625457764, "learning_rate": 9.767953528027238e-06, "loss": 0.4419, "step": 1735 }, { "epoch": 0.9385474860335196, "grad_norm": 0.36679312586784363, "learning_rate": 9.767384707308383e-06, "loss": 0.4631, "step": 1736 }, { "epoch": 0.9390881239863038, "grad_norm": 0.3865496516227722, "learning_rate": 9.76681520686446e-06, "loss": 0.4864, "step": 1737 }, { "epoch": 0.9396287619390882, "grad_norm": 0.362447053194046, "learning_rate": 9.766245026776668e-06, "loss": 0.4684, "step": 1738 }, { "epoch": 0.9401693998918724, "grad_norm": 0.358012855052948, "learning_rate": 9.765674167126303e-06, "loss": 0.4952, "step": 1739 }, { "epoch": 0.9407100378446567, "grad_norm": 0.349895715713501, "learning_rate": 9.765102627994757e-06, "loss": 0.4928, "step": 1740 }, { "epoch": 0.9412506757974409, "grad_norm": 0.38196805119514465, "learning_rate": 9.764530409463516e-06, "loss": 0.4672, "step": 1741 }, { "epoch": 0.9417913137502253, "grad_norm": 0.3335559070110321, "learning_rate": 9.763957511614166e-06, "loss": 0.4631, "step": 1742 }, { "epoch": 0.9423319517030095, "grad_norm": 0.35727524757385254, "learning_rate": 9.763383934528393e-06, "loss": 0.4684, "step": 1743 }, { "epoch": 0.9428725896557938, "grad_norm": 0.3617507517337799, "learning_rate": 9.762809678287977e-06, "loss": 0.4639, "step": 1744 }, { "epoch": 0.9434132276085782, "grad_norm": 0.35392940044403076, "learning_rate": 9.762234742974793e-06, "loss": 0.4491, "step": 1745 }, { "epoch": 0.9439538655613624, "grad_norm": 0.3508428633213043, "learning_rate": 9.761659128670811e-06, "loss": 0.4596, "step": 1746 }, { "epoch": 0.9444945035141467, "grad_norm": 0.35308611392974854, "learning_rate": 9.761082835458104e-06, "loss": 0.4254, "step": 1747 }, { "epoch": 0.945035141466931, "grad_norm": 0.3707454800605774, "learning_rate": 9.760505863418841e-06, "loss": 0.4768, "step": 1748 }, { "epoch": 0.9455757794197153, "grad_norm": 0.32324856519699097, "learning_rate": 9.759928212635281e-06, "loss": 0.4895, "step": 1749 }, { "epoch": 0.9461164173724995, "grad_norm": 0.3526584506034851, "learning_rate": 9.759349883189788e-06, "loss": 0.47, "step": 1750 }, { "epoch": 0.9466570553252839, "grad_norm": 0.3857341408729553, "learning_rate": 9.758770875164817e-06, "loss": 0.4596, "step": 1751 }, { "epoch": 0.9471976932780681, "grad_norm": 0.3677152395248413, "learning_rate": 9.758191188642924e-06, "loss": 0.4959, "step": 1752 }, { "epoch": 0.9477383312308524, "grad_norm": 0.3096745014190674, "learning_rate": 9.75761082370676e-06, "loss": 0.4465, "step": 1753 }, { "epoch": 0.9482789691836367, "grad_norm": 0.3877503275871277, "learning_rate": 9.757029780439069e-06, "loss": 0.4668, "step": 1754 }, { "epoch": 0.948819607136421, "grad_norm": 0.3441794216632843, "learning_rate": 9.756448058922697e-06, "loss": 0.4602, "step": 1755 }, { "epoch": 0.9493602450892052, "grad_norm": 0.3353310227394104, "learning_rate": 9.755865659240585e-06, "loss": 0.4535, "step": 1756 }, { "epoch": 0.9499008830419895, "grad_norm": 0.361272931098938, "learning_rate": 9.755282581475769e-06, "loss": 0.4874, "step": 1757 }, { "epoch": 0.9504415209947739, "grad_norm": 0.37511539459228516, "learning_rate": 9.754698825711384e-06, "loss": 0.4829, "step": 1758 }, { "epoch": 0.9509821589475581, "grad_norm": 0.36202606558799744, "learning_rate": 9.754114392030663e-06, "loss": 0.4436, "step": 1759 }, { "epoch": 0.9515227969003424, "grad_norm": 0.36360007524490356, "learning_rate": 9.753529280516931e-06, "loss": 0.4749, "step": 1760 }, { "epoch": 0.9520634348531267, "grad_norm": 0.3650102913379669, "learning_rate": 9.752943491253614e-06, "loss": 0.4808, "step": 1761 }, { "epoch": 0.952604072805911, "grad_norm": 0.4095380902290344, "learning_rate": 9.75235702432423e-06, "loss": 0.489, "step": 1762 }, { "epoch": 0.9531447107586952, "grad_norm": 0.3650003969669342, "learning_rate": 9.7517698798124e-06, "loss": 0.4751, "step": 1763 }, { "epoch": 0.9536853487114796, "grad_norm": 4.295322895050049, "learning_rate": 9.751182057801835e-06, "loss": 0.4924, "step": 1764 }, { "epoch": 0.9542259866642638, "grad_norm": 0.4038078486919403, "learning_rate": 9.750593558376347e-06, "loss": 0.4769, "step": 1765 }, { "epoch": 0.9547666246170481, "grad_norm": 0.352078378200531, "learning_rate": 9.750004381619841e-06, "loss": 0.4449, "step": 1766 }, { "epoch": 0.9553072625698324, "grad_norm": 0.3834569454193115, "learning_rate": 9.749414527616325e-06, "loss": 0.4789, "step": 1767 }, { "epoch": 0.9558479005226167, "grad_norm": 0.40125179290771484, "learning_rate": 9.748823996449895e-06, "loss": 0.4755, "step": 1768 }, { "epoch": 0.956388538475401, "grad_norm": 0.3302454352378845, "learning_rate": 9.74823278820475e-06, "loss": 0.4495, "step": 1769 }, { "epoch": 0.9569291764281853, "grad_norm": 0.4053800404071808, "learning_rate": 9.747640902965185e-06, "loss": 0.4675, "step": 1770 }, { "epoch": 0.9574698143809696, "grad_norm": 0.3767763674259186, "learning_rate": 9.747048340815586e-06, "loss": 0.4757, "step": 1771 }, { "epoch": 0.9580104523337538, "grad_norm": 0.3748930096626282, "learning_rate": 9.746455101840442e-06, "loss": 0.458, "step": 1772 }, { "epoch": 0.9585510902865381, "grad_norm": 0.34963858127593994, "learning_rate": 9.745861186124336e-06, "loss": 0.4685, "step": 1773 }, { "epoch": 0.9590917282393224, "grad_norm": 0.374912291765213, "learning_rate": 9.745266593751946e-06, "loss": 0.4811, "step": 1774 }, { "epoch": 0.9596323661921067, "grad_norm": 0.38071802258491516, "learning_rate": 9.74467132480805e-06, "loss": 0.4724, "step": 1775 }, { "epoch": 0.9601730041448909, "grad_norm": 0.34633877873420715, "learning_rate": 9.744075379377518e-06, "loss": 0.4496, "step": 1776 }, { "epoch": 0.9607136420976753, "grad_norm": 0.39294639229774475, "learning_rate": 9.743478757545321e-06, "loss": 0.4599, "step": 1777 }, { "epoch": 0.9612542800504595, "grad_norm": 0.42019978165626526, "learning_rate": 9.742881459396522e-06, "loss": 0.4521, "step": 1778 }, { "epoch": 0.9617949180032438, "grad_norm": 0.4065021574497223, "learning_rate": 9.742283485016285e-06, "loss": 0.4968, "step": 1779 }, { "epoch": 0.9623355559560282, "grad_norm": 0.3769640624523163, "learning_rate": 9.741684834489866e-06, "loss": 0.4609, "step": 1780 }, { "epoch": 0.9628761939088124, "grad_norm": 0.34647080302238464, "learning_rate": 9.741085507902622e-06, "loss": 0.4587, "step": 1781 }, { "epoch": 0.9634168318615967, "grad_norm": 0.36830389499664307, "learning_rate": 9.740485505340002e-06, "loss": 0.4353, "step": 1782 }, { "epoch": 0.963957469814381, "grad_norm": 0.40501725673675537, "learning_rate": 9.739884826887554e-06, "loss": 0.434, "step": 1783 }, { "epoch": 0.9644981077671653, "grad_norm": 0.4197520315647125, "learning_rate": 9.739283472630919e-06, "loss": 0.4318, "step": 1784 }, { "epoch": 0.9650387457199495, "grad_norm": 0.3837212324142456, "learning_rate": 9.738681442655842e-06, "loss": 0.4664, "step": 1785 }, { "epoch": 0.9655793836727338, "grad_norm": 0.3517027795314789, "learning_rate": 9.738078737048156e-06, "loss": 0.459, "step": 1786 }, { "epoch": 0.9661200216255181, "grad_norm": 0.43149450421333313, "learning_rate": 9.737475355893793e-06, "loss": 0.4861, "step": 1787 }, { "epoch": 0.9666606595783024, "grad_norm": 0.36714836955070496, "learning_rate": 9.736871299278786e-06, "loss": 0.4603, "step": 1788 }, { "epoch": 0.9672012975310866, "grad_norm": 0.3596876263618469, "learning_rate": 9.736266567289255e-06, "loss": 0.4898, "step": 1789 }, { "epoch": 0.967741935483871, "grad_norm": 0.45363107323646545, "learning_rate": 9.735661160011424e-06, "loss": 0.4787, "step": 1790 }, { "epoch": 0.9682825734366552, "grad_norm": 0.36468809843063354, "learning_rate": 9.735055077531612e-06, "loss": 0.4616, "step": 1791 }, { "epoch": 0.9688232113894395, "grad_norm": 0.3992007076740265, "learning_rate": 9.734448319936234e-06, "loss": 0.4556, "step": 1792 }, { "epoch": 0.9693638493422239, "grad_norm": 0.37230291962623596, "learning_rate": 9.733840887311796e-06, "loss": 0.4585, "step": 1793 }, { "epoch": 0.9699044872950081, "grad_norm": 0.41598498821258545, "learning_rate": 9.733232779744909e-06, "loss": 0.4779, "step": 1794 }, { "epoch": 0.9704451252477924, "grad_norm": 0.3878816068172455, "learning_rate": 9.732623997322274e-06, "loss": 0.4846, "step": 1795 }, { "epoch": 0.9709857632005767, "grad_norm": 0.39695218205451965, "learning_rate": 9.73201454013069e-06, "loss": 0.4606, "step": 1796 }, { "epoch": 0.971526401153361, "grad_norm": 0.42593061923980713, "learning_rate": 9.731404408257052e-06, "loss": 0.4784, "step": 1797 }, { "epoch": 0.9720670391061452, "grad_norm": 0.4588161110877991, "learning_rate": 9.730793601788353e-06, "loss": 0.4844, "step": 1798 }, { "epoch": 0.9726076770589296, "grad_norm": 0.3776327073574066, "learning_rate": 9.730182120811679e-06, "loss": 0.4468, "step": 1799 }, { "epoch": 0.9731483150117138, "grad_norm": 0.3473685085773468, "learning_rate": 9.729569965414214e-06, "loss": 0.4567, "step": 1800 }, { "epoch": 0.9736889529644981, "grad_norm": 0.39597994089126587, "learning_rate": 9.728957135683238e-06, "loss": 0.478, "step": 1801 }, { "epoch": 0.9742295909172823, "grad_norm": 0.3842535614967346, "learning_rate": 9.72834363170613e-06, "loss": 0.4458, "step": 1802 }, { "epoch": 0.9747702288700667, "grad_norm": 0.42436766624450684, "learning_rate": 9.72772945357036e-06, "loss": 0.4971, "step": 1803 }, { "epoch": 0.975310866822851, "grad_norm": 0.4073885381221771, "learning_rate": 9.727114601363496e-06, "loss": 0.4751, "step": 1804 }, { "epoch": 0.9758515047756352, "grad_norm": 0.35591429471969604, "learning_rate": 9.726499075173201e-06, "loss": 0.4507, "step": 1805 }, { "epoch": 0.9763921427284196, "grad_norm": 0.34828415513038635, "learning_rate": 9.72588287508724e-06, "loss": 0.4693, "step": 1806 }, { "epoch": 0.9769327806812038, "grad_norm": 0.3742208778858185, "learning_rate": 9.725266001193466e-06, "loss": 0.4504, "step": 1807 }, { "epoch": 0.9774734186339881, "grad_norm": 0.3611016571521759, "learning_rate": 9.724648453579834e-06, "loss": 0.4732, "step": 1808 }, { "epoch": 0.9780140565867724, "grad_norm": 0.32930028438568115, "learning_rate": 9.72403023233439e-06, "loss": 0.4482, "step": 1809 }, { "epoch": 0.9785546945395567, "grad_norm": 0.3373595178127289, "learning_rate": 9.723411337545283e-06, "loss": 0.4635, "step": 1810 }, { "epoch": 0.9790953324923409, "grad_norm": 0.3561059534549713, "learning_rate": 9.72279176930075e-06, "loss": 0.4581, "step": 1811 }, { "epoch": 0.9796359704451253, "grad_norm": 0.3475349545478821, "learning_rate": 9.722171527689131e-06, "loss": 0.4419, "step": 1812 }, { "epoch": 0.9801766083979095, "grad_norm": 0.3433786928653717, "learning_rate": 9.721550612798856e-06, "loss": 0.4448, "step": 1813 }, { "epoch": 0.9807172463506938, "grad_norm": 0.3646323084831238, "learning_rate": 9.720929024718456e-06, "loss": 0.4638, "step": 1814 }, { "epoch": 0.9812578843034782, "grad_norm": 0.35177263617515564, "learning_rate": 9.720306763536553e-06, "loss": 0.4595, "step": 1815 }, { "epoch": 0.9817985222562624, "grad_norm": 0.37401440739631653, "learning_rate": 9.719683829341873e-06, "loss": 0.452, "step": 1816 }, { "epoch": 0.9823391602090467, "grad_norm": 0.3282455503940582, "learning_rate": 9.71906022222323e-06, "loss": 0.4522, "step": 1817 }, { "epoch": 0.9828797981618309, "grad_norm": 0.34565940499305725, "learning_rate": 9.718435942269534e-06, "loss": 0.45, "step": 1818 }, { "epoch": 0.9834204361146153, "grad_norm": 0.3489386737346649, "learning_rate": 9.717810989569798e-06, "loss": 0.4791, "step": 1819 }, { "epoch": 0.9839610740673995, "grad_norm": 0.3730597198009491, "learning_rate": 9.717185364213127e-06, "loss": 0.4963, "step": 1820 }, { "epoch": 0.9845017120201838, "grad_norm": 0.33707693219184875, "learning_rate": 9.716559066288716e-06, "loss": 0.486, "step": 1821 }, { "epoch": 0.9850423499729681, "grad_norm": 0.34130632877349854, "learning_rate": 9.715932095885867e-06, "loss": 0.4575, "step": 1822 }, { "epoch": 0.9855829879257524, "grad_norm": 0.35243359208106995, "learning_rate": 9.71530445309397e-06, "loss": 0.4883, "step": 1823 }, { "epoch": 0.9861236258785366, "grad_norm": 0.3288569152355194, "learning_rate": 9.714676138002514e-06, "loss": 0.4736, "step": 1824 }, { "epoch": 0.986664263831321, "grad_norm": 0.36741355061531067, "learning_rate": 9.714047150701082e-06, "loss": 0.4668, "step": 1825 }, { "epoch": 0.9872049017841052, "grad_norm": 0.3808061480522156, "learning_rate": 9.713417491279354e-06, "loss": 0.444, "step": 1826 }, { "epoch": 0.9877455397368895, "grad_norm": 0.3768243193626404, "learning_rate": 9.712787159827107e-06, "loss": 0.4654, "step": 1827 }, { "epoch": 0.9882861776896739, "grad_norm": 0.3869498074054718, "learning_rate": 9.71215615643421e-06, "loss": 0.4807, "step": 1828 }, { "epoch": 0.9888268156424581, "grad_norm": 0.4101428687572479, "learning_rate": 9.711524481190633e-06, "loss": 0.4621, "step": 1829 }, { "epoch": 0.9893674535952424, "grad_norm": 0.36245039105415344, "learning_rate": 9.710892134186438e-06, "loss": 0.4475, "step": 1830 }, { "epoch": 0.9899080915480267, "grad_norm": 0.40394195914268494, "learning_rate": 9.710259115511784e-06, "loss": 0.4703, "step": 1831 }, { "epoch": 0.990448729500811, "grad_norm": 0.3817879259586334, "learning_rate": 9.709625425256926e-06, "loss": 0.456, "step": 1832 }, { "epoch": 0.9909893674535952, "grad_norm": 0.40098169445991516, "learning_rate": 9.708991063512213e-06, "loss": 0.4658, "step": 1833 }, { "epoch": 0.9915300054063795, "grad_norm": 0.3755854368209839, "learning_rate": 9.708356030368091e-06, "loss": 0.4603, "step": 1834 }, { "epoch": 0.9920706433591638, "grad_norm": 0.4370364844799042, "learning_rate": 9.707720325915105e-06, "loss": 0.4402, "step": 1835 }, { "epoch": 0.9926112813119481, "grad_norm": 0.3481072187423706, "learning_rate": 9.707083950243889e-06, "loss": 0.4588, "step": 1836 }, { "epoch": 0.9931519192647323, "grad_norm": 0.3732248544692993, "learning_rate": 9.706446903445179e-06, "loss": 0.4536, "step": 1837 }, { "epoch": 0.9936925572175167, "grad_norm": 0.3808124363422394, "learning_rate": 9.705809185609802e-06, "loss": 0.4734, "step": 1838 }, { "epoch": 0.994233195170301, "grad_norm": 0.35636451840400696, "learning_rate": 9.705170796828684e-06, "loss": 0.4513, "step": 1839 }, { "epoch": 0.9947738331230852, "grad_norm": 0.37091416120529175, "learning_rate": 9.704531737192847e-06, "loss": 0.4496, "step": 1840 }, { "epoch": 0.9953144710758696, "grad_norm": 0.4094441831111908, "learning_rate": 9.703892006793401e-06, "loss": 0.5067, "step": 1841 }, { "epoch": 0.9958551090286538, "grad_norm": 0.3345203101634979, "learning_rate": 9.703251605721565e-06, "loss": 0.4448, "step": 1842 }, { "epoch": 0.9963957469814381, "grad_norm": 0.44573473930358887, "learning_rate": 9.702610534068639e-06, "loss": 0.4547, "step": 1843 }, { "epoch": 0.9969363849342224, "grad_norm": 0.3749711215496063, "learning_rate": 9.701968791926031e-06, "loss": 0.5019, "step": 1844 }, { "epoch": 0.9974770228870067, "grad_norm": 0.3989967703819275, "learning_rate": 9.701326379385238e-06, "loss": 0.4534, "step": 1845 }, { "epoch": 0.9980176608397909, "grad_norm": 0.44340914487838745, "learning_rate": 9.700683296537855e-06, "loss": 0.4914, "step": 1846 }, { "epoch": 0.9985582987925753, "grad_norm": 0.3767038583755493, "learning_rate": 9.700039543475569e-06, "loss": 0.4627, "step": 1847 }, { "epoch": 0.9990989367453595, "grad_norm": 0.43750816583633423, "learning_rate": 9.699395120290166e-06, "loss": 0.4709, "step": 1848 }, { "epoch": 0.9996395746981438, "grad_norm": 0.3936137557029724, "learning_rate": 9.698750027073529e-06, "loss": 0.4575, "step": 1849 }, { "epoch": 1.000180212650928, "grad_norm": 0.521910548210144, "learning_rate": 9.698104263917632e-06, "loss": 0.6287, "step": 1850 }, { "epoch": 1.0007208506037124, "grad_norm": 0.353114515542984, "learning_rate": 9.697457830914546e-06, "loss": 0.4359, "step": 1851 }, { "epoch": 1.0012614885564968, "grad_norm": 0.3728412985801697, "learning_rate": 9.696810728156441e-06, "loss": 0.4542, "step": 1852 }, { "epoch": 1.001802126509281, "grad_norm": 0.3588818907737732, "learning_rate": 9.696162955735577e-06, "loss": 0.4268, "step": 1853 }, { "epoch": 1.0023427644620653, "grad_norm": 0.37867024540901184, "learning_rate": 9.695514513744314e-06, "loss": 0.4717, "step": 1854 }, { "epoch": 1.0028834024148494, "grad_norm": 0.386565625667572, "learning_rate": 9.694865402275105e-06, "loss": 0.4342, "step": 1855 }, { "epoch": 1.0034240403676338, "grad_norm": 0.3667563796043396, "learning_rate": 9.6942156214205e-06, "loss": 0.4577, "step": 1856 }, { "epoch": 1.0039646783204181, "grad_norm": 0.3618047535419464, "learning_rate": 9.693565171273143e-06, "loss": 0.4196, "step": 1857 }, { "epoch": 1.0045053162732023, "grad_norm": 0.3565126955509186, "learning_rate": 9.692914051925773e-06, "loss": 0.465, "step": 1858 }, { "epoch": 1.0050459542259866, "grad_norm": 0.3447374105453491, "learning_rate": 9.692262263471226e-06, "loss": 0.4333, "step": 1859 }, { "epoch": 1.005586592178771, "grad_norm": 0.35998278856277466, "learning_rate": 9.691609806002433e-06, "loss": 0.4527, "step": 1860 }, { "epoch": 1.0061272301315551, "grad_norm": 0.3468131721019745, "learning_rate": 9.690956679612422e-06, "loss": 0.4629, "step": 1861 }, { "epoch": 1.0066678680843395, "grad_norm": 0.36353087425231934, "learning_rate": 9.690302884394312e-06, "loss": 0.4308, "step": 1862 }, { "epoch": 1.0072085060371239, "grad_norm": 0.35647186636924744, "learning_rate": 9.68964842044132e-06, "loss": 0.4528, "step": 1863 }, { "epoch": 1.007749143989908, "grad_norm": 0.3625536561012268, "learning_rate": 9.68899328784676e-06, "loss": 0.4397, "step": 1864 }, { "epoch": 1.0082897819426924, "grad_norm": 0.3450867831707001, "learning_rate": 9.688337486704038e-06, "loss": 0.454, "step": 1865 }, { "epoch": 1.0088304198954767, "grad_norm": 0.33882075548171997, "learning_rate": 9.687681017106659e-06, "loss": 0.429, "step": 1866 }, { "epoch": 1.0093710578482609, "grad_norm": 0.35694748163223267, "learning_rate": 9.687023879148217e-06, "loss": 0.4633, "step": 1867 }, { "epoch": 1.0099116958010452, "grad_norm": 0.3318149447441101, "learning_rate": 9.686366072922411e-06, "loss": 0.4278, "step": 1868 }, { "epoch": 1.0104523337538296, "grad_norm": 0.38252225518226624, "learning_rate": 9.685707598523027e-06, "loss": 0.4348, "step": 1869 }, { "epoch": 1.0109929717066137, "grad_norm": 0.3737165629863739, "learning_rate": 9.68504845604395e-06, "loss": 0.4475, "step": 1870 }, { "epoch": 1.011533609659398, "grad_norm": 0.35093238949775696, "learning_rate": 9.684388645579156e-06, "loss": 0.4219, "step": 1871 }, { "epoch": 1.0120742476121825, "grad_norm": 0.42347413301467896, "learning_rate": 9.683728167222723e-06, "loss": 0.4769, "step": 1872 }, { "epoch": 1.0126148855649666, "grad_norm": 0.36415350437164307, "learning_rate": 9.683067021068822e-06, "loss": 0.4377, "step": 1873 }, { "epoch": 1.013155523517751, "grad_norm": 0.43035557866096497, "learning_rate": 9.682405207211714e-06, "loss": 0.4721, "step": 1874 }, { "epoch": 1.0136961614705353, "grad_norm": 0.35217908024787903, "learning_rate": 9.681742725745762e-06, "loss": 0.4266, "step": 1875 }, { "epoch": 1.0142367994233195, "grad_norm": 0.36210188269615173, "learning_rate": 9.681079576765422e-06, "loss": 0.4207, "step": 1876 }, { "epoch": 1.0147774373761038, "grad_norm": 0.35266774892807007, "learning_rate": 9.680415760365242e-06, "loss": 0.4016, "step": 1877 }, { "epoch": 1.0153180753288882, "grad_norm": 0.3950832486152649, "learning_rate": 9.679751276639869e-06, "loss": 0.4759, "step": 1878 }, { "epoch": 1.0158587132816723, "grad_norm": 0.39965641498565674, "learning_rate": 9.679086125684043e-06, "loss": 0.5019, "step": 1879 }, { "epoch": 1.0163993512344567, "grad_norm": 0.36587411165237427, "learning_rate": 9.678420307592602e-06, "loss": 0.4369, "step": 1880 }, { "epoch": 1.016939989187241, "grad_norm": 0.3512573540210724, "learning_rate": 9.677753822460478e-06, "loss": 0.4442, "step": 1881 }, { "epoch": 1.0174806271400252, "grad_norm": 0.36345434188842773, "learning_rate": 9.677086670382692e-06, "loss": 0.4394, "step": 1882 }, { "epoch": 1.0180212650928095, "grad_norm": 0.3650181293487549, "learning_rate": 9.67641885145437e-06, "loss": 0.4639, "step": 1883 }, { "epoch": 1.018561903045594, "grad_norm": 0.40704137086868286, "learning_rate": 9.675750365770727e-06, "loss": 0.4686, "step": 1884 }, { "epoch": 1.019102540998378, "grad_norm": 0.34051263332366943, "learning_rate": 9.675081213427076e-06, "loss": 0.412, "step": 1885 }, { "epoch": 1.0196431789511624, "grad_norm": 0.3666491210460663, "learning_rate": 9.67441139451882e-06, "loss": 0.4777, "step": 1886 }, { "epoch": 1.0201838169039465, "grad_norm": 0.35916945338249207, "learning_rate": 9.673740909141463e-06, "loss": 0.445, "step": 1887 }, { "epoch": 1.020724454856731, "grad_norm": 0.3610200881958008, "learning_rate": 9.6730697573906e-06, "loss": 0.4602, "step": 1888 }, { "epoch": 1.0212650928095153, "grad_norm": 0.36404091119766235, "learning_rate": 9.672397939361926e-06, "loss": 0.454, "step": 1889 }, { "epoch": 1.0218057307622994, "grad_norm": 0.39531588554382324, "learning_rate": 9.671725455151226e-06, "loss": 0.4119, "step": 1890 }, { "epoch": 1.0223463687150838, "grad_norm": 0.39778226613998413, "learning_rate": 9.671052304854378e-06, "loss": 0.4638, "step": 1891 }, { "epoch": 1.0228870066678681, "grad_norm": 0.40537434816360474, "learning_rate": 9.670378488567365e-06, "loss": 0.4265, "step": 1892 }, { "epoch": 1.0234276446206523, "grad_norm": 0.3681070804595947, "learning_rate": 9.669704006386252e-06, "loss": 0.4466, "step": 1893 }, { "epoch": 1.0239682825734366, "grad_norm": 0.37263795733451843, "learning_rate": 9.66902885840721e-06, "loss": 0.4359, "step": 1894 }, { "epoch": 1.024508920526221, "grad_norm": 0.3602556884288788, "learning_rate": 9.668353044726498e-06, "loss": 0.4045, "step": 1895 }, { "epoch": 1.0250495584790051, "grad_norm": 0.3647526502609253, "learning_rate": 9.667676565440474e-06, "loss": 0.4777, "step": 1896 }, { "epoch": 1.0255901964317895, "grad_norm": 0.3482561707496643, "learning_rate": 9.666999420645589e-06, "loss": 0.4224, "step": 1897 }, { "epoch": 1.0261308343845739, "grad_norm": 0.363437682390213, "learning_rate": 9.666321610438386e-06, "loss": 0.4548, "step": 1898 }, { "epoch": 1.026671472337358, "grad_norm": 0.3769209682941437, "learning_rate": 9.66564313491551e-06, "loss": 0.4224, "step": 1899 }, { "epoch": 1.0272121102901424, "grad_norm": 0.38231518864631653, "learning_rate": 9.664963994173695e-06, "loss": 0.4719, "step": 1900 }, { "epoch": 1.0277527482429267, "grad_norm": 0.3820781409740448, "learning_rate": 9.66428418830977e-06, "loss": 0.4256, "step": 1901 }, { "epoch": 1.0282933861957109, "grad_norm": 0.40323346853256226, "learning_rate": 9.663603717420667e-06, "loss": 0.4791, "step": 1902 }, { "epoch": 1.0288340241484952, "grad_norm": 0.34871387481689453, "learning_rate": 9.662922581603398e-06, "loss": 0.4104, "step": 1903 }, { "epoch": 1.0293746621012796, "grad_norm": 0.47941431403160095, "learning_rate": 9.662240780955082e-06, "loss": 0.4853, "step": 1904 }, { "epoch": 1.0299153000540637, "grad_norm": 0.3837866485118866, "learning_rate": 9.66155831557293e-06, "loss": 0.4254, "step": 1905 }, { "epoch": 1.030455938006848, "grad_norm": 0.43927279114723206, "learning_rate": 9.660875185554244e-06, "loss": 0.4251, "step": 1906 }, { "epoch": 1.0309965759596325, "grad_norm": 0.37006092071533203, "learning_rate": 9.660191390996426e-06, "loss": 0.4738, "step": 1907 }, { "epoch": 1.0315372139124166, "grad_norm": 0.48216795921325684, "learning_rate": 9.659506931996967e-06, "loss": 0.452, "step": 1908 }, { "epoch": 1.032077851865201, "grad_norm": 0.32206961512565613, "learning_rate": 9.65882180865346e-06, "loss": 0.4309, "step": 1909 }, { "epoch": 1.0326184898179853, "grad_norm": 0.4409984350204468, "learning_rate": 9.658136021063585e-06, "loss": 0.4706, "step": 1910 }, { "epoch": 1.0331591277707695, "grad_norm": 0.4066472053527832, "learning_rate": 9.65744956932512e-06, "loss": 0.4532, "step": 1911 }, { "epoch": 1.0336997657235538, "grad_norm": 0.36346372961997986, "learning_rate": 9.656762453535945e-06, "loss": 0.4866, "step": 1912 }, { "epoch": 1.0342404036763382, "grad_norm": 0.3586696982383728, "learning_rate": 9.656074673794018e-06, "loss": 0.4415, "step": 1913 }, { "epoch": 1.0347810416291223, "grad_norm": 0.3591172397136688, "learning_rate": 9.655386230197408e-06, "loss": 0.4435, "step": 1914 }, { "epoch": 1.0353216795819067, "grad_norm": 0.4151376485824585, "learning_rate": 9.65469712284427e-06, "loss": 0.4366, "step": 1915 }, { "epoch": 1.0358623175346908, "grad_norm": 0.4572712481021881, "learning_rate": 9.654007351832856e-06, "loss": 0.494, "step": 1916 }, { "epoch": 1.0364029554874752, "grad_norm": 0.40585753321647644, "learning_rate": 9.653316917261511e-06, "loss": 0.4527, "step": 1917 }, { "epoch": 1.0369435934402595, "grad_norm": 0.3337477147579193, "learning_rate": 9.652625819228679e-06, "loss": 0.4078, "step": 1918 }, { "epoch": 1.0374842313930437, "grad_norm": 0.5288644433021545, "learning_rate": 9.65193405783289e-06, "loss": 0.518, "step": 1919 }, { "epoch": 1.038024869345828, "grad_norm": 0.38844338059425354, "learning_rate": 9.651241633172782e-06, "loss": 0.4227, "step": 1920 }, { "epoch": 1.0385655072986124, "grad_norm": 0.45273417234420776, "learning_rate": 9.650548545347072e-06, "loss": 0.4283, "step": 1921 }, { "epoch": 1.0391061452513966, "grad_norm": 0.3740539848804474, "learning_rate": 9.649854794454583e-06, "loss": 0.4156, "step": 1922 }, { "epoch": 1.039646783204181, "grad_norm": 0.3912931978702545, "learning_rate": 9.649160380594227e-06, "loss": 0.4163, "step": 1923 }, { "epoch": 1.0401874211569653, "grad_norm": 0.3708667457103729, "learning_rate": 9.648465303865016e-06, "loss": 0.4039, "step": 1924 }, { "epoch": 1.0407280591097494, "grad_norm": 0.4059867560863495, "learning_rate": 9.647769564366048e-06, "loss": 0.4632, "step": 1925 }, { "epoch": 1.0412686970625338, "grad_norm": 0.37832167744636536, "learning_rate": 9.647073162196524e-06, "loss": 0.4548, "step": 1926 }, { "epoch": 1.0418093350153181, "grad_norm": 0.40500980615615845, "learning_rate": 9.646376097455732e-06, "loss": 0.4753, "step": 1927 }, { "epoch": 1.0423499729681023, "grad_norm": 0.38375428318977356, "learning_rate": 9.645678370243063e-06, "loss": 0.4413, "step": 1928 }, { "epoch": 1.0428906109208866, "grad_norm": 0.3782906234264374, "learning_rate": 9.644979980657993e-06, "loss": 0.4726, "step": 1929 }, { "epoch": 1.043431248873671, "grad_norm": 0.4120505452156067, "learning_rate": 9.644280928800101e-06, "loss": 0.4907, "step": 1930 }, { "epoch": 1.0439718868264551, "grad_norm": 0.35727861523628235, "learning_rate": 9.643581214769053e-06, "loss": 0.3948, "step": 1931 }, { "epoch": 1.0445125247792395, "grad_norm": 0.42740848660469055, "learning_rate": 9.642880838664617e-06, "loss": 0.4586, "step": 1932 }, { "epoch": 1.0450531627320239, "grad_norm": 0.3294697105884552, "learning_rate": 9.642179800586648e-06, "loss": 0.4003, "step": 1933 }, { "epoch": 1.045593800684808, "grad_norm": 0.40418773889541626, "learning_rate": 9.6414781006351e-06, "loss": 0.4738, "step": 1934 }, { "epoch": 1.0461344386375924, "grad_norm": 0.35638585686683655, "learning_rate": 9.640775738910019e-06, "loss": 0.4416, "step": 1935 }, { "epoch": 1.0466750765903767, "grad_norm": 0.37873512506484985, "learning_rate": 9.640072715511547e-06, "loss": 0.4497, "step": 1936 }, { "epoch": 1.0472157145431609, "grad_norm": 0.40080496668815613, "learning_rate": 9.639369030539922e-06, "loss": 0.4547, "step": 1937 }, { "epoch": 1.0477563524959452, "grad_norm": 0.3935638964176178, "learning_rate": 9.638664684095472e-06, "loss": 0.4574, "step": 1938 }, { "epoch": 1.0482969904487296, "grad_norm": 0.3729800283908844, "learning_rate": 9.637959676278621e-06, "loss": 0.4383, "step": 1939 }, { "epoch": 1.0488376284015137, "grad_norm": 0.3998558819293976, "learning_rate": 9.63725400718989e-06, "loss": 0.4737, "step": 1940 }, { "epoch": 1.049378266354298, "grad_norm": 0.3633527159690857, "learning_rate": 9.636547676929889e-06, "loss": 0.3989, "step": 1941 }, { "epoch": 1.0499189043070825, "grad_norm": 0.37377429008483887, "learning_rate": 9.635840685599328e-06, "loss": 0.4225, "step": 1942 }, { "epoch": 1.0504595422598666, "grad_norm": 0.3647961914539337, "learning_rate": 9.635133033299005e-06, "loss": 0.451, "step": 1943 }, { "epoch": 1.051000180212651, "grad_norm": 0.4128117263317108, "learning_rate": 9.63442472012982e-06, "loss": 0.4678, "step": 1944 }, { "epoch": 1.0515408181654353, "grad_norm": 0.35981616377830505, "learning_rate": 9.633715746192762e-06, "loss": 0.439, "step": 1945 }, { "epoch": 1.0520814561182195, "grad_norm": 0.3871247470378876, "learning_rate": 9.633006111588912e-06, "loss": 0.4444, "step": 1946 }, { "epoch": 1.0526220940710038, "grad_norm": 0.35950541496276855, "learning_rate": 9.632295816419453e-06, "loss": 0.4392, "step": 1947 }, { "epoch": 1.0531627320237882, "grad_norm": 0.4200381338596344, "learning_rate": 9.631584860785654e-06, "loss": 0.4601, "step": 1948 }, { "epoch": 1.0537033699765723, "grad_norm": 0.3932918608188629, "learning_rate": 9.630873244788884e-06, "loss": 0.4588, "step": 1949 }, { "epoch": 1.0542440079293567, "grad_norm": 0.3635420799255371, "learning_rate": 9.630160968530601e-06, "loss": 0.4535, "step": 1950 }, { "epoch": 1.0547846458821408, "grad_norm": 0.35260698199272156, "learning_rate": 9.629448032112365e-06, "loss": 0.4062, "step": 1951 }, { "epoch": 1.0553252838349252, "grad_norm": 0.41558098793029785, "learning_rate": 9.62873443563582e-06, "loss": 0.4876, "step": 1952 }, { "epoch": 1.0558659217877095, "grad_norm": 0.31702765822410583, "learning_rate": 9.628020179202713e-06, "loss": 0.4188, "step": 1953 }, { "epoch": 1.0564065597404937, "grad_norm": 0.407604843378067, "learning_rate": 9.62730526291488e-06, "loss": 0.4382, "step": 1954 }, { "epoch": 1.056947197693278, "grad_norm": 0.358194500207901, "learning_rate": 9.626589686874252e-06, "loss": 0.4139, "step": 1955 }, { "epoch": 1.0574878356460624, "grad_norm": 0.4273225963115692, "learning_rate": 9.625873451182855e-06, "loss": 0.4718, "step": 1956 }, { "epoch": 1.0580284735988466, "grad_norm": 0.3338474929332733, "learning_rate": 9.62515655594281e-06, "loss": 0.3965, "step": 1957 }, { "epoch": 1.058569111551631, "grad_norm": 0.45933595299720764, "learning_rate": 9.624439001256327e-06, "loss": 0.4446, "step": 1958 }, { "epoch": 1.0591097495044153, "grad_norm": 0.35661551356315613, "learning_rate": 9.623720787225716e-06, "loss": 0.4321, "step": 1959 }, { "epoch": 1.0596503874571994, "grad_norm": 0.422316312789917, "learning_rate": 9.62300191395338e-06, "loss": 0.428, "step": 1960 }, { "epoch": 1.0601910254099838, "grad_norm": 0.39070868492126465, "learning_rate": 9.622282381541812e-06, "loss": 0.4235, "step": 1961 }, { "epoch": 1.0607316633627681, "grad_norm": 0.46358242630958557, "learning_rate": 9.621562190093603e-06, "loss": 0.4778, "step": 1962 }, { "epoch": 1.0612723013155523, "grad_norm": 0.3455776274204254, "learning_rate": 9.620841339711437e-06, "loss": 0.4426, "step": 1963 }, { "epoch": 1.0618129392683366, "grad_norm": 0.3895058333873749, "learning_rate": 9.62011983049809e-06, "loss": 0.4379, "step": 1964 }, { "epoch": 1.062353577221121, "grad_norm": 0.42753368616104126, "learning_rate": 9.619397662556434e-06, "loss": 0.4499, "step": 1965 }, { "epoch": 1.0628942151739051, "grad_norm": 0.3436278998851776, "learning_rate": 9.618674835989437e-06, "loss": 0.4534, "step": 1966 }, { "epoch": 1.0634348531266895, "grad_norm": 0.4359297454357147, "learning_rate": 9.617951350900154e-06, "loss": 0.4324, "step": 1967 }, { "epoch": 1.0639754910794739, "grad_norm": 0.43395212292671204, "learning_rate": 9.61722720739174e-06, "loss": 0.5134, "step": 1968 }, { "epoch": 1.064516129032258, "grad_norm": 0.35746920108795166, "learning_rate": 9.616502405567445e-06, "loss": 0.4204, "step": 1969 }, { "epoch": 1.0650567669850424, "grad_norm": 0.4134131968021393, "learning_rate": 9.615776945530603e-06, "loss": 0.4691, "step": 1970 }, { "epoch": 1.0655974049378267, "grad_norm": 0.37661606073379517, "learning_rate": 9.615050827384656e-06, "loss": 0.4183, "step": 1971 }, { "epoch": 1.0661380428906109, "grad_norm": 0.3690008521080017, "learning_rate": 9.614324051233131e-06, "loss": 0.4714, "step": 1972 }, { "epoch": 1.0666786808433952, "grad_norm": 0.4408411681652069, "learning_rate": 9.613596617179645e-06, "loss": 0.4368, "step": 1973 }, { "epoch": 1.0672193187961796, "grad_norm": 0.3526779115200043, "learning_rate": 9.612868525327921e-06, "loss": 0.4193, "step": 1974 }, { "epoch": 1.0677599567489637, "grad_norm": 0.40113815665245056, "learning_rate": 9.612139775781766e-06, "loss": 0.4518, "step": 1975 }, { "epoch": 1.068300594701748, "grad_norm": 0.4572330117225647, "learning_rate": 9.611410368645085e-06, "loss": 0.4592, "step": 1976 }, { "epoch": 1.0688412326545325, "grad_norm": 0.3182274401187897, "learning_rate": 9.610680304021873e-06, "loss": 0.4426, "step": 1977 }, { "epoch": 1.0693818706073166, "grad_norm": 0.37616831064224243, "learning_rate": 9.609949582016223e-06, "loss": 0.415, "step": 1978 }, { "epoch": 1.069922508560101, "grad_norm": 0.40873757004737854, "learning_rate": 9.609218202732322e-06, "loss": 0.4509, "step": 1979 }, { "epoch": 1.070463146512885, "grad_norm": 0.3298252522945404, "learning_rate": 9.608486166274444e-06, "loss": 0.4476, "step": 1980 }, { "epoch": 1.0710037844656695, "grad_norm": 0.4793800115585327, "learning_rate": 9.607753472746967e-06, "loss": 0.4329, "step": 1981 }, { "epoch": 1.0715444224184538, "grad_norm": 0.3937591016292572, "learning_rate": 9.607020122254352e-06, "loss": 0.423, "step": 1982 }, { "epoch": 1.072085060371238, "grad_norm": 0.3705812692642212, "learning_rate": 9.60628611490116e-06, "loss": 0.4036, "step": 1983 }, { "epoch": 1.0726256983240223, "grad_norm": 0.4920952618122101, "learning_rate": 9.605551450792048e-06, "loss": 0.4605, "step": 1984 }, { "epoch": 1.0731663362768067, "grad_norm": 0.35678839683532715, "learning_rate": 9.60481613003176e-06, "loss": 0.4243, "step": 1985 }, { "epoch": 1.0737069742295908, "grad_norm": 0.41355428099632263, "learning_rate": 9.604080152725137e-06, "loss": 0.4419, "step": 1986 }, { "epoch": 1.0742476121823752, "grad_norm": 0.40846797823905945, "learning_rate": 9.603343518977113e-06, "loss": 0.4522, "step": 1987 }, { "epoch": 1.0747882501351596, "grad_norm": 0.4189375042915344, "learning_rate": 9.602606228892717e-06, "loss": 0.5046, "step": 1988 }, { "epoch": 1.0753288880879437, "grad_norm": 0.37765973806381226, "learning_rate": 9.601868282577069e-06, "loss": 0.4458, "step": 1989 }, { "epoch": 1.075869526040728, "grad_norm": 0.37527140974998474, "learning_rate": 9.601129680135386e-06, "loss": 0.4313, "step": 1990 }, { "epoch": 1.0764101639935124, "grad_norm": 0.34690606594085693, "learning_rate": 9.600390421672976e-06, "loss": 0.4276, "step": 1991 }, { "epoch": 1.0769508019462966, "grad_norm": 0.42187508940696716, "learning_rate": 9.59965050729524e-06, "loss": 0.4851, "step": 1992 }, { "epoch": 1.077491439899081, "grad_norm": 0.3796841502189636, "learning_rate": 9.598909937107674e-06, "loss": 0.38, "step": 1993 }, { "epoch": 1.0780320778518653, "grad_norm": 0.386795312166214, "learning_rate": 9.598168711215868e-06, "loss": 0.4132, "step": 1994 }, { "epoch": 1.0785727158046494, "grad_norm": 0.3887866139411926, "learning_rate": 9.597426829725504e-06, "loss": 0.4518, "step": 1995 }, { "epoch": 1.0791133537574338, "grad_norm": 0.43749120831489563, "learning_rate": 9.59668429274236e-06, "loss": 0.4726, "step": 1996 }, { "epoch": 1.0796539917102181, "grad_norm": 0.40146055817604065, "learning_rate": 9.595941100372301e-06, "loss": 0.454, "step": 1997 }, { "epoch": 1.0801946296630023, "grad_norm": 0.35357674956321716, "learning_rate": 9.595197252721293e-06, "loss": 0.4288, "step": 1998 }, { "epoch": 1.0807352676157866, "grad_norm": 0.34494102001190186, "learning_rate": 9.594452749895395e-06, "loss": 0.4223, "step": 1999 }, { "epoch": 1.081275905568571, "grad_norm": 0.42233067750930786, "learning_rate": 9.593707592000751e-06, "loss": 0.4356, "step": 2000 }, { "epoch": 1.0818165435213551, "grad_norm": 0.4014831483364105, "learning_rate": 9.59296177914361e-06, "loss": 0.45, "step": 2001 }, { "epoch": 1.0823571814741395, "grad_norm": 0.36923670768737793, "learning_rate": 9.592215311430305e-06, "loss": 0.4123, "step": 2002 }, { "epoch": 1.0828978194269239, "grad_norm": 0.4248311519622803, "learning_rate": 9.591468188967267e-06, "loss": 0.4579, "step": 2003 }, { "epoch": 1.083438457379708, "grad_norm": 0.39018672704696655, "learning_rate": 9.590720411861022e-06, "loss": 0.4405, "step": 2004 }, { "epoch": 1.0839790953324924, "grad_norm": 0.3719469904899597, "learning_rate": 9.58997198021818e-06, "loss": 0.4133, "step": 2005 }, { "epoch": 1.0845197332852767, "grad_norm": 0.49077853560447693, "learning_rate": 9.589222894145458e-06, "loss": 0.4758, "step": 2006 }, { "epoch": 1.0850603712380609, "grad_norm": 0.3614523708820343, "learning_rate": 9.588473153749656e-06, "loss": 0.4529, "step": 2007 }, { "epoch": 1.0856010091908452, "grad_norm": 0.36509764194488525, "learning_rate": 9.58772275913767e-06, "loss": 0.3923, "step": 2008 }, { "epoch": 1.0861416471436294, "grad_norm": 0.48173028230667114, "learning_rate": 9.586971710416493e-06, "loss": 0.458, "step": 2009 }, { "epoch": 1.0866822850964137, "grad_norm": 0.4093618094921112, "learning_rate": 9.586220007693205e-06, "loss": 0.4655, "step": 2010 }, { "epoch": 1.087222923049198, "grad_norm": 0.4406038224697113, "learning_rate": 9.585467651074983e-06, "loss": 0.4169, "step": 2011 }, { "epoch": 1.0877635610019822, "grad_norm": 0.5025647282600403, "learning_rate": 9.584714640669099e-06, "loss": 0.4674, "step": 2012 }, { "epoch": 1.0883041989547666, "grad_norm": 0.4379287660121918, "learning_rate": 9.583960976582914e-06, "loss": 0.4344, "step": 2013 }, { "epoch": 1.088844836907551, "grad_norm": 0.49229374527931213, "learning_rate": 9.583206658923882e-06, "loss": 0.4062, "step": 2014 }, { "epoch": 1.089385474860335, "grad_norm": 0.44954460859298706, "learning_rate": 9.582451687799557e-06, "loss": 0.4786, "step": 2015 }, { "epoch": 1.0899261128131195, "grad_norm": 0.4374828040599823, "learning_rate": 9.581696063317577e-06, "loss": 0.4025, "step": 2016 }, { "epoch": 1.0904667507659038, "grad_norm": 0.5120025873184204, "learning_rate": 9.58093978558568e-06, "loss": 0.4771, "step": 2017 }, { "epoch": 1.091007388718688, "grad_norm": 0.4352531433105469, "learning_rate": 9.580182854711695e-06, "loss": 0.4597, "step": 2018 }, { "epoch": 1.0915480266714723, "grad_norm": 0.4131707549095154, "learning_rate": 9.579425270803542e-06, "loss": 0.4323, "step": 2019 }, { "epoch": 1.0920886646242567, "grad_norm": 0.39147377014160156, "learning_rate": 9.578667033969238e-06, "loss": 0.4271, "step": 2020 }, { "epoch": 1.0926293025770408, "grad_norm": 0.40212157368659973, "learning_rate": 9.577908144316888e-06, "loss": 0.4159, "step": 2021 }, { "epoch": 1.0931699405298252, "grad_norm": 0.3936586380004883, "learning_rate": 9.577148601954697e-06, "loss": 0.4443, "step": 2022 }, { "epoch": 1.0937105784826096, "grad_norm": 0.4311821758747101, "learning_rate": 9.576388406990957e-06, "loss": 0.4309, "step": 2023 }, { "epoch": 1.0942512164353937, "grad_norm": 0.38196295499801636, "learning_rate": 9.575627559534055e-06, "loss": 0.4731, "step": 2024 }, { "epoch": 1.094791854388178, "grad_norm": 0.41277268528938293, "learning_rate": 9.574866059692471e-06, "loss": 0.4391, "step": 2025 }, { "epoch": 1.0953324923409624, "grad_norm": 0.4242735207080841, "learning_rate": 9.57410390757478e-06, "loss": 0.4473, "step": 2026 }, { "epoch": 1.0958731302937466, "grad_norm": 0.4308188259601593, "learning_rate": 9.573341103289646e-06, "loss": 0.5019, "step": 2027 }, { "epoch": 1.096413768246531, "grad_norm": 0.35142946243286133, "learning_rate": 9.572577646945831e-06, "loss": 0.4185, "step": 2028 }, { "epoch": 1.0969544061993153, "grad_norm": 0.3706936240196228, "learning_rate": 9.571813538652184e-06, "loss": 0.4477, "step": 2029 }, { "epoch": 1.0974950441520994, "grad_norm": 0.4424436092376709, "learning_rate": 9.571048778517655e-06, "loss": 0.465, "step": 2030 }, { "epoch": 1.0980356821048838, "grad_norm": 0.3273000121116638, "learning_rate": 9.570283366651277e-06, "loss": 0.4133, "step": 2031 }, { "epoch": 1.0985763200576681, "grad_norm": 0.41131043434143066, "learning_rate": 9.56951730316218e-06, "loss": 0.4554, "step": 2032 }, { "epoch": 1.0991169580104523, "grad_norm": 0.3751986622810364, "learning_rate": 9.568750588159596e-06, "loss": 0.4196, "step": 2033 }, { "epoch": 1.0996575959632366, "grad_norm": 0.4607866704463959, "learning_rate": 9.567983221752835e-06, "loss": 0.4272, "step": 2034 }, { "epoch": 1.100198233916021, "grad_norm": 0.3737804889678955, "learning_rate": 9.567215204051307e-06, "loss": 0.4948, "step": 2035 }, { "epoch": 1.1007388718688051, "grad_norm": 0.4024915099143982, "learning_rate": 9.566446535164518e-06, "loss": 0.4065, "step": 2036 }, { "epoch": 1.1012795098215895, "grad_norm": 0.42979228496551514, "learning_rate": 9.565677215202062e-06, "loss": 0.4041, "step": 2037 }, { "epoch": 1.1018201477743736, "grad_norm": 0.3591923117637634, "learning_rate": 9.564907244273624e-06, "loss": 0.4536, "step": 2038 }, { "epoch": 1.102360785727158, "grad_norm": 0.4698256254196167, "learning_rate": 9.564136622488991e-06, "loss": 0.4662, "step": 2039 }, { "epoch": 1.1029014236799424, "grad_norm": 0.36560025811195374, "learning_rate": 9.563365349958032e-06, "loss": 0.4361, "step": 2040 }, { "epoch": 1.1034420616327267, "grad_norm": 0.4091884195804596, "learning_rate": 9.562593426790715e-06, "loss": 0.4253, "step": 2041 }, { "epoch": 1.1039826995855109, "grad_norm": 0.3399011492729187, "learning_rate": 9.5618208530971e-06, "loss": 0.4345, "step": 2042 }, { "epoch": 1.1045233375382952, "grad_norm": 0.4654679596424103, "learning_rate": 9.561047628987338e-06, "loss": 0.489, "step": 2043 }, { "epoch": 1.1050639754910794, "grad_norm": 0.36112046241760254, "learning_rate": 9.560273754571678e-06, "loss": 0.481, "step": 2044 }, { "epoch": 1.1056046134438637, "grad_norm": 0.36224114894866943, "learning_rate": 9.55949922996045e-06, "loss": 0.4154, "step": 2045 }, { "epoch": 1.106145251396648, "grad_norm": 0.38548043370246887, "learning_rate": 9.558724055264093e-06, "loss": 0.4901, "step": 2046 }, { "epoch": 1.1066858893494322, "grad_norm": 0.3203273117542267, "learning_rate": 9.557948230593122e-06, "loss": 0.4238, "step": 2047 }, { "epoch": 1.1072265273022166, "grad_norm": 0.40722090005874634, "learning_rate": 9.55717175605816e-06, "loss": 0.4541, "step": 2048 }, { "epoch": 1.107767165255001, "grad_norm": 0.4180580973625183, "learning_rate": 9.556394631769907e-06, "loss": 0.4814, "step": 2049 }, { "epoch": 1.108307803207785, "grad_norm": 0.41281113028526306, "learning_rate": 9.555616857839171e-06, "loss": 0.4413, "step": 2050 }, { "epoch": 1.1088484411605695, "grad_norm": 0.3930680453777313, "learning_rate": 9.554838434376845e-06, "loss": 0.4443, "step": 2051 }, { "epoch": 1.1093890791133538, "grad_norm": 0.4233580231666565, "learning_rate": 9.554059361493913e-06, "loss": 0.4212, "step": 2052 }, { "epoch": 1.109929717066138, "grad_norm": 0.3436315953731537, "learning_rate": 9.553279639301452e-06, "loss": 0.4226, "step": 2053 }, { "epoch": 1.1104703550189223, "grad_norm": 0.381905734539032, "learning_rate": 9.552499267910637e-06, "loss": 0.4105, "step": 2054 }, { "epoch": 1.1110109929717067, "grad_norm": 0.4006160497665405, "learning_rate": 9.551718247432732e-06, "loss": 0.4805, "step": 2055 }, { "epoch": 1.1115516309244908, "grad_norm": 0.3256142735481262, "learning_rate": 9.55093657797909e-06, "loss": 0.4155, "step": 2056 }, { "epoch": 1.1120922688772752, "grad_norm": 0.36843231320381165, "learning_rate": 9.550154259661162e-06, "loss": 0.4562, "step": 2057 }, { "epoch": 1.1126329068300596, "grad_norm": 0.34505757689476013, "learning_rate": 9.54937129259049e-06, "loss": 0.4096, "step": 2058 }, { "epoch": 1.1131735447828437, "grad_norm": 0.3614414632320404, "learning_rate": 9.548587676878709e-06, "loss": 0.452, "step": 2059 }, { "epoch": 1.113714182735628, "grad_norm": 0.34557512402534485, "learning_rate": 9.547803412637542e-06, "loss": 0.4648, "step": 2060 }, { "epoch": 1.1142548206884124, "grad_norm": 0.35479235649108887, "learning_rate": 9.547018499978811e-06, "loss": 0.4242, "step": 2061 }, { "epoch": 1.1147954586411966, "grad_norm": 0.37343519926071167, "learning_rate": 9.546232939014428e-06, "loss": 0.4435, "step": 2062 }, { "epoch": 1.115336096593981, "grad_norm": 0.3476622998714447, "learning_rate": 9.545446729856394e-06, "loss": 0.4179, "step": 2063 }, { "epoch": 1.1158767345467653, "grad_norm": 0.3708413243293762, "learning_rate": 9.544659872616806e-06, "loss": 0.4859, "step": 2064 }, { "epoch": 1.1164173724995494, "grad_norm": 0.31397050619125366, "learning_rate": 9.543872367407854e-06, "loss": 0.4047, "step": 2065 }, { "epoch": 1.1169580104523338, "grad_norm": 0.3721030354499817, "learning_rate": 9.54308421434182e-06, "loss": 0.4644, "step": 2066 }, { "epoch": 1.117498648405118, "grad_norm": 0.2971539795398712, "learning_rate": 9.542295413531073e-06, "loss": 0.3858, "step": 2067 }, { "epoch": 1.1180392863579023, "grad_norm": 0.418592244386673, "learning_rate": 9.541505965088083e-06, "loss": 0.4695, "step": 2068 }, { "epoch": 1.1185799243106866, "grad_norm": 0.36569511890411377, "learning_rate": 9.540715869125407e-06, "loss": 0.4404, "step": 2069 }, { "epoch": 1.119120562263471, "grad_norm": 0.3991091251373291, "learning_rate": 9.539925125755695e-06, "loss": 0.4525, "step": 2070 }, { "epoch": 1.1196612002162551, "grad_norm": 0.33858662843704224, "learning_rate": 9.53913373509169e-06, "loss": 0.386, "step": 2071 }, { "epoch": 1.1202018381690395, "grad_norm": 0.4000351130962372, "learning_rate": 9.538341697246228e-06, "loss": 0.4707, "step": 2072 }, { "epoch": 1.1207424761218236, "grad_norm": 0.34329837560653687, "learning_rate": 9.537549012332234e-06, "loss": 0.4293, "step": 2073 }, { "epoch": 1.121283114074608, "grad_norm": 0.392547607421875, "learning_rate": 9.536755680462729e-06, "loss": 0.4555, "step": 2074 }, { "epoch": 1.1218237520273924, "grad_norm": 0.38397929072380066, "learning_rate": 9.535961701750825e-06, "loss": 0.439, "step": 2075 }, { "epoch": 1.1223643899801765, "grad_norm": 0.3610386848449707, "learning_rate": 9.535167076309726e-06, "loss": 0.4423, "step": 2076 }, { "epoch": 1.1229050279329609, "grad_norm": 0.3509500324726105, "learning_rate": 9.534371804252727e-06, "loss": 0.4486, "step": 2077 }, { "epoch": 1.1234456658857452, "grad_norm": 0.342852383852005, "learning_rate": 9.53357588569322e-06, "loss": 0.3888, "step": 2078 }, { "epoch": 1.1239863038385294, "grad_norm": 0.44941166043281555, "learning_rate": 9.53277932074468e-06, "loss": 0.4635, "step": 2079 }, { "epoch": 1.1245269417913137, "grad_norm": 0.3370332717895508, "learning_rate": 9.531982109520686e-06, "loss": 0.4314, "step": 2080 }, { "epoch": 1.125067579744098, "grad_norm": 0.36963319778442383, "learning_rate": 9.531184252134897e-06, "loss": 0.449, "step": 2081 }, { "epoch": 1.1256082176968822, "grad_norm": 0.3691161274909973, "learning_rate": 9.530385748701074e-06, "loss": 0.4106, "step": 2082 }, { "epoch": 1.1261488556496666, "grad_norm": 0.3494841158390045, "learning_rate": 9.529586599333066e-06, "loss": 0.4389, "step": 2083 }, { "epoch": 1.126689493602451, "grad_norm": 0.3670741319656372, "learning_rate": 9.528786804144812e-06, "loss": 0.4453, "step": 2084 }, { "epoch": 1.127230131555235, "grad_norm": 0.3459216058254242, "learning_rate": 9.527986363250348e-06, "loss": 0.4509, "step": 2085 }, { "epoch": 1.1277707695080195, "grad_norm": 0.38125917315483093, "learning_rate": 9.527185276763797e-06, "loss": 0.4244, "step": 2086 }, { "epoch": 1.1283114074608038, "grad_norm": 0.3307487666606903, "learning_rate": 9.526383544799378e-06, "loss": 0.441, "step": 2087 }, { "epoch": 1.128852045413588, "grad_norm": 0.38201895356178284, "learning_rate": 9.525581167471399e-06, "loss": 0.4615, "step": 2088 }, { "epoch": 1.1293926833663723, "grad_norm": 0.3586467206478119, "learning_rate": 9.524778144894265e-06, "loss": 0.4586, "step": 2089 }, { "epoch": 1.1299333213191567, "grad_norm": 0.35074567794799805, "learning_rate": 9.523974477182465e-06, "loss": 0.4258, "step": 2090 }, { "epoch": 1.1304739592719408, "grad_norm": 0.41212746500968933, "learning_rate": 9.523170164450586e-06, "loss": 0.4729, "step": 2091 }, { "epoch": 1.1310145972247252, "grad_norm": 0.34268930554389954, "learning_rate": 9.522365206813307e-06, "loss": 0.4335, "step": 2092 }, { "epoch": 1.1315552351775096, "grad_norm": 0.40464693307876587, "learning_rate": 9.521559604385396e-06, "loss": 0.4127, "step": 2093 }, { "epoch": 1.1320958731302937, "grad_norm": 0.40215039253234863, "learning_rate": 9.520753357281716e-06, "loss": 0.4875, "step": 2094 }, { "epoch": 1.132636511083078, "grad_norm": 0.3894921541213989, "learning_rate": 9.519946465617217e-06, "loss": 0.4091, "step": 2095 }, { "epoch": 1.1331771490358622, "grad_norm": 0.35323572158813477, "learning_rate": 9.519138929506949e-06, "loss": 0.4147, "step": 2096 }, { "epoch": 1.1337177869886466, "grad_norm": 0.3723592758178711, "learning_rate": 9.518330749066042e-06, "loss": 0.4824, "step": 2097 }, { "epoch": 1.134258424941431, "grad_norm": 0.3743079602718353, "learning_rate": 9.517521924409731e-06, "loss": 0.4413, "step": 2098 }, { "epoch": 1.1347990628942153, "grad_norm": 0.39084959030151367, "learning_rate": 9.516712455653337e-06, "loss": 0.466, "step": 2099 }, { "epoch": 1.1353397008469994, "grad_norm": 0.3701314926147461, "learning_rate": 9.515902342912268e-06, "loss": 0.4594, "step": 2100 }, { "epoch": 1.1358803387997838, "grad_norm": 0.3685029149055481, "learning_rate": 9.51509158630203e-06, "loss": 0.4459, "step": 2101 }, { "epoch": 1.136420976752568, "grad_norm": 0.38407519459724426, "learning_rate": 9.514280185938223e-06, "loss": 0.466, "step": 2102 }, { "epoch": 1.1369616147053523, "grad_norm": 0.3733687102794647, "learning_rate": 9.51346814193653e-06, "loss": 0.4422, "step": 2103 }, { "epoch": 1.1375022526581366, "grad_norm": 0.4305700957775116, "learning_rate": 9.512655454412734e-06, "loss": 0.4432, "step": 2104 }, { "epoch": 1.138042890610921, "grad_norm": 0.3547512888908386, "learning_rate": 9.511842123482703e-06, "loss": 0.4748, "step": 2105 }, { "epoch": 1.1385835285637051, "grad_norm": 0.3857305347919464, "learning_rate": 9.511028149262405e-06, "loss": 0.4531, "step": 2106 }, { "epoch": 1.1391241665164895, "grad_norm": 0.3905250132083893, "learning_rate": 9.510213531867891e-06, "loss": 0.4121, "step": 2107 }, { "epoch": 1.1396648044692737, "grad_norm": 0.4033034145832062, "learning_rate": 9.509398271415308e-06, "loss": 0.4839, "step": 2108 }, { "epoch": 1.140205442422058, "grad_norm": 0.3537909984588623, "learning_rate": 9.508582368020897e-06, "loss": 0.4396, "step": 2109 }, { "epoch": 1.1407460803748424, "grad_norm": 0.352022260427475, "learning_rate": 9.507765821800988e-06, "loss": 0.4231, "step": 2110 }, { "epoch": 1.1412867183276265, "grad_norm": 0.4184352457523346, "learning_rate": 9.506948632872e-06, "loss": 0.4879, "step": 2111 }, { "epoch": 1.1418273562804109, "grad_norm": 0.38211655616760254, "learning_rate": 9.506130801350447e-06, "loss": 0.4507, "step": 2112 }, { "epoch": 1.1423679942331952, "grad_norm": 0.4082963168621063, "learning_rate": 9.505312327352935e-06, "loss": 0.431, "step": 2113 }, { "epoch": 1.1429086321859794, "grad_norm": 0.32051509618759155, "learning_rate": 9.504493210996159e-06, "loss": 0.3991, "step": 2114 }, { "epoch": 1.1434492701387637, "grad_norm": 0.3416892886161804, "learning_rate": 9.503673452396909e-06, "loss": 0.4656, "step": 2115 }, { "epoch": 1.143989908091548, "grad_norm": 0.35561808943748474, "learning_rate": 9.502853051672066e-06, "loss": 0.4353, "step": 2116 }, { "epoch": 1.1445305460443322, "grad_norm": 0.4583134949207306, "learning_rate": 9.502032008938595e-06, "loss": 0.4664, "step": 2117 }, { "epoch": 1.1450711839971166, "grad_norm": 0.3298965394496918, "learning_rate": 9.501210324313566e-06, "loss": 0.4249, "step": 2118 }, { "epoch": 1.145611821949901, "grad_norm": 0.42214271426200867, "learning_rate": 9.500387997914127e-06, "loss": 0.4299, "step": 2119 }, { "epoch": 1.146152459902685, "grad_norm": 0.3864177465438843, "learning_rate": 9.499565029857529e-06, "loss": 0.4365, "step": 2120 }, { "epoch": 1.1466930978554695, "grad_norm": 0.3692407011985779, "learning_rate": 9.498741420261109e-06, "loss": 0.4334, "step": 2121 }, { "epoch": 1.1472337358082538, "grad_norm": 0.45088258385658264, "learning_rate": 9.497917169242293e-06, "loss": 0.4399, "step": 2122 }, { "epoch": 1.147774373761038, "grad_norm": 0.42142733931541443, "learning_rate": 9.4970922769186e-06, "loss": 0.4337, "step": 2123 }, { "epoch": 1.1483150117138223, "grad_norm": 0.37217411398887634, "learning_rate": 9.496266743407646e-06, "loss": 0.4719, "step": 2124 }, { "epoch": 1.1488556496666065, "grad_norm": 0.3911516070365906, "learning_rate": 9.49544056882713e-06, "loss": 0.4479, "step": 2125 }, { "epoch": 1.1493962876193908, "grad_norm": 0.44513747096061707, "learning_rate": 9.49461375329485e-06, "loss": 0.4667, "step": 2126 }, { "epoch": 1.1499369255721752, "grad_norm": 0.33743032813072205, "learning_rate": 9.493786296928691e-06, "loss": 0.4108, "step": 2127 }, { "epoch": 1.1504775635249596, "grad_norm": 0.36376333236694336, "learning_rate": 9.492958199846628e-06, "loss": 0.418, "step": 2128 }, { "epoch": 1.1510182014777437, "grad_norm": 0.4417121410369873, "learning_rate": 9.492129462166732e-06, "loss": 0.4843, "step": 2129 }, { "epoch": 1.151558839430528, "grad_norm": 0.3500215709209442, "learning_rate": 9.491300084007162e-06, "loss": 0.4525, "step": 2130 }, { "epoch": 1.1520994773833122, "grad_norm": 0.40554940700531006, "learning_rate": 9.490470065486168e-06, "loss": 0.4391, "step": 2131 }, { "epoch": 1.1526401153360966, "grad_norm": 0.4034478962421417, "learning_rate": 9.489639406722095e-06, "loss": 0.4953, "step": 2132 }, { "epoch": 1.153180753288881, "grad_norm": 0.35386618971824646, "learning_rate": 9.488808107833376e-06, "loss": 0.4658, "step": 2133 }, { "epoch": 1.1537213912416653, "grad_norm": 0.35145196318626404, "learning_rate": 9.487976168938535e-06, "loss": 0.4166, "step": 2134 }, { "epoch": 1.1542620291944494, "grad_norm": 0.3624265789985657, "learning_rate": 9.48714359015619e-06, "loss": 0.4281, "step": 2135 }, { "epoch": 1.1548026671472338, "grad_norm": 0.3582821190357208, "learning_rate": 9.486310371605046e-06, "loss": 0.4896, "step": 2136 }, { "epoch": 1.155343305100018, "grad_norm": 0.36306634545326233, "learning_rate": 9.485476513403905e-06, "loss": 0.4282, "step": 2137 }, { "epoch": 1.1558839430528023, "grad_norm": 0.39350152015686035, "learning_rate": 9.484642015671655e-06, "loss": 0.4531, "step": 2138 }, { "epoch": 1.1564245810055866, "grad_norm": 0.34382736682891846, "learning_rate": 9.483806878527277e-06, "loss": 0.4247, "step": 2139 }, { "epoch": 1.1569652189583708, "grad_norm": 0.3771136701107025, "learning_rate": 9.482971102089845e-06, "loss": 0.4273, "step": 2140 }, { "epoch": 1.1575058569111552, "grad_norm": 0.430793434381485, "learning_rate": 9.48213468647852e-06, "loss": 0.4638, "step": 2141 }, { "epoch": 1.1580464948639395, "grad_norm": 0.37223073840141296, "learning_rate": 9.481297631812558e-06, "loss": 0.4602, "step": 2142 }, { "epoch": 1.1585871328167237, "grad_norm": 0.41819655895233154, "learning_rate": 9.480459938211305e-06, "loss": 0.4695, "step": 2143 }, { "epoch": 1.159127770769508, "grad_norm": 0.35040050745010376, "learning_rate": 9.479621605794199e-06, "loss": 0.4223, "step": 2144 }, { "epoch": 1.1596684087222924, "grad_norm": 0.3786729574203491, "learning_rate": 9.478782634680765e-06, "loss": 0.4101, "step": 2145 }, { "epoch": 1.1602090466750765, "grad_norm": 0.445541650056839, "learning_rate": 9.477943024990623e-06, "loss": 0.4645, "step": 2146 }, { "epoch": 1.1607496846278609, "grad_norm": 0.3482339382171631, "learning_rate": 9.477102776843486e-06, "loss": 0.4434, "step": 2147 }, { "epoch": 1.1612903225806452, "grad_norm": 0.4120996594429016, "learning_rate": 9.476261890359151e-06, "loss": 0.4368, "step": 2148 }, { "epoch": 1.1618309605334294, "grad_norm": 0.40635013580322266, "learning_rate": 9.475420365657512e-06, "loss": 0.4764, "step": 2149 }, { "epoch": 1.1623715984862137, "grad_norm": 0.37310487031936646, "learning_rate": 9.47457820285855e-06, "loss": 0.3937, "step": 2150 }, { "epoch": 1.162912236438998, "grad_norm": 0.35417458415031433, "learning_rate": 9.473735402082342e-06, "loss": 0.3948, "step": 2151 }, { "epoch": 1.1634528743917822, "grad_norm": 0.3947520852088928, "learning_rate": 9.472891963449053e-06, "loss": 0.4356, "step": 2152 }, { "epoch": 1.1639935123445666, "grad_norm": 0.4242643117904663, "learning_rate": 9.472047887078937e-06, "loss": 0.4677, "step": 2153 }, { "epoch": 1.164534150297351, "grad_norm": 0.34379175305366516, "learning_rate": 9.471203173092341e-06, "loss": 0.4273, "step": 2154 }, { "epoch": 1.165074788250135, "grad_norm": 0.38708722591400146, "learning_rate": 9.470357821609703e-06, "loss": 0.4671, "step": 2155 }, { "epoch": 1.1656154262029195, "grad_norm": 0.3811572194099426, "learning_rate": 9.469511832751555e-06, "loss": 0.4166, "step": 2156 }, { "epoch": 1.1661560641557038, "grad_norm": 0.3911553919315338, "learning_rate": 9.46866520663851e-06, "loss": 0.4457, "step": 2157 }, { "epoch": 1.166696702108488, "grad_norm": 0.43305057287216187, "learning_rate": 9.467817943391284e-06, "loss": 0.5152, "step": 2158 }, { "epoch": 1.1672373400612723, "grad_norm": 0.39823833107948303, "learning_rate": 9.466970043130676e-06, "loss": 0.3998, "step": 2159 }, { "epoch": 1.1677779780140565, "grad_norm": 0.3863241970539093, "learning_rate": 9.466121505977577e-06, "loss": 0.4543, "step": 2160 }, { "epoch": 1.1683186159668408, "grad_norm": 0.45706862211227417, "learning_rate": 9.465272332052972e-06, "loss": 0.3999, "step": 2161 }, { "epoch": 1.1688592539196252, "grad_norm": 0.3852021396160126, "learning_rate": 9.464422521477935e-06, "loss": 0.4424, "step": 2162 }, { "epoch": 1.1693998918724096, "grad_norm": 0.45317286252975464, "learning_rate": 9.463572074373628e-06, "loss": 0.4563, "step": 2163 }, { "epoch": 1.1699405298251937, "grad_norm": 0.4411197900772095, "learning_rate": 9.46272099086131e-06, "loss": 0.4417, "step": 2164 }, { "epoch": 1.170481167777978, "grad_norm": 0.344078004360199, "learning_rate": 9.461869271062322e-06, "loss": 0.4278, "step": 2165 }, { "epoch": 1.1710218057307622, "grad_norm": 0.40181687474250793, "learning_rate": 9.461016915098104e-06, "loss": 0.4331, "step": 2166 }, { "epoch": 1.1715624436835466, "grad_norm": 0.42046919465065, "learning_rate": 9.460163923090184e-06, "loss": 0.4202, "step": 2167 }, { "epoch": 1.172103081636331, "grad_norm": 0.36066851019859314, "learning_rate": 9.459310295160176e-06, "loss": 0.4571, "step": 2168 }, { "epoch": 1.172643719589115, "grad_norm": 0.38671255111694336, "learning_rate": 9.458456031429792e-06, "loss": 0.4384, "step": 2169 }, { "epoch": 1.1731843575418994, "grad_norm": 0.36749428510665894, "learning_rate": 9.457601132020832e-06, "loss": 0.446, "step": 2170 }, { "epoch": 1.1737249954946838, "grad_norm": 0.3736770749092102, "learning_rate": 9.456745597055185e-06, "loss": 0.44, "step": 2171 }, { "epoch": 1.174265633447468, "grad_norm": 0.39715027809143066, "learning_rate": 9.45588942665483e-06, "loss": 0.4351, "step": 2172 }, { "epoch": 1.1748062714002523, "grad_norm": 0.34449928998947144, "learning_rate": 9.45503262094184e-06, "loss": 0.451, "step": 2173 }, { "epoch": 1.1753469093530367, "grad_norm": 0.3449814021587372, "learning_rate": 9.454175180038376e-06, "loss": 0.4125, "step": 2174 }, { "epoch": 1.1758875473058208, "grad_norm": 0.33945605158805847, "learning_rate": 9.453317104066693e-06, "loss": 0.3961, "step": 2175 }, { "epoch": 1.1764281852586052, "grad_norm": 0.39236050844192505, "learning_rate": 9.45245839314913e-06, "loss": 0.4637, "step": 2176 }, { "epoch": 1.1769688232113895, "grad_norm": 0.3773477077484131, "learning_rate": 9.45159904740812e-06, "loss": 0.4501, "step": 2177 }, { "epoch": 1.1775094611641737, "grad_norm": 0.34886983036994934, "learning_rate": 9.450739066966192e-06, "loss": 0.458, "step": 2178 }, { "epoch": 1.178050099116958, "grad_norm": 0.40819504857063293, "learning_rate": 9.449878451945958e-06, "loss": 0.4038, "step": 2179 }, { "epoch": 1.1785907370697424, "grad_norm": 0.3563839793205261, "learning_rate": 9.44901720247012e-06, "loss": 0.4686, "step": 2180 }, { "epoch": 1.1791313750225265, "grad_norm": 0.3313881754875183, "learning_rate": 9.448155318661476e-06, "loss": 0.4203, "step": 2181 }, { "epoch": 1.1796720129753109, "grad_norm": 0.3622712790966034, "learning_rate": 9.447292800642913e-06, "loss": 0.4537, "step": 2182 }, { "epoch": 1.1802126509280952, "grad_norm": 0.29673299193382263, "learning_rate": 9.446429648537406e-06, "loss": 0.4283, "step": 2183 }, { "epoch": 1.1807532888808794, "grad_norm": 0.3906388580799103, "learning_rate": 9.445565862468021e-06, "loss": 0.4324, "step": 2184 }, { "epoch": 1.1812939268336637, "grad_norm": 0.35276076197624207, "learning_rate": 9.444701442557917e-06, "loss": 0.4522, "step": 2185 }, { "epoch": 1.181834564786448, "grad_norm": 0.3876296877861023, "learning_rate": 9.443836388930339e-06, "loss": 0.4133, "step": 2186 }, { "epoch": 1.1823752027392322, "grad_norm": 0.35179904103279114, "learning_rate": 9.442970701708625e-06, "loss": 0.437, "step": 2187 }, { "epoch": 1.1829158406920166, "grad_norm": 0.40207886695861816, "learning_rate": 9.442104381016206e-06, "loss": 0.4446, "step": 2188 }, { "epoch": 1.1834564786448007, "grad_norm": 0.35293862223625183, "learning_rate": 9.441237426976596e-06, "loss": 0.4451, "step": 2189 }, { "epoch": 1.183997116597585, "grad_norm": 0.38812461495399475, "learning_rate": 9.440369839713407e-06, "loss": 0.4249, "step": 2190 }, { "epoch": 1.1845377545503695, "grad_norm": 0.3562657833099365, "learning_rate": 9.439501619350338e-06, "loss": 0.4276, "step": 2191 }, { "epoch": 1.1850783925031538, "grad_norm": 0.40622127056121826, "learning_rate": 9.438632766011177e-06, "loss": 0.4296, "step": 2192 }, { "epoch": 1.185619030455938, "grad_norm": 0.37603306770324707, "learning_rate": 9.437763279819803e-06, "loss": 0.4983, "step": 2193 }, { "epoch": 1.1861596684087223, "grad_norm": 0.32640597224235535, "learning_rate": 9.436893160900188e-06, "loss": 0.4332, "step": 2194 }, { "epoch": 1.1867003063615065, "grad_norm": 0.4022713601589203, "learning_rate": 9.436022409376391e-06, "loss": 0.4563, "step": 2195 }, { "epoch": 1.1872409443142908, "grad_norm": 0.3431012034416199, "learning_rate": 9.43515102537256e-06, "loss": 0.3772, "step": 2196 }, { "epoch": 1.1877815822670752, "grad_norm": 0.3568943440914154, "learning_rate": 9.434279009012938e-06, "loss": 0.4523, "step": 2197 }, { "epoch": 1.1883222202198593, "grad_norm": 0.4028702974319458, "learning_rate": 9.433406360421857e-06, "loss": 0.4223, "step": 2198 }, { "epoch": 1.1888628581726437, "grad_norm": 0.3365423381328583, "learning_rate": 9.432533079723734e-06, "loss": 0.4347, "step": 2199 }, { "epoch": 1.189403496125428, "grad_norm": 0.395528107881546, "learning_rate": 9.431659167043079e-06, "loss": 0.4713, "step": 2200 }, { "epoch": 1.1899441340782122, "grad_norm": 0.37539371848106384, "learning_rate": 9.430784622504497e-06, "loss": 0.4018, "step": 2201 }, { "epoch": 1.1904847720309966, "grad_norm": 0.34760650992393494, "learning_rate": 9.429909446232676e-06, "loss": 0.4432, "step": 2202 }, { "epoch": 1.191025409983781, "grad_norm": 0.3822764754295349, "learning_rate": 9.4290336383524e-06, "loss": 0.472, "step": 2203 }, { "epoch": 1.191566047936565, "grad_norm": 0.3600846827030182, "learning_rate": 9.428157198988537e-06, "loss": 0.4183, "step": 2204 }, { "epoch": 1.1921066858893494, "grad_norm": 0.37059861421585083, "learning_rate": 9.427280128266049e-06, "loss": 0.461, "step": 2205 }, { "epoch": 1.1926473238421338, "grad_norm": 0.3323056697845459, "learning_rate": 9.426402426309989e-06, "loss": 0.4355, "step": 2206 }, { "epoch": 1.193187961794918, "grad_norm": 0.4206660985946655, "learning_rate": 9.425524093245495e-06, "loss": 0.4678, "step": 2207 }, { "epoch": 1.1937285997477023, "grad_norm": 0.33384454250335693, "learning_rate": 9.424645129197801e-06, "loss": 0.4143, "step": 2208 }, { "epoch": 1.1942692377004867, "grad_norm": 0.3933919072151184, "learning_rate": 9.423765534292226e-06, "loss": 0.462, "step": 2209 }, { "epoch": 1.1948098756532708, "grad_norm": 0.3982803225517273, "learning_rate": 9.422885308654183e-06, "loss": 0.435, "step": 2210 }, { "epoch": 1.1953505136060552, "grad_norm": 0.34469297528266907, "learning_rate": 9.42200445240917e-06, "loss": 0.4267, "step": 2211 }, { "epoch": 1.1958911515588395, "grad_norm": 0.37136533856391907, "learning_rate": 9.421122965682782e-06, "loss": 0.4386, "step": 2212 }, { "epoch": 1.1964317895116237, "grad_norm": 0.40113547444343567, "learning_rate": 9.420240848600696e-06, "loss": 0.4275, "step": 2213 }, { "epoch": 1.196972427464408, "grad_norm": 0.3625503480434418, "learning_rate": 9.419358101288684e-06, "loss": 0.438, "step": 2214 }, { "epoch": 1.1975130654171924, "grad_norm": 0.4281715154647827, "learning_rate": 9.418474723872609e-06, "loss": 0.4306, "step": 2215 }, { "epoch": 1.1980537033699765, "grad_norm": 0.36719268560409546, "learning_rate": 9.417590716478416e-06, "loss": 0.4596, "step": 2216 }, { "epoch": 1.1985943413227609, "grad_norm": 0.3527308702468872, "learning_rate": 9.41670607923215e-06, "loss": 0.48, "step": 2217 }, { "epoch": 1.199134979275545, "grad_norm": 0.3650606572628021, "learning_rate": 9.41582081225994e-06, "loss": 0.393, "step": 2218 }, { "epoch": 1.1996756172283294, "grad_norm": 0.38191086053848267, "learning_rate": 9.414934915688003e-06, "loss": 0.4144, "step": 2219 }, { "epoch": 1.2002162551811137, "grad_norm": 0.35002824664115906, "learning_rate": 9.414048389642652e-06, "loss": 0.4261, "step": 2220 }, { "epoch": 1.200756893133898, "grad_norm": 0.3725591003894806, "learning_rate": 9.413161234250284e-06, "loss": 0.4481, "step": 2221 }, { "epoch": 1.2012975310866822, "grad_norm": 0.39697352051734924, "learning_rate": 9.412273449637388e-06, "loss": 0.4509, "step": 2222 }, { "epoch": 1.2018381690394666, "grad_norm": 0.323587566614151, "learning_rate": 9.411385035930545e-06, "loss": 0.4621, "step": 2223 }, { "epoch": 1.2023788069922507, "grad_norm": 0.3549892008304596, "learning_rate": 9.410495993256422e-06, "loss": 0.4807, "step": 2224 }, { "epoch": 1.202919444945035, "grad_norm": 0.3448998034000397, "learning_rate": 9.409606321741776e-06, "loss": 0.4487, "step": 2225 }, { "epoch": 1.2034600828978195, "grad_norm": 0.33572226762771606, "learning_rate": 9.408716021513455e-06, "loss": 0.4429, "step": 2226 }, { "epoch": 1.2040007208506038, "grad_norm": 0.3907988965511322, "learning_rate": 9.4078250926984e-06, "loss": 0.4633, "step": 2227 }, { "epoch": 1.204541358803388, "grad_norm": 0.35494303703308105, "learning_rate": 9.406933535423632e-06, "loss": 0.4233, "step": 2228 }, { "epoch": 1.2050819967561723, "grad_norm": 0.4350510239601135, "learning_rate": 9.406041349816272e-06, "loss": 0.4327, "step": 2229 }, { "epoch": 1.2056226347089565, "grad_norm": 0.3504043221473694, "learning_rate": 9.405148536003527e-06, "loss": 0.4136, "step": 2230 }, { "epoch": 1.2061632726617408, "grad_norm": 0.383758008480072, "learning_rate": 9.404255094112688e-06, "loss": 0.4472, "step": 2231 }, { "epoch": 1.2067039106145252, "grad_norm": 0.4049503207206726, "learning_rate": 9.403361024271145e-06, "loss": 0.4226, "step": 2232 }, { "epoch": 1.2072445485673093, "grad_norm": 0.3764093220233917, "learning_rate": 9.402466326606369e-06, "loss": 0.4626, "step": 2233 }, { "epoch": 1.2077851865200937, "grad_norm": 0.3602176308631897, "learning_rate": 9.401571001245928e-06, "loss": 0.4518, "step": 2234 }, { "epoch": 1.208325824472878, "grad_norm": 0.3844587206840515, "learning_rate": 9.400675048317473e-06, "loss": 0.4539, "step": 2235 }, { "epoch": 1.2088664624256622, "grad_norm": 0.385905921459198, "learning_rate": 9.39977846794875e-06, "loss": 0.4654, "step": 2236 }, { "epoch": 1.2094071003784466, "grad_norm": 0.3673568367958069, "learning_rate": 9.398881260267589e-06, "loss": 0.4718, "step": 2237 }, { "epoch": 1.209947738331231, "grad_norm": 0.3255668878555298, "learning_rate": 9.397983425401915e-06, "loss": 0.3788, "step": 2238 }, { "epoch": 1.210488376284015, "grad_norm": 0.3674350082874298, "learning_rate": 9.397084963479738e-06, "loss": 0.4678, "step": 2239 }, { "epoch": 1.2110290142367994, "grad_norm": 0.34120893478393555, "learning_rate": 9.396185874629158e-06, "loss": 0.4141, "step": 2240 }, { "epoch": 1.2115696521895838, "grad_norm": 0.39488181471824646, "learning_rate": 9.395286158978367e-06, "loss": 0.4583, "step": 2241 }, { "epoch": 1.212110290142368, "grad_norm": 0.31320297718048096, "learning_rate": 9.394385816655647e-06, "loss": 0.4305, "step": 2242 }, { "epoch": 1.2126509280951523, "grad_norm": 0.39865848422050476, "learning_rate": 9.393484847789363e-06, "loss": 0.463, "step": 2243 }, { "epoch": 1.2131915660479367, "grad_norm": 0.38404038548469543, "learning_rate": 9.392583252507974e-06, "loss": 0.4668, "step": 2244 }, { "epoch": 1.2137322040007208, "grad_norm": 0.3422393500804901, "learning_rate": 9.391681030940031e-06, "loss": 0.4188, "step": 2245 }, { "epoch": 1.2142728419535052, "grad_norm": 0.3838838040828705, "learning_rate": 9.390778183214168e-06, "loss": 0.4607, "step": 2246 }, { "epoch": 1.2148134799062895, "grad_norm": 0.35177937150001526, "learning_rate": 9.389874709459113e-06, "loss": 0.4314, "step": 2247 }, { "epoch": 1.2153541178590737, "grad_norm": 0.3254885971546173, "learning_rate": 9.388970609803683e-06, "loss": 0.4369, "step": 2248 }, { "epoch": 1.215894755811858, "grad_norm": 0.35308679938316345, "learning_rate": 9.388065884376778e-06, "loss": 0.4392, "step": 2249 }, { "epoch": 1.2164353937646424, "grad_norm": 0.3629399538040161, "learning_rate": 9.387160533307398e-06, "loss": 0.4443, "step": 2250 }, { "epoch": 1.2169760317174265, "grad_norm": 0.29690372943878174, "learning_rate": 9.386254556724622e-06, "loss": 0.4125, "step": 2251 }, { "epoch": 1.2175166696702109, "grad_norm": 0.3469884395599365, "learning_rate": 9.385347954757625e-06, "loss": 0.4298, "step": 2252 }, { "epoch": 1.218057307622995, "grad_norm": 0.350612610578537, "learning_rate": 9.384440727535666e-06, "loss": 0.4291, "step": 2253 }, { "epoch": 1.2185979455757794, "grad_norm": 0.36138424277305603, "learning_rate": 9.383532875188099e-06, "loss": 0.4649, "step": 2254 }, { "epoch": 1.2191385835285637, "grad_norm": 0.3611285090446472, "learning_rate": 9.382624397844363e-06, "loss": 0.4186, "step": 2255 }, { "epoch": 1.219679221481348, "grad_norm": 0.4227101504802704, "learning_rate": 9.381715295633987e-06, "loss": 0.4508, "step": 2256 }, { "epoch": 1.2202198594341322, "grad_norm": 0.34860163927078247, "learning_rate": 9.380805568686586e-06, "loss": 0.4743, "step": 2257 }, { "epoch": 1.2207604973869166, "grad_norm": 0.34503501653671265, "learning_rate": 9.379895217131873e-06, "loss": 0.3927, "step": 2258 }, { "epoch": 1.2213011353397007, "grad_norm": 0.4622492492198944, "learning_rate": 9.378984241099638e-06, "loss": 0.4687, "step": 2259 }, { "epoch": 1.2218417732924851, "grad_norm": 0.3430006504058838, "learning_rate": 9.378072640719773e-06, "loss": 0.4237, "step": 2260 }, { "epoch": 1.2223824112452695, "grad_norm": 0.42140287160873413, "learning_rate": 9.377160416122247e-06, "loss": 0.4448, "step": 2261 }, { "epoch": 1.2229230491980536, "grad_norm": 0.3985527455806732, "learning_rate": 9.376247567437124e-06, "loss": 0.4172, "step": 2262 }, { "epoch": 1.223463687150838, "grad_norm": 0.38346680998802185, "learning_rate": 9.375334094794558e-06, "loss": 0.4948, "step": 2263 }, { "epoch": 1.2240043251036223, "grad_norm": 0.39689841866493225, "learning_rate": 9.374419998324792e-06, "loss": 0.4087, "step": 2264 }, { "epoch": 1.2245449630564065, "grad_norm": 0.4045921266078949, "learning_rate": 9.373505278158152e-06, "loss": 0.4642, "step": 2265 }, { "epoch": 1.2250856010091908, "grad_norm": 0.3763102889060974, "learning_rate": 9.37258993442506e-06, "loss": 0.4534, "step": 2266 }, { "epoch": 1.2256262389619752, "grad_norm": 0.4438874423503876, "learning_rate": 9.371673967256023e-06, "loss": 0.4202, "step": 2267 }, { "epoch": 1.2261668769147593, "grad_norm": 0.3829502463340759, "learning_rate": 9.370757376781638e-06, "loss": 0.4208, "step": 2268 }, { "epoch": 1.2267075148675437, "grad_norm": 0.41881024837493896, "learning_rate": 9.36984016313259e-06, "loss": 0.4552, "step": 2269 }, { "epoch": 1.227248152820328, "grad_norm": 0.4431045651435852, "learning_rate": 9.368922326439655e-06, "loss": 0.4628, "step": 2270 }, { "epoch": 1.2277887907731122, "grad_norm": 0.370781272649765, "learning_rate": 9.368003866833697e-06, "loss": 0.4015, "step": 2271 }, { "epoch": 1.2283294287258966, "grad_norm": 0.331416517496109, "learning_rate": 9.367084784445668e-06, "loss": 0.4552, "step": 2272 }, { "epoch": 1.228870066678681, "grad_norm": 0.39645448327064514, "learning_rate": 9.366165079406606e-06, "loss": 0.4651, "step": 2273 }, { "epoch": 1.229410704631465, "grad_norm": 0.31915992498397827, "learning_rate": 9.365244751847644e-06, "loss": 0.4231, "step": 2274 }, { "epoch": 1.2299513425842494, "grad_norm": 0.3478055000305176, "learning_rate": 9.364323801900002e-06, "loss": 0.482, "step": 2275 }, { "epoch": 1.2304919805370338, "grad_norm": 0.3081183135509491, "learning_rate": 9.363402229694982e-06, "loss": 0.4057, "step": 2276 }, { "epoch": 1.231032618489818, "grad_norm": 0.39849919080734253, "learning_rate": 9.362480035363987e-06, "loss": 0.483, "step": 2277 }, { "epoch": 1.2315732564426023, "grad_norm": 0.35171177983283997, "learning_rate": 9.361557219038494e-06, "loss": 0.4569, "step": 2278 }, { "epoch": 1.2321138943953867, "grad_norm": 0.36089226603507996, "learning_rate": 9.360633780850086e-06, "loss": 0.4671, "step": 2279 }, { "epoch": 1.2326545323481708, "grad_norm": 0.3670012056827545, "learning_rate": 9.359709720930417e-06, "loss": 0.4142, "step": 2280 }, { "epoch": 1.2331951703009552, "grad_norm": 0.3502783477306366, "learning_rate": 9.35878503941124e-06, "loss": 0.4344, "step": 2281 }, { "epoch": 1.2337358082537393, "grad_norm": 0.389818400144577, "learning_rate": 9.357859736424395e-06, "loss": 0.4934, "step": 2282 }, { "epoch": 1.2342764462065237, "grad_norm": 0.40615352988243103, "learning_rate": 9.356933812101812e-06, "loss": 0.3921, "step": 2283 }, { "epoch": 1.234817084159308, "grad_norm": 0.4099143147468567, "learning_rate": 9.356007266575504e-06, "loss": 0.4831, "step": 2284 }, { "epoch": 1.2353577221120924, "grad_norm": 0.4174445867538452, "learning_rate": 9.355080099977579e-06, "loss": 0.4462, "step": 2285 }, { "epoch": 1.2358983600648765, "grad_norm": 0.4430469274520874, "learning_rate": 9.354152312440228e-06, "loss": 0.4472, "step": 2286 }, { "epoch": 1.2364389980176609, "grad_norm": 0.39883628487586975, "learning_rate": 9.353223904095736e-06, "loss": 0.4447, "step": 2287 }, { "epoch": 1.236979635970445, "grad_norm": 0.3867606818675995, "learning_rate": 9.352294875076472e-06, "loss": 0.427, "step": 2288 }, { "epoch": 1.2375202739232294, "grad_norm": 0.45105767250061035, "learning_rate": 9.351365225514898e-06, "loss": 0.4467, "step": 2289 }, { "epoch": 1.2380609118760137, "grad_norm": 0.39790135622024536, "learning_rate": 9.350434955543557e-06, "loss": 0.4362, "step": 2290 }, { "epoch": 1.2386015498287979, "grad_norm": 0.389241099357605, "learning_rate": 9.34950406529509e-06, "loss": 0.4269, "step": 2291 }, { "epoch": 1.2391421877815822, "grad_norm": 0.46503782272338867, "learning_rate": 9.34857255490222e-06, "loss": 0.4359, "step": 2292 }, { "epoch": 1.2396828257343666, "grad_norm": 0.3403927981853485, "learning_rate": 9.347640424497757e-06, "loss": 0.4373, "step": 2293 }, { "epoch": 1.2402234636871508, "grad_norm": 0.39097025990486145, "learning_rate": 9.346707674214606e-06, "loss": 0.4222, "step": 2294 }, { "epoch": 1.2407641016399351, "grad_norm": 0.42722076177597046, "learning_rate": 9.345774304185756e-06, "loss": 0.4445, "step": 2295 }, { "epoch": 1.2413047395927195, "grad_norm": 0.40439140796661377, "learning_rate": 9.344840314544286e-06, "loss": 0.4665, "step": 2296 }, { "epoch": 1.2418453775455036, "grad_norm": 0.37643495202064514, "learning_rate": 9.34390570542336e-06, "loss": 0.3981, "step": 2297 }, { "epoch": 1.242386015498288, "grad_norm": 0.341826468706131, "learning_rate": 9.342970476956234e-06, "loss": 0.3975, "step": 2298 }, { "epoch": 1.2429266534510723, "grad_norm": 0.4274151027202606, "learning_rate": 9.342034629276255e-06, "loss": 0.4438, "step": 2299 }, { "epoch": 1.2434672914038565, "grad_norm": 0.3313400149345398, "learning_rate": 9.341098162516848e-06, "loss": 0.4179, "step": 2300 }, { "epoch": 1.2440079293566408, "grad_norm": 0.39104312658309937, "learning_rate": 9.340161076811539e-06, "loss": 0.4657, "step": 2301 }, { "epoch": 1.2445485673094252, "grad_norm": 0.3932245671749115, "learning_rate": 9.33922337229393e-06, "loss": 0.4612, "step": 2302 }, { "epoch": 1.2450892052622093, "grad_norm": 0.37315264344215393, "learning_rate": 9.338285049097722e-06, "loss": 0.4403, "step": 2303 }, { "epoch": 1.2456298432149937, "grad_norm": 0.3951815366744995, "learning_rate": 9.337346107356695e-06, "loss": 0.4602, "step": 2304 }, { "epoch": 1.246170481167778, "grad_norm": 0.37928786873817444, "learning_rate": 9.336406547204726e-06, "loss": 0.4451, "step": 2305 }, { "epoch": 1.2467111191205622, "grad_norm": 0.3693735599517822, "learning_rate": 9.335466368775774e-06, "loss": 0.4122, "step": 2306 }, { "epoch": 1.2472517570733466, "grad_norm": 0.4383481740951538, "learning_rate": 9.334525572203887e-06, "loss": 0.4892, "step": 2307 }, { "epoch": 1.247792395026131, "grad_norm": 0.33080145716667175, "learning_rate": 9.333584157623204e-06, "loss": 0.4127, "step": 2308 }, { "epoch": 1.248333032978915, "grad_norm": 0.36075839400291443, "learning_rate": 9.332642125167948e-06, "loss": 0.4394, "step": 2309 }, { "epoch": 1.2488736709316994, "grad_norm": 0.35682231187820435, "learning_rate": 9.331699474972434e-06, "loss": 0.4496, "step": 2310 }, { "epoch": 1.2494143088844836, "grad_norm": 0.3295380473136902, "learning_rate": 9.330756207171064e-06, "loss": 0.4384, "step": 2311 }, { "epoch": 1.249954946837268, "grad_norm": 0.36135971546173096, "learning_rate": 9.329812321898323e-06, "loss": 0.3627, "step": 2312 }, { "epoch": 1.2504955847900523, "grad_norm": 0.43534931540489197, "learning_rate": 9.328867819288794e-06, "loss": 0.465, "step": 2313 }, { "epoch": 1.2510362227428367, "grad_norm": 0.3011641204357147, "learning_rate": 9.327922699477139e-06, "loss": 0.4207, "step": 2314 }, { "epoch": 1.2515768606956208, "grad_norm": 0.43106555938720703, "learning_rate": 9.326976962598113e-06, "loss": 0.4948, "step": 2315 }, { "epoch": 1.2521174986484052, "grad_norm": 0.3331192135810852, "learning_rate": 9.326030608786558e-06, "loss": 0.367, "step": 2316 }, { "epoch": 1.2526581366011893, "grad_norm": 0.34783822298049927, "learning_rate": 9.325083638177401e-06, "loss": 0.4393, "step": 2317 }, { "epoch": 1.2531987745539737, "grad_norm": 0.3120574355125427, "learning_rate": 9.32413605090566e-06, "loss": 0.4223, "step": 2318 }, { "epoch": 1.253739412506758, "grad_norm": 0.3396937847137451, "learning_rate": 9.323187847106441e-06, "loss": 0.4543, "step": 2319 }, { "epoch": 1.2542800504595424, "grad_norm": 0.3857153654098511, "learning_rate": 9.322239026914938e-06, "loss": 0.4591, "step": 2320 }, { "epoch": 1.2548206884123265, "grad_norm": 0.3564053177833557, "learning_rate": 9.321289590466434e-06, "loss": 0.4577, "step": 2321 }, { "epoch": 1.2553613263651109, "grad_norm": 0.34253233671188354, "learning_rate": 9.32033953789629e-06, "loss": 0.4183, "step": 2322 }, { "epoch": 1.255901964317895, "grad_norm": 0.383951336145401, "learning_rate": 9.319388869339971e-06, "loss": 0.4299, "step": 2323 }, { "epoch": 1.2564426022706794, "grad_norm": 0.33586639165878296, "learning_rate": 9.318437584933018e-06, "loss": 0.4426, "step": 2324 }, { "epoch": 1.2569832402234637, "grad_norm": 0.3765944242477417, "learning_rate": 9.317485684811065e-06, "loss": 0.498, "step": 2325 }, { "epoch": 1.2575238781762481, "grad_norm": 0.35276341438293457, "learning_rate": 9.31653316910983e-06, "loss": 0.4591, "step": 2326 }, { "epoch": 1.2580645161290323, "grad_norm": 0.3316299319267273, "learning_rate": 9.315580037965123e-06, "loss": 0.4082, "step": 2327 }, { "epoch": 1.2586051540818166, "grad_norm": 0.3217739760875702, "learning_rate": 9.314626291512838e-06, "loss": 0.4168, "step": 2328 }, { "epoch": 1.2591457920346008, "grad_norm": 0.31653720140457153, "learning_rate": 9.31367192988896e-06, "loss": 0.4382, "step": 2329 }, { "epoch": 1.2596864299873851, "grad_norm": 0.3542955815792084, "learning_rate": 9.31271695322956e-06, "loss": 0.3904, "step": 2330 }, { "epoch": 1.2602270679401695, "grad_norm": 0.3201338052749634, "learning_rate": 9.311761361670794e-06, "loss": 0.4461, "step": 2331 }, { "epoch": 1.2607677058929536, "grad_norm": 0.3405888080596924, "learning_rate": 9.310805155348912e-06, "loss": 0.4609, "step": 2332 }, { "epoch": 1.261308343845738, "grad_norm": 0.3953564465045929, "learning_rate": 9.309848334400247e-06, "loss": 0.4452, "step": 2333 }, { "epoch": 1.2618489817985221, "grad_norm": 0.3923310935497284, "learning_rate": 9.30889089896122e-06, "loss": 0.4389, "step": 2334 }, { "epoch": 1.2623896197513065, "grad_norm": 0.2811641991138458, "learning_rate": 9.307932849168341e-06, "loss": 0.4077, "step": 2335 }, { "epoch": 1.2629302577040908, "grad_norm": 0.4195081293582916, "learning_rate": 9.306974185158209e-06, "loss": 0.4563, "step": 2336 }, { "epoch": 1.2634708956568752, "grad_norm": 0.37984922528266907, "learning_rate": 9.306014907067507e-06, "loss": 0.4557, "step": 2337 }, { "epoch": 1.2640115336096593, "grad_norm": 0.3626854419708252, "learning_rate": 9.305055015033004e-06, "loss": 0.4338, "step": 2338 }, { "epoch": 1.2645521715624437, "grad_norm": 0.3649787902832031, "learning_rate": 9.304094509191564e-06, "loss": 0.4677, "step": 2339 }, { "epoch": 1.2650928095152278, "grad_norm": 0.35816383361816406, "learning_rate": 9.303133389680134e-06, "loss": 0.4417, "step": 2340 }, { "epoch": 1.2656334474680122, "grad_norm": 0.34932854771614075, "learning_rate": 9.302171656635746e-06, "loss": 0.4226, "step": 2341 }, { "epoch": 1.2661740854207966, "grad_norm": 0.3992402255535126, "learning_rate": 9.301209310195523e-06, "loss": 0.4574, "step": 2342 }, { "epoch": 1.266714723373581, "grad_norm": 0.35466626286506653, "learning_rate": 9.300246350496676e-06, "loss": 0.4379, "step": 2343 }, { "epoch": 1.267255361326365, "grad_norm": 0.3635440468788147, "learning_rate": 9.2992827776765e-06, "loss": 0.425, "step": 2344 }, { "epoch": 1.2677959992791494, "grad_norm": 0.3766593933105469, "learning_rate": 9.298318591872381e-06, "loss": 0.4503, "step": 2345 }, { "epoch": 1.2683366372319336, "grad_norm": 0.40644508600234985, "learning_rate": 9.297353793221793e-06, "loss": 0.434, "step": 2346 }, { "epoch": 1.268877275184718, "grad_norm": 0.33753782510757446, "learning_rate": 9.29638838186229e-06, "loss": 0.4649, "step": 2347 }, { "epoch": 1.2694179131375023, "grad_norm": 0.35078006982803345, "learning_rate": 9.295422357931523e-06, "loss": 0.4276, "step": 2348 }, { "epoch": 1.2699585510902867, "grad_norm": 0.41513198614120483, "learning_rate": 9.294455721567224e-06, "loss": 0.4546, "step": 2349 }, { "epoch": 1.2704991890430708, "grad_norm": 0.337698757648468, "learning_rate": 9.293488472907213e-06, "loss": 0.4209, "step": 2350 }, { "epoch": 1.2710398269958552, "grad_norm": 0.37815606594085693, "learning_rate": 9.292520612089402e-06, "loss": 0.4791, "step": 2351 }, { "epoch": 1.2715804649486393, "grad_norm": 0.33892422914505005, "learning_rate": 9.291552139251784e-06, "loss": 0.4178, "step": 2352 }, { "epoch": 1.2721211029014237, "grad_norm": 0.35459035634994507, "learning_rate": 9.290583054532443e-06, "loss": 0.446, "step": 2353 }, { "epoch": 1.272661740854208, "grad_norm": 0.3055948317050934, "learning_rate": 9.289613358069549e-06, "loss": 0.423, "step": 2354 }, { "epoch": 1.2732023788069924, "grad_norm": 0.305288165807724, "learning_rate": 9.288643050001362e-06, "loss": 0.4593, "step": 2355 }, { "epoch": 1.2737430167597765, "grad_norm": 0.36002859473228455, "learning_rate": 9.287672130466223e-06, "loss": 0.4326, "step": 2356 }, { "epoch": 1.2742836547125609, "grad_norm": 0.3907860517501831, "learning_rate": 9.286700599602565e-06, "loss": 0.4918, "step": 2357 }, { "epoch": 1.274824292665345, "grad_norm": 0.3049376308917999, "learning_rate": 9.285728457548909e-06, "loss": 0.3754, "step": 2358 }, { "epoch": 1.2753649306181294, "grad_norm": 0.39047765731811523, "learning_rate": 9.28475570444386e-06, "loss": 0.4517, "step": 2359 }, { "epoch": 1.2759055685709138, "grad_norm": 0.38072285056114197, "learning_rate": 9.283782340426112e-06, "loss": 0.4627, "step": 2360 }, { "epoch": 1.276446206523698, "grad_norm": 0.41873809695243835, "learning_rate": 9.282808365634444e-06, "loss": 0.4289, "step": 2361 }, { "epoch": 1.2769868444764823, "grad_norm": 0.3254391849040985, "learning_rate": 9.281833780207725e-06, "loss": 0.4131, "step": 2362 }, { "epoch": 1.2775274824292666, "grad_norm": 0.519358217716217, "learning_rate": 9.280858584284909e-06, "loss": 0.4975, "step": 2363 }, { "epoch": 1.2780681203820508, "grad_norm": 0.3705024719238281, "learning_rate": 9.279882778005035e-06, "loss": 0.4235, "step": 2364 }, { "epoch": 1.2786087583348351, "grad_norm": 0.45141515135765076, "learning_rate": 9.278906361507238e-06, "loss": 0.4301, "step": 2365 }, { "epoch": 1.2791493962876195, "grad_norm": 0.4273265600204468, "learning_rate": 9.27792933493073e-06, "loss": 0.4363, "step": 2366 }, { "epoch": 1.2796900342404036, "grad_norm": 0.414331316947937, "learning_rate": 9.276951698414812e-06, "loss": 0.4153, "step": 2367 }, { "epoch": 1.280230672193188, "grad_norm": 0.4092567265033722, "learning_rate": 9.275973452098877e-06, "loss": 0.4542, "step": 2368 }, { "epoch": 1.2807713101459721, "grad_norm": 0.4097854495048523, "learning_rate": 9.2749945961224e-06, "loss": 0.4499, "step": 2369 }, { "epoch": 1.2813119480987565, "grad_norm": 0.40357089042663574, "learning_rate": 9.274015130624943e-06, "loss": 0.4272, "step": 2370 }, { "epoch": 1.2818525860515408, "grad_norm": 0.3889763057231903, "learning_rate": 9.273035055746159e-06, "loss": 0.4346, "step": 2371 }, { "epoch": 1.2823932240043252, "grad_norm": 0.3984520733356476, "learning_rate": 9.272054371625783e-06, "loss": 0.4514, "step": 2372 }, { "epoch": 1.2829338619571093, "grad_norm": 0.35287633538246155, "learning_rate": 9.271073078403643e-06, "loss": 0.4235, "step": 2373 }, { "epoch": 1.2834744999098937, "grad_norm": 0.4218217432498932, "learning_rate": 9.270091176219645e-06, "loss": 0.474, "step": 2374 }, { "epoch": 1.2840151378626778, "grad_norm": 0.3276956081390381, "learning_rate": 9.26910866521379e-06, "loss": 0.4135, "step": 2375 }, { "epoch": 1.2845557758154622, "grad_norm": 0.3250516653060913, "learning_rate": 9.268125545526163e-06, "loss": 0.4082, "step": 2376 }, { "epoch": 1.2850964137682466, "grad_norm": 0.44134101271629333, "learning_rate": 9.267141817296933e-06, "loss": 0.4576, "step": 2377 }, { "epoch": 1.285637051721031, "grad_norm": 0.3511035442352295, "learning_rate": 9.26615748066636e-06, "loss": 0.4313, "step": 2378 }, { "epoch": 1.286177689673815, "grad_norm": 0.40824609994888306, "learning_rate": 9.265172535774788e-06, "loss": 0.4029, "step": 2379 }, { "epoch": 1.2867183276265994, "grad_norm": 0.4215698540210724, "learning_rate": 9.264186982762649e-06, "loss": 0.4939, "step": 2380 }, { "epoch": 1.2872589655793836, "grad_norm": 0.3959537446498871, "learning_rate": 9.263200821770462e-06, "loss": 0.4302, "step": 2381 }, { "epoch": 1.287799603532168, "grad_norm": 0.3674355447292328, "learning_rate": 9.262214052938832e-06, "loss": 0.4367, "step": 2382 }, { "epoch": 1.2883402414849523, "grad_norm": 0.4184582829475403, "learning_rate": 9.26122667640845e-06, "loss": 0.4503, "step": 2383 }, { "epoch": 1.2888808794377367, "grad_norm": 0.3745483458042145, "learning_rate": 9.260238692320093e-06, "loss": 0.3753, "step": 2384 }, { "epoch": 1.2894215173905208, "grad_norm": 0.35480156540870667, "learning_rate": 9.25925010081463e-06, "loss": 0.5004, "step": 2385 }, { "epoch": 1.2899621553433052, "grad_norm": 0.3390335738658905, "learning_rate": 9.258260902033007e-06, "loss": 0.4054, "step": 2386 }, { "epoch": 1.2905027932960893, "grad_norm": 0.3922213315963745, "learning_rate": 9.257271096116268e-06, "loss": 0.4065, "step": 2387 }, { "epoch": 1.2910434312488737, "grad_norm": 0.3549047112464905, "learning_rate": 9.256280683205534e-06, "loss": 0.4525, "step": 2388 }, { "epoch": 1.291584069201658, "grad_norm": 0.36089131236076355, "learning_rate": 9.255289663442018e-06, "loss": 0.4325, "step": 2389 }, { "epoch": 1.2921247071544422, "grad_norm": 0.33818209171295166, "learning_rate": 9.254298036967015e-06, "loss": 0.4321, "step": 2390 }, { "epoch": 1.2926653451072265, "grad_norm": 0.367330402135849, "learning_rate": 9.253305803921915e-06, "loss": 0.4429, "step": 2391 }, { "epoch": 1.2932059830600109, "grad_norm": 0.32385024428367615, "learning_rate": 9.252312964448182e-06, "loss": 0.4145, "step": 2392 }, { "epoch": 1.293746621012795, "grad_norm": 0.3290650546550751, "learning_rate": 9.251319518687379e-06, "loss": 0.4268, "step": 2393 }, { "epoch": 1.2942872589655794, "grad_norm": 0.3983169496059418, "learning_rate": 9.250325466781145e-06, "loss": 0.4725, "step": 2394 }, { "epoch": 1.2948278969183638, "grad_norm": 0.3585212528705597, "learning_rate": 9.249330808871213e-06, "loss": 0.429, "step": 2395 }, { "epoch": 1.295368534871148, "grad_norm": 0.3783895969390869, "learning_rate": 9.248335545099398e-06, "loss": 0.4486, "step": 2396 }, { "epoch": 1.2959091728239323, "grad_norm": 0.34847337007522583, "learning_rate": 9.247339675607606e-06, "loss": 0.4414, "step": 2397 }, { "epoch": 1.2964498107767164, "grad_norm": 0.3564877212047577, "learning_rate": 9.246343200537823e-06, "loss": 0.458, "step": 2398 }, { "epoch": 1.2969904487295008, "grad_norm": 0.34090301394462585, "learning_rate": 9.245346120032124e-06, "loss": 0.4475, "step": 2399 }, { "epoch": 1.2975310866822851, "grad_norm": 0.41178473830223083, "learning_rate": 9.244348434232676e-06, "loss": 0.4478, "step": 2400 }, { "epoch": 1.2980717246350695, "grad_norm": 0.3171329200267792, "learning_rate": 9.24335014328172e-06, "loss": 0.4077, "step": 2401 }, { "epoch": 1.2986123625878536, "grad_norm": 0.36641278862953186, "learning_rate": 9.242351247321595e-06, "loss": 0.4764, "step": 2402 }, { "epoch": 1.299153000540638, "grad_norm": 0.34632667899131775, "learning_rate": 9.241351746494723e-06, "loss": 0.4268, "step": 2403 }, { "epoch": 1.2996936384934221, "grad_norm": 0.3175496757030487, "learning_rate": 9.240351640943607e-06, "loss": 0.4124, "step": 2404 }, { "epoch": 1.3002342764462065, "grad_norm": 0.40579769015312195, "learning_rate": 9.239350930810843e-06, "loss": 0.4939, "step": 2405 }, { "epoch": 1.3007749143989908, "grad_norm": 0.30617377161979675, "learning_rate": 9.23834961623911e-06, "loss": 0.4298, "step": 2406 }, { "epoch": 1.3013155523517752, "grad_norm": 0.3338660001754761, "learning_rate": 9.237347697371173e-06, "loss": 0.4309, "step": 2407 }, { "epoch": 1.3018561903045593, "grad_norm": 0.3111022412776947, "learning_rate": 9.236345174349884e-06, "loss": 0.4361, "step": 2408 }, { "epoch": 1.3023968282573437, "grad_norm": 0.35824936628341675, "learning_rate": 9.23534204731818e-06, "loss": 0.4738, "step": 2409 }, { "epoch": 1.3029374662101278, "grad_norm": 0.31303870677948, "learning_rate": 9.23433831641909e-06, "loss": 0.4224, "step": 2410 }, { "epoch": 1.3034781041629122, "grad_norm": 0.30870863795280457, "learning_rate": 9.233333981795715e-06, "loss": 0.3822, "step": 2411 }, { "epoch": 1.3040187421156966, "grad_norm": 0.41559818387031555, "learning_rate": 9.23232904359126e-06, "loss": 0.4988, "step": 2412 }, { "epoch": 1.304559380068481, "grad_norm": 0.3499990701675415, "learning_rate": 9.231323501949003e-06, "loss": 0.4313, "step": 2413 }, { "epoch": 1.305100018021265, "grad_norm": 0.3727319538593292, "learning_rate": 9.230317357012312e-06, "loss": 0.459, "step": 2414 }, { "epoch": 1.3056406559740494, "grad_norm": 0.34095412492752075, "learning_rate": 9.229310608924643e-06, "loss": 0.3827, "step": 2415 }, { "epoch": 1.3061812939268336, "grad_norm": 0.4113728106021881, "learning_rate": 9.228303257829535e-06, "loss": 0.423, "step": 2416 }, { "epoch": 1.306721931879618, "grad_norm": 0.38452550768852234, "learning_rate": 9.227295303870615e-06, "loss": 0.4199, "step": 2417 }, { "epoch": 1.3072625698324023, "grad_norm": 0.3986125886440277, "learning_rate": 9.226286747191597e-06, "loss": 0.4625, "step": 2418 }, { "epoch": 1.3078032077851864, "grad_norm": 0.43863949179649353, "learning_rate": 9.225277587936275e-06, "loss": 0.4138, "step": 2419 }, { "epoch": 1.3083438457379708, "grad_norm": 0.3248741626739502, "learning_rate": 9.224267826248536e-06, "loss": 0.423, "step": 2420 }, { "epoch": 1.3088844836907552, "grad_norm": 0.41215142607688904, "learning_rate": 9.22325746227235e-06, "loss": 0.4702, "step": 2421 }, { "epoch": 1.3094251216435393, "grad_norm": 0.37813472747802734, "learning_rate": 9.222246496151772e-06, "loss": 0.447, "step": 2422 }, { "epoch": 1.3099657595963237, "grad_norm": 0.3352581262588501, "learning_rate": 9.221234928030944e-06, "loss": 0.4113, "step": 2423 }, { "epoch": 1.310506397549108, "grad_norm": 0.3993983864784241, "learning_rate": 9.220222758054093e-06, "loss": 0.4622, "step": 2424 }, { "epoch": 1.3110470355018922, "grad_norm": 0.34393075108528137, "learning_rate": 9.219209986365533e-06, "loss": 0.4406, "step": 2425 }, { "epoch": 1.3115876734546765, "grad_norm": 0.41056370735168457, "learning_rate": 9.218196613109664e-06, "loss": 0.4707, "step": 2426 }, { "epoch": 1.3121283114074607, "grad_norm": 0.3283563554286957, "learning_rate": 9.21718263843097e-06, "loss": 0.4335, "step": 2427 }, { "epoch": 1.312668949360245, "grad_norm": 0.36037319898605347, "learning_rate": 9.21616806247402e-06, "loss": 0.4211, "step": 2428 }, { "epoch": 1.3132095873130294, "grad_norm": 0.3254454433917999, "learning_rate": 9.215152885383473e-06, "loss": 0.4663, "step": 2429 }, { "epoch": 1.3137502252658138, "grad_norm": 0.3504960238933563, "learning_rate": 9.21413710730407e-06, "loss": 0.4154, "step": 2430 }, { "epoch": 1.314290863218598, "grad_norm": 0.3567231595516205, "learning_rate": 9.21312072838064e-06, "loss": 0.4223, "step": 2431 }, { "epoch": 1.3148315011713823, "grad_norm": 0.3479120433330536, "learning_rate": 9.212103748758095e-06, "loss": 0.453, "step": 2432 }, { "epoch": 1.3153721391241664, "grad_norm": 0.31994810700416565, "learning_rate": 9.211086168581433e-06, "loss": 0.438, "step": 2433 }, { "epoch": 1.3159127770769508, "grad_norm": 0.3787417411804199, "learning_rate": 9.210067987995742e-06, "loss": 0.4615, "step": 2434 }, { "epoch": 1.3164534150297351, "grad_norm": 0.30681583285331726, "learning_rate": 9.20904920714619e-06, "loss": 0.3856, "step": 2435 }, { "epoch": 1.3169940529825195, "grad_norm": 0.35557037591934204, "learning_rate": 9.208029826178034e-06, "loss": 0.45, "step": 2436 }, { "epoch": 1.3175346909353036, "grad_norm": 0.382725328207016, "learning_rate": 9.207009845236614e-06, "loss": 0.4463, "step": 2437 }, { "epoch": 1.318075328888088, "grad_norm": 0.2951371967792511, "learning_rate": 9.205989264467359e-06, "loss": 0.3897, "step": 2438 }, { "epoch": 1.3186159668408721, "grad_norm": 0.33292078971862793, "learning_rate": 9.20496808401578e-06, "loss": 0.4581, "step": 2439 }, { "epoch": 1.3191566047936565, "grad_norm": 0.3527860939502716, "learning_rate": 9.203946304027476e-06, "loss": 0.4135, "step": 2440 }, { "epoch": 1.3196972427464408, "grad_norm": 0.3484804928302765, "learning_rate": 9.20292392464813e-06, "loss": 0.4608, "step": 2441 }, { "epoch": 1.3202378806992252, "grad_norm": 0.32849547266960144, "learning_rate": 9.201900946023512e-06, "loss": 0.4735, "step": 2442 }, { "epoch": 1.3207785186520093, "grad_norm": 0.345099538564682, "learning_rate": 9.200877368299474e-06, "loss": 0.4386, "step": 2443 }, { "epoch": 1.3213191566047937, "grad_norm": 0.34676334261894226, "learning_rate": 9.19985319162196e-06, "loss": 0.4486, "step": 2444 }, { "epoch": 1.3218597945575778, "grad_norm": 0.31078365445137024, "learning_rate": 9.198828416136991e-06, "loss": 0.3761, "step": 2445 }, { "epoch": 1.3224004325103622, "grad_norm": 0.39399051666259766, "learning_rate": 9.19780304199068e-06, "loss": 0.4963, "step": 2446 }, { "epoch": 1.3229410704631466, "grad_norm": 0.3411425054073334, "learning_rate": 9.196777069329222e-06, "loss": 0.4087, "step": 2447 }, { "epoch": 1.323481708415931, "grad_norm": 0.34350520372390747, "learning_rate": 9.195750498298898e-06, "loss": 0.4536, "step": 2448 }, { "epoch": 1.324022346368715, "grad_norm": 0.3334350883960724, "learning_rate": 9.194723329046076e-06, "loss": 0.425, "step": 2449 }, { "epoch": 1.3245629843214994, "grad_norm": 0.34668123722076416, "learning_rate": 9.193695561717207e-06, "loss": 0.4197, "step": 2450 }, { "epoch": 1.3251036222742836, "grad_norm": 0.3792758584022522, "learning_rate": 9.192667196458829e-06, "loss": 0.4212, "step": 2451 }, { "epoch": 1.325644260227068, "grad_norm": 0.3879375457763672, "learning_rate": 9.191638233417563e-06, "loss": 0.4541, "step": 2452 }, { "epoch": 1.3261848981798523, "grad_norm": 0.38033583760261536, "learning_rate": 9.190608672740118e-06, "loss": 0.4258, "step": 2453 }, { "epoch": 1.3267255361326364, "grad_norm": 0.31433993577957153, "learning_rate": 9.189578514573287e-06, "loss": 0.3939, "step": 2454 }, { "epoch": 1.3272661740854208, "grad_norm": 0.40909436345100403, "learning_rate": 9.188547759063948e-06, "loss": 0.4741, "step": 2455 }, { "epoch": 1.3278068120382052, "grad_norm": 0.40054723620414734, "learning_rate": 9.187516406359062e-06, "loss": 0.4371, "step": 2456 }, { "epoch": 1.3283474499909893, "grad_norm": 0.38349390029907227, "learning_rate": 9.186484456605682e-06, "loss": 0.4281, "step": 2457 }, { "epoch": 1.3288880879437737, "grad_norm": 0.4046049416065216, "learning_rate": 9.185451909950937e-06, "loss": 0.4476, "step": 2458 }, { "epoch": 1.329428725896558, "grad_norm": 0.33745673298835754, "learning_rate": 9.184418766542046e-06, "loss": 0.4072, "step": 2459 }, { "epoch": 1.3299693638493422, "grad_norm": 0.3759952187538147, "learning_rate": 9.183385026526317e-06, "loss": 0.453, "step": 2460 }, { "epoch": 1.3305100018021265, "grad_norm": 0.37083351612091064, "learning_rate": 9.182350690051134e-06, "loss": 0.455, "step": 2461 }, { "epoch": 1.3310506397549107, "grad_norm": 0.35298165678977966, "learning_rate": 9.181315757263973e-06, "loss": 0.4801, "step": 2462 }, { "epoch": 1.331591277707695, "grad_norm": 0.30566030740737915, "learning_rate": 9.180280228312394e-06, "loss": 0.3776, "step": 2463 }, { "epoch": 1.3321319156604794, "grad_norm": 0.33040979504585266, "learning_rate": 9.179244103344039e-06, "loss": 0.4015, "step": 2464 }, { "epoch": 1.3326725536132638, "grad_norm": 0.3727269172668457, "learning_rate": 9.178207382506634e-06, "loss": 0.5066, "step": 2465 }, { "epoch": 1.333213191566048, "grad_norm": 0.36058521270751953, "learning_rate": 9.177170065948e-06, "loss": 0.4391, "step": 2466 }, { "epoch": 1.3337538295188323, "grad_norm": 0.3290637731552124, "learning_rate": 9.17613215381603e-06, "loss": 0.466, "step": 2467 }, { "epoch": 1.3342944674716164, "grad_norm": 0.3557436466217041, "learning_rate": 9.175093646258709e-06, "loss": 0.4387, "step": 2468 }, { "epoch": 1.3348351054244008, "grad_norm": 0.39177340269088745, "learning_rate": 9.174054543424106e-06, "loss": 0.4607, "step": 2469 }, { "epoch": 1.3353757433771851, "grad_norm": 0.32272791862487793, "learning_rate": 9.173014845460375e-06, "loss": 0.4258, "step": 2470 }, { "epoch": 1.3359163813299695, "grad_norm": 0.42237669229507446, "learning_rate": 9.171974552515753e-06, "loss": 0.4199, "step": 2471 }, { "epoch": 1.3364570192827536, "grad_norm": 0.3549451529979706, "learning_rate": 9.170933664738563e-06, "loss": 0.4044, "step": 2472 }, { "epoch": 1.336997657235538, "grad_norm": 0.3337773382663727, "learning_rate": 9.169892182277214e-06, "loss": 0.4506, "step": 2473 }, { "epoch": 1.3375382951883221, "grad_norm": 0.3925420939922333, "learning_rate": 9.168850105280198e-06, "loss": 0.4141, "step": 2474 }, { "epoch": 1.3380789331411065, "grad_norm": 0.37819182872772217, "learning_rate": 9.167807433896091e-06, "loss": 0.5166, "step": 2475 }, { "epoch": 1.3386195710938908, "grad_norm": 0.38570624589920044, "learning_rate": 9.166764168273559e-06, "loss": 0.4778, "step": 2476 }, { "epoch": 1.3391602090466752, "grad_norm": 0.31898075342178345, "learning_rate": 9.165720308561347e-06, "loss": 0.3757, "step": 2477 }, { "epoch": 1.3397008469994593, "grad_norm": 0.3864937424659729, "learning_rate": 9.164675854908284e-06, "loss": 0.494, "step": 2478 }, { "epoch": 1.3402414849522437, "grad_norm": 0.3443322479724884, "learning_rate": 9.163630807463292e-06, "loss": 0.4216, "step": 2479 }, { "epoch": 1.3407821229050279, "grad_norm": 0.43592381477355957, "learning_rate": 9.162585166375367e-06, "loss": 0.4449, "step": 2480 }, { "epoch": 1.3413227608578122, "grad_norm": 0.36814263463020325, "learning_rate": 9.161538931793595e-06, "loss": 0.448, "step": 2481 }, { "epoch": 1.3418633988105966, "grad_norm": 0.325903058052063, "learning_rate": 9.160492103867149e-06, "loss": 0.4119, "step": 2482 }, { "epoch": 1.3424040367633807, "grad_norm": 0.4102088212966919, "learning_rate": 9.159444682745282e-06, "loss": 0.4866, "step": 2483 }, { "epoch": 1.342944674716165, "grad_norm": 0.3582179844379425, "learning_rate": 9.158396668577333e-06, "loss": 0.4608, "step": 2484 }, { "epoch": 1.3434853126689494, "grad_norm": 0.3896026313304901, "learning_rate": 9.157348061512728e-06, "loss": 0.4008, "step": 2485 }, { "epoch": 1.3440259506217336, "grad_norm": 0.3490865230560303, "learning_rate": 9.156298861700971e-06, "loss": 0.4442, "step": 2486 }, { "epoch": 1.344566588574518, "grad_norm": 0.39922162890434265, "learning_rate": 9.155249069291661e-06, "loss": 0.4744, "step": 2487 }, { "epoch": 1.3451072265273023, "grad_norm": 0.30951571464538574, "learning_rate": 9.154198684434472e-06, "loss": 0.3839, "step": 2488 }, { "epoch": 1.3456478644800864, "grad_norm": 0.3632635176181793, "learning_rate": 9.153147707279168e-06, "loss": 0.4455, "step": 2489 }, { "epoch": 1.3461885024328708, "grad_norm": 0.3484949469566345, "learning_rate": 9.152096137975593e-06, "loss": 0.4681, "step": 2490 }, { "epoch": 1.346729140385655, "grad_norm": 0.33641254901885986, "learning_rate": 9.151043976673676e-06, "loss": 0.4173, "step": 2491 }, { "epoch": 1.3472697783384393, "grad_norm": 0.34935262799263, "learning_rate": 9.149991223523439e-06, "loss": 0.4061, "step": 2492 }, { "epoch": 1.3478104162912237, "grad_norm": 0.37855634093284607, "learning_rate": 9.148937878674975e-06, "loss": 0.4633, "step": 2493 }, { "epoch": 1.348351054244008, "grad_norm": 0.3477320969104767, "learning_rate": 9.147883942278474e-06, "loss": 0.4195, "step": 2494 }, { "epoch": 1.3488916921967922, "grad_norm": 0.39148709177970886, "learning_rate": 9.146829414484198e-06, "loss": 0.4698, "step": 2495 }, { "epoch": 1.3494323301495765, "grad_norm": 0.40924400091171265, "learning_rate": 9.145774295442504e-06, "loss": 0.4393, "step": 2496 }, { "epoch": 1.3499729681023607, "grad_norm": 0.49655890464782715, "learning_rate": 9.144718585303829e-06, "loss": 0.4513, "step": 2497 }, { "epoch": 1.350513606055145, "grad_norm": 0.36215028166770935, "learning_rate": 9.143662284218691e-06, "loss": 0.451, "step": 2498 }, { "epoch": 1.3510542440079294, "grad_norm": 0.37285175919532776, "learning_rate": 9.142605392337697e-06, "loss": 0.4216, "step": 2499 }, { "epoch": 1.3515948819607138, "grad_norm": 0.4430399239063263, "learning_rate": 9.14154790981154e-06, "loss": 0.4176, "step": 2500 }, { "epoch": 1.352135519913498, "grad_norm": 0.3936831057071686, "learning_rate": 9.140489836790989e-06, "loss": 0.4633, "step": 2501 }, { "epoch": 1.3526761578662823, "grad_norm": 0.37488722801208496, "learning_rate": 9.139431173426905e-06, "loss": 0.4296, "step": 2502 }, { "epoch": 1.3532167958190664, "grad_norm": 0.38206738233566284, "learning_rate": 9.13837191987023e-06, "loss": 0.4323, "step": 2503 }, { "epoch": 1.3537574337718508, "grad_norm": 0.3599168062210083, "learning_rate": 9.137312076271989e-06, "loss": 0.4255, "step": 2504 }, { "epoch": 1.3542980717246351, "grad_norm": 0.4199734032154083, "learning_rate": 9.136251642783294e-06, "loss": 0.4803, "step": 2505 }, { "epoch": 1.3548387096774195, "grad_norm": 0.4077081084251404, "learning_rate": 9.135190619555339e-06, "loss": 0.4305, "step": 2506 }, { "epoch": 1.3553793476302036, "grad_norm": 0.4488022029399872, "learning_rate": 9.134129006739403e-06, "loss": 0.4646, "step": 2507 }, { "epoch": 1.355919985582988, "grad_norm": 0.3567434847354889, "learning_rate": 9.13306680448685e-06, "loss": 0.3833, "step": 2508 }, { "epoch": 1.3564606235357721, "grad_norm": 0.46651482582092285, "learning_rate": 9.132004012949124e-06, "loss": 0.4489, "step": 2509 }, { "epoch": 1.3570012614885565, "grad_norm": 0.3879334330558777, "learning_rate": 9.130940632277757e-06, "loss": 0.4286, "step": 2510 }, { "epoch": 1.3575418994413408, "grad_norm": 0.37734895944595337, "learning_rate": 9.129876662624366e-06, "loss": 0.4454, "step": 2511 }, { "epoch": 1.358082537394125, "grad_norm": 0.3508733808994293, "learning_rate": 9.12881210414065e-06, "loss": 0.4183, "step": 2512 }, { "epoch": 1.3586231753469094, "grad_norm": 0.36155420541763306, "learning_rate": 9.127746956978388e-06, "loss": 0.4631, "step": 2513 }, { "epoch": 1.3591638132996937, "grad_norm": 0.3453098237514496, "learning_rate": 9.126681221289448e-06, "loss": 0.4324, "step": 2514 }, { "epoch": 1.3597044512524779, "grad_norm": 0.38498273491859436, "learning_rate": 9.125614897225785e-06, "loss": 0.4327, "step": 2515 }, { "epoch": 1.3602450892052622, "grad_norm": 0.39644426107406616, "learning_rate": 9.124547984939427e-06, "loss": 0.4537, "step": 2516 }, { "epoch": 1.3607857271580466, "grad_norm": 0.34871289134025574, "learning_rate": 9.123480484582498e-06, "loss": 0.43, "step": 2517 }, { "epoch": 1.3613263651108307, "grad_norm": 0.3775876760482788, "learning_rate": 9.122412396307196e-06, "loss": 0.4601, "step": 2518 }, { "epoch": 1.361867003063615, "grad_norm": 0.3927980959415436, "learning_rate": 9.12134372026581e-06, "loss": 0.4253, "step": 2519 }, { "epoch": 1.3624076410163992, "grad_norm": 0.3621505796909332, "learning_rate": 9.120274456610708e-06, "loss": 0.4899, "step": 2520 }, { "epoch": 1.3629482789691836, "grad_norm": 0.30624163150787354, "learning_rate": 9.119204605494345e-06, "loss": 0.4045, "step": 2521 }, { "epoch": 1.363488916921968, "grad_norm": 0.2986149489879608, "learning_rate": 9.118134167069258e-06, "loss": 0.418, "step": 2522 }, { "epoch": 1.3640295548747523, "grad_norm": 0.3559814989566803, "learning_rate": 9.117063141488067e-06, "loss": 0.4618, "step": 2523 }, { "epoch": 1.3645701928275364, "grad_norm": 0.3439945578575134, "learning_rate": 9.11599152890348e-06, "loss": 0.4326, "step": 2524 }, { "epoch": 1.3651108307803208, "grad_norm": 0.31808650493621826, "learning_rate": 9.114919329468283e-06, "loss": 0.4275, "step": 2525 }, { "epoch": 1.365651468733105, "grad_norm": 0.3407554030418396, "learning_rate": 9.113846543335349e-06, "loss": 0.4439, "step": 2526 }, { "epoch": 1.3661921066858893, "grad_norm": 0.3182757794857025, "learning_rate": 9.112773170657631e-06, "loss": 0.4177, "step": 2527 }, { "epoch": 1.3667327446386737, "grad_norm": 0.34512054920196533, "learning_rate": 9.111699211588175e-06, "loss": 0.4515, "step": 2528 }, { "epoch": 1.367273382591458, "grad_norm": 0.3278456926345825, "learning_rate": 9.110624666280099e-06, "loss": 0.439, "step": 2529 }, { "epoch": 1.3678140205442422, "grad_norm": 0.30506500601768494, "learning_rate": 9.10954953488661e-06, "loss": 0.4169, "step": 2530 }, { "epoch": 1.3683546584970265, "grad_norm": 0.3284112811088562, "learning_rate": 9.108473817561e-06, "loss": 0.4181, "step": 2531 }, { "epoch": 1.3688952964498107, "grad_norm": 0.37319162487983704, "learning_rate": 9.107397514456643e-06, "loss": 0.4607, "step": 2532 }, { "epoch": 1.369435934402595, "grad_norm": 0.31024980545043945, "learning_rate": 9.106320625726995e-06, "loss": 0.4059, "step": 2533 }, { "epoch": 1.3699765723553794, "grad_norm": 0.3992244601249695, "learning_rate": 9.105243151525598e-06, "loss": 0.4932, "step": 2534 }, { "epoch": 1.3705172103081638, "grad_norm": 0.3026205897331238, "learning_rate": 9.104165092006075e-06, "loss": 0.4204, "step": 2535 }, { "epoch": 1.371057848260948, "grad_norm": 0.3684896230697632, "learning_rate": 9.103086447322136e-06, "loss": 0.4454, "step": 2536 }, { "epoch": 1.3715984862137323, "grad_norm": 0.38126397132873535, "learning_rate": 9.102007217627568e-06, "loss": 0.4732, "step": 2537 }, { "epoch": 1.3721391241665164, "grad_norm": 0.3135012686252594, "learning_rate": 9.10092740307625e-06, "loss": 0.4374, "step": 2538 }, { "epoch": 1.3726797621193008, "grad_norm": 0.3915354311466217, "learning_rate": 9.099847003822139e-06, "loss": 0.4375, "step": 2539 }, { "epoch": 1.3732204000720851, "grad_norm": 0.3551939129829407, "learning_rate": 9.098766020019273e-06, "loss": 0.4527, "step": 2540 }, { "epoch": 1.3737610380248693, "grad_norm": 0.3653704524040222, "learning_rate": 9.097684451821783e-06, "loss": 0.3931, "step": 2541 }, { "epoch": 1.3743016759776536, "grad_norm": 0.41091975569725037, "learning_rate": 9.096602299383872e-06, "loss": 0.4849, "step": 2542 }, { "epoch": 1.374842313930438, "grad_norm": 0.35535484552383423, "learning_rate": 9.09551956285983e-06, "loss": 0.4221, "step": 2543 }, { "epoch": 1.3753829518832221, "grad_norm": 0.4004269540309906, "learning_rate": 9.094436242404039e-06, "loss": 0.4366, "step": 2544 }, { "epoch": 1.3759235898360065, "grad_norm": 0.3528960049152374, "learning_rate": 9.09335233817095e-06, "loss": 0.3928, "step": 2545 }, { "epoch": 1.3764642277887909, "grad_norm": 0.510593056678772, "learning_rate": 9.092267850315106e-06, "loss": 0.4862, "step": 2546 }, { "epoch": 1.377004865741575, "grad_norm": 0.3259771168231964, "learning_rate": 9.091182778991132e-06, "loss": 0.4215, "step": 2547 }, { "epoch": 1.3775455036943594, "grad_norm": 0.451362669467926, "learning_rate": 9.090097124353737e-06, "loss": 0.4199, "step": 2548 }, { "epoch": 1.3780861416471435, "grad_norm": 0.3616625964641571, "learning_rate": 9.089010886557706e-06, "loss": 0.4328, "step": 2549 }, { "epoch": 1.3786267795999279, "grad_norm": 0.37003451585769653, "learning_rate": 9.08792406575792e-06, "loss": 0.4332, "step": 2550 }, { "epoch": 1.3791674175527122, "grad_norm": 0.3810839354991913, "learning_rate": 9.08683666210933e-06, "loss": 0.4253, "step": 2551 }, { "epoch": 1.3797080555054966, "grad_norm": 0.4266107380390167, "learning_rate": 9.085748675766981e-06, "loss": 0.4508, "step": 2552 }, { "epoch": 1.3802486934582807, "grad_norm": 0.39522749185562134, "learning_rate": 9.084660106885992e-06, "loss": 0.4043, "step": 2553 }, { "epoch": 1.380789331411065, "grad_norm": 0.4028782546520233, "learning_rate": 9.083570955621572e-06, "loss": 0.4656, "step": 2554 }, { "epoch": 1.3813299693638492, "grad_norm": 0.34329450130462646, "learning_rate": 9.082481222129008e-06, "loss": 0.4385, "step": 2555 }, { "epoch": 1.3818706073166336, "grad_norm": 0.4008656144142151, "learning_rate": 9.081390906563675e-06, "loss": 0.4881, "step": 2556 }, { "epoch": 1.382411245269418, "grad_norm": 0.3827539384365082, "learning_rate": 9.080300009081025e-06, "loss": 0.4496, "step": 2557 }, { "epoch": 1.3829518832222023, "grad_norm": 0.35076501965522766, "learning_rate": 9.079208529836598e-06, "loss": 0.4231, "step": 2558 }, { "epoch": 1.3834925211749864, "grad_norm": 0.32431963086128235, "learning_rate": 9.078116468986016e-06, "loss": 0.4315, "step": 2559 }, { "epoch": 1.3840331591277708, "grad_norm": 0.354658305644989, "learning_rate": 9.07702382668498e-06, "loss": 0.4423, "step": 2560 }, { "epoch": 1.384573797080555, "grad_norm": 0.3761045038700104, "learning_rate": 9.07593060308928e-06, "loss": 0.4651, "step": 2561 }, { "epoch": 1.3851144350333393, "grad_norm": 0.3039899468421936, "learning_rate": 9.074836798354785e-06, "loss": 0.3852, "step": 2562 }, { "epoch": 1.3856550729861237, "grad_norm": 0.42946872115135193, "learning_rate": 9.073742412637448e-06, "loss": 0.4575, "step": 2563 }, { "epoch": 1.386195710938908, "grad_norm": 0.3606507182121277, "learning_rate": 9.072647446093304e-06, "loss": 0.479, "step": 2564 }, { "epoch": 1.3867363488916922, "grad_norm": 0.3425506353378296, "learning_rate": 9.071551898878471e-06, "loss": 0.4277, "step": 2565 }, { "epoch": 1.3872769868444765, "grad_norm": 0.35136839747428894, "learning_rate": 9.070455771149149e-06, "loss": 0.4338, "step": 2566 }, { "epoch": 1.3878176247972607, "grad_norm": 0.3452429473400116, "learning_rate": 9.069359063061624e-06, "loss": 0.4693, "step": 2567 }, { "epoch": 1.388358262750045, "grad_norm": 0.338131308555603, "learning_rate": 9.068261774772262e-06, "loss": 0.4126, "step": 2568 }, { "epoch": 1.3888989007028294, "grad_norm": 0.3617740571498871, "learning_rate": 9.067163906437513e-06, "loss": 0.4393, "step": 2569 }, { "epoch": 1.3894395386556138, "grad_norm": 0.3760451674461365, "learning_rate": 9.066065458213908e-06, "loss": 0.4787, "step": 2570 }, { "epoch": 1.389980176608398, "grad_norm": 0.35575684905052185, "learning_rate": 9.064966430258064e-06, "loss": 0.4009, "step": 2571 }, { "epoch": 1.3905208145611823, "grad_norm": 0.4492211639881134, "learning_rate": 9.063866822726675e-06, "loss": 0.4452, "step": 2572 }, { "epoch": 1.3910614525139664, "grad_norm": 0.31847888231277466, "learning_rate": 9.062766635776523e-06, "loss": 0.4393, "step": 2573 }, { "epoch": 1.3916020904667508, "grad_norm": 0.38668292760849, "learning_rate": 9.061665869564468e-06, "loss": 0.4882, "step": 2574 }, { "epoch": 1.3921427284195351, "grad_norm": 0.31565096974372864, "learning_rate": 9.06056452424746e-06, "loss": 0.3894, "step": 2575 }, { "epoch": 1.3926833663723193, "grad_norm": 0.37927380204200745, "learning_rate": 9.059462599982525e-06, "loss": 0.4499, "step": 2576 }, { "epoch": 1.3932240043251036, "grad_norm": 0.3331967890262604, "learning_rate": 9.058360096926771e-06, "loss": 0.4102, "step": 2577 }, { "epoch": 1.393764642277888, "grad_norm": 0.3800790011882782, "learning_rate": 9.057257015237394e-06, "loss": 0.4605, "step": 2578 }, { "epoch": 1.3943052802306721, "grad_norm": 0.3449414074420929, "learning_rate": 9.056153355071668e-06, "loss": 0.4421, "step": 2579 }, { "epoch": 1.3948459181834565, "grad_norm": 0.3734491765499115, "learning_rate": 9.055049116586951e-06, "loss": 0.4663, "step": 2580 }, { "epoch": 1.3953865561362409, "grad_norm": 0.4147307276725769, "learning_rate": 9.05394429994068e-06, "loss": 0.4177, "step": 2581 }, { "epoch": 1.395927194089025, "grad_norm": 0.3757738173007965, "learning_rate": 9.052838905290386e-06, "loss": 0.4443, "step": 2582 }, { "epoch": 1.3964678320418094, "grad_norm": 0.39692026376724243, "learning_rate": 9.051732932793667e-06, "loss": 0.4309, "step": 2583 }, { "epoch": 1.3970084699945935, "grad_norm": 0.41125962138175964, "learning_rate": 9.050626382608212e-06, "loss": 0.4539, "step": 2584 }, { "epoch": 1.3975491079473779, "grad_norm": 0.34175458550453186, "learning_rate": 9.049519254891793e-06, "loss": 0.4387, "step": 2585 }, { "epoch": 1.3980897459001622, "grad_norm": 0.38721558451652527, "learning_rate": 9.048411549802259e-06, "loss": 0.4768, "step": 2586 }, { "epoch": 1.3986303838529466, "grad_norm": 0.3092331886291504, "learning_rate": 9.047303267497547e-06, "loss": 0.397, "step": 2587 }, { "epoch": 1.3991710218057307, "grad_norm": 0.3423914611339569, "learning_rate": 9.046194408135673e-06, "loss": 0.4816, "step": 2588 }, { "epoch": 1.399711659758515, "grad_norm": 0.3521196246147156, "learning_rate": 9.045084971874738e-06, "loss": 0.4182, "step": 2589 }, { "epoch": 1.4002522977112992, "grad_norm": 0.3921590745449066, "learning_rate": 9.04397495887292e-06, "loss": 0.4861, "step": 2590 }, { "epoch": 1.4007929356640836, "grad_norm": 0.32359740138053894, "learning_rate": 9.042864369288487e-06, "loss": 0.438, "step": 2591 }, { "epoch": 1.401333573616868, "grad_norm": 0.3559328317642212, "learning_rate": 9.041753203279781e-06, "loss": 0.4198, "step": 2592 }, { "epoch": 1.4018742115696523, "grad_norm": 0.38676345348358154, "learning_rate": 9.040641461005232e-06, "loss": 0.4698, "step": 2593 }, { "epoch": 1.4024148495224364, "grad_norm": 0.3294946849346161, "learning_rate": 9.039529142623348e-06, "loss": 0.4192, "step": 2594 }, { "epoch": 1.4029554874752208, "grad_norm": 0.35132449865341187, "learning_rate": 9.038416248292725e-06, "loss": 0.425, "step": 2595 }, { "epoch": 1.403496125428005, "grad_norm": 0.3458995521068573, "learning_rate": 9.037302778172034e-06, "loss": 0.4588, "step": 2596 }, { "epoch": 1.4040367633807893, "grad_norm": 0.3966250419616699, "learning_rate": 9.036188732420035e-06, "loss": 0.4995, "step": 2597 }, { "epoch": 1.4045774013335737, "grad_norm": 0.32241538166999817, "learning_rate": 9.035074111195563e-06, "loss": 0.3888, "step": 2598 }, { "epoch": 1.405118039286358, "grad_norm": 0.3781411051750183, "learning_rate": 9.03395891465754e-06, "loss": 0.4392, "step": 2599 }, { "epoch": 1.4056586772391422, "grad_norm": 0.3311634957790375, "learning_rate": 9.03284314296497e-06, "loss": 0.4485, "step": 2600 }, { "epoch": 1.4061993151919265, "grad_norm": 0.3563523590564728, "learning_rate": 9.031726796276935e-06, "loss": 0.4227, "step": 2601 }, { "epoch": 1.4067399531447107, "grad_norm": 0.3249850869178772, "learning_rate": 9.030609874752604e-06, "loss": 0.4468, "step": 2602 }, { "epoch": 1.407280591097495, "grad_norm": 0.30806684494018555, "learning_rate": 9.029492378551228e-06, "loss": 0.4329, "step": 2603 }, { "epoch": 1.4078212290502794, "grad_norm": 0.38034096360206604, "learning_rate": 9.028374307832131e-06, "loss": 0.4332, "step": 2604 }, { "epoch": 1.4083618670030635, "grad_norm": 0.33460333943367004, "learning_rate": 9.02725566275473e-06, "loss": 0.4521, "step": 2605 }, { "epoch": 1.408902504955848, "grad_norm": 0.35097768902778625, "learning_rate": 9.02613644347852e-06, "loss": 0.4261, "step": 2606 }, { "epoch": 1.4094431429086323, "grad_norm": 0.3345774710178375, "learning_rate": 9.025016650163074e-06, "loss": 0.4488, "step": 2607 }, { "epoch": 1.4099837808614164, "grad_norm": 0.3553728759288788, "learning_rate": 9.023896282968052e-06, "loss": 0.4551, "step": 2608 }, { "epoch": 1.4105244188142008, "grad_norm": 0.34653764963150024, "learning_rate": 9.022775342053194e-06, "loss": 0.4303, "step": 2609 }, { "epoch": 1.4110650567669851, "grad_norm": 0.3573172688484192, "learning_rate": 9.021653827578322e-06, "loss": 0.4306, "step": 2610 }, { "epoch": 1.4116056947197693, "grad_norm": 0.43176814913749695, "learning_rate": 9.020531739703338e-06, "loss": 0.4513, "step": 2611 }, { "epoch": 1.4121463326725536, "grad_norm": 0.3422747850418091, "learning_rate": 9.01940907858823e-06, "loss": 0.4489, "step": 2612 }, { "epoch": 1.4126869706253378, "grad_norm": 0.37848156690597534, "learning_rate": 9.018285844393061e-06, "loss": 0.3838, "step": 2613 }, { "epoch": 1.4132276085781221, "grad_norm": 0.44722023606300354, "learning_rate": 9.017162037277983e-06, "loss": 0.4741, "step": 2614 }, { "epoch": 1.4137682465309065, "grad_norm": 0.38165798783302307, "learning_rate": 9.016037657403225e-06, "loss": 0.4159, "step": 2615 }, { "epoch": 1.4143088844836909, "grad_norm": 0.37461984157562256, "learning_rate": 9.0149127049291e-06, "loss": 0.4323, "step": 2616 }, { "epoch": 1.414849522436475, "grad_norm": 0.3761579692363739, "learning_rate": 9.013787180016e-06, "loss": 0.4729, "step": 2617 }, { "epoch": 1.4153901603892594, "grad_norm": 0.3706890046596527, "learning_rate": 9.012661082824404e-06, "loss": 0.4611, "step": 2618 }, { "epoch": 1.4159307983420435, "grad_norm": 0.33320996165275574, "learning_rate": 9.011534413514862e-06, "loss": 0.4077, "step": 2619 }, { "epoch": 1.4164714362948279, "grad_norm": 0.37726783752441406, "learning_rate": 9.01040717224802e-06, "loss": 0.408, "step": 2620 }, { "epoch": 1.4170120742476122, "grad_norm": 0.4023379683494568, "learning_rate": 9.009279359184594e-06, "loss": 0.4518, "step": 2621 }, { "epoch": 1.4175527122003966, "grad_norm": 0.3516872525215149, "learning_rate": 9.008150974485386e-06, "loss": 0.4082, "step": 2622 }, { "epoch": 1.4180933501531807, "grad_norm": 0.4246536195278168, "learning_rate": 9.007022018311277e-06, "loss": 0.4818, "step": 2623 }, { "epoch": 1.418633988105965, "grad_norm": 0.3128785490989685, "learning_rate": 9.005892490823237e-06, "loss": 0.4043, "step": 2624 }, { "epoch": 1.4191746260587492, "grad_norm": 0.3647494316101074, "learning_rate": 9.004762392182307e-06, "loss": 0.4162, "step": 2625 }, { "epoch": 1.4197152640115336, "grad_norm": 0.3745572865009308, "learning_rate": 9.003631722549617e-06, "loss": 0.4648, "step": 2626 }, { "epoch": 1.420255901964318, "grad_norm": 0.3839242458343506, "learning_rate": 9.002500482086377e-06, "loss": 0.42, "step": 2627 }, { "epoch": 1.4207965399171023, "grad_norm": 0.354693740606308, "learning_rate": 9.001368670953872e-06, "loss": 0.4153, "step": 2628 }, { "epoch": 1.4213371778698864, "grad_norm": 0.3716014325618744, "learning_rate": 9.000236289313479e-06, "loss": 0.4531, "step": 2629 }, { "epoch": 1.4218778158226708, "grad_norm": 0.3571845293045044, "learning_rate": 8.999103337326646e-06, "loss": 0.4221, "step": 2630 }, { "epoch": 1.422418453775455, "grad_norm": 0.3787524998188019, "learning_rate": 8.997969815154913e-06, "loss": 0.4424, "step": 2631 }, { "epoch": 1.4229590917282393, "grad_norm": 0.37358084321022034, "learning_rate": 8.99683572295989e-06, "loss": 0.436, "step": 2632 }, { "epoch": 1.4234997296810237, "grad_norm": 0.37591075897216797, "learning_rate": 8.995701060903279e-06, "loss": 0.4121, "step": 2633 }, { "epoch": 1.4240403676338078, "grad_norm": 0.3382313847541809, "learning_rate": 8.994565829146855e-06, "loss": 0.4449, "step": 2634 }, { "epoch": 1.4245810055865922, "grad_norm": 0.3377707600593567, "learning_rate": 8.993430027852476e-06, "loss": 0.4205, "step": 2635 }, { "epoch": 1.4251216435393765, "grad_norm": 0.34145236015319824, "learning_rate": 8.992293657182085e-06, "loss": 0.4755, "step": 2636 }, { "epoch": 1.4256622814921607, "grad_norm": 0.36096832156181335, "learning_rate": 8.991156717297702e-06, "loss": 0.4393, "step": 2637 }, { "epoch": 1.426202919444945, "grad_norm": 0.33001676201820374, "learning_rate": 8.990019208361432e-06, "loss": 0.4076, "step": 2638 }, { "epoch": 1.4267435573977294, "grad_norm": 0.357586145401001, "learning_rate": 8.988881130535459e-06, "loss": 0.4401, "step": 2639 }, { "epoch": 1.4272841953505135, "grad_norm": 0.37538856267929077, "learning_rate": 8.987742483982044e-06, "loss": 0.4397, "step": 2640 }, { "epoch": 1.427824833303298, "grad_norm": 0.4129542410373688, "learning_rate": 8.986603268863536e-06, "loss": 0.48, "step": 2641 }, { "epoch": 1.428365471256082, "grad_norm": 0.36001160740852356, "learning_rate": 8.985463485342363e-06, "loss": 0.4259, "step": 2642 }, { "epoch": 1.4289061092088664, "grad_norm": 0.3682215213775635, "learning_rate": 8.984323133581032e-06, "loss": 0.4293, "step": 2643 }, { "epoch": 1.4294467471616508, "grad_norm": 0.4558807909488678, "learning_rate": 8.983182213742135e-06, "loss": 0.4569, "step": 2644 }, { "epoch": 1.4299873851144351, "grad_norm": 0.34552398324012756, "learning_rate": 8.982040725988337e-06, "loss": 0.4642, "step": 2645 }, { "epoch": 1.4305280230672193, "grad_norm": 0.42189887166023254, "learning_rate": 8.980898670482392e-06, "loss": 0.441, "step": 2646 }, { "epoch": 1.4310686610200036, "grad_norm": 0.3556559085845947, "learning_rate": 8.979756047387134e-06, "loss": 0.3828, "step": 2647 }, { "epoch": 1.4316092989727878, "grad_norm": 0.36019167304039, "learning_rate": 8.978612856865474e-06, "loss": 0.4522, "step": 2648 }, { "epoch": 1.4321499369255721, "grad_norm": 0.34309664368629456, "learning_rate": 8.977469099080405e-06, "loss": 0.4102, "step": 2649 }, { "epoch": 1.4326905748783565, "grad_norm": 0.3519548773765564, "learning_rate": 8.976324774195005e-06, "loss": 0.4334, "step": 2650 }, { "epoch": 1.4332312128311409, "grad_norm": 0.3870348632335663, "learning_rate": 8.975179882372428e-06, "loss": 0.4601, "step": 2651 }, { "epoch": 1.433771850783925, "grad_norm": 0.32236963510513306, "learning_rate": 8.974034423775912e-06, "loss": 0.4295, "step": 2652 }, { "epoch": 1.4343124887367094, "grad_norm": 0.3496725559234619, "learning_rate": 8.972888398568772e-06, "loss": 0.405, "step": 2653 }, { "epoch": 1.4348531266894935, "grad_norm": 0.28755730390548706, "learning_rate": 8.971741806914409e-06, "loss": 0.3739, "step": 2654 }, { "epoch": 1.4353937646422779, "grad_norm": 0.34134241938591003, "learning_rate": 8.970594648976299e-06, "loss": 0.4223, "step": 2655 }, { "epoch": 1.4359344025950622, "grad_norm": 0.38335177302360535, "learning_rate": 8.969446924918001e-06, "loss": 0.4435, "step": 2656 }, { "epoch": 1.4364750405478466, "grad_norm": 0.3327115476131439, "learning_rate": 8.96829863490316e-06, "loss": 0.4378, "step": 2657 }, { "epoch": 1.4370156785006307, "grad_norm": 0.333930641412735, "learning_rate": 8.967149779095494e-06, "loss": 0.4356, "step": 2658 }, { "epoch": 1.437556316453415, "grad_norm": 0.34585675597190857, "learning_rate": 8.966000357658807e-06, "loss": 0.4388, "step": 2659 }, { "epoch": 1.4380969544061992, "grad_norm": 0.3636191487312317, "learning_rate": 8.964850370756978e-06, "loss": 0.406, "step": 2660 }, { "epoch": 1.4386375923589836, "grad_norm": 0.3512509763240814, "learning_rate": 8.963699818553972e-06, "loss": 0.4522, "step": 2661 }, { "epoch": 1.439178230311768, "grad_norm": 0.35486963391304016, "learning_rate": 8.962548701213834e-06, "loss": 0.4345, "step": 2662 }, { "epoch": 1.4397188682645523, "grad_norm": 0.3926379680633545, "learning_rate": 8.961397018900685e-06, "loss": 0.4312, "step": 2663 }, { "epoch": 1.4402595062173364, "grad_norm": 0.3730280101299286, "learning_rate": 8.960244771778732e-06, "loss": 0.4754, "step": 2664 }, { "epoch": 1.4408001441701208, "grad_norm": 0.34494009613990784, "learning_rate": 8.95909196001226e-06, "loss": 0.4289, "step": 2665 }, { "epoch": 1.441340782122905, "grad_norm": 0.373043030500412, "learning_rate": 8.957938583765636e-06, "loss": 0.4349, "step": 2666 }, { "epoch": 1.4418814200756893, "grad_norm": 0.35607317090034485, "learning_rate": 8.956784643203303e-06, "loss": 0.454, "step": 2667 }, { "epoch": 1.4424220580284737, "grad_norm": 0.33781003952026367, "learning_rate": 8.955630138489788e-06, "loss": 0.4377, "step": 2668 }, { "epoch": 1.4429626959812578, "grad_norm": 0.3684767186641693, "learning_rate": 8.954475069789703e-06, "loss": 0.4445, "step": 2669 }, { "epoch": 1.4435033339340422, "grad_norm": 0.34141626954078674, "learning_rate": 8.953319437267731e-06, "loss": 0.3888, "step": 2670 }, { "epoch": 1.4440439718868265, "grad_norm": 0.3129776418209076, "learning_rate": 8.952163241088642e-06, "loss": 0.4355, "step": 2671 }, { "epoch": 1.4445846098396107, "grad_norm": 0.36519676446914673, "learning_rate": 8.951006481417284e-06, "loss": 0.4312, "step": 2672 }, { "epoch": 1.445125247792395, "grad_norm": 0.40581244230270386, "learning_rate": 8.949849158418586e-06, "loss": 0.4849, "step": 2673 }, { "epoch": 1.4456658857451794, "grad_norm": 0.3111916184425354, "learning_rate": 8.948691272257555e-06, "loss": 0.3988, "step": 2674 }, { "epoch": 1.4462065236979635, "grad_norm": 0.4079495370388031, "learning_rate": 8.947532823099284e-06, "loss": 0.4706, "step": 2675 }, { "epoch": 1.446747161650748, "grad_norm": 0.3530767261981964, "learning_rate": 8.946373811108939e-06, "loss": 0.4219, "step": 2676 }, { "epoch": 1.447287799603532, "grad_norm": 0.3372650742530823, "learning_rate": 8.94521423645177e-06, "loss": 0.3823, "step": 2677 }, { "epoch": 1.4478284375563164, "grad_norm": 0.383865088224411, "learning_rate": 8.944054099293109e-06, "loss": 0.4292, "step": 2678 }, { "epoch": 1.4483690755091008, "grad_norm": 0.3813159167766571, "learning_rate": 8.942893399798367e-06, "loss": 0.4788, "step": 2679 }, { "epoch": 1.4489097134618851, "grad_norm": 0.3461150825023651, "learning_rate": 8.941732138133032e-06, "loss": 0.3961, "step": 2680 }, { "epoch": 1.4494503514146693, "grad_norm": 0.42276474833488464, "learning_rate": 8.940570314462676e-06, "loss": 0.4753, "step": 2681 }, { "epoch": 1.4499909893674536, "grad_norm": 0.3687373101711273, "learning_rate": 8.93940792895295e-06, "loss": 0.4402, "step": 2682 }, { "epoch": 1.4505316273202378, "grad_norm": 0.3514117896556854, "learning_rate": 8.938244981769581e-06, "loss": 0.4164, "step": 2683 }, { "epoch": 1.4510722652730221, "grad_norm": 0.3533627688884735, "learning_rate": 8.937081473078387e-06, "loss": 0.4901, "step": 2684 }, { "epoch": 1.4516129032258065, "grad_norm": 0.4426608383655548, "learning_rate": 8.935917403045251e-06, "loss": 0.466, "step": 2685 }, { "epoch": 1.4521535411785909, "grad_norm": 0.34184348583221436, "learning_rate": 8.93475277183615e-06, "loss": 0.4276, "step": 2686 }, { "epoch": 1.452694179131375, "grad_norm": 0.4027365446090698, "learning_rate": 8.933587579617134e-06, "loss": 0.4524, "step": 2687 }, { "epoch": 1.4532348170841594, "grad_norm": 0.34122905135154724, "learning_rate": 8.932421826554332e-06, "loss": 0.4221, "step": 2688 }, { "epoch": 1.4537754550369435, "grad_norm": 0.36274346709251404, "learning_rate": 8.931255512813954e-06, "loss": 0.4649, "step": 2689 }, { "epoch": 1.4543160929897279, "grad_norm": 0.3560095727443695, "learning_rate": 8.930088638562296e-06, "loss": 0.435, "step": 2690 }, { "epoch": 1.4548567309425122, "grad_norm": 0.31499168276786804, "learning_rate": 8.928921203965724e-06, "loss": 0.4129, "step": 2691 }, { "epoch": 1.4553973688952966, "grad_norm": 0.3829094469547272, "learning_rate": 8.927753209190691e-06, "loss": 0.4623, "step": 2692 }, { "epoch": 1.4559380068480807, "grad_norm": 0.32538989186286926, "learning_rate": 8.926584654403725e-06, "loss": 0.4188, "step": 2693 }, { "epoch": 1.456478644800865, "grad_norm": 0.3670380711555481, "learning_rate": 8.925415539771441e-06, "loss": 0.4482, "step": 2694 }, { "epoch": 1.4570192827536492, "grad_norm": 0.3919623792171478, "learning_rate": 8.924245865460523e-06, "loss": 0.4852, "step": 2695 }, { "epoch": 1.4575599207064336, "grad_norm": 0.33094435930252075, "learning_rate": 8.923075631637748e-06, "loss": 0.3942, "step": 2696 }, { "epoch": 1.458100558659218, "grad_norm": 0.3392255902290344, "learning_rate": 8.921904838469962e-06, "loss": 0.4479, "step": 2697 }, { "epoch": 1.458641196612002, "grad_norm": 0.3547809422016144, "learning_rate": 8.920733486124093e-06, "loss": 0.4303, "step": 2698 }, { "epoch": 1.4591818345647865, "grad_norm": 0.38857102394104004, "learning_rate": 8.919561574767154e-06, "loss": 0.4945, "step": 2699 }, { "epoch": 1.4597224725175708, "grad_norm": 0.2873074412345886, "learning_rate": 8.918389104566232e-06, "loss": 0.3641, "step": 2700 }, { "epoch": 1.460263110470355, "grad_norm": 0.40835830569267273, "learning_rate": 8.917216075688496e-06, "loss": 0.4893, "step": 2701 }, { "epoch": 1.4608037484231393, "grad_norm": 0.29825371503829956, "learning_rate": 8.916042488301195e-06, "loss": 0.427, "step": 2702 }, { "epoch": 1.4613443863759237, "grad_norm": 0.38785210251808167, "learning_rate": 8.914868342571655e-06, "loss": 0.4327, "step": 2703 }, { "epoch": 1.4618850243287078, "grad_norm": 0.32820212841033936, "learning_rate": 8.913693638667284e-06, "loss": 0.4663, "step": 2704 }, { "epoch": 1.4624256622814922, "grad_norm": 0.32327958941459656, "learning_rate": 8.912518376755572e-06, "loss": 0.4259, "step": 2705 }, { "epoch": 1.4629663002342763, "grad_norm": 0.3118366003036499, "learning_rate": 8.911342557004084e-06, "loss": 0.411, "step": 2706 }, { "epoch": 1.4635069381870607, "grad_norm": 0.32654812932014465, "learning_rate": 8.910166179580463e-06, "loss": 0.4306, "step": 2707 }, { "epoch": 1.464047576139845, "grad_norm": 0.3658846318721771, "learning_rate": 8.90898924465244e-06, "loss": 0.462, "step": 2708 }, { "epoch": 1.4645882140926294, "grad_norm": 0.3317575752735138, "learning_rate": 8.907811752387818e-06, "loss": 0.4846, "step": 2709 }, { "epoch": 1.4651288520454135, "grad_norm": 0.38094812631607056, "learning_rate": 8.906633702954482e-06, "loss": 0.4601, "step": 2710 }, { "epoch": 1.465669489998198, "grad_norm": 0.35576844215393066, "learning_rate": 8.905455096520394e-06, "loss": 0.4251, "step": 2711 }, { "epoch": 1.466210127950982, "grad_norm": 0.31583791971206665, "learning_rate": 8.9042759332536e-06, "loss": 0.4207, "step": 2712 }, { "epoch": 1.4667507659037664, "grad_norm": 0.4097664952278137, "learning_rate": 8.903096213322222e-06, "loss": 0.4022, "step": 2713 }, { "epoch": 1.4672914038565508, "grad_norm": 0.34725257754325867, "learning_rate": 8.901915936894462e-06, "loss": 0.4665, "step": 2714 }, { "epoch": 1.4678320418093351, "grad_norm": 0.389870285987854, "learning_rate": 8.900735104138605e-06, "loss": 0.4675, "step": 2715 }, { "epoch": 1.4683726797621193, "grad_norm": 0.3505401909351349, "learning_rate": 8.899553715223008e-06, "loss": 0.4101, "step": 2716 }, { "epoch": 1.4689133177149036, "grad_norm": 0.44681039452552795, "learning_rate": 8.898371770316113e-06, "loss": 0.4471, "step": 2717 }, { "epoch": 1.4694539556676878, "grad_norm": 0.4092337191104889, "learning_rate": 8.89718926958644e-06, "loss": 0.4758, "step": 2718 }, { "epoch": 1.4699945936204721, "grad_norm": 0.3586428761482239, "learning_rate": 8.896006213202584e-06, "loss": 0.4015, "step": 2719 }, { "epoch": 1.4705352315732565, "grad_norm": 0.40717989206314087, "learning_rate": 8.894822601333228e-06, "loss": 0.4515, "step": 2720 }, { "epoch": 1.4710758695260409, "grad_norm": 0.3298538327217102, "learning_rate": 8.893638434147126e-06, "loss": 0.4324, "step": 2721 }, { "epoch": 1.471616507478825, "grad_norm": 0.3751235902309418, "learning_rate": 8.892453711813119e-06, "loss": 0.4405, "step": 2722 }, { "epoch": 1.4721571454316094, "grad_norm": 0.4075682759284973, "learning_rate": 8.891268434500116e-06, "loss": 0.4411, "step": 2723 }, { "epoch": 1.4726977833843935, "grad_norm": 0.35277068614959717, "learning_rate": 8.890082602377115e-06, "loss": 0.4457, "step": 2724 }, { "epoch": 1.4732384213371779, "grad_norm": 0.36235764622688293, "learning_rate": 8.888896215613192e-06, "loss": 0.4229, "step": 2725 }, { "epoch": 1.4737790592899622, "grad_norm": 0.36335426568984985, "learning_rate": 8.887709274377496e-06, "loss": 0.4636, "step": 2726 }, { "epoch": 1.4743196972427464, "grad_norm": 0.340276300907135, "learning_rate": 8.88652177883926e-06, "loss": 0.4184, "step": 2727 }, { "epoch": 1.4748603351955307, "grad_norm": 0.3393535017967224, "learning_rate": 8.885333729167797e-06, "loss": 0.4512, "step": 2728 }, { "epoch": 1.475400973148315, "grad_norm": 0.38643527030944824, "learning_rate": 8.884145125532494e-06, "loss": 0.3916, "step": 2729 }, { "epoch": 1.4759416111010992, "grad_norm": 0.3693040609359741, "learning_rate": 8.882955968102822e-06, "loss": 0.4452, "step": 2730 }, { "epoch": 1.4764822490538836, "grad_norm": 0.35152468085289, "learning_rate": 8.881766257048328e-06, "loss": 0.45, "step": 2731 }, { "epoch": 1.477022887006668, "grad_norm": 0.35957586765289307, "learning_rate": 8.88057599253864e-06, "loss": 0.4246, "step": 2732 }, { "epoch": 1.477563524959452, "grad_norm": 0.3535710871219635, "learning_rate": 8.879385174743462e-06, "loss": 0.4292, "step": 2733 }, { "epoch": 1.4781041629122365, "grad_norm": 0.34068745374679565, "learning_rate": 8.87819380383258e-06, "loss": 0.4507, "step": 2734 }, { "epoch": 1.4786448008650206, "grad_norm": 0.37489134073257446, "learning_rate": 8.877001879975857e-06, "loss": 0.4613, "step": 2735 }, { "epoch": 1.479185438817805, "grad_norm": 0.29304268956184387, "learning_rate": 8.875809403343236e-06, "loss": 0.3901, "step": 2736 }, { "epoch": 1.4797260767705893, "grad_norm": 0.3183952569961548, "learning_rate": 8.874616374104736e-06, "loss": 0.4281, "step": 2737 }, { "epoch": 1.4802667147233737, "grad_norm": 0.33998361229896545, "learning_rate": 8.87342279243046e-06, "loss": 0.4294, "step": 2738 }, { "epoch": 1.4808073526761578, "grad_norm": 0.34192410111427307, "learning_rate": 8.872228658490585e-06, "loss": 0.4864, "step": 2739 }, { "epoch": 1.4813479906289422, "grad_norm": 0.3485790491104126, "learning_rate": 8.87103397245537e-06, "loss": 0.4256, "step": 2740 }, { "epoch": 1.4818886285817263, "grad_norm": 0.33720722794532776, "learning_rate": 8.869838734495147e-06, "loss": 0.4147, "step": 2741 }, { "epoch": 1.4824292665345107, "grad_norm": 0.35417404770851135, "learning_rate": 8.868642944780334e-06, "loss": 0.4734, "step": 2742 }, { "epoch": 1.482969904487295, "grad_norm": 0.38683000206947327, "learning_rate": 8.867446603481427e-06, "loss": 0.4333, "step": 2743 }, { "epoch": 1.4835105424400794, "grad_norm": 0.37127575278282166, "learning_rate": 8.866249710768992e-06, "loss": 0.4378, "step": 2744 }, { "epoch": 1.4840511803928635, "grad_norm": 0.3404901921749115, "learning_rate": 8.865052266813686e-06, "loss": 0.4425, "step": 2745 }, { "epoch": 1.484591818345648, "grad_norm": 0.3562512695789337, "learning_rate": 8.863854271786234e-06, "loss": 0.4009, "step": 2746 }, { "epoch": 1.485132456298432, "grad_norm": 0.3635586202144623, "learning_rate": 8.862655725857445e-06, "loss": 0.4643, "step": 2747 }, { "epoch": 1.4856730942512164, "grad_norm": 0.3443708121776581, "learning_rate": 8.861456629198209e-06, "loss": 0.4301, "step": 2748 }, { "epoch": 1.4862137322040008, "grad_norm": 0.34321269392967224, "learning_rate": 8.860256981979485e-06, "loss": 0.418, "step": 2749 }, { "epoch": 1.4867543701567851, "grad_norm": 0.38231542706489563, "learning_rate": 8.85905678437232e-06, "loss": 0.4622, "step": 2750 }, { "epoch": 1.4872950081095693, "grad_norm": 0.3586867153644562, "learning_rate": 8.857856036547837e-06, "loss": 0.4336, "step": 2751 }, { "epoch": 1.4878356460623536, "grad_norm": 0.41343262791633606, "learning_rate": 8.856654738677234e-06, "loss": 0.4435, "step": 2752 }, { "epoch": 1.4883762840151378, "grad_norm": 0.37144145369529724, "learning_rate": 8.85545289093179e-06, "loss": 0.4096, "step": 2753 }, { "epoch": 1.4889169219679221, "grad_norm": 0.4296935796737671, "learning_rate": 8.854250493482865e-06, "loss": 0.4716, "step": 2754 }, { "epoch": 1.4894575599207065, "grad_norm": 0.3688115179538727, "learning_rate": 8.853047546501893e-06, "loss": 0.4137, "step": 2755 }, { "epoch": 1.4899981978734906, "grad_norm": 0.3428502082824707, "learning_rate": 8.851844050160387e-06, "loss": 0.4605, "step": 2756 }, { "epoch": 1.490538835826275, "grad_norm": 0.3452523350715637, "learning_rate": 8.85064000462994e-06, "loss": 0.4294, "step": 2757 }, { "epoch": 1.4910794737790594, "grad_norm": 0.3571106791496277, "learning_rate": 8.849435410082224e-06, "loss": 0.4651, "step": 2758 }, { "epoch": 1.4916201117318435, "grad_norm": 0.40978145599365234, "learning_rate": 8.848230266688984e-06, "loss": 0.5045, "step": 2759 }, { "epoch": 1.4921607496846279, "grad_norm": 0.33386868238449097, "learning_rate": 8.847024574622051e-06, "loss": 0.3977, "step": 2760 }, { "epoch": 1.4927013876374122, "grad_norm": 0.32085907459259033, "learning_rate": 8.845818334053332e-06, "loss": 0.4055, "step": 2761 }, { "epoch": 1.4932420255901964, "grad_norm": 0.35079190135002136, "learning_rate": 8.844611545154804e-06, "loss": 0.3969, "step": 2762 }, { "epoch": 1.4937826635429807, "grad_norm": 0.3813706934452057, "learning_rate": 8.843404208098536e-06, "loss": 0.4684, "step": 2763 }, { "epoch": 1.4943233014957649, "grad_norm": 0.3383070230484009, "learning_rate": 8.842196323056662e-06, "loss": 0.4306, "step": 2764 }, { "epoch": 1.4948639394485492, "grad_norm": 0.37092912197113037, "learning_rate": 8.840987890201404e-06, "loss": 0.4309, "step": 2765 }, { "epoch": 1.4954045774013336, "grad_norm": 0.3601703941822052, "learning_rate": 8.839778909705055e-06, "loss": 0.484, "step": 2766 }, { "epoch": 1.495945215354118, "grad_norm": 0.36565494537353516, "learning_rate": 8.838569381739993e-06, "loss": 0.4247, "step": 2767 }, { "epoch": 1.496485853306902, "grad_norm": 0.37783923745155334, "learning_rate": 8.837359306478667e-06, "loss": 0.4507, "step": 2768 }, { "epoch": 1.4970264912596865, "grad_norm": 0.3881077170372009, "learning_rate": 8.83614868409361e-06, "loss": 0.4566, "step": 2769 }, { "epoch": 1.4975671292124706, "grad_norm": 0.33153507113456726, "learning_rate": 8.834937514757428e-06, "loss": 0.4205, "step": 2770 }, { "epoch": 1.498107767165255, "grad_norm": 0.3849857449531555, "learning_rate": 8.833725798642809e-06, "loss": 0.4349, "step": 2771 }, { "epoch": 1.4986484051180393, "grad_norm": 0.37943586707115173, "learning_rate": 8.832513535922516e-06, "loss": 0.4195, "step": 2772 }, { "epoch": 1.4991890430708237, "grad_norm": 0.3738144338130951, "learning_rate": 8.831300726769391e-06, "loss": 0.4469, "step": 2773 }, { "epoch": 1.4997296810236078, "grad_norm": 0.3525858521461487, "learning_rate": 8.830087371356356e-06, "loss": 0.3896, "step": 2774 }, { "epoch": 1.5002703189763922, "grad_norm": 0.33611002564430237, "learning_rate": 8.828873469856408e-06, "loss": 0.4355, "step": 2775 }, { "epoch": 1.5008109569291763, "grad_norm": 0.36726000905036926, "learning_rate": 8.827659022442622e-06, "loss": 0.466, "step": 2776 }, { "epoch": 1.5013515948819607, "grad_norm": 0.40099841356277466, "learning_rate": 8.826444029288154e-06, "loss": 0.4297, "step": 2777 }, { "epoch": 1.501892232834745, "grad_norm": 0.32601767778396606, "learning_rate": 8.825228490566233e-06, "loss": 0.4192, "step": 2778 }, { "epoch": 1.5024328707875294, "grad_norm": 0.3288547098636627, "learning_rate": 8.824012406450171e-06, "loss": 0.4107, "step": 2779 }, { "epoch": 1.5029735087403135, "grad_norm": 0.38603484630584717, "learning_rate": 8.822795777113352e-06, "loss": 0.4437, "step": 2780 }, { "epoch": 1.503514146693098, "grad_norm": 0.32590451836586, "learning_rate": 8.821578602729242e-06, "loss": 0.4262, "step": 2781 }, { "epoch": 1.504054784645882, "grad_norm": 0.3509955108165741, "learning_rate": 8.820360883471383e-06, "loss": 0.4291, "step": 2782 }, { "epoch": 1.5045954225986664, "grad_norm": 0.3051590919494629, "learning_rate": 8.819142619513399e-06, "loss": 0.4262, "step": 2783 }, { "epoch": 1.5051360605514508, "grad_norm": 0.3273349404335022, "learning_rate": 8.817923811028984e-06, "loss": 0.4559, "step": 2784 }, { "epoch": 1.5056766985042351, "grad_norm": 0.3385399878025055, "learning_rate": 8.816704458191913e-06, "loss": 0.4628, "step": 2785 }, { "epoch": 1.5062173364570193, "grad_norm": 0.341138631105423, "learning_rate": 8.815484561176041e-06, "loss": 0.4523, "step": 2786 }, { "epoch": 1.5067579744098034, "grad_norm": 0.2973570227622986, "learning_rate": 8.814264120155297e-06, "loss": 0.4419, "step": 2787 }, { "epoch": 1.5072986123625878, "grad_norm": 0.3023761212825775, "learning_rate": 8.813043135303692e-06, "loss": 0.4082, "step": 2788 }, { "epoch": 1.5078392503153721, "grad_norm": 0.35281112790107727, "learning_rate": 8.81182160679531e-06, "loss": 0.4583, "step": 2789 }, { "epoch": 1.5083798882681565, "grad_norm": 0.3194134533405304, "learning_rate": 8.810599534804315e-06, "loss": 0.4434, "step": 2790 }, { "epoch": 1.5089205262209409, "grad_norm": 0.353096067905426, "learning_rate": 8.809376919504946e-06, "loss": 0.4103, "step": 2791 }, { "epoch": 1.509461164173725, "grad_norm": 0.3596133887767792, "learning_rate": 8.808153761071525e-06, "loss": 0.468, "step": 2792 }, { "epoch": 1.5100018021265091, "grad_norm": 0.3381783664226532, "learning_rate": 8.806930059678442e-06, "loss": 0.454, "step": 2793 }, { "epoch": 1.5105424400792935, "grad_norm": 0.37229710817337036, "learning_rate": 8.805705815500177e-06, "loss": 0.4548, "step": 2794 }, { "epoch": 1.5110830780320779, "grad_norm": 0.35773658752441406, "learning_rate": 8.804481028711274e-06, "loss": 0.4905, "step": 2795 }, { "epoch": 1.5116237159848622, "grad_norm": 0.3648654818534851, "learning_rate": 8.803255699486367e-06, "loss": 0.4349, "step": 2796 }, { "epoch": 1.5121643539376466, "grad_norm": 0.3511752188205719, "learning_rate": 8.802029828000157e-06, "loss": 0.4158, "step": 2797 }, { "epoch": 1.5127049918904307, "grad_norm": 0.3634643852710724, "learning_rate": 8.800803414427426e-06, "loss": 0.4287, "step": 2798 }, { "epoch": 1.5132456298432149, "grad_norm": 0.364849716424942, "learning_rate": 8.799576458943036e-06, "loss": 0.4453, "step": 2799 }, { "epoch": 1.5137862677959992, "grad_norm": 0.3459816873073578, "learning_rate": 8.798348961721925e-06, "loss": 0.4362, "step": 2800 }, { "epoch": 1.5143269057487836, "grad_norm": 0.37838080525398254, "learning_rate": 8.797120922939104e-06, "loss": 0.4421, "step": 2801 }, { "epoch": 1.514867543701568, "grad_norm": 0.36956077814102173, "learning_rate": 8.795892342769668e-06, "loss": 0.4374, "step": 2802 }, { "epoch": 1.515408181654352, "grad_norm": 0.3102215528488159, "learning_rate": 8.794663221388782e-06, "loss": 0.4251, "step": 2803 }, { "epoch": 1.5159488196071365, "grad_norm": 0.3910534381866455, "learning_rate": 8.793433558971695e-06, "loss": 0.4706, "step": 2804 }, { "epoch": 1.5164894575599206, "grad_norm": 0.3742954730987549, "learning_rate": 8.792203355693731e-06, "loss": 0.4495, "step": 2805 }, { "epoch": 1.517030095512705, "grad_norm": 0.32030007243156433, "learning_rate": 8.790972611730286e-06, "loss": 0.4461, "step": 2806 }, { "epoch": 1.5175707334654893, "grad_norm": 0.4075528383255005, "learning_rate": 8.789741327256841e-06, "loss": 0.4306, "step": 2807 }, { "epoch": 1.5181113714182737, "grad_norm": 0.38579171895980835, "learning_rate": 8.788509502448948e-06, "loss": 0.4444, "step": 2808 }, { "epoch": 1.5186520093710578, "grad_norm": 0.3903457820415497, "learning_rate": 8.78727713748224e-06, "loss": 0.4682, "step": 2809 }, { "epoch": 1.5191926473238422, "grad_norm": 0.44318467378616333, "learning_rate": 8.786044232532423e-06, "loss": 0.4599, "step": 2810 }, { "epoch": 1.5197332852766263, "grad_norm": 0.3772420287132263, "learning_rate": 8.784810787775285e-06, "loss": 0.4243, "step": 2811 }, { "epoch": 1.5202739232294107, "grad_norm": 0.37346014380455017, "learning_rate": 8.783576803386687e-06, "loss": 0.4687, "step": 2812 }, { "epoch": 1.520814561182195, "grad_norm": 0.3519136607646942, "learning_rate": 8.782342279542569e-06, "loss": 0.4046, "step": 2813 }, { "epoch": 1.5213551991349794, "grad_norm": 0.40191009640693665, "learning_rate": 8.781107216418945e-06, "loss": 0.4479, "step": 2814 }, { "epoch": 1.5218958370877635, "grad_norm": 0.36206626892089844, "learning_rate": 8.77987161419191e-06, "loss": 0.4261, "step": 2815 }, { "epoch": 1.5224364750405477, "grad_norm": 0.36779820919036865, "learning_rate": 8.778635473037635e-06, "loss": 0.4459, "step": 2816 }, { "epoch": 1.522977112993332, "grad_norm": 0.3682043254375458, "learning_rate": 8.777398793132364e-06, "loss": 0.4385, "step": 2817 }, { "epoch": 1.5235177509461164, "grad_norm": 0.3300313949584961, "learning_rate": 8.776161574652423e-06, "loss": 0.4134, "step": 2818 }, { "epoch": 1.5240583888989008, "grad_norm": 0.36796510219573975, "learning_rate": 8.774923817774211e-06, "loss": 0.4872, "step": 2819 }, { "epoch": 1.5245990268516851, "grad_norm": 0.37004774808883667, "learning_rate": 8.773685522674205e-06, "loss": 0.4239, "step": 2820 }, { "epoch": 1.5251396648044693, "grad_norm": 0.3955453932285309, "learning_rate": 8.77244668952896e-06, "loss": 0.4465, "step": 2821 }, { "epoch": 1.5256803027572534, "grad_norm": 0.364418089389801, "learning_rate": 8.771207318515104e-06, "loss": 0.4375, "step": 2822 }, { "epoch": 1.5262209407100378, "grad_norm": 0.46262267231941223, "learning_rate": 8.769967409809348e-06, "loss": 0.4684, "step": 2823 }, { "epoch": 1.5267615786628221, "grad_norm": 0.36952295899391174, "learning_rate": 8.768726963588475e-06, "loss": 0.4226, "step": 2824 }, { "epoch": 1.5273022166156065, "grad_norm": 0.38018321990966797, "learning_rate": 8.767485980029342e-06, "loss": 0.4038, "step": 2825 }, { "epoch": 1.5278428545683909, "grad_norm": 0.37219712138175964, "learning_rate": 8.76624445930889e-06, "loss": 0.4761, "step": 2826 }, { "epoch": 1.528383492521175, "grad_norm": 0.38064977526664734, "learning_rate": 8.765002401604133e-06, "loss": 0.4146, "step": 2827 }, { "epoch": 1.5289241304739591, "grad_norm": 0.40907156467437744, "learning_rate": 8.763759807092157e-06, "loss": 0.4225, "step": 2828 }, { "epoch": 1.5294647684267435, "grad_norm": 0.31351208686828613, "learning_rate": 8.762516675950134e-06, "loss": 0.4277, "step": 2829 }, { "epoch": 1.5300054063795279, "grad_norm": 0.3757653832435608, "learning_rate": 8.761273008355306e-06, "loss": 0.4729, "step": 2830 }, { "epoch": 1.5305460443323122, "grad_norm": 0.38561713695526123, "learning_rate": 8.76002880448499e-06, "loss": 0.4109, "step": 2831 }, { "epoch": 1.5310866822850964, "grad_norm": 0.32167428731918335, "learning_rate": 8.758784064516585e-06, "loss": 0.4125, "step": 2832 }, { "epoch": 1.5316273202378807, "grad_norm": 0.3748205006122589, "learning_rate": 8.757538788627563e-06, "loss": 0.4633, "step": 2833 }, { "epoch": 1.5321679581906649, "grad_norm": 0.3118850588798523, "learning_rate": 8.756292976995475e-06, "loss": 0.3965, "step": 2834 }, { "epoch": 1.5327085961434492, "grad_norm": 0.3302631378173828, "learning_rate": 8.755046629797944e-06, "loss": 0.4082, "step": 2835 }, { "epoch": 1.5332492340962336, "grad_norm": 0.34935998916625977, "learning_rate": 8.753799747212672e-06, "loss": 0.4552, "step": 2836 }, { "epoch": 1.533789872049018, "grad_norm": 0.3248444199562073, "learning_rate": 8.752552329417439e-06, "loss": 0.4346, "step": 2837 }, { "epoch": 1.534330510001802, "grad_norm": 0.33430078625679016, "learning_rate": 8.7513043765901e-06, "loss": 0.4498, "step": 2838 }, { "epoch": 1.5348711479545865, "grad_norm": 0.31254705786705017, "learning_rate": 8.750055888908582e-06, "loss": 0.3941, "step": 2839 }, { "epoch": 1.5354117859073706, "grad_norm": 0.3183569312095642, "learning_rate": 8.748806866550895e-06, "loss": 0.4209, "step": 2840 }, { "epoch": 1.535952423860155, "grad_norm": 0.3188185393810272, "learning_rate": 8.747557309695123e-06, "loss": 0.4231, "step": 2841 }, { "epoch": 1.5364930618129393, "grad_norm": 0.37554019689559937, "learning_rate": 8.746307218519424e-06, "loss": 0.4536, "step": 2842 }, { "epoch": 1.5370336997657237, "grad_norm": 0.3110560178756714, "learning_rate": 8.745056593202033e-06, "loss": 0.4261, "step": 2843 }, { "epoch": 1.5375743377185078, "grad_norm": 0.3565128743648529, "learning_rate": 8.743805433921265e-06, "loss": 0.4492, "step": 2844 }, { "epoch": 1.538114975671292, "grad_norm": 0.32924672961235046, "learning_rate": 8.742553740855507e-06, "loss": 0.437, "step": 2845 }, { "epoch": 1.5386556136240763, "grad_norm": 0.310278058052063, "learning_rate": 8.74130151418322e-06, "loss": 0.3881, "step": 2846 }, { "epoch": 1.5391962515768607, "grad_norm": 0.33028700947761536, "learning_rate": 8.740048754082949e-06, "loss": 0.4407, "step": 2847 }, { "epoch": 1.539736889529645, "grad_norm": 0.32446345686912537, "learning_rate": 8.738795460733305e-06, "loss": 0.4159, "step": 2848 }, { "epoch": 1.5402775274824294, "grad_norm": 0.35544806718826294, "learning_rate": 8.737541634312985e-06, "loss": 0.475, "step": 2849 }, { "epoch": 1.5408181654352135, "grad_norm": 0.3118836283683777, "learning_rate": 8.736287275000755e-06, "loss": 0.3991, "step": 2850 }, { "epoch": 1.5413588033879977, "grad_norm": 0.33479005098342896, "learning_rate": 8.735032382975459e-06, "loss": 0.4306, "step": 2851 }, { "epoch": 1.541899441340782, "grad_norm": 0.3764827251434326, "learning_rate": 8.733776958416018e-06, "loss": 0.4292, "step": 2852 }, { "epoch": 1.5424400792935664, "grad_norm": 0.35498976707458496, "learning_rate": 8.732521001501428e-06, "loss": 0.4456, "step": 2853 }, { "epoch": 1.5429807172463508, "grad_norm": 0.3415829539299011, "learning_rate": 8.731264512410762e-06, "loss": 0.4211, "step": 2854 }, { "epoch": 1.5435213551991351, "grad_norm": 0.36124956607818604, "learning_rate": 8.730007491323167e-06, "loss": 0.4473, "step": 2855 }, { "epoch": 1.5440619931519193, "grad_norm": 0.3353195786476135, "learning_rate": 8.728749938417867e-06, "loss": 0.4127, "step": 2856 }, { "epoch": 1.5446026311047034, "grad_norm": 0.34685778617858887, "learning_rate": 8.727491853874159e-06, "loss": 0.4386, "step": 2857 }, { "epoch": 1.5451432690574878, "grad_norm": 0.3679361343383789, "learning_rate": 8.726233237871424e-06, "loss": 0.4551, "step": 2858 }, { "epoch": 1.5456839070102721, "grad_norm": 0.3218771815299988, "learning_rate": 8.724974090589107e-06, "loss": 0.4331, "step": 2859 }, { "epoch": 1.5462245449630565, "grad_norm": 0.3502300977706909, "learning_rate": 8.723714412206741e-06, "loss": 0.4591, "step": 2860 }, { "epoch": 1.5467651829158406, "grad_norm": 0.3705553710460663, "learning_rate": 8.722454202903923e-06, "loss": 0.4244, "step": 2861 }, { "epoch": 1.547305820868625, "grad_norm": 0.33638572692871094, "learning_rate": 8.721193462860335e-06, "loss": 0.4398, "step": 2862 }, { "epoch": 1.5478464588214091, "grad_norm": 0.3592517375946045, "learning_rate": 8.719932192255732e-06, "loss": 0.4252, "step": 2863 }, { "epoch": 1.5483870967741935, "grad_norm": 0.3698059320449829, "learning_rate": 8.718670391269939e-06, "loss": 0.4756, "step": 2864 }, { "epoch": 1.5489277347269779, "grad_norm": 0.3670055568218231, "learning_rate": 8.717408060082865e-06, "loss": 0.4137, "step": 2865 }, { "epoch": 1.5494683726797622, "grad_norm": 0.36520129442214966, "learning_rate": 8.71614519887449e-06, "loss": 0.4402, "step": 2866 }, { "epoch": 1.5500090106325464, "grad_norm": 0.3617687523365021, "learning_rate": 8.71488180782487e-06, "loss": 0.4098, "step": 2867 }, { "epoch": 1.5505496485853307, "grad_norm": 0.3548413813114166, "learning_rate": 8.713617887114137e-06, "loss": 0.4374, "step": 2868 }, { "epoch": 1.5510902865381149, "grad_norm": 0.3364056348800659, "learning_rate": 8.712353436922501e-06, "loss": 0.4491, "step": 2869 }, { "epoch": 1.5516309244908992, "grad_norm": 0.3399386405944824, "learning_rate": 8.711088457430239e-06, "loss": 0.4152, "step": 2870 }, { "epoch": 1.5521715624436836, "grad_norm": 0.3334507346153259, "learning_rate": 8.709822948817716e-06, "loss": 0.4189, "step": 2871 }, { "epoch": 1.552712200396468, "grad_norm": 0.32251378893852234, "learning_rate": 8.708556911265363e-06, "loss": 0.45, "step": 2872 }, { "epoch": 1.553252838349252, "grad_norm": 0.32779017090797424, "learning_rate": 8.70729034495369e-06, "loss": 0.4086, "step": 2873 }, { "epoch": 1.5537934763020365, "grad_norm": 0.3283478021621704, "learning_rate": 8.70602325006328e-06, "loss": 0.4603, "step": 2874 }, { "epoch": 1.5543341142548206, "grad_norm": 0.29777684807777405, "learning_rate": 8.704755626774796e-06, "loss": 0.4148, "step": 2875 }, { "epoch": 1.554874752207605, "grad_norm": 0.3250730335712433, "learning_rate": 8.703487475268972e-06, "loss": 0.4264, "step": 2876 }, { "epoch": 1.5554153901603893, "grad_norm": 0.3259025514125824, "learning_rate": 8.702218795726619e-06, "loss": 0.4506, "step": 2877 }, { "epoch": 1.5559560281131737, "grad_norm": 0.30676570534706116, "learning_rate": 8.70094958832862e-06, "loss": 0.4403, "step": 2878 }, { "epoch": 1.5564966660659578, "grad_norm": 0.30886489152908325, "learning_rate": 8.69967985325594e-06, "loss": 0.4271, "step": 2879 }, { "epoch": 1.557037304018742, "grad_norm": 0.30409660935401917, "learning_rate": 8.698409590689616e-06, "loss": 0.4394, "step": 2880 }, { "epoch": 1.5575779419715263, "grad_norm": 0.30880311131477356, "learning_rate": 8.697138800810756e-06, "loss": 0.4429, "step": 2881 }, { "epoch": 1.5581185799243107, "grad_norm": 0.3233693540096283, "learning_rate": 8.695867483800551e-06, "loss": 0.4363, "step": 2882 }, { "epoch": 1.558659217877095, "grad_norm": 0.3493873178958893, "learning_rate": 8.69459563984026e-06, "loss": 0.4717, "step": 2883 }, { "epoch": 1.5591998558298794, "grad_norm": 0.3600580096244812, "learning_rate": 8.693323269111222e-06, "loss": 0.4428, "step": 2884 }, { "epoch": 1.5597404937826636, "grad_norm": 0.35579100251197815, "learning_rate": 8.692050371794849e-06, "loss": 0.4299, "step": 2885 }, { "epoch": 1.5602811317354477, "grad_norm": 0.2761000990867615, "learning_rate": 8.690776948072629e-06, "loss": 0.3494, "step": 2886 }, { "epoch": 1.560821769688232, "grad_norm": 0.39794984459877014, "learning_rate": 8.689502998126121e-06, "loss": 0.4744, "step": 2887 }, { "epoch": 1.5613624076410164, "grad_norm": 0.3184354603290558, "learning_rate": 8.688228522136966e-06, "loss": 0.4155, "step": 2888 }, { "epoch": 1.5619030455938008, "grad_norm": 0.33132901787757874, "learning_rate": 8.686953520286876e-06, "loss": 0.4332, "step": 2889 }, { "epoch": 1.562443683546585, "grad_norm": 0.4157227575778961, "learning_rate": 8.685677992757637e-06, "loss": 0.4832, "step": 2890 }, { "epoch": 1.5629843214993693, "grad_norm": 0.3433517813682556, "learning_rate": 8.684401939731114e-06, "loss": 0.4207, "step": 2891 }, { "epoch": 1.5635249594521534, "grad_norm": 0.3725986182689667, "learning_rate": 8.683125361389241e-06, "loss": 0.4232, "step": 2892 }, { "epoch": 1.5640655974049378, "grad_norm": 0.34945711493492126, "learning_rate": 8.681848257914031e-06, "loss": 0.4275, "step": 2893 }, { "epoch": 1.5646062353577221, "grad_norm": 0.35210081934928894, "learning_rate": 8.680570629487575e-06, "loss": 0.4357, "step": 2894 }, { "epoch": 1.5651468733105065, "grad_norm": 0.39097723364830017, "learning_rate": 8.67929247629203e-06, "loss": 0.468, "step": 2895 }, { "epoch": 1.5656875112632906, "grad_norm": 0.4142407774925232, "learning_rate": 8.678013798509636e-06, "loss": 0.4771, "step": 2896 }, { "epoch": 1.566228149216075, "grad_norm": 0.33647021651268005, "learning_rate": 8.676734596322702e-06, "loss": 0.4007, "step": 2897 }, { "epoch": 1.5667687871688591, "grad_norm": 0.3727369010448456, "learning_rate": 8.675454869913616e-06, "loss": 0.4262, "step": 2898 }, { "epoch": 1.5673094251216435, "grad_norm": 0.42712923884391785, "learning_rate": 8.674174619464838e-06, "loss": 0.4331, "step": 2899 }, { "epoch": 1.5678500630744279, "grad_norm": 0.34135958552360535, "learning_rate": 8.672893845158908e-06, "loss": 0.4467, "step": 2900 }, { "epoch": 1.5683907010272122, "grad_norm": 0.3845239579677582, "learning_rate": 8.671612547178428e-06, "loss": 0.4315, "step": 2901 }, { "epoch": 1.5689313389799964, "grad_norm": 0.3987267315387726, "learning_rate": 8.67033072570609e-06, "loss": 0.4823, "step": 2902 }, { "epoch": 1.5694719769327807, "grad_norm": 0.2945195436477661, "learning_rate": 8.669048380924654e-06, "loss": 0.4257, "step": 2903 }, { "epoch": 1.5700126148855649, "grad_norm": 0.38530126214027405, "learning_rate": 8.667765513016949e-06, "loss": 0.4269, "step": 2904 }, { "epoch": 1.5705532528383492, "grad_norm": 0.3750806450843811, "learning_rate": 8.66648212216589e-06, "loss": 0.481, "step": 2905 }, { "epoch": 1.5710938907911336, "grad_norm": 0.3303103446960449, "learning_rate": 8.665198208554456e-06, "loss": 0.4464, "step": 2906 }, { "epoch": 1.571634528743918, "grad_norm": 0.3678765296936035, "learning_rate": 8.663913772365706e-06, "loss": 0.4626, "step": 2907 }, { "epoch": 1.572175166696702, "grad_norm": 0.3794427216053009, "learning_rate": 8.662628813782775e-06, "loss": 0.4398, "step": 2908 }, { "epoch": 1.5727158046494862, "grad_norm": 0.3422366678714752, "learning_rate": 8.661343332988869e-06, "loss": 0.4414, "step": 2909 }, { "epoch": 1.5732564426022706, "grad_norm": 0.3901372253894806, "learning_rate": 8.660057330167267e-06, "loss": 0.4565, "step": 2910 }, { "epoch": 1.573797080555055, "grad_norm": 0.32709166407585144, "learning_rate": 8.658770805501328e-06, "loss": 0.393, "step": 2911 }, { "epoch": 1.5743377185078393, "grad_norm": 0.3676150143146515, "learning_rate": 8.657483759174482e-06, "loss": 0.4566, "step": 2912 }, { "epoch": 1.5748783564606237, "grad_norm": 0.3521651327610016, "learning_rate": 8.656196191370233e-06, "loss": 0.4469, "step": 2913 }, { "epoch": 1.5754189944134078, "grad_norm": 0.33962196111679077, "learning_rate": 8.65490810227216e-06, "loss": 0.3909, "step": 2914 }, { "epoch": 1.575959632366192, "grad_norm": 0.40406396985054016, "learning_rate": 8.653619492063916e-06, "loss": 0.4976, "step": 2915 }, { "epoch": 1.5765002703189763, "grad_norm": 0.32255756855010986, "learning_rate": 8.652330360929228e-06, "loss": 0.4204, "step": 2916 }, { "epoch": 1.5770409082717607, "grad_norm": 0.3527771532535553, "learning_rate": 8.6510407090519e-06, "loss": 0.3998, "step": 2917 }, { "epoch": 1.577581546224545, "grad_norm": 0.4008268117904663, "learning_rate": 8.64975053661581e-06, "loss": 0.5013, "step": 2918 }, { "epoch": 1.5781221841773294, "grad_norm": 0.30029141902923584, "learning_rate": 8.648459843804904e-06, "loss": 0.3843, "step": 2919 }, { "epoch": 1.5786628221301136, "grad_norm": 0.3258407413959503, "learning_rate": 8.647168630803208e-06, "loss": 0.4322, "step": 2920 }, { "epoch": 1.5792034600828977, "grad_norm": 0.37515318393707275, "learning_rate": 8.645876897794823e-06, "loss": 0.4661, "step": 2921 }, { "epoch": 1.579744098035682, "grad_norm": 0.3038284182548523, "learning_rate": 8.644584644963918e-06, "loss": 0.4474, "step": 2922 }, { "epoch": 1.5802847359884664, "grad_norm": 0.3194510042667389, "learning_rate": 8.643291872494745e-06, "loss": 0.4175, "step": 2923 }, { "epoch": 1.5808253739412508, "grad_norm": 0.32912907004356384, "learning_rate": 8.64199858057162e-06, "loss": 0.4195, "step": 2924 }, { "epoch": 1.581366011894035, "grad_norm": 0.39627596735954285, "learning_rate": 8.640704769378943e-06, "loss": 0.4388, "step": 2925 }, { "epoch": 1.5819066498468193, "grad_norm": 0.31934958696365356, "learning_rate": 8.63941043910118e-06, "loss": 0.4587, "step": 2926 }, { "epoch": 1.5824472877996034, "grad_norm": 0.35360094904899597, "learning_rate": 8.638115589922875e-06, "loss": 0.4516, "step": 2927 }, { "epoch": 1.5829879257523878, "grad_norm": 0.336273729801178, "learning_rate": 8.636820222028645e-06, "loss": 0.431, "step": 2928 }, { "epoch": 1.5835285637051721, "grad_norm": 0.31268832087516785, "learning_rate": 8.635524335603183e-06, "loss": 0.4043, "step": 2929 }, { "epoch": 1.5840692016579565, "grad_norm": 0.3598516583442688, "learning_rate": 8.634227930831252e-06, "loss": 0.476, "step": 2930 }, { "epoch": 1.5846098396107406, "grad_norm": 0.28740474581718445, "learning_rate": 8.632931007897693e-06, "loss": 0.4236, "step": 2931 }, { "epoch": 1.585150477563525, "grad_norm": 0.34720805287361145, "learning_rate": 8.631633566987416e-06, "loss": 0.4373, "step": 2932 }, { "epoch": 1.5856911155163091, "grad_norm": 0.31540805101394653, "learning_rate": 8.630335608285412e-06, "loss": 0.4298, "step": 2933 }, { "epoch": 1.5862317534690935, "grad_norm": 0.284653902053833, "learning_rate": 8.629037131976737e-06, "loss": 0.4067, "step": 2934 }, { "epoch": 1.5867723914218779, "grad_norm": 0.3056807219982147, "learning_rate": 8.627738138246529e-06, "loss": 0.4201, "step": 2935 }, { "epoch": 1.5873130293746622, "grad_norm": 0.33723872900009155, "learning_rate": 8.626438627279993e-06, "loss": 0.463, "step": 2936 }, { "epoch": 1.5878536673274464, "grad_norm": 0.3082202672958374, "learning_rate": 8.625138599262416e-06, "loss": 0.4211, "step": 2937 }, { "epoch": 1.5883943052802305, "grad_norm": 0.31902506947517395, "learning_rate": 8.623838054379145e-06, "loss": 0.4535, "step": 2938 }, { "epoch": 1.5889349432330149, "grad_norm": 0.35180023312568665, "learning_rate": 8.62253699281562e-06, "loss": 0.4971, "step": 2939 }, { "epoch": 1.5894755811857992, "grad_norm": 0.33312559127807617, "learning_rate": 8.621235414757337e-06, "loss": 0.4408, "step": 2940 }, { "epoch": 1.5900162191385836, "grad_norm": 0.2971533536911011, "learning_rate": 8.619933320389872e-06, "loss": 0.3947, "step": 2941 }, { "epoch": 1.590556857091368, "grad_norm": 0.3088550865650177, "learning_rate": 8.618630709898878e-06, "loss": 0.4251, "step": 2942 }, { "epoch": 1.591097495044152, "grad_norm": 0.30490434169769287, "learning_rate": 8.61732758347008e-06, "loss": 0.4071, "step": 2943 }, { "epoch": 1.5916381329969362, "grad_norm": 0.3358185291290283, "learning_rate": 8.616023941289274e-06, "loss": 0.4123, "step": 2944 }, { "epoch": 1.5921787709497206, "grad_norm": 0.35692816972732544, "learning_rate": 8.61471978354233e-06, "loss": 0.4366, "step": 2945 }, { "epoch": 1.592719408902505, "grad_norm": 0.3419777750968933, "learning_rate": 8.613415110415194e-06, "loss": 0.4426, "step": 2946 }, { "epoch": 1.5932600468552893, "grad_norm": 0.37838178873062134, "learning_rate": 8.612109922093881e-06, "loss": 0.4715, "step": 2947 }, { "epoch": 1.5938006848080737, "grad_norm": 0.3336822986602783, "learning_rate": 8.610804218764487e-06, "loss": 0.3702, "step": 2948 }, { "epoch": 1.5943413227608578, "grad_norm": 0.33858394622802734, "learning_rate": 8.609498000613173e-06, "loss": 0.457, "step": 2949 }, { "epoch": 1.594881960713642, "grad_norm": 0.3557717502117157, "learning_rate": 8.608191267826179e-06, "loss": 0.4291, "step": 2950 }, { "epoch": 1.5954225986664263, "grad_norm": 0.36976203322410583, "learning_rate": 8.606884020589816e-06, "loss": 0.456, "step": 2951 }, { "epoch": 1.5959632366192107, "grad_norm": 0.33798712491989136, "learning_rate": 8.605576259090467e-06, "loss": 0.4667, "step": 2952 }, { "epoch": 1.596503874571995, "grad_norm": 0.37304526567459106, "learning_rate": 8.604267983514595e-06, "loss": 0.403, "step": 2953 }, { "epoch": 1.5970445125247792, "grad_norm": 0.37876787781715393, "learning_rate": 8.602959194048728e-06, "loss": 0.4562, "step": 2954 }, { "epoch": 1.5975851504775636, "grad_norm": 0.2938414514064789, "learning_rate": 8.60164989087947e-06, "loss": 0.4321, "step": 2955 }, { "epoch": 1.5981257884303477, "grad_norm": 0.3825919032096863, "learning_rate": 8.600340074193504e-06, "loss": 0.4688, "step": 2956 }, { "epoch": 1.598666426383132, "grad_norm": 0.35982459783554077, "learning_rate": 8.599029744177577e-06, "loss": 0.4318, "step": 2957 }, { "epoch": 1.5992070643359164, "grad_norm": 0.3182411193847656, "learning_rate": 8.597718901018512e-06, "loss": 0.4157, "step": 2958 }, { "epoch": 1.5997477022887008, "grad_norm": 0.3472853899002075, "learning_rate": 8.596407544903212e-06, "loss": 0.4471, "step": 2959 }, { "epoch": 1.600288340241485, "grad_norm": 0.3751436471939087, "learning_rate": 8.595095676018645e-06, "loss": 0.4626, "step": 2960 }, { "epoch": 1.6008289781942693, "grad_norm": 0.2762688994407654, "learning_rate": 8.593783294551853e-06, "loss": 0.3576, "step": 2961 }, { "epoch": 1.6013696161470534, "grad_norm": 0.3292232155799866, "learning_rate": 8.592470400689956e-06, "loss": 0.4851, "step": 2962 }, { "epoch": 1.6019102540998378, "grad_norm": 0.3487094044685364, "learning_rate": 8.591156994620142e-06, "loss": 0.4279, "step": 2963 }, { "epoch": 1.6024508920526221, "grad_norm": 0.34718480706214905, "learning_rate": 8.589843076529675e-06, "loss": 0.4374, "step": 2964 }, { "epoch": 1.6029915300054065, "grad_norm": 0.3252781629562378, "learning_rate": 8.588528646605893e-06, "loss": 0.4086, "step": 2965 }, { "epoch": 1.6035321679581906, "grad_norm": 0.35437238216400146, "learning_rate": 8.587213705036202e-06, "loss": 0.4082, "step": 2966 }, { "epoch": 1.6040728059109748, "grad_norm": 0.31416869163513184, "learning_rate": 8.585898252008082e-06, "loss": 0.4597, "step": 2967 }, { "epoch": 1.6046134438637591, "grad_norm": 0.37345847487449646, "learning_rate": 8.584582287709094e-06, "loss": 0.4391, "step": 2968 }, { "epoch": 1.6051540818165435, "grad_norm": 0.39789295196533203, "learning_rate": 8.583265812326862e-06, "loss": 0.4491, "step": 2969 }, { "epoch": 1.6056947197693279, "grad_norm": 0.33515751361846924, "learning_rate": 8.581948826049086e-06, "loss": 0.3954, "step": 2970 }, { "epoch": 1.6062353577221122, "grad_norm": 0.39586490392684937, "learning_rate": 8.580631329063544e-06, "loss": 0.4189, "step": 2971 }, { "epoch": 1.6067759956748964, "grad_norm": 0.3636821508407593, "learning_rate": 8.579313321558076e-06, "loss": 0.4267, "step": 2972 }, { "epoch": 1.6073166336276805, "grad_norm": 0.35685521364212036, "learning_rate": 8.577994803720605e-06, "loss": 0.4306, "step": 2973 }, { "epoch": 1.6078572715804649, "grad_norm": 0.3229580223560333, "learning_rate": 8.576675775739125e-06, "loss": 0.43, "step": 2974 }, { "epoch": 1.6083979095332492, "grad_norm": 0.34237968921661377, "learning_rate": 8.575356237801695e-06, "loss": 0.4498, "step": 2975 }, { "epoch": 1.6089385474860336, "grad_norm": 0.32243970036506653, "learning_rate": 8.574036190096455e-06, "loss": 0.4135, "step": 2976 }, { "epoch": 1.609479185438818, "grad_norm": 0.3718234598636627, "learning_rate": 8.572715632811616e-06, "loss": 0.4753, "step": 2977 }, { "epoch": 1.610019823391602, "grad_norm": 0.3125777244567871, "learning_rate": 8.57139456613546e-06, "loss": 0.4278, "step": 2978 }, { "epoch": 1.6105604613443862, "grad_norm": 0.3171471357345581, "learning_rate": 8.570072990256342e-06, "loss": 0.4292, "step": 2979 }, { "epoch": 1.6111010992971706, "grad_norm": 0.29979678988456726, "learning_rate": 8.56875090536269e-06, "loss": 0.4332, "step": 2980 }, { "epoch": 1.611641737249955, "grad_norm": 0.3105889558792114, "learning_rate": 8.567428311643005e-06, "loss": 0.4319, "step": 2981 }, { "epoch": 1.6121823752027393, "grad_norm": 0.33198606967926025, "learning_rate": 8.566105209285857e-06, "loss": 0.4812, "step": 2982 }, { "epoch": 1.6127230131555235, "grad_norm": 0.30810120701789856, "learning_rate": 8.564781598479897e-06, "loss": 0.405, "step": 2983 }, { "epoch": 1.6132636511083078, "grad_norm": 0.3284092843532562, "learning_rate": 8.563457479413839e-06, "loss": 0.4186, "step": 2984 }, { "epoch": 1.613804289061092, "grad_norm": 0.33219486474990845, "learning_rate": 8.562132852276474e-06, "loss": 0.4559, "step": 2985 }, { "epoch": 1.6143449270138763, "grad_norm": 0.36312806606292725, "learning_rate": 8.560807717256666e-06, "loss": 0.4533, "step": 2986 }, { "epoch": 1.6148855649666607, "grad_norm": 0.32629111409187317, "learning_rate": 8.55948207454335e-06, "loss": 0.4116, "step": 2987 }, { "epoch": 1.615426202919445, "grad_norm": 0.3450940251350403, "learning_rate": 8.558155924325533e-06, "loss": 0.4427, "step": 2988 }, { "epoch": 1.6159668408722292, "grad_norm": 0.3320872187614441, "learning_rate": 8.556829266792297e-06, "loss": 0.4304, "step": 2989 }, { "epoch": 1.6165074788250136, "grad_norm": 0.31070026755332947, "learning_rate": 8.555502102132792e-06, "loss": 0.4164, "step": 2990 }, { "epoch": 1.6170481167777977, "grad_norm": 0.3339788615703583, "learning_rate": 8.554174430536243e-06, "loss": 0.4475, "step": 2991 }, { "epoch": 1.617588754730582, "grad_norm": 0.36194276809692383, "learning_rate": 8.552846252191949e-06, "loss": 0.4633, "step": 2992 }, { "epoch": 1.6181293926833664, "grad_norm": 0.35281410813331604, "learning_rate": 8.551517567289279e-06, "loss": 0.4368, "step": 2993 }, { "epoch": 1.6186700306361508, "grad_norm": 0.34481269121170044, "learning_rate": 8.55018837601767e-06, "loss": 0.4275, "step": 2994 }, { "epoch": 1.619210668588935, "grad_norm": 0.36388522386550903, "learning_rate": 8.548858678566643e-06, "loss": 0.4433, "step": 2995 }, { "epoch": 1.6197513065417193, "grad_norm": 0.3803480863571167, "learning_rate": 8.547528475125778e-06, "loss": 0.3977, "step": 2996 }, { "epoch": 1.6202919444945034, "grad_norm": 0.4043755829334259, "learning_rate": 8.546197765884736e-06, "loss": 0.4438, "step": 2997 }, { "epoch": 1.6208325824472878, "grad_norm": 0.3669350743293762, "learning_rate": 8.544866551033246e-06, "loss": 0.4257, "step": 2998 }, { "epoch": 1.6213732204000721, "grad_norm": 0.3811838924884796, "learning_rate": 8.54353483076111e-06, "loss": 0.432, "step": 2999 }, { "epoch": 1.6219138583528565, "grad_norm": 0.4157685935497284, "learning_rate": 8.542202605258204e-06, "loss": 0.4744, "step": 3000 }, { "epoch": 1.6224544963056406, "grad_norm": 0.3191993832588196, "learning_rate": 8.54086987471447e-06, "loss": 0.4048, "step": 3001 }, { "epoch": 1.6229951342584248, "grad_norm": 0.3500971794128418, "learning_rate": 8.539536639319932e-06, "loss": 0.4533, "step": 3002 }, { "epoch": 1.6235357722112091, "grad_norm": 0.3561091721057892, "learning_rate": 8.538202899264678e-06, "loss": 0.4441, "step": 3003 }, { "epoch": 1.6240764101639935, "grad_norm": 0.32293397188186646, "learning_rate": 8.536868654738867e-06, "loss": 0.4242, "step": 3004 }, { "epoch": 1.6246170481167779, "grad_norm": 0.34320175647735596, "learning_rate": 8.535533905932739e-06, "loss": 0.4134, "step": 3005 }, { "epoch": 1.6251576860695622, "grad_norm": 0.3411339521408081, "learning_rate": 8.534198653036595e-06, "loss": 0.4609, "step": 3006 }, { "epoch": 1.6256983240223464, "grad_norm": 0.31736695766448975, "learning_rate": 8.532862896240815e-06, "loss": 0.4261, "step": 3007 }, { "epoch": 1.6262389619751305, "grad_norm": 0.3072502613067627, "learning_rate": 8.53152663573585e-06, "loss": 0.4099, "step": 3008 }, { "epoch": 1.6267795999279149, "grad_norm": 0.3346844017505646, "learning_rate": 8.53018987171222e-06, "loss": 0.4835, "step": 3009 }, { "epoch": 1.6273202378806992, "grad_norm": 0.31881600618362427, "learning_rate": 8.528852604360518e-06, "loss": 0.4327, "step": 3010 }, { "epoch": 1.6278608758334836, "grad_norm": 0.3123781681060791, "learning_rate": 8.527514833871411e-06, "loss": 0.4201, "step": 3011 }, { "epoch": 1.628401513786268, "grad_norm": 0.33178600668907166, "learning_rate": 8.526176560435634e-06, "loss": 0.4304, "step": 3012 }, { "epoch": 1.628942151739052, "grad_norm": 0.33425503969192505, "learning_rate": 8.524837784243995e-06, "loss": 0.4199, "step": 3013 }, { "epoch": 1.6294827896918362, "grad_norm": 0.3384115695953369, "learning_rate": 8.523498505487377e-06, "loss": 0.4123, "step": 3014 }, { "epoch": 1.6300234276446206, "grad_norm": 0.37247204780578613, "learning_rate": 8.52215872435673e-06, "loss": 0.446, "step": 3015 }, { "epoch": 1.630564065597405, "grad_norm": 0.33755308389663696, "learning_rate": 8.52081844104308e-06, "loss": 0.4209, "step": 3016 }, { "epoch": 1.6311047035501893, "grad_norm": 0.324717253446579, "learning_rate": 8.51947765573752e-06, "loss": 0.4097, "step": 3017 }, { "epoch": 1.6316453415029735, "grad_norm": 0.3523276746273041, "learning_rate": 8.518136368631216e-06, "loss": 0.4053, "step": 3018 }, { "epoch": 1.6321859794557578, "grad_norm": 0.3019939064979553, "learning_rate": 8.516794579915407e-06, "loss": 0.4097, "step": 3019 }, { "epoch": 1.632726617408542, "grad_norm": 0.30737465620040894, "learning_rate": 8.515452289781403e-06, "loss": 0.4222, "step": 3020 }, { "epoch": 1.6332672553613263, "grad_norm": 0.3415035605430603, "learning_rate": 8.514109498420586e-06, "loss": 0.4827, "step": 3021 }, { "epoch": 1.6338078933141107, "grad_norm": 0.30833402276039124, "learning_rate": 8.51276620602441e-06, "loss": 0.4109, "step": 3022 }, { "epoch": 1.634348531266895, "grad_norm": 0.34067338705062866, "learning_rate": 8.511422412784396e-06, "loss": 0.4255, "step": 3023 }, { "epoch": 1.6348891692196792, "grad_norm": 0.31600379943847656, "learning_rate": 8.51007811889214e-06, "loss": 0.4368, "step": 3024 }, { "epoch": 1.6354298071724636, "grad_norm": 0.33969056606292725, "learning_rate": 8.50873332453931e-06, "loss": 0.4142, "step": 3025 }, { "epoch": 1.6359704451252477, "grad_norm": 0.33624815940856934, "learning_rate": 8.507388029917646e-06, "loss": 0.4426, "step": 3026 }, { "epoch": 1.636511083078032, "grad_norm": 0.3214746117591858, "learning_rate": 8.506042235218955e-06, "loss": 0.4558, "step": 3027 }, { "epoch": 1.6370517210308164, "grad_norm": 0.3398401439189911, "learning_rate": 8.504695940635117e-06, "loss": 0.4614, "step": 3028 }, { "epoch": 1.6375923589836008, "grad_norm": 0.3270202875137329, "learning_rate": 8.50334914635809e-06, "loss": 0.4314, "step": 3029 }, { "epoch": 1.638132996936385, "grad_norm": 0.3382250666618347, "learning_rate": 8.50200185257989e-06, "loss": 0.3988, "step": 3030 }, { "epoch": 1.638673634889169, "grad_norm": 0.37231630086898804, "learning_rate": 8.500654059492618e-06, "loss": 0.4684, "step": 3031 }, { "epoch": 1.6392142728419534, "grad_norm": 0.3468717336654663, "learning_rate": 8.499305767288438e-06, "loss": 0.4703, "step": 3032 }, { "epoch": 1.6397549107947378, "grad_norm": 0.33282721042633057, "learning_rate": 8.497956976159585e-06, "loss": 0.4088, "step": 3033 }, { "epoch": 1.6402955487475221, "grad_norm": 0.42642226815223694, "learning_rate": 8.496607686298368e-06, "loss": 0.4518, "step": 3034 }, { "epoch": 1.6408361867003065, "grad_norm": 0.3754686713218689, "learning_rate": 8.495257897897166e-06, "loss": 0.4452, "step": 3035 }, { "epoch": 1.6413768246530906, "grad_norm": 0.31679567694664, "learning_rate": 8.493907611148433e-06, "loss": 0.4025, "step": 3036 }, { "epoch": 1.6419174626058748, "grad_norm": 0.4182370901107788, "learning_rate": 8.492556826244687e-06, "loss": 0.4908, "step": 3037 }, { "epoch": 1.6424581005586592, "grad_norm": 0.3175565302371979, "learning_rate": 8.491205543378518e-06, "loss": 0.4201, "step": 3038 }, { "epoch": 1.6429987385114435, "grad_norm": 0.36803102493286133, "learning_rate": 8.489853762742596e-06, "loss": 0.4387, "step": 3039 }, { "epoch": 1.6435393764642279, "grad_norm": 0.3387838900089264, "learning_rate": 8.48850148452965e-06, "loss": 0.4604, "step": 3040 }, { "epoch": 1.6440800144170122, "grad_norm": 0.3143702447414398, "learning_rate": 8.487148708932489e-06, "loss": 0.4285, "step": 3041 }, { "epoch": 1.6446206523697964, "grad_norm": 0.36707639694213867, "learning_rate": 8.485795436143987e-06, "loss": 0.4527, "step": 3042 }, { "epoch": 1.6451612903225805, "grad_norm": 0.34523433446884155, "learning_rate": 8.484441666357093e-06, "loss": 0.444, "step": 3043 }, { "epoch": 1.6457019282753649, "grad_norm": 0.371557354927063, "learning_rate": 8.48308739976482e-06, "loss": 0.4698, "step": 3044 }, { "epoch": 1.6462425662281492, "grad_norm": 0.33267924189567566, "learning_rate": 8.481732636560266e-06, "loss": 0.41, "step": 3045 }, { "epoch": 1.6467832041809336, "grad_norm": 0.36271023750305176, "learning_rate": 8.480377376936582e-06, "loss": 0.434, "step": 3046 }, { "epoch": 1.6473238421337177, "grad_norm": 0.31835541129112244, "learning_rate": 8.479021621087002e-06, "loss": 0.402, "step": 3047 }, { "epoch": 1.647864480086502, "grad_norm": 0.3511029779911041, "learning_rate": 8.477665369204829e-06, "loss": 0.434, "step": 3048 }, { "epoch": 1.6484051180392862, "grad_norm": 0.3473978638648987, "learning_rate": 8.476308621483433e-06, "loss": 0.4391, "step": 3049 }, { "epoch": 1.6489457559920706, "grad_norm": 0.3333207070827484, "learning_rate": 8.474951378116253e-06, "loss": 0.4286, "step": 3050 }, { "epoch": 1.649486393944855, "grad_norm": 0.3969205617904663, "learning_rate": 8.473593639296811e-06, "loss": 0.4527, "step": 3051 }, { "epoch": 1.6500270318976393, "grad_norm": 0.30005496740341187, "learning_rate": 8.472235405218682e-06, "loss": 0.414, "step": 3052 }, { "epoch": 1.6505676698504235, "grad_norm": 0.41392895579338074, "learning_rate": 8.470876676075528e-06, "loss": 0.4504, "step": 3053 }, { "epoch": 1.6511083078032078, "grad_norm": 0.3443209230899811, "learning_rate": 8.46951745206107e-06, "loss": 0.4172, "step": 3054 }, { "epoch": 1.651648945755992, "grad_norm": 0.3992152512073517, "learning_rate": 8.468157733369102e-06, "loss": 0.4553, "step": 3055 }, { "epoch": 1.6521895837087763, "grad_norm": 0.37463444471359253, "learning_rate": 8.466797520193492e-06, "loss": 0.4543, "step": 3056 }, { "epoch": 1.6527302216615607, "grad_norm": 0.33600836992263794, "learning_rate": 8.465436812728181e-06, "loss": 0.4298, "step": 3057 }, { "epoch": 1.653270859614345, "grad_norm": 0.4305607080459595, "learning_rate": 8.46407561116717e-06, "loss": 0.4148, "step": 3058 }, { "epoch": 1.6538114975671292, "grad_norm": 0.35718268156051636, "learning_rate": 8.46271391570454e-06, "loss": 0.41, "step": 3059 }, { "epoch": 1.6543521355199133, "grad_norm": 0.4128228724002838, "learning_rate": 8.461351726534438e-06, "loss": 0.4067, "step": 3060 }, { "epoch": 1.6548927734726977, "grad_norm": 0.3360671401023865, "learning_rate": 8.459989043851082e-06, "loss": 0.4042, "step": 3061 }, { "epoch": 1.655433411425482, "grad_norm": 0.33517733216285706, "learning_rate": 8.45862586784876e-06, "loss": 0.4024, "step": 3062 }, { "epoch": 1.6559740493782664, "grad_norm": 0.4027295708656311, "learning_rate": 8.457262198721836e-06, "loss": 0.447, "step": 3063 }, { "epoch": 1.6565146873310508, "grad_norm": 0.3583768308162689, "learning_rate": 8.455898036664734e-06, "loss": 0.4516, "step": 3064 }, { "epoch": 1.657055325283835, "grad_norm": 0.39112964272499084, "learning_rate": 8.454533381871957e-06, "loss": 0.4538, "step": 3065 }, { "epoch": 1.657595963236619, "grad_norm": 0.4554952383041382, "learning_rate": 8.453168234538075e-06, "loss": 0.4292, "step": 3066 }, { "epoch": 1.6581366011894034, "grad_norm": 0.42617353796958923, "learning_rate": 8.451802594857725e-06, "loss": 0.4653, "step": 3067 }, { "epoch": 1.6586772391421878, "grad_norm": 0.3546878695487976, "learning_rate": 8.45043646302562e-06, "loss": 0.3308, "step": 3068 }, { "epoch": 1.6592178770949721, "grad_norm": 0.4163733124732971, "learning_rate": 8.44906983923654e-06, "loss": 0.4218, "step": 3069 }, { "epoch": 1.6597585150477565, "grad_norm": 0.42740800976753235, "learning_rate": 8.447702723685335e-06, "loss": 0.4925, "step": 3070 }, { "epoch": 1.6602991530005407, "grad_norm": 0.3994062840938568, "learning_rate": 8.446335116566927e-06, "loss": 0.4142, "step": 3071 }, { "epoch": 1.6608397909533248, "grad_norm": 0.3746875524520874, "learning_rate": 8.44496701807631e-06, "loss": 0.4115, "step": 3072 }, { "epoch": 1.6613804289061092, "grad_norm": 0.41194552183151245, "learning_rate": 8.443598428408537e-06, "loss": 0.4565, "step": 3073 }, { "epoch": 1.6619210668588935, "grad_norm": 0.3965880870819092, "learning_rate": 8.442229347758748e-06, "loss": 0.4606, "step": 3074 }, { "epoch": 1.6624617048116779, "grad_norm": 0.386476069688797, "learning_rate": 8.440859776322137e-06, "loss": 0.3842, "step": 3075 }, { "epoch": 1.663002342764462, "grad_norm": 0.348588228225708, "learning_rate": 8.439489714293978e-06, "loss": 0.4534, "step": 3076 }, { "epoch": 1.6635429807172464, "grad_norm": 0.38109901547431946, "learning_rate": 8.43811916186961e-06, "loss": 0.4271, "step": 3077 }, { "epoch": 1.6640836186700305, "grad_norm": 0.42218753695487976, "learning_rate": 8.43674811924445e-06, "loss": 0.4368, "step": 3078 }, { "epoch": 1.6646242566228149, "grad_norm": 0.34757694602012634, "learning_rate": 8.435376586613972e-06, "loss": 0.4115, "step": 3079 }, { "epoch": 1.6651648945755992, "grad_norm": 0.4021320343017578, "learning_rate": 8.43400456417373e-06, "loss": 0.4767, "step": 3080 }, { "epoch": 1.6657055325283836, "grad_norm": 0.36043521761894226, "learning_rate": 8.432632052119342e-06, "loss": 0.4206, "step": 3081 }, { "epoch": 1.6662461704811677, "grad_norm": 0.37315505743026733, "learning_rate": 8.431259050646502e-06, "loss": 0.4035, "step": 3082 }, { "epoch": 1.666786808433952, "grad_norm": 0.3432157635688782, "learning_rate": 8.429885559950965e-06, "loss": 0.4299, "step": 3083 }, { "epoch": 1.6673274463867362, "grad_norm": 0.3866594731807709, "learning_rate": 8.428511580228564e-06, "loss": 0.4573, "step": 3084 }, { "epoch": 1.6678680843395206, "grad_norm": 0.37218984961509705, "learning_rate": 8.4271371116752e-06, "loss": 0.4442, "step": 3085 }, { "epoch": 1.668408722292305, "grad_norm": 0.405370831489563, "learning_rate": 8.42576215448684e-06, "loss": 0.4491, "step": 3086 }, { "epoch": 1.6689493602450893, "grad_norm": 0.3809480667114258, "learning_rate": 8.424386708859522e-06, "loss": 0.4166, "step": 3087 }, { "epoch": 1.6694899981978735, "grad_norm": 0.36650386452674866, "learning_rate": 8.423010774989357e-06, "loss": 0.4182, "step": 3088 }, { "epoch": 1.6700306361506576, "grad_norm": 0.4135519564151764, "learning_rate": 8.421634353072522e-06, "loss": 0.4545, "step": 3089 }, { "epoch": 1.670571274103442, "grad_norm": 0.33149343729019165, "learning_rate": 8.420257443305264e-06, "loss": 0.4127, "step": 3090 }, { "epoch": 1.6711119120562263, "grad_norm": 0.40432754158973694, "learning_rate": 8.418880045883902e-06, "loss": 0.4292, "step": 3091 }, { "epoch": 1.6716525500090107, "grad_norm": 0.3729030191898346, "learning_rate": 8.41750216100482e-06, "loss": 0.4231, "step": 3092 }, { "epoch": 1.672193187961795, "grad_norm": 0.3287682831287384, "learning_rate": 8.416123788864478e-06, "loss": 0.4342, "step": 3093 }, { "epoch": 1.6727338259145792, "grad_norm": 0.43830937147140503, "learning_rate": 8.4147449296594e-06, "loss": 0.406, "step": 3094 }, { "epoch": 1.6732744638673633, "grad_norm": 0.3575669825077057, "learning_rate": 8.41336558358618e-06, "loss": 0.4064, "step": 3095 }, { "epoch": 1.6738151018201477, "grad_norm": 0.4404871165752411, "learning_rate": 8.411985750841484e-06, "loss": 0.4708, "step": 3096 }, { "epoch": 1.674355739772932, "grad_norm": 0.33472740650177, "learning_rate": 8.410605431622048e-06, "loss": 0.411, "step": 3097 }, { "epoch": 1.6748963777257164, "grad_norm": 0.3318942189216614, "learning_rate": 8.409224626124672e-06, "loss": 0.4288, "step": 3098 }, { "epoch": 1.6754370156785008, "grad_norm": 0.38175883889198303, "learning_rate": 8.40784333454623e-06, "loss": 0.416, "step": 3099 }, { "epoch": 1.675977653631285, "grad_norm": 0.3427804708480835, "learning_rate": 8.406461557083666e-06, "loss": 0.448, "step": 3100 }, { "epoch": 1.676518291584069, "grad_norm": 0.36495766043663025, "learning_rate": 8.405079293933986e-06, "loss": 0.4756, "step": 3101 }, { "epoch": 1.6770589295368534, "grad_norm": 0.3412352204322815, "learning_rate": 8.403696545294276e-06, "loss": 0.4158, "step": 3102 }, { "epoch": 1.6775995674896378, "grad_norm": 0.31895023584365845, "learning_rate": 8.402313311361684e-06, "loss": 0.4354, "step": 3103 }, { "epoch": 1.6781402054424222, "grad_norm": 0.41563037037849426, "learning_rate": 8.400929592333429e-06, "loss": 0.4755, "step": 3104 }, { "epoch": 1.6786808433952063, "grad_norm": 0.3583192825317383, "learning_rate": 8.399545388406798e-06, "loss": 0.446, "step": 3105 }, { "epoch": 1.6792214813479907, "grad_norm": 0.32093483209609985, "learning_rate": 8.39816069977915e-06, "loss": 0.4305, "step": 3106 }, { "epoch": 1.6797621193007748, "grad_norm": 0.4041621685028076, "learning_rate": 8.396775526647911e-06, "loss": 0.4506, "step": 3107 }, { "epoch": 1.6803027572535592, "grad_norm": 0.3379467725753784, "learning_rate": 8.395389869210576e-06, "loss": 0.3782, "step": 3108 }, { "epoch": 1.6808433952063435, "grad_norm": 0.37511587142944336, "learning_rate": 8.39400372766471e-06, "loss": 0.4389, "step": 3109 }, { "epoch": 1.6813840331591279, "grad_norm": 0.29660964012145996, "learning_rate": 8.392617102207945e-06, "loss": 0.3928, "step": 3110 }, { "epoch": 1.681924671111912, "grad_norm": 0.38168632984161377, "learning_rate": 8.391229993037986e-06, "loss": 0.4428, "step": 3111 }, { "epoch": 1.6824653090646964, "grad_norm": 0.36777037382125854, "learning_rate": 8.389842400352603e-06, "loss": 0.4462, "step": 3112 }, { "epoch": 1.6830059470174805, "grad_norm": 0.3301421105861664, "learning_rate": 8.388454324349636e-06, "loss": 0.4568, "step": 3113 }, { "epoch": 1.6835465849702649, "grad_norm": 0.3039977252483368, "learning_rate": 8.387065765226995e-06, "loss": 0.3929, "step": 3114 }, { "epoch": 1.6840872229230492, "grad_norm": 0.37604808807373047, "learning_rate": 8.38567672318266e-06, "loss": 0.4395, "step": 3115 }, { "epoch": 1.6846278608758336, "grad_norm": 0.31758004426956177, "learning_rate": 8.384287198414676e-06, "loss": 0.4309, "step": 3116 }, { "epoch": 1.6851684988286177, "grad_norm": 0.3382377624511719, "learning_rate": 8.382897191121157e-06, "loss": 0.4281, "step": 3117 }, { "epoch": 1.685709136781402, "grad_norm": 0.37232866883277893, "learning_rate": 8.381506701500292e-06, "loss": 0.4368, "step": 3118 }, { "epoch": 1.6862497747341862, "grad_norm": 0.36527279019355774, "learning_rate": 8.380115729750333e-06, "loss": 0.4015, "step": 3119 }, { "epoch": 1.6867904126869706, "grad_norm": 0.3765943646430969, "learning_rate": 8.3787242760696e-06, "loss": 0.4732, "step": 3120 }, { "epoch": 1.687331050639755, "grad_norm": 0.3340546786785126, "learning_rate": 8.377332340656488e-06, "loss": 0.4017, "step": 3121 }, { "epoch": 1.6878716885925393, "grad_norm": 0.412190318107605, "learning_rate": 8.375939923709453e-06, "loss": 0.474, "step": 3122 }, { "epoch": 1.6884123265453235, "grad_norm": 0.3258398175239563, "learning_rate": 8.374547025427024e-06, "loss": 0.4413, "step": 3123 }, { "epoch": 1.6889529644981076, "grad_norm": 0.31814485788345337, "learning_rate": 8.373153646007802e-06, "loss": 0.4178, "step": 3124 }, { "epoch": 1.689493602450892, "grad_norm": 0.39647987484931946, "learning_rate": 8.371759785650444e-06, "loss": 0.456, "step": 3125 }, { "epoch": 1.6900342404036763, "grad_norm": 0.3643586039543152, "learning_rate": 8.370365444553692e-06, "loss": 0.4342, "step": 3126 }, { "epoch": 1.6905748783564607, "grad_norm": 0.33919721841812134, "learning_rate": 8.368970622916346e-06, "loss": 0.4159, "step": 3127 }, { "epoch": 1.691115516309245, "grad_norm": 0.38754889369010925, "learning_rate": 8.367575320937276e-06, "loss": 0.4435, "step": 3128 }, { "epoch": 1.6916561542620292, "grad_norm": 0.3378376364707947, "learning_rate": 8.366179538815424e-06, "loss": 0.4812, "step": 3129 }, { "epoch": 1.6921967922148133, "grad_norm": 0.33486446738243103, "learning_rate": 8.364783276749794e-06, "loss": 0.4108, "step": 3130 }, { "epoch": 1.6927374301675977, "grad_norm": 0.3412155210971832, "learning_rate": 8.363386534939467e-06, "loss": 0.4107, "step": 3131 }, { "epoch": 1.693278068120382, "grad_norm": 0.36446821689605713, "learning_rate": 8.361989313583586e-06, "loss": 0.4241, "step": 3132 }, { "epoch": 1.6938187060731664, "grad_norm": 0.3438349962234497, "learning_rate": 8.360591612881363e-06, "loss": 0.4735, "step": 3133 }, { "epoch": 1.6943593440259508, "grad_norm": 0.3088810443878174, "learning_rate": 8.359193433032083e-06, "loss": 0.4163, "step": 3134 }, { "epoch": 1.694899981978735, "grad_norm": 0.37794172763824463, "learning_rate": 8.357794774235094e-06, "loss": 0.4534, "step": 3135 }, { "epoch": 1.695440619931519, "grad_norm": 0.3165605068206787, "learning_rate": 8.356395636689811e-06, "loss": 0.3943, "step": 3136 }, { "epoch": 1.6959812578843034, "grad_norm": 0.33255523443222046, "learning_rate": 8.354996020595728e-06, "loss": 0.4647, "step": 3137 }, { "epoch": 1.6965218958370878, "grad_norm": 0.30814820528030396, "learning_rate": 8.353595926152391e-06, "loss": 0.4561, "step": 3138 }, { "epoch": 1.6970625337898722, "grad_norm": 0.34673863649368286, "learning_rate": 8.35219535355943e-06, "loss": 0.419, "step": 3139 }, { "epoch": 1.6976031717426563, "grad_norm": 0.32622379064559937, "learning_rate": 8.350794303016533e-06, "loss": 0.4528, "step": 3140 }, { "epoch": 1.6981438096954407, "grad_norm": 0.2980291545391083, "learning_rate": 8.349392774723459e-06, "loss": 0.4075, "step": 3141 }, { "epoch": 1.6986844476482248, "grad_norm": 0.3580862879753113, "learning_rate": 8.347990768880036e-06, "loss": 0.4604, "step": 3142 }, { "epoch": 1.6992250856010092, "grad_norm": 0.3246307969093323, "learning_rate": 8.34658828568616e-06, "loss": 0.3947, "step": 3143 }, { "epoch": 1.6997657235537935, "grad_norm": 0.30707135796546936, "learning_rate": 8.345185325341794e-06, "loss": 0.405, "step": 3144 }, { "epoch": 1.7003063615065779, "grad_norm": 0.311701238155365, "learning_rate": 8.343781888046971e-06, "loss": 0.4258, "step": 3145 }, { "epoch": 1.700846999459362, "grad_norm": 0.2869536280632019, "learning_rate": 8.342377974001787e-06, "loss": 0.3938, "step": 3146 }, { "epoch": 1.7013876374121464, "grad_norm": 0.3633367717266083, "learning_rate": 8.340973583406412e-06, "loss": 0.4839, "step": 3147 }, { "epoch": 1.7019282753649305, "grad_norm": 0.3297747075557709, "learning_rate": 8.339568716461082e-06, "loss": 0.4462, "step": 3148 }, { "epoch": 1.7024689133177149, "grad_norm": 0.28560447692871094, "learning_rate": 8.338163373366099e-06, "loss": 0.4178, "step": 3149 }, { "epoch": 1.7030095512704992, "grad_norm": 0.31055206060409546, "learning_rate": 8.336757554321832e-06, "loss": 0.4093, "step": 3150 }, { "epoch": 1.7035501892232836, "grad_norm": 0.3611248731613159, "learning_rate": 8.335351259528726e-06, "loss": 0.4674, "step": 3151 }, { "epoch": 1.7040908271760677, "grad_norm": 0.31785741448402405, "learning_rate": 8.333944489187284e-06, "loss": 0.4416, "step": 3152 }, { "epoch": 1.7046314651288519, "grad_norm": 0.3182879388332367, "learning_rate": 8.332537243498082e-06, "loss": 0.4145, "step": 3153 }, { "epoch": 1.7051721030816362, "grad_norm": 0.3585261106491089, "learning_rate": 8.331129522661761e-06, "loss": 0.4515, "step": 3154 }, { "epoch": 1.7057127410344206, "grad_norm": 0.3109356760978699, "learning_rate": 8.329721326879032e-06, "loss": 0.4235, "step": 3155 }, { "epoch": 1.706253378987205, "grad_norm": 0.38349929451942444, "learning_rate": 8.328312656350675e-06, "loss": 0.4808, "step": 3156 }, { "epoch": 1.7067940169399893, "grad_norm": 0.32526955008506775, "learning_rate": 8.326903511277535e-06, "loss": 0.4377, "step": 3157 }, { "epoch": 1.7073346548927735, "grad_norm": 0.34731414914131165, "learning_rate": 8.32549389186052e-06, "loss": 0.4497, "step": 3158 }, { "epoch": 1.7078752928455576, "grad_norm": 0.37489038705825806, "learning_rate": 8.32408379830062e-06, "loss": 0.4258, "step": 3159 }, { "epoch": 1.708415930798342, "grad_norm": 0.3703058958053589, "learning_rate": 8.322673230798877e-06, "loss": 0.4201, "step": 3160 }, { "epoch": 1.7089565687511263, "grad_norm": 0.32067111134529114, "learning_rate": 8.32126218955641e-06, "loss": 0.4305, "step": 3161 }, { "epoch": 1.7094972067039107, "grad_norm": 0.3572859466075897, "learning_rate": 8.319850674774401e-06, "loss": 0.4379, "step": 3162 }, { "epoch": 1.710037844656695, "grad_norm": 0.3251093924045563, "learning_rate": 8.318438686654101e-06, "loss": 0.4293, "step": 3163 }, { "epoch": 1.7105784826094792, "grad_norm": 0.2939565181732178, "learning_rate": 8.317026225396832e-06, "loss": 0.42, "step": 3164 }, { "epoch": 1.7111191205622633, "grad_norm": 0.3347533643245697, "learning_rate": 8.315613291203977e-06, "loss": 0.4308, "step": 3165 }, { "epoch": 1.7116597585150477, "grad_norm": 0.30677613615989685, "learning_rate": 8.31419988427699e-06, "loss": 0.3876, "step": 3166 }, { "epoch": 1.712200396467832, "grad_norm": 0.3365405201911926, "learning_rate": 8.312786004817394e-06, "loss": 0.4636, "step": 3167 }, { "epoch": 1.7127410344206164, "grad_norm": 0.3438670337200165, "learning_rate": 8.311371653026775e-06, "loss": 0.4645, "step": 3168 }, { "epoch": 1.7132816723734006, "grad_norm": 0.31338000297546387, "learning_rate": 8.309956829106789e-06, "loss": 0.4463, "step": 3169 }, { "epoch": 1.713822310326185, "grad_norm": 0.32951968908309937, "learning_rate": 8.30854153325916e-06, "loss": 0.4077, "step": 3170 }, { "epoch": 1.714362948278969, "grad_norm": 0.3423302173614502, "learning_rate": 8.307125765685677e-06, "loss": 0.4197, "step": 3171 }, { "epoch": 1.7149035862317534, "grad_norm": 0.30592572689056396, "learning_rate": 8.3057095265882e-06, "loss": 0.4028, "step": 3172 }, { "epoch": 1.7154442241845378, "grad_norm": 0.3736177682876587, "learning_rate": 8.304292816168653e-06, "loss": 0.4519, "step": 3173 }, { "epoch": 1.7159848621373222, "grad_norm": 0.34032773971557617, "learning_rate": 8.302875634629027e-06, "loss": 0.4213, "step": 3174 }, { "epoch": 1.7165255000901063, "grad_norm": 0.33573025465011597, "learning_rate": 8.30145798217138e-06, "loss": 0.4315, "step": 3175 }, { "epoch": 1.7170661380428907, "grad_norm": 0.3742954134941101, "learning_rate": 8.30003985899784e-06, "loss": 0.4146, "step": 3176 }, { "epoch": 1.7176067759956748, "grad_norm": 0.32530078291893005, "learning_rate": 8.298621265310602e-06, "loss": 0.4337, "step": 3177 }, { "epoch": 1.7181474139484592, "grad_norm": 0.33018314838409424, "learning_rate": 8.297202201311923e-06, "loss": 0.4435, "step": 3178 }, { "epoch": 1.7186880519012435, "grad_norm": 0.3356422781944275, "learning_rate": 8.295782667204133e-06, "loss": 0.4602, "step": 3179 }, { "epoch": 1.7192286898540279, "grad_norm": 0.30430036783218384, "learning_rate": 8.294362663189626e-06, "loss": 0.383, "step": 3180 }, { "epoch": 1.719769327806812, "grad_norm": 0.3536764979362488, "learning_rate": 8.292942189470863e-06, "loss": 0.4313, "step": 3181 }, { "epoch": 1.7203099657595962, "grad_norm": 0.30938300490379333, "learning_rate": 8.291521246250373e-06, "loss": 0.3956, "step": 3182 }, { "epoch": 1.7208506037123805, "grad_norm": 0.36522185802459717, "learning_rate": 8.290099833730753e-06, "loss": 0.461, "step": 3183 }, { "epoch": 1.7213912416651649, "grad_norm": 0.3340790569782257, "learning_rate": 8.288677952114663e-06, "loss": 0.4611, "step": 3184 }, { "epoch": 1.7219318796179492, "grad_norm": 0.31987613439559937, "learning_rate": 8.287255601604834e-06, "loss": 0.3968, "step": 3185 }, { "epoch": 1.7224725175707336, "grad_norm": 0.35184627771377563, "learning_rate": 8.285832782404061e-06, "loss": 0.4627, "step": 3186 }, { "epoch": 1.7230131555235177, "grad_norm": 0.29496318101882935, "learning_rate": 8.284409494715208e-06, "loss": 0.4, "step": 3187 }, { "epoch": 1.7235537934763019, "grad_norm": 0.32895147800445557, "learning_rate": 8.282985738741202e-06, "loss": 0.4251, "step": 3188 }, { "epoch": 1.7240944314290862, "grad_norm": 0.3419833481311798, "learning_rate": 8.281561514685046e-06, "loss": 0.4547, "step": 3189 }, { "epoch": 1.7246350693818706, "grad_norm": 0.2946586310863495, "learning_rate": 8.280136822749796e-06, "loss": 0.3943, "step": 3190 }, { "epoch": 1.725175707334655, "grad_norm": 0.3011445105075836, "learning_rate": 8.278711663138585e-06, "loss": 0.4056, "step": 3191 }, { "epoch": 1.7257163452874393, "grad_norm": 0.36560511589050293, "learning_rate": 8.277286036054611e-06, "loss": 0.5145, "step": 3192 }, { "epoch": 1.7262569832402235, "grad_norm": 0.2690065801143646, "learning_rate": 8.275859941701137e-06, "loss": 0.3663, "step": 3193 }, { "epoch": 1.7267976211930076, "grad_norm": 0.347079336643219, "learning_rate": 8.27443338028149e-06, "loss": 0.444, "step": 3194 }, { "epoch": 1.727338259145792, "grad_norm": 0.34917938709259033, "learning_rate": 8.27300635199907e-06, "loss": 0.4253, "step": 3195 }, { "epoch": 1.7278788970985763, "grad_norm": 0.32194095849990845, "learning_rate": 8.271578857057337e-06, "loss": 0.4584, "step": 3196 }, { "epoch": 1.7284195350513607, "grad_norm": 0.3533184826374054, "learning_rate": 8.270150895659824e-06, "loss": 0.4777, "step": 3197 }, { "epoch": 1.7289601730041448, "grad_norm": 0.3372891843318939, "learning_rate": 8.268722468010123e-06, "loss": 0.4002, "step": 3198 }, { "epoch": 1.7295008109569292, "grad_norm": 0.30643367767333984, "learning_rate": 8.267293574311901e-06, "loss": 0.4468, "step": 3199 }, { "epoch": 1.7300414489097133, "grad_norm": 0.3147575855255127, "learning_rate": 8.265864214768883e-06, "loss": 0.3975, "step": 3200 }, { "epoch": 1.7305820868624977, "grad_norm": 0.3653241693973541, "learning_rate": 8.26443438958487e-06, "loss": 0.4848, "step": 3201 }, { "epoch": 1.731122724815282, "grad_norm": 0.3251360058784485, "learning_rate": 8.263004098963719e-06, "loss": 0.424, "step": 3202 }, { "epoch": 1.7316633627680664, "grad_norm": 0.31904032826423645, "learning_rate": 8.261573343109359e-06, "loss": 0.4258, "step": 3203 }, { "epoch": 1.7322040007208506, "grad_norm": 0.32821953296661377, "learning_rate": 8.260142122225788e-06, "loss": 0.4668, "step": 3204 }, { "epoch": 1.732744638673635, "grad_norm": 0.34518858790397644, "learning_rate": 8.25871043651706e-06, "loss": 0.4592, "step": 3205 }, { "epoch": 1.733285276626419, "grad_norm": 0.332118421792984, "learning_rate": 8.25727828618731e-06, "loss": 0.4063, "step": 3206 }, { "epoch": 1.7338259145792034, "grad_norm": 0.34514275193214417, "learning_rate": 8.255845671440726e-06, "loss": 0.5073, "step": 3207 }, { "epoch": 1.7343665525319878, "grad_norm": 0.3004824221134186, "learning_rate": 8.25441259248157e-06, "loss": 0.4085, "step": 3208 }, { "epoch": 1.7349071904847722, "grad_norm": 0.29278916120529175, "learning_rate": 8.252979049514168e-06, "loss": 0.3678, "step": 3209 }, { "epoch": 1.7354478284375563, "grad_norm": 0.36105865240097046, "learning_rate": 8.25154504274291e-06, "loss": 0.4853, "step": 3210 }, { "epoch": 1.7359884663903407, "grad_norm": 0.31009751558303833, "learning_rate": 8.250110572372255e-06, "loss": 0.428, "step": 3211 }, { "epoch": 1.7365291043431248, "grad_norm": 0.5618311762809753, "learning_rate": 8.248675638606729e-06, "loss": 0.4689, "step": 3212 }, { "epoch": 1.7370697422959092, "grad_norm": 0.35816720128059387, "learning_rate": 8.247240241650918e-06, "loss": 0.4614, "step": 3213 }, { "epoch": 1.7376103802486935, "grad_norm": 0.3334486484527588, "learning_rate": 8.245804381709483e-06, "loss": 0.422, "step": 3214 }, { "epoch": 1.7381510182014779, "grad_norm": 0.34021323919296265, "learning_rate": 8.244368058987145e-06, "loss": 0.414, "step": 3215 }, { "epoch": 1.738691656154262, "grad_norm": 0.3346158266067505, "learning_rate": 8.24293127368869e-06, "loss": 0.4688, "step": 3216 }, { "epoch": 1.7392322941070462, "grad_norm": 0.350480318069458, "learning_rate": 8.241494026018974e-06, "loss": 0.4408, "step": 3217 }, { "epoch": 1.7397729320598305, "grad_norm": 0.32346951961517334, "learning_rate": 8.240056316182917e-06, "loss": 0.4241, "step": 3218 }, { "epoch": 1.7403135700126149, "grad_norm": 0.33489251136779785, "learning_rate": 8.238618144385506e-06, "loss": 0.4288, "step": 3219 }, { "epoch": 1.7408542079653992, "grad_norm": 0.3779529929161072, "learning_rate": 8.237179510831792e-06, "loss": 0.4622, "step": 3220 }, { "epoch": 1.7413948459181836, "grad_norm": 0.31131255626678467, "learning_rate": 8.23574041572689e-06, "loss": 0.4228, "step": 3221 }, { "epoch": 1.7419354838709677, "grad_norm": 0.3840460777282715, "learning_rate": 8.234300859275989e-06, "loss": 0.4145, "step": 3222 }, { "epoch": 1.7424761218237519, "grad_norm": 0.3488578200340271, "learning_rate": 8.232860841684333e-06, "loss": 0.4694, "step": 3223 }, { "epoch": 1.7430167597765363, "grad_norm": 0.3081468343734741, "learning_rate": 8.231420363157243e-06, "loss": 0.3967, "step": 3224 }, { "epoch": 1.7435573977293206, "grad_norm": 0.3889080584049225, "learning_rate": 8.229979423900095e-06, "loss": 0.448, "step": 3225 }, { "epoch": 1.744098035682105, "grad_norm": 0.3414023816585541, "learning_rate": 8.228538024118338e-06, "loss": 0.4822, "step": 3226 }, { "epoch": 1.7446386736348891, "grad_norm": 0.30643653869628906, "learning_rate": 8.227096164017482e-06, "loss": 0.4196, "step": 3227 }, { "epoch": 1.7451793115876735, "grad_norm": 0.36826562881469727, "learning_rate": 8.225653843803107e-06, "loss": 0.4308, "step": 3228 }, { "epoch": 1.7457199495404576, "grad_norm": 0.37800219655036926, "learning_rate": 8.224211063680854e-06, "loss": 0.4503, "step": 3229 }, { "epoch": 1.746260587493242, "grad_norm": 0.3215877115726471, "learning_rate": 8.222767823856435e-06, "loss": 0.3991, "step": 3230 }, { "epoch": 1.7468012254460263, "grad_norm": 0.40726178884506226, "learning_rate": 8.221324124535622e-06, "loss": 0.4337, "step": 3231 }, { "epoch": 1.7473418633988107, "grad_norm": 0.3118278682231903, "learning_rate": 8.219879965924255e-06, "loss": 0.4753, "step": 3232 }, { "epoch": 1.7478825013515948, "grad_norm": 0.31255823373794556, "learning_rate": 8.218435348228241e-06, "loss": 0.3601, "step": 3233 }, { "epoch": 1.7484231393043792, "grad_norm": 0.3631909489631653, "learning_rate": 8.216990271653553e-06, "loss": 0.4126, "step": 3234 }, { "epoch": 1.7489637772571633, "grad_norm": 0.34900087118148804, "learning_rate": 8.215544736406223e-06, "loss": 0.4599, "step": 3235 }, { "epoch": 1.7495044152099477, "grad_norm": 0.2817728519439697, "learning_rate": 8.214098742692353e-06, "loss": 0.3794, "step": 3236 }, { "epoch": 1.750045053162732, "grad_norm": 0.4058758616447449, "learning_rate": 8.212652290718113e-06, "loss": 0.4523, "step": 3237 }, { "epoch": 1.7505856911155164, "grad_norm": 0.3100168704986572, "learning_rate": 8.211205380689735e-06, "loss": 0.4086, "step": 3238 }, { "epoch": 1.7511263290683006, "grad_norm": 0.3628126084804535, "learning_rate": 8.209758012813515e-06, "loss": 0.4776, "step": 3239 }, { "epoch": 1.751666967021085, "grad_norm": 0.29844430088996887, "learning_rate": 8.20831018729582e-06, "loss": 0.406, "step": 3240 }, { "epoch": 1.752207604973869, "grad_norm": 0.3558284044265747, "learning_rate": 8.206861904343074e-06, "loss": 0.4451, "step": 3241 }, { "epoch": 1.7527482429266534, "grad_norm": 0.35172179341316223, "learning_rate": 8.20541316416177e-06, "loss": 0.4426, "step": 3242 }, { "epoch": 1.7532888808794378, "grad_norm": 0.36479464173316956, "learning_rate": 8.20396396695847e-06, "loss": 0.4168, "step": 3243 }, { "epoch": 1.7538295188322222, "grad_norm": 0.3374549150466919, "learning_rate": 8.202514312939798e-06, "loss": 0.4347, "step": 3244 }, { "epoch": 1.7543701567850063, "grad_norm": 0.357465535402298, "learning_rate": 8.20106420231244e-06, "loss": 0.4579, "step": 3245 }, { "epoch": 1.7549107947377904, "grad_norm": 0.41111886501312256, "learning_rate": 8.199613635283154e-06, "loss": 0.45, "step": 3246 }, { "epoch": 1.7554514326905748, "grad_norm": 0.33777353167533875, "learning_rate": 8.198162612058755e-06, "loss": 0.3852, "step": 3247 }, { "epoch": 1.7559920706433592, "grad_norm": 0.34722068905830383, "learning_rate": 8.19671113284613e-06, "loss": 0.4177, "step": 3248 }, { "epoch": 1.7565327085961435, "grad_norm": 0.3915463984012604, "learning_rate": 8.19525919785223e-06, "loss": 0.4462, "step": 3249 }, { "epoch": 1.7570733465489279, "grad_norm": 0.3315128982067108, "learning_rate": 8.193806807284064e-06, "loss": 0.4572, "step": 3250 }, { "epoch": 1.757613984501712, "grad_norm": 0.3063032925128937, "learning_rate": 8.192353961348717e-06, "loss": 0.4245, "step": 3251 }, { "epoch": 1.7581546224544962, "grad_norm": 0.3548220992088318, "learning_rate": 8.190900660253327e-06, "loss": 0.4619, "step": 3252 }, { "epoch": 1.7586952604072805, "grad_norm": 0.3348757028579712, "learning_rate": 8.189446904205107e-06, "loss": 0.4173, "step": 3253 }, { "epoch": 1.7592358983600649, "grad_norm": 0.3433801233768463, "learning_rate": 8.187992693411333e-06, "loss": 0.4607, "step": 3254 }, { "epoch": 1.7597765363128492, "grad_norm": 0.30504295229911804, "learning_rate": 8.186538028079338e-06, "loss": 0.396, "step": 3255 }, { "epoch": 1.7603171742656336, "grad_norm": 0.349337100982666, "learning_rate": 8.18508290841653e-06, "loss": 0.4742, "step": 3256 }, { "epoch": 1.7608578122184178, "grad_norm": 0.31097519397735596, "learning_rate": 8.183627334630376e-06, "loss": 0.3798, "step": 3257 }, { "epoch": 1.761398450171202, "grad_norm": 0.36984965205192566, "learning_rate": 8.182171306928407e-06, "loss": 0.4295, "step": 3258 }, { "epoch": 1.7619390881239863, "grad_norm": 0.32913482189178467, "learning_rate": 8.180714825518223e-06, "loss": 0.4201, "step": 3259 }, { "epoch": 1.7624797260767706, "grad_norm": 0.3395659923553467, "learning_rate": 8.179257890607489e-06, "loss": 0.3953, "step": 3260 }, { "epoch": 1.763020364029555, "grad_norm": 0.34365546703338623, "learning_rate": 8.177800502403928e-06, "loss": 0.4944, "step": 3261 }, { "epoch": 1.7635610019823391, "grad_norm": 0.3863326609134674, "learning_rate": 8.176342661115332e-06, "loss": 0.4325, "step": 3262 }, { "epoch": 1.7641016399351235, "grad_norm": 0.3482382297515869, "learning_rate": 8.174884366949558e-06, "loss": 0.4258, "step": 3263 }, { "epoch": 1.7646422778879076, "grad_norm": 0.3736988306045532, "learning_rate": 8.173425620114529e-06, "loss": 0.4245, "step": 3264 }, { "epoch": 1.765182915840692, "grad_norm": 0.40473508834838867, "learning_rate": 8.171966420818227e-06, "loss": 0.4628, "step": 3265 }, { "epoch": 1.7657235537934763, "grad_norm": 0.3368479907512665, "learning_rate": 8.170506769268706e-06, "loss": 0.4379, "step": 3266 }, { "epoch": 1.7662641917462607, "grad_norm": 0.4241042137145996, "learning_rate": 8.16904666567408e-06, "loss": 0.4502, "step": 3267 }, { "epoch": 1.7668048296990448, "grad_norm": 0.354518860578537, "learning_rate": 8.167586110242522e-06, "loss": 0.438, "step": 3268 }, { "epoch": 1.7673454676518292, "grad_norm": 0.3048177659511566, "learning_rate": 8.16612510318228e-06, "loss": 0.4042, "step": 3269 }, { "epoch": 1.7678861056046133, "grad_norm": 0.32324472069740295, "learning_rate": 8.164663644701662e-06, "loss": 0.4426, "step": 3270 }, { "epoch": 1.7684267435573977, "grad_norm": 0.3259657919406891, "learning_rate": 8.163201735009041e-06, "loss": 0.4184, "step": 3271 }, { "epoch": 1.768967381510182, "grad_norm": 0.3852538466453552, "learning_rate": 8.161739374312852e-06, "loss": 0.4176, "step": 3272 }, { "epoch": 1.7695080194629664, "grad_norm": 0.30960458517074585, "learning_rate": 8.160276562821594e-06, "loss": 0.4033, "step": 3273 }, { "epoch": 1.7700486574157506, "grad_norm": 0.3406146466732025, "learning_rate": 8.158813300743835e-06, "loss": 0.3973, "step": 3274 }, { "epoch": 1.7705892953685347, "grad_norm": 0.3403013348579407, "learning_rate": 8.157349588288202e-06, "loss": 0.4278, "step": 3275 }, { "epoch": 1.771129933321319, "grad_norm": 0.3388356864452362, "learning_rate": 8.155885425663389e-06, "loss": 0.4677, "step": 3276 }, { "epoch": 1.7716705712741034, "grad_norm": 0.3182254433631897, "learning_rate": 8.154420813078155e-06, "loss": 0.4176, "step": 3277 }, { "epoch": 1.7722112092268878, "grad_norm": 0.34564125537872314, "learning_rate": 8.15295575074132e-06, "loss": 0.401, "step": 3278 }, { "epoch": 1.7727518471796722, "grad_norm": 0.3367253839969635, "learning_rate": 8.15149023886177e-06, "loss": 0.4193, "step": 3279 }, { "epoch": 1.7732924851324563, "grad_norm": 0.3374026119709015, "learning_rate": 8.150024277648458e-06, "loss": 0.4362, "step": 3280 }, { "epoch": 1.7738331230852404, "grad_norm": 0.3175216615200043, "learning_rate": 8.148557867310393e-06, "loss": 0.426, "step": 3281 }, { "epoch": 1.7743737610380248, "grad_norm": 0.3488418161869049, "learning_rate": 8.147091008056658e-06, "loss": 0.4802, "step": 3282 }, { "epoch": 1.7749143989908092, "grad_norm": 0.34109702706336975, "learning_rate": 8.145623700096394e-06, "loss": 0.4823, "step": 3283 }, { "epoch": 1.7754550369435935, "grad_norm": 0.3144458532333374, "learning_rate": 8.144155943638804e-06, "loss": 0.3939, "step": 3284 }, { "epoch": 1.7759956748963779, "grad_norm": 0.33580702543258667, "learning_rate": 8.142687738893161e-06, "loss": 0.434, "step": 3285 }, { "epoch": 1.776536312849162, "grad_norm": 0.3042031228542328, "learning_rate": 8.1412190860688e-06, "loss": 0.4064, "step": 3286 }, { "epoch": 1.7770769508019462, "grad_norm": 0.30901986360549927, "learning_rate": 8.139749985375113e-06, "loss": 0.4476, "step": 3287 }, { "epoch": 1.7776175887547305, "grad_norm": 0.32003363966941833, "learning_rate": 8.138280437021569e-06, "loss": 0.3802, "step": 3288 }, { "epoch": 1.7781582267075149, "grad_norm": 0.3311749994754791, "learning_rate": 8.13681044121769e-06, "loss": 0.4242, "step": 3289 }, { "epoch": 1.7786988646602993, "grad_norm": 0.3157782256603241, "learning_rate": 8.135339998173064e-06, "loss": 0.4406, "step": 3290 }, { "epoch": 1.7792395026130834, "grad_norm": 0.34921789169311523, "learning_rate": 8.133869108097349e-06, "loss": 0.4257, "step": 3291 }, { "epoch": 1.7797801405658678, "grad_norm": 0.3359015882015228, "learning_rate": 8.132397771200256e-06, "loss": 0.4576, "step": 3292 }, { "epoch": 1.780320778518652, "grad_norm": 0.31737324595451355, "learning_rate": 8.13092598769157e-06, "loss": 0.4271, "step": 3293 }, { "epoch": 1.7808614164714363, "grad_norm": 0.35905441641807556, "learning_rate": 8.129453757781132e-06, "loss": 0.4522, "step": 3294 }, { "epoch": 1.7814020544242206, "grad_norm": 0.30049991607666016, "learning_rate": 8.12798108167885e-06, "loss": 0.3941, "step": 3295 }, { "epoch": 1.781942692377005, "grad_norm": 0.34811657667160034, "learning_rate": 8.1265079595947e-06, "loss": 0.4553, "step": 3296 }, { "epoch": 1.7824833303297891, "grad_norm": 0.31443458795547485, "learning_rate": 8.125034391738712e-06, "loss": 0.4523, "step": 3297 }, { "epoch": 1.7830239682825735, "grad_norm": 0.29668858647346497, "learning_rate": 8.123560378320988e-06, "loss": 0.3684, "step": 3298 }, { "epoch": 1.7835646062353576, "grad_norm": 0.3364712595939636, "learning_rate": 8.122085919551685e-06, "loss": 0.4364, "step": 3299 }, { "epoch": 1.784105244188142, "grad_norm": 0.3368052542209625, "learning_rate": 8.120611015641036e-06, "loss": 0.4778, "step": 3300 }, { "epoch": 1.7846458821409263, "grad_norm": 0.3242607116699219, "learning_rate": 8.119135666799324e-06, "loss": 0.4486, "step": 3301 }, { "epoch": 1.7851865200937107, "grad_norm": 0.30162885785102844, "learning_rate": 8.117659873236906e-06, "loss": 0.4081, "step": 3302 }, { "epoch": 1.7857271580464948, "grad_norm": 0.32313260436058044, "learning_rate": 8.116183635164193e-06, "loss": 0.4478, "step": 3303 }, { "epoch": 1.786267795999279, "grad_norm": 0.34067660570144653, "learning_rate": 8.11470695279167e-06, "loss": 0.4393, "step": 3304 }, { "epoch": 1.7868084339520633, "grad_norm": 0.30169734358787537, "learning_rate": 8.113229826329876e-06, "loss": 0.4139, "step": 3305 }, { "epoch": 1.7873490719048477, "grad_norm": 0.3240569829940796, "learning_rate": 8.11175225598942e-06, "loss": 0.4215, "step": 3306 }, { "epoch": 1.787889709857632, "grad_norm": 0.35240086913108826, "learning_rate": 8.110274241980967e-06, "loss": 0.4272, "step": 3307 }, { "epoch": 1.7884303478104164, "grad_norm": 0.2917160093784332, "learning_rate": 8.108795784515252e-06, "loss": 0.4377, "step": 3308 }, { "epoch": 1.7889709857632006, "grad_norm": 0.3504859507083893, "learning_rate": 8.107316883803071e-06, "loss": 0.4809, "step": 3309 }, { "epoch": 1.7895116237159847, "grad_norm": 0.3004903197288513, "learning_rate": 8.105837540055284e-06, "loss": 0.3998, "step": 3310 }, { "epoch": 1.790052261668769, "grad_norm": 0.3329935371875763, "learning_rate": 8.10435775348281e-06, "loss": 0.4854, "step": 3311 }, { "epoch": 1.7905928996215534, "grad_norm": 0.28653180599212646, "learning_rate": 8.102877524296637e-06, "loss": 0.3824, "step": 3312 }, { "epoch": 1.7911335375743378, "grad_norm": 0.34429025650024414, "learning_rate": 8.101396852707811e-06, "loss": 0.4145, "step": 3313 }, { "epoch": 1.7916741755271222, "grad_norm": 0.33222684264183044, "learning_rate": 8.099915738927446e-06, "loss": 0.458, "step": 3314 }, { "epoch": 1.7922148134799063, "grad_norm": 0.3459024727344513, "learning_rate": 8.098434183166716e-06, "loss": 0.4562, "step": 3315 }, { "epoch": 1.7927554514326904, "grad_norm": 0.3265814781188965, "learning_rate": 8.096952185636856e-06, "loss": 0.4364, "step": 3316 }, { "epoch": 1.7932960893854748, "grad_norm": 0.36054661870002747, "learning_rate": 8.095469746549172e-06, "loss": 0.4636, "step": 3317 }, { "epoch": 1.7938367273382592, "grad_norm": 0.3394835889339447, "learning_rate": 8.09398686611502e-06, "loss": 0.4258, "step": 3318 }, { "epoch": 1.7943773652910435, "grad_norm": 0.3196641802787781, "learning_rate": 8.092503544545834e-06, "loss": 0.3846, "step": 3319 }, { "epoch": 1.7949180032438277, "grad_norm": 0.34875744581222534, "learning_rate": 8.091019782053097e-06, "loss": 0.4571, "step": 3320 }, { "epoch": 1.795458641196612, "grad_norm": 0.34042981266975403, "learning_rate": 8.089535578848364e-06, "loss": 0.4589, "step": 3321 }, { "epoch": 1.7959992791493962, "grad_norm": 0.32495275139808655, "learning_rate": 8.088050935143252e-06, "loss": 0.4319, "step": 3322 }, { "epoch": 1.7965399171021805, "grad_norm": 0.33212369680404663, "learning_rate": 8.086565851149435e-06, "loss": 0.419, "step": 3323 }, { "epoch": 1.797080555054965, "grad_norm": 0.30814874172210693, "learning_rate": 8.085080327078656e-06, "loss": 0.4376, "step": 3324 }, { "epoch": 1.7976211930077493, "grad_norm": 0.35443365573883057, "learning_rate": 8.083594363142717e-06, "loss": 0.4306, "step": 3325 }, { "epoch": 1.7981618309605334, "grad_norm": 0.33060944080352783, "learning_rate": 8.082107959553484e-06, "loss": 0.4332, "step": 3326 }, { "epoch": 1.7987024689133178, "grad_norm": 0.3036220967769623, "learning_rate": 8.080621116522886e-06, "loss": 0.4198, "step": 3327 }, { "epoch": 1.799243106866102, "grad_norm": 0.3334490954875946, "learning_rate": 8.079133834262916e-06, "loss": 0.4342, "step": 3328 }, { "epoch": 1.7997837448188863, "grad_norm": 0.3297540843486786, "learning_rate": 8.077646112985626e-06, "loss": 0.4735, "step": 3329 }, { "epoch": 1.8003243827716706, "grad_norm": 0.32654431462287903, "learning_rate": 8.076157952903134e-06, "loss": 0.3758, "step": 3330 }, { "epoch": 1.800865020724455, "grad_norm": 0.3422190845012665, "learning_rate": 8.07466935422762e-06, "loss": 0.4157, "step": 3331 }, { "epoch": 1.8014056586772391, "grad_norm": 0.34716933965682983, "learning_rate": 8.073180317171322e-06, "loss": 0.5095, "step": 3332 }, { "epoch": 1.8019462966300235, "grad_norm": 0.31180334091186523, "learning_rate": 8.071690841946547e-06, "loss": 0.3699, "step": 3333 }, { "epoch": 1.8024869345828076, "grad_norm": 0.3031768500804901, "learning_rate": 8.070200928765661e-06, "loss": 0.4465, "step": 3334 }, { "epoch": 1.803027572535592, "grad_norm": 0.31092366576194763, "learning_rate": 8.068710577841093e-06, "loss": 0.4141, "step": 3335 }, { "epoch": 1.8035682104883763, "grad_norm": 0.3233906030654907, "learning_rate": 8.067219789385335e-06, "loss": 0.4207, "step": 3336 }, { "epoch": 1.8041088484411607, "grad_norm": 0.3430812954902649, "learning_rate": 8.06572856361094e-06, "loss": 0.4896, "step": 3337 }, { "epoch": 1.8046494863939448, "grad_norm": 0.2886134684085846, "learning_rate": 8.064236900730526e-06, "loss": 0.4108, "step": 3338 }, { "epoch": 1.805190124346729, "grad_norm": 0.3320710062980652, "learning_rate": 8.06274480095677e-06, "loss": 0.3786, "step": 3339 }, { "epoch": 1.8057307622995133, "grad_norm": 0.34547311067581177, "learning_rate": 8.061252264502415e-06, "loss": 0.4575, "step": 3340 }, { "epoch": 1.8062714002522977, "grad_norm": 0.32114607095718384, "learning_rate": 8.05975929158026e-06, "loss": 0.4365, "step": 3341 }, { "epoch": 1.806812038205082, "grad_norm": 0.3137049973011017, "learning_rate": 8.058265882403174e-06, "loss": 0.4302, "step": 3342 }, { "epoch": 1.8073526761578664, "grad_norm": 0.33785489201545715, "learning_rate": 8.056772037184083e-06, "loss": 0.4535, "step": 3343 }, { "epoch": 1.8078933141106506, "grad_norm": 0.3604099452495575, "learning_rate": 8.055277756135978e-06, "loss": 0.4406, "step": 3344 }, { "epoch": 1.8084339520634347, "grad_norm": 0.35606586933135986, "learning_rate": 8.053783039471909e-06, "loss": 0.455, "step": 3345 }, { "epoch": 1.808974590016219, "grad_norm": 0.3593486547470093, "learning_rate": 8.052287887404992e-06, "loss": 0.4388, "step": 3346 }, { "epoch": 1.8095152279690034, "grad_norm": 0.3589855134487152, "learning_rate": 8.050792300148402e-06, "loss": 0.439, "step": 3347 }, { "epoch": 1.8100558659217878, "grad_norm": 0.3543134033679962, "learning_rate": 8.049296277915378e-06, "loss": 0.4468, "step": 3348 }, { "epoch": 1.8105965038745722, "grad_norm": 0.40692999958992004, "learning_rate": 8.047799820919218e-06, "loss": 0.4285, "step": 3349 }, { "epoch": 1.8111371418273563, "grad_norm": 0.35559090971946716, "learning_rate": 8.046302929373286e-06, "loss": 0.4082, "step": 3350 }, { "epoch": 1.8116777797801404, "grad_norm": 0.40692654252052307, "learning_rate": 8.044805603491005e-06, "loss": 0.4688, "step": 3351 }, { "epoch": 1.8122184177329248, "grad_norm": 0.36968672275543213, "learning_rate": 8.043307843485863e-06, "loss": 0.4103, "step": 3352 }, { "epoch": 1.8127590556857092, "grad_norm": 0.4615669548511505, "learning_rate": 8.041809649571406e-06, "loss": 0.4533, "step": 3353 }, { "epoch": 1.8132996936384935, "grad_norm": 0.3097493052482605, "learning_rate": 8.040311021961245e-06, "loss": 0.4162, "step": 3354 }, { "epoch": 1.8138403315912777, "grad_norm": 0.374896764755249, "learning_rate": 8.038811960869051e-06, "loss": 0.3935, "step": 3355 }, { "epoch": 1.814380969544062, "grad_norm": 0.3678896129131317, "learning_rate": 8.037312466508555e-06, "loss": 0.4263, "step": 3356 }, { "epoch": 1.8149216074968462, "grad_norm": 0.3088608682155609, "learning_rate": 8.035812539093557e-06, "loss": 0.4408, "step": 3357 }, { "epoch": 1.8154622454496305, "grad_norm": 0.3997863233089447, "learning_rate": 8.034312178837911e-06, "loss": 0.4149, "step": 3358 }, { "epoch": 1.816002883402415, "grad_norm": 0.3411961793899536, "learning_rate": 8.032811385955535e-06, "loss": 0.4484, "step": 3359 }, { "epoch": 1.8165435213551993, "grad_norm": 0.3319053649902344, "learning_rate": 8.031310160660411e-06, "loss": 0.4202, "step": 3360 }, { "epoch": 1.8170841593079834, "grad_norm": 0.36736491322517395, "learning_rate": 8.02980850316658e-06, "loss": 0.4341, "step": 3361 }, { "epoch": 1.8176247972607678, "grad_norm": 0.34897086024284363, "learning_rate": 8.028306413688147e-06, "loss": 0.4524, "step": 3362 }, { "epoch": 1.818165435213552, "grad_norm": 0.36966174840927124, "learning_rate": 8.026803892439276e-06, "loss": 0.4693, "step": 3363 }, { "epoch": 1.8187060731663363, "grad_norm": 0.32758980989456177, "learning_rate": 8.025300939634193e-06, "loss": 0.3934, "step": 3364 }, { "epoch": 1.8192467111191206, "grad_norm": 0.31811997294425964, "learning_rate": 8.023797555487188e-06, "loss": 0.4073, "step": 3365 }, { "epoch": 1.819787349071905, "grad_norm": 0.3481244742870331, "learning_rate": 8.02229374021261e-06, "loss": 0.4686, "step": 3366 }, { "epoch": 1.8203279870246891, "grad_norm": 0.2885033190250397, "learning_rate": 8.02078949402487e-06, "loss": 0.3879, "step": 3367 }, { "epoch": 1.8208686249774733, "grad_norm": 0.33795928955078125, "learning_rate": 8.019284817138442e-06, "loss": 0.4468, "step": 3368 }, { "epoch": 1.8214092629302576, "grad_norm": 0.35017502307891846, "learning_rate": 8.017779709767857e-06, "loss": 0.403, "step": 3369 }, { "epoch": 1.821949900883042, "grad_norm": 0.32768213748931885, "learning_rate": 8.016274172127715e-06, "loss": 0.4891, "step": 3370 }, { "epoch": 1.8224905388358263, "grad_norm": 0.37308111786842346, "learning_rate": 8.01476820443267e-06, "loss": 0.4717, "step": 3371 }, { "epoch": 1.8230311767886107, "grad_norm": 0.3486446440219879, "learning_rate": 8.01326180689744e-06, "loss": 0.3905, "step": 3372 }, { "epoch": 1.8235718147413948, "grad_norm": 0.3119492828845978, "learning_rate": 8.011754979736804e-06, "loss": 0.4412, "step": 3373 }, { "epoch": 1.824112452694179, "grad_norm": 0.41840484738349915, "learning_rate": 8.010247723165604e-06, "loss": 0.4477, "step": 3374 }, { "epoch": 1.8246530906469633, "grad_norm": 0.34604212641716003, "learning_rate": 8.008740037398742e-06, "loss": 0.4532, "step": 3375 }, { "epoch": 1.8251937285997477, "grad_norm": 0.3138395845890045, "learning_rate": 8.00723192265118e-06, "loss": 0.421, "step": 3376 }, { "epoch": 1.825734366552532, "grad_norm": 0.3667658865451813, "learning_rate": 8.005723379137944e-06, "loss": 0.4477, "step": 3377 }, { "epoch": 1.8262750045053164, "grad_norm": 0.3710743486881256, "learning_rate": 8.004214407074118e-06, "loss": 0.4334, "step": 3378 }, { "epoch": 1.8268156424581006, "grad_norm": 0.35465577244758606, "learning_rate": 8.002705006674849e-06, "loss": 0.4332, "step": 3379 }, { "epoch": 1.8273562804108847, "grad_norm": 0.3599276542663574, "learning_rate": 8.001195178155344e-06, "loss": 0.4571, "step": 3380 }, { "epoch": 1.827896918363669, "grad_norm": 0.35257986187934875, "learning_rate": 7.999684921730872e-06, "loss": 0.4335, "step": 3381 }, { "epoch": 1.8284375563164534, "grad_norm": 0.3256742060184479, "learning_rate": 7.998174237616763e-06, "loss": 0.4154, "step": 3382 }, { "epoch": 1.8289781942692378, "grad_norm": 0.3676372468471527, "learning_rate": 7.996663126028406e-06, "loss": 0.4634, "step": 3383 }, { "epoch": 1.829518832222022, "grad_norm": 0.2912156581878662, "learning_rate": 7.995151587181256e-06, "loss": 0.3677, "step": 3384 }, { "epoch": 1.8300594701748063, "grad_norm": 0.31365203857421875, "learning_rate": 7.99363962129082e-06, "loss": 0.4162, "step": 3385 }, { "epoch": 1.8306001081275904, "grad_norm": 0.3299397826194763, "learning_rate": 7.992127228572677e-06, "loss": 0.4345, "step": 3386 }, { "epoch": 1.8311407460803748, "grad_norm": 0.33139047026634216, "learning_rate": 7.990614409242458e-06, "loss": 0.4299, "step": 3387 }, { "epoch": 1.8316813840331592, "grad_norm": 0.3328225314617157, "learning_rate": 7.98910116351586e-06, "loss": 0.4504, "step": 3388 }, { "epoch": 1.8322220219859435, "grad_norm": 0.3534029424190521, "learning_rate": 7.987587491608636e-06, "loss": 0.4632, "step": 3389 }, { "epoch": 1.8327626599387277, "grad_norm": 0.3083217144012451, "learning_rate": 7.986073393736607e-06, "loss": 0.4132, "step": 3390 }, { "epoch": 1.833303297891512, "grad_norm": 0.3547525703907013, "learning_rate": 7.984558870115645e-06, "loss": 0.4629, "step": 3391 }, { "epoch": 1.8338439358442962, "grad_norm": 0.3502817749977112, "learning_rate": 7.983043920961692e-06, "loss": 0.4646, "step": 3392 }, { "epoch": 1.8343845737970805, "grad_norm": 0.33607080578804016, "learning_rate": 7.981528546490744e-06, "loss": 0.4354, "step": 3393 }, { "epoch": 1.834925211749865, "grad_norm": 0.33097922801971436, "learning_rate": 7.980012746918863e-06, "loss": 0.4483, "step": 3394 }, { "epoch": 1.8354658497026493, "grad_norm": 0.3429686725139618, "learning_rate": 7.978496522462167e-06, "loss": 0.4176, "step": 3395 }, { "epoch": 1.8360064876554334, "grad_norm": 0.34447550773620605, "learning_rate": 7.976979873336838e-06, "loss": 0.4116, "step": 3396 }, { "epoch": 1.8365471256082175, "grad_norm": 0.30354902148246765, "learning_rate": 7.975462799759115e-06, "loss": 0.4629, "step": 3397 }, { "epoch": 1.837087763561002, "grad_norm": 0.318607360124588, "learning_rate": 7.973945301945302e-06, "loss": 0.4459, "step": 3398 }, { "epoch": 1.8376284015137863, "grad_norm": 0.35816827416419983, "learning_rate": 7.97242738011176e-06, "loss": 0.4536, "step": 3399 }, { "epoch": 1.8381690394665706, "grad_norm": 0.31781914830207825, "learning_rate": 7.97090903447491e-06, "loss": 0.4347, "step": 3400 }, { "epoch": 1.838709677419355, "grad_norm": 0.34621742367744446, "learning_rate": 7.969390265251238e-06, "loss": 0.4336, "step": 3401 }, { "epoch": 1.8392503153721391, "grad_norm": 0.36284780502319336, "learning_rate": 7.967871072657285e-06, "loss": 0.4342, "step": 3402 }, { "epoch": 1.8397909533249233, "grad_norm": 0.3561781346797943, "learning_rate": 7.966351456909656e-06, "loss": 0.4454, "step": 3403 }, { "epoch": 1.8403315912777076, "grad_norm": 0.3318120241165161, "learning_rate": 7.964831418225015e-06, "loss": 0.4255, "step": 3404 }, { "epoch": 1.840872229230492, "grad_norm": 0.376350998878479, "learning_rate": 7.963310956820085e-06, "loss": 0.434, "step": 3405 }, { "epoch": 1.8414128671832763, "grad_norm": 0.3097081184387207, "learning_rate": 7.96179007291165e-06, "loss": 0.4006, "step": 3406 }, { "epoch": 1.8419535051360607, "grad_norm": 0.4447126090526581, "learning_rate": 7.960268766716561e-06, "loss": 0.4955, "step": 3407 }, { "epoch": 1.8424941430888448, "grad_norm": 0.3248273432254791, "learning_rate": 7.958747038451715e-06, "loss": 0.4237, "step": 3408 }, { "epoch": 1.843034781041629, "grad_norm": 0.3516506552696228, "learning_rate": 7.957224888334084e-06, "loss": 0.4339, "step": 3409 }, { "epoch": 1.8435754189944134, "grad_norm": 0.34608927369117737, "learning_rate": 7.955702316580686e-06, "loss": 0.3852, "step": 3410 }, { "epoch": 1.8441160569471977, "grad_norm": 0.37326952815055847, "learning_rate": 7.954179323408613e-06, "loss": 0.4479, "step": 3411 }, { "epoch": 1.844656694899982, "grad_norm": 0.36792048811912537, "learning_rate": 7.952655909035008e-06, "loss": 0.4638, "step": 3412 }, { "epoch": 1.8451973328527662, "grad_norm": 0.32509738206863403, "learning_rate": 7.951132073677077e-06, "loss": 0.4225, "step": 3413 }, { "epoch": 1.8457379708055506, "grad_norm": 0.3608253002166748, "learning_rate": 7.949607817552086e-06, "loss": 0.4148, "step": 3414 }, { "epoch": 1.8462786087583347, "grad_norm": 0.38289326429367065, "learning_rate": 7.94808314087736e-06, "loss": 0.4439, "step": 3415 }, { "epoch": 1.846819246711119, "grad_norm": 0.35737040638923645, "learning_rate": 7.946558043870286e-06, "loss": 0.4638, "step": 3416 }, { "epoch": 1.8473598846639034, "grad_norm": 0.32094112038612366, "learning_rate": 7.945032526748308e-06, "loss": 0.4111, "step": 3417 }, { "epoch": 1.8479005226166878, "grad_norm": 0.3625240921974182, "learning_rate": 7.943506589728931e-06, "loss": 0.4163, "step": 3418 }, { "epoch": 1.848441160569472, "grad_norm": 0.346235990524292, "learning_rate": 7.941980233029723e-06, "loss": 0.4296, "step": 3419 }, { "epoch": 1.8489817985222563, "grad_norm": 0.31741318106651306, "learning_rate": 7.940453456868304e-06, "loss": 0.4189, "step": 3420 }, { "epoch": 1.8495224364750404, "grad_norm": 0.39256808161735535, "learning_rate": 7.938926261462366e-06, "loss": 0.4774, "step": 3421 }, { "epoch": 1.8500630744278248, "grad_norm": 0.39664962887763977, "learning_rate": 7.93739864702965e-06, "loss": 0.4316, "step": 3422 }, { "epoch": 1.8506037123806092, "grad_norm": 0.328249990940094, "learning_rate": 7.93587061378796e-06, "loss": 0.4067, "step": 3423 }, { "epoch": 1.8511443503333935, "grad_norm": 0.4215303063392639, "learning_rate": 7.93434216195516e-06, "loss": 0.4366, "step": 3424 }, { "epoch": 1.8516849882861777, "grad_norm": 0.3874909281730652, "learning_rate": 7.932813291749177e-06, "loss": 0.4325, "step": 3425 }, { "epoch": 1.8522256262389618, "grad_norm": 0.30493733286857605, "learning_rate": 7.93128400338799e-06, "loss": 0.388, "step": 3426 }, { "epoch": 1.8527662641917462, "grad_norm": 0.4207254648208618, "learning_rate": 7.929754297089646e-06, "loss": 0.4686, "step": 3427 }, { "epoch": 1.8533069021445305, "grad_norm": 0.3271644115447998, "learning_rate": 7.928224173072247e-06, "loss": 0.4329, "step": 3428 }, { "epoch": 1.853847540097315, "grad_norm": 0.3401440978050232, "learning_rate": 7.926693631553955e-06, "loss": 0.4076, "step": 3429 }, { "epoch": 1.8543881780500993, "grad_norm": 0.44994547963142395, "learning_rate": 7.925162672752989e-06, "loss": 0.4323, "step": 3430 }, { "epoch": 1.8549288160028834, "grad_norm": 0.3586891293525696, "learning_rate": 7.923631296887634e-06, "loss": 0.4322, "step": 3431 }, { "epoch": 1.8554694539556675, "grad_norm": 0.3234873414039612, "learning_rate": 7.92209950417623e-06, "loss": 0.4233, "step": 3432 }, { "epoch": 1.856010091908452, "grad_norm": 0.3968302309513092, "learning_rate": 7.920567294837176e-06, "loss": 0.4589, "step": 3433 }, { "epoch": 1.8565507298612363, "grad_norm": 0.3297528028488159, "learning_rate": 7.919034669088933e-06, "loss": 0.4487, "step": 3434 }, { "epoch": 1.8570913678140206, "grad_norm": 0.3599737286567688, "learning_rate": 7.917501627150019e-06, "loss": 0.4642, "step": 3435 }, { "epoch": 1.857632005766805, "grad_norm": 0.3447016477584839, "learning_rate": 7.915968169239012e-06, "loss": 0.4301, "step": 3436 }, { "epoch": 1.8581726437195891, "grad_norm": 0.3434467315673828, "learning_rate": 7.914434295574552e-06, "loss": 0.4359, "step": 3437 }, { "epoch": 1.8587132816723733, "grad_norm": 0.3599534034729004, "learning_rate": 7.912900006375334e-06, "loss": 0.4322, "step": 3438 }, { "epoch": 1.8592539196251576, "grad_norm": 0.3040129244327545, "learning_rate": 7.911365301860114e-06, "loss": 0.3793, "step": 3439 }, { "epoch": 1.859794557577942, "grad_norm": 0.352145254611969, "learning_rate": 7.90983018224771e-06, "loss": 0.4731, "step": 3440 }, { "epoch": 1.8603351955307263, "grad_norm": 0.36871862411499023, "learning_rate": 7.908294647756992e-06, "loss": 0.4668, "step": 3441 }, { "epoch": 1.8608758334835105, "grad_norm": 0.3542006015777588, "learning_rate": 7.906758698606895e-06, "loss": 0.4712, "step": 3442 }, { "epoch": 1.8614164714362949, "grad_norm": 0.3433796465396881, "learning_rate": 7.905222335016417e-06, "loss": 0.3947, "step": 3443 }, { "epoch": 1.861957109389079, "grad_norm": 0.3266144394874573, "learning_rate": 7.903685557204601e-06, "loss": 0.461, "step": 3444 }, { "epoch": 1.8624977473418634, "grad_norm": 0.36006054282188416, "learning_rate": 7.902148365390567e-06, "loss": 0.414, "step": 3445 }, { "epoch": 1.8630383852946477, "grad_norm": 0.3393409550189972, "learning_rate": 7.90061075979348e-06, "loss": 0.4392, "step": 3446 }, { "epoch": 1.863579023247432, "grad_norm": 0.30579596757888794, "learning_rate": 7.89907274063257e-06, "loss": 0.4311, "step": 3447 }, { "epoch": 1.8641196612002162, "grad_norm": 0.37538549304008484, "learning_rate": 7.897534308127123e-06, "loss": 0.4638, "step": 3448 }, { "epoch": 1.8646602991530006, "grad_norm": 0.3323426842689514, "learning_rate": 7.895995462496491e-06, "loss": 0.4363, "step": 3449 }, { "epoch": 1.8652009371057847, "grad_norm": 0.30412882566452026, "learning_rate": 7.894456203960075e-06, "loss": 0.4247, "step": 3450 }, { "epoch": 1.865741575058569, "grad_norm": 0.3254438042640686, "learning_rate": 7.892916532737343e-06, "loss": 0.4238, "step": 3451 }, { "epoch": 1.8662822130113534, "grad_norm": 0.3321169316768646, "learning_rate": 7.891376449047813e-06, "loss": 0.4779, "step": 3452 }, { "epoch": 1.8668228509641378, "grad_norm": 0.28796014189720154, "learning_rate": 7.889835953111075e-06, "loss": 0.3889, "step": 3453 }, { "epoch": 1.867363488916922, "grad_norm": 0.33776959776878357, "learning_rate": 7.888295045146766e-06, "loss": 0.4302, "step": 3454 }, { "epoch": 1.8679041268697063, "grad_norm": 0.30615368485450745, "learning_rate": 7.886753725374586e-06, "loss": 0.4116, "step": 3455 }, { "epoch": 1.8684447648224904, "grad_norm": 0.3729323744773865, "learning_rate": 7.885211994014294e-06, "loss": 0.4937, "step": 3456 }, { "epoch": 1.8689854027752748, "grad_norm": 0.2820781469345093, "learning_rate": 7.883669851285707e-06, "loss": 0.3901, "step": 3457 }, { "epoch": 1.8695260407280592, "grad_norm": 0.3319065570831299, "learning_rate": 7.8821272974087e-06, "loss": 0.4388, "step": 3458 }, { "epoch": 1.8700666786808435, "grad_norm": 0.32416149973869324, "learning_rate": 7.88058433260321e-06, "loss": 0.4215, "step": 3459 }, { "epoch": 1.8706073166336277, "grad_norm": 0.37668269872665405, "learning_rate": 7.879040957089229e-06, "loss": 0.4589, "step": 3460 }, { "epoch": 1.8711479545864118, "grad_norm": 0.3454728126525879, "learning_rate": 7.877497171086805e-06, "loss": 0.4593, "step": 3461 }, { "epoch": 1.8716885925391962, "grad_norm": 0.2921670377254486, "learning_rate": 7.875952974816054e-06, "loss": 0.4146, "step": 3462 }, { "epoch": 1.8722292304919805, "grad_norm": 0.33144211769104004, "learning_rate": 7.874408368497142e-06, "loss": 0.4077, "step": 3463 }, { "epoch": 1.872769868444765, "grad_norm": 0.3379572033882141, "learning_rate": 7.872863352350298e-06, "loss": 0.4074, "step": 3464 }, { "epoch": 1.8733105063975493, "grad_norm": 0.34386518597602844, "learning_rate": 7.871317926595804e-06, "loss": 0.4467, "step": 3465 }, { "epoch": 1.8738511443503334, "grad_norm": 0.35571232438087463, "learning_rate": 7.869772091454007e-06, "loss": 0.481, "step": 3466 }, { "epoch": 1.8743917823031175, "grad_norm": 0.30590566992759705, "learning_rate": 7.868225847145308e-06, "loss": 0.3902, "step": 3467 }, { "epoch": 1.874932420255902, "grad_norm": 0.3952900767326355, "learning_rate": 7.86667919389017e-06, "loss": 0.4655, "step": 3468 }, { "epoch": 1.8754730582086863, "grad_norm": 0.31838780641555786, "learning_rate": 7.865132131909106e-06, "loss": 0.4241, "step": 3469 }, { "epoch": 1.8760136961614706, "grad_norm": 0.3257013261318207, "learning_rate": 7.8635846614227e-06, "loss": 0.4763, "step": 3470 }, { "epoch": 1.876554334114255, "grad_norm": 0.35523343086242676, "learning_rate": 7.862036782651586e-06, "loss": 0.4286, "step": 3471 }, { "epoch": 1.8770949720670391, "grad_norm": 0.3634128272533417, "learning_rate": 7.860488495816456e-06, "loss": 0.4624, "step": 3472 }, { "epoch": 1.8776356100198233, "grad_norm": 0.3118461072444916, "learning_rate": 7.858939801138061e-06, "loss": 0.3846, "step": 3473 }, { "epoch": 1.8781762479726076, "grad_norm": 0.39412805438041687, "learning_rate": 7.857390698837214e-06, "loss": 0.4756, "step": 3474 }, { "epoch": 1.878716885925392, "grad_norm": 0.33649295568466187, "learning_rate": 7.855841189134784e-06, "loss": 0.418, "step": 3475 }, { "epoch": 1.8792575238781764, "grad_norm": 0.3323207497596741, "learning_rate": 7.854291272251692e-06, "loss": 0.3778, "step": 3476 }, { "epoch": 1.8797981618309605, "grad_norm": 0.3266110122203827, "learning_rate": 7.852740948408928e-06, "loss": 0.4403, "step": 3477 }, { "epoch": 1.8803387997837449, "grad_norm": 0.35497087240219116, "learning_rate": 7.85119021782753e-06, "loss": 0.4386, "step": 3478 }, { "epoch": 1.880879437736529, "grad_norm": 0.3806227445602417, "learning_rate": 7.849639080728601e-06, "loss": 0.4426, "step": 3479 }, { "epoch": 1.8814200756893134, "grad_norm": 0.33290979266166687, "learning_rate": 7.848087537333298e-06, "loss": 0.46, "step": 3480 }, { "epoch": 1.8819607136420977, "grad_norm": 0.42978647351264954, "learning_rate": 7.846535587862838e-06, "loss": 0.409, "step": 3481 }, { "epoch": 1.882501351594882, "grad_norm": 0.33629342913627625, "learning_rate": 7.844983232538497e-06, "loss": 0.4345, "step": 3482 }, { "epoch": 1.8830419895476662, "grad_norm": 0.36188969016075134, "learning_rate": 7.843430471581603e-06, "loss": 0.4136, "step": 3483 }, { "epoch": 1.8835826275004506, "grad_norm": 0.34933236241340637, "learning_rate": 7.841877305213548e-06, "loss": 0.4289, "step": 3484 }, { "epoch": 1.8841232654532347, "grad_norm": 0.3108839988708496, "learning_rate": 7.84032373365578e-06, "loss": 0.4197, "step": 3485 }, { "epoch": 1.884663903406019, "grad_norm": 0.3623836934566498, "learning_rate": 7.838769757129804e-06, "loss": 0.4456, "step": 3486 }, { "epoch": 1.8852045413588034, "grad_norm": 0.34876587986946106, "learning_rate": 7.837215375857182e-06, "loss": 0.4237, "step": 3487 }, { "epoch": 1.8857451793115878, "grad_norm": 0.3331240713596344, "learning_rate": 7.835660590059537e-06, "loss": 0.4507, "step": 3488 }, { "epoch": 1.886285817264372, "grad_norm": 0.3328709006309509, "learning_rate": 7.834105399958545e-06, "loss": 0.4107, "step": 3489 }, { "epoch": 1.886826455217156, "grad_norm": 0.29972803592681885, "learning_rate": 7.832549805775945e-06, "loss": 0.3918, "step": 3490 }, { "epoch": 1.8873670931699404, "grad_norm": 0.3482237458229065, "learning_rate": 7.83099380773353e-06, "loss": 0.4382, "step": 3491 }, { "epoch": 1.8879077311227248, "grad_norm": 0.3243218660354614, "learning_rate": 7.829437406053149e-06, "loss": 0.4319, "step": 3492 }, { "epoch": 1.8884483690755092, "grad_norm": 0.3584705591201782, "learning_rate": 7.827880600956714e-06, "loss": 0.4782, "step": 3493 }, { "epoch": 1.8889890070282935, "grad_norm": 0.30217036604881287, "learning_rate": 7.82632339266619e-06, "loss": 0.4364, "step": 3494 }, { "epoch": 1.8895296449810777, "grad_norm": 0.3277549147605896, "learning_rate": 7.8247657814036e-06, "loss": 0.4096, "step": 3495 }, { "epoch": 1.8900702829338618, "grad_norm": 0.32769298553466797, "learning_rate": 7.823207767391027e-06, "loss": 0.4361, "step": 3496 }, { "epoch": 1.8906109208866462, "grad_norm": 0.2843538820743561, "learning_rate": 7.82164935085061e-06, "loss": 0.3908, "step": 3497 }, { "epoch": 1.8911515588394305, "grad_norm": 0.35002461075782776, "learning_rate": 7.820090532004546e-06, "loss": 0.4466, "step": 3498 }, { "epoch": 1.891692196792215, "grad_norm": 0.3506307899951935, "learning_rate": 7.818531311075084e-06, "loss": 0.4412, "step": 3499 }, { "epoch": 1.8922328347449993, "grad_norm": 0.3184232711791992, "learning_rate": 7.81697168828454e-06, "loss": 0.4426, "step": 3500 }, { "epoch": 1.8927734726977834, "grad_norm": 0.30824220180511475, "learning_rate": 7.815411663855279e-06, "loss": 0.389, "step": 3501 }, { "epoch": 1.8933141106505675, "grad_norm": 0.3077496588230133, "learning_rate": 7.813851238009728e-06, "loss": 0.435, "step": 3502 }, { "epoch": 1.893854748603352, "grad_norm": 0.350620299577713, "learning_rate": 7.81229041097037e-06, "loss": 0.4233, "step": 3503 }, { "epoch": 1.8943953865561363, "grad_norm": 0.34502118825912476, "learning_rate": 7.810729182959744e-06, "loss": 0.4362, "step": 3504 }, { "epoch": 1.8949360245089206, "grad_norm": 0.3388058841228485, "learning_rate": 7.809167554200446e-06, "loss": 0.4121, "step": 3505 }, { "epoch": 1.8954766624617048, "grad_norm": 0.3333636224269867, "learning_rate": 7.807605524915133e-06, "loss": 0.4545, "step": 3506 }, { "epoch": 1.8960173004144891, "grad_norm": 0.34134441614151, "learning_rate": 7.806043095326515e-06, "loss": 0.4315, "step": 3507 }, { "epoch": 1.8965579383672733, "grad_norm": 0.3529305160045624, "learning_rate": 7.804480265657359e-06, "loss": 0.4034, "step": 3508 }, { "epoch": 1.8970985763200576, "grad_norm": 0.3405922055244446, "learning_rate": 7.802917036130491e-06, "loss": 0.4338, "step": 3509 }, { "epoch": 1.897639214272842, "grad_norm": 0.34475529193878174, "learning_rate": 7.801353406968795e-06, "loss": 0.4081, "step": 3510 }, { "epoch": 1.8981798522256264, "grad_norm": 0.3404204845428467, "learning_rate": 7.79978937839521e-06, "loss": 0.4392, "step": 3511 }, { "epoch": 1.8987204901784105, "grad_norm": 0.3219848871231079, "learning_rate": 7.79822495063273e-06, "loss": 0.4109, "step": 3512 }, { "epoch": 1.8992611281311949, "grad_norm": 0.30592772364616394, "learning_rate": 7.796660123904412e-06, "loss": 0.415, "step": 3513 }, { "epoch": 1.899801766083979, "grad_norm": 0.2983476221561432, "learning_rate": 7.795094898433364e-06, "loss": 0.4261, "step": 3514 }, { "epoch": 1.9003424040367634, "grad_norm": 0.3173729479312897, "learning_rate": 7.793529274442753e-06, "loss": 0.4306, "step": 3515 }, { "epoch": 1.9008830419895477, "grad_norm": 0.27732619643211365, "learning_rate": 7.791963252155803e-06, "loss": 0.4165, "step": 3516 }, { "epoch": 1.901423679942332, "grad_norm": 0.31612640619277954, "learning_rate": 7.790396831795792e-06, "loss": 0.4575, "step": 3517 }, { "epoch": 1.9019643178951162, "grad_norm": 0.30507561564445496, "learning_rate": 7.788830013586064e-06, "loss": 0.4295, "step": 3518 }, { "epoch": 1.9025049558479004, "grad_norm": 0.34407278895378113, "learning_rate": 7.787262797750006e-06, "loss": 0.4468, "step": 3519 }, { "epoch": 1.9030455938006847, "grad_norm": 0.2905230224132538, "learning_rate": 7.785695184511074e-06, "loss": 0.4223, "step": 3520 }, { "epoch": 1.903586231753469, "grad_norm": 0.3119387924671173, "learning_rate": 7.784127174092773e-06, "loss": 0.4197, "step": 3521 }, { "epoch": 1.9041268697062534, "grad_norm": 0.30937427282333374, "learning_rate": 7.782558766718668e-06, "loss": 0.4294, "step": 3522 }, { "epoch": 1.9046675076590378, "grad_norm": 0.3073981702327728, "learning_rate": 7.780989962612377e-06, "loss": 0.408, "step": 3523 }, { "epoch": 1.905208145611822, "grad_norm": 0.3197193741798401, "learning_rate": 7.779420761997582e-06, "loss": 0.454, "step": 3524 }, { "epoch": 1.905748783564606, "grad_norm": 0.3320379853248596, "learning_rate": 7.777851165098012e-06, "loss": 0.4208, "step": 3525 }, { "epoch": 1.9062894215173904, "grad_norm": 0.3256038725376129, "learning_rate": 7.77628117213746e-06, "loss": 0.4337, "step": 3526 }, { "epoch": 1.9068300594701748, "grad_norm": 0.3060012459754944, "learning_rate": 7.774710783339772e-06, "loss": 0.4515, "step": 3527 }, { "epoch": 1.9073706974229592, "grad_norm": 0.33129894733428955, "learning_rate": 7.773139998928852e-06, "loss": 0.421, "step": 3528 }, { "epoch": 1.9079113353757435, "grad_norm": 0.3311441242694855, "learning_rate": 7.771568819128659e-06, "loss": 0.4309, "step": 3529 }, { "epoch": 1.9084519733285277, "grad_norm": 0.3432542383670807, "learning_rate": 7.769997244163209e-06, "loss": 0.4302, "step": 3530 }, { "epoch": 1.9089926112813118, "grad_norm": 0.33644434809684753, "learning_rate": 7.768425274256575e-06, "loss": 0.4412, "step": 3531 }, { "epoch": 1.9095332492340962, "grad_norm": 0.30734914541244507, "learning_rate": 7.766852909632882e-06, "loss": 0.4113, "step": 3532 }, { "epoch": 1.9100738871868805, "grad_norm": 0.34845325350761414, "learning_rate": 7.765280150516322e-06, "loss": 0.3858, "step": 3533 }, { "epoch": 1.910614525139665, "grad_norm": 0.35980260372161865, "learning_rate": 7.763706997131129e-06, "loss": 0.4582, "step": 3534 }, { "epoch": 1.911155163092449, "grad_norm": 0.32207977771759033, "learning_rate": 7.762133449701603e-06, "loss": 0.4366, "step": 3535 }, { "epoch": 1.9116958010452334, "grad_norm": 0.32457974553108215, "learning_rate": 7.760559508452099e-06, "loss": 0.4374, "step": 3536 }, { "epoch": 1.9122364389980175, "grad_norm": 0.33621159195899963, "learning_rate": 7.758985173607026e-06, "loss": 0.4739, "step": 3537 }, { "epoch": 1.912777076950802, "grad_norm": 0.3402005136013031, "learning_rate": 7.757410445390847e-06, "loss": 0.4149, "step": 3538 }, { "epoch": 1.9133177149035863, "grad_norm": 0.35434216260910034, "learning_rate": 7.755835324028089e-06, "loss": 0.424, "step": 3539 }, { "epoch": 1.9138583528563706, "grad_norm": 0.33663997054100037, "learning_rate": 7.754259809743325e-06, "loss": 0.4818, "step": 3540 }, { "epoch": 1.9143989908091548, "grad_norm": 0.30200737714767456, "learning_rate": 7.752683902761193e-06, "loss": 0.3807, "step": 3541 }, { "epoch": 1.9149396287619391, "grad_norm": 0.35024553537368774, "learning_rate": 7.75110760330638e-06, "loss": 0.4713, "step": 3542 }, { "epoch": 1.9154802667147233, "grad_norm": 0.3063545227050781, "learning_rate": 7.749530911603634e-06, "loss": 0.4605, "step": 3543 }, { "epoch": 1.9160209046675076, "grad_norm": 0.3189338147640228, "learning_rate": 7.747953827877754e-06, "loss": 0.4019, "step": 3544 }, { "epoch": 1.916561542620292, "grad_norm": 0.313576877117157, "learning_rate": 7.746376352353599e-06, "loss": 0.4276, "step": 3545 }, { "epoch": 1.9171021805730764, "grad_norm": 0.33931392431259155, "learning_rate": 7.744798485256085e-06, "loss": 0.4364, "step": 3546 }, { "epoch": 1.9176428185258605, "grad_norm": 0.3177691400051117, "learning_rate": 7.74322022681018e-06, "loss": 0.4454, "step": 3547 }, { "epoch": 1.9181834564786449, "grad_norm": 0.30451542139053345, "learning_rate": 7.741641577240908e-06, "loss": 0.4067, "step": 3548 }, { "epoch": 1.918724094431429, "grad_norm": 0.33761847019195557, "learning_rate": 7.740062536773352e-06, "loss": 0.4385, "step": 3549 }, { "epoch": 1.9192647323842134, "grad_norm": 0.3786282241344452, "learning_rate": 7.738483105632644e-06, "loss": 0.4336, "step": 3550 }, { "epoch": 1.9198053703369977, "grad_norm": 0.3104192912578583, "learning_rate": 7.736903284043985e-06, "loss": 0.4376, "step": 3551 }, { "epoch": 1.920346008289782, "grad_norm": 0.39415085315704346, "learning_rate": 7.735323072232615e-06, "loss": 0.4964, "step": 3552 }, { "epoch": 1.9208866462425662, "grad_norm": 0.32172977924346924, "learning_rate": 7.73374247042384e-06, "loss": 0.3928, "step": 3553 }, { "epoch": 1.9214272841953504, "grad_norm": 0.32149016857147217, "learning_rate": 7.732161478843021e-06, "loss": 0.4396, "step": 3554 }, { "epoch": 1.9219679221481347, "grad_norm": 0.32530462741851807, "learning_rate": 7.730580097715575e-06, "loss": 0.4396, "step": 3555 }, { "epoch": 1.922508560100919, "grad_norm": 0.3648417294025421, "learning_rate": 7.728998327266966e-06, "loss": 0.4428, "step": 3556 }, { "epoch": 1.9230491980537034, "grad_norm": 0.3257485032081604, "learning_rate": 7.727416167722724e-06, "loss": 0.4678, "step": 3557 }, { "epoch": 1.9235898360064878, "grad_norm": 0.3238253891468048, "learning_rate": 7.72583361930843e-06, "loss": 0.4, "step": 3558 }, { "epoch": 1.924130473959272, "grad_norm": 0.37113291025161743, "learning_rate": 7.724250682249723e-06, "loss": 0.4275, "step": 3559 }, { "epoch": 1.924671111912056, "grad_norm": 0.32570475339889526, "learning_rate": 7.722667356772291e-06, "loss": 0.4404, "step": 3560 }, { "epoch": 1.9252117498648404, "grad_norm": 0.28276491165161133, "learning_rate": 7.721083643101883e-06, "loss": 0.3831, "step": 3561 }, { "epoch": 1.9257523878176248, "grad_norm": 0.35973110795021057, "learning_rate": 7.719499541464304e-06, "loss": 0.4794, "step": 3562 }, { "epoch": 1.9262930257704092, "grad_norm": 0.3179782032966614, "learning_rate": 7.717915052085412e-06, "loss": 0.4124, "step": 3563 }, { "epoch": 1.9268336637231933, "grad_norm": 0.3386150598526001, "learning_rate": 7.716330175191118e-06, "loss": 0.4497, "step": 3564 }, { "epoch": 1.9273743016759777, "grad_norm": 0.29650476574897766, "learning_rate": 7.714744911007395e-06, "loss": 0.4274, "step": 3565 }, { "epoch": 1.9279149396287618, "grad_norm": 0.3160037696361542, "learning_rate": 7.713159259760262e-06, "loss": 0.4642, "step": 3566 }, { "epoch": 1.9284555775815462, "grad_norm": 0.35160017013549805, "learning_rate": 7.711573221675804e-06, "loss": 0.4352, "step": 3567 }, { "epoch": 1.9289962155343305, "grad_norm": 0.33104562759399414, "learning_rate": 7.709986796980148e-06, "loss": 0.4122, "step": 3568 }, { "epoch": 1.929536853487115, "grad_norm": 0.3283877670764923, "learning_rate": 7.708399985899492e-06, "loss": 0.431, "step": 3569 }, { "epoch": 1.930077491439899, "grad_norm": 0.34296366572380066, "learning_rate": 7.706812788660075e-06, "loss": 0.4243, "step": 3570 }, { "epoch": 1.9306181293926834, "grad_norm": 0.3678690195083618, "learning_rate": 7.705225205488201e-06, "loss": 0.4164, "step": 3571 }, { "epoch": 1.9311587673454675, "grad_norm": 0.331254243850708, "learning_rate": 7.703637236610217e-06, "loss": 0.4632, "step": 3572 }, { "epoch": 1.931699405298252, "grad_norm": 0.3256266415119171, "learning_rate": 7.702048882252541e-06, "loss": 0.4003, "step": 3573 }, { "epoch": 1.9322400432510363, "grad_norm": 0.3572953939437866, "learning_rate": 7.700460142641635e-06, "loss": 0.4536, "step": 3574 }, { "epoch": 1.9327806812038206, "grad_norm": 0.36617356538772583, "learning_rate": 7.698871018004016e-06, "loss": 0.4225, "step": 3575 }, { "epoch": 1.9333213191566048, "grad_norm": 0.3246054947376251, "learning_rate": 7.697281508566264e-06, "loss": 0.4174, "step": 3576 }, { "epoch": 1.9338619571093891, "grad_norm": 0.3796486258506775, "learning_rate": 7.695691614555002e-06, "loss": 0.4441, "step": 3577 }, { "epoch": 1.9344025950621733, "grad_norm": 0.4053588807582855, "learning_rate": 7.694101336196917e-06, "loss": 0.4574, "step": 3578 }, { "epoch": 1.9349432330149576, "grad_norm": 0.31780388951301575, "learning_rate": 7.69251067371875e-06, "loss": 0.3953, "step": 3579 }, { "epoch": 1.935483870967742, "grad_norm": 0.38607701659202576, "learning_rate": 7.690919627347292e-06, "loss": 0.4131, "step": 3580 }, { "epoch": 1.9360245089205264, "grad_norm": 0.39074504375457764, "learning_rate": 7.689328197309394e-06, "loss": 0.4427, "step": 3581 }, { "epoch": 1.9365651468733105, "grad_norm": 0.3454664647579193, "learning_rate": 7.687736383831956e-06, "loss": 0.4408, "step": 3582 }, { "epoch": 1.9371057848260946, "grad_norm": 0.3508855998516083, "learning_rate": 7.686144187141938e-06, "loss": 0.4216, "step": 3583 }, { "epoch": 1.937646422778879, "grad_norm": 0.38397687673568726, "learning_rate": 7.684551607466351e-06, "loss": 0.4718, "step": 3584 }, { "epoch": 1.9381870607316634, "grad_norm": 0.330385684967041, "learning_rate": 7.682958645032265e-06, "loss": 0.3939, "step": 3585 }, { "epoch": 1.9387276986844477, "grad_norm": 0.3208334445953369, "learning_rate": 7.681365300066798e-06, "loss": 0.4651, "step": 3586 }, { "epoch": 1.939268336637232, "grad_norm": 0.36974313855171204, "learning_rate": 7.67977157279713e-06, "loss": 0.4421, "step": 3587 }, { "epoch": 1.9398089745900162, "grad_norm": 0.32609590888023376, "learning_rate": 7.67817746345049e-06, "loss": 0.4336, "step": 3588 }, { "epoch": 1.9403496125428004, "grad_norm": 0.3425266742706299, "learning_rate": 7.676582972254162e-06, "loss": 0.4271, "step": 3589 }, { "epoch": 1.9408902504955847, "grad_norm": 0.3551659882068634, "learning_rate": 7.674988099435487e-06, "loss": 0.4233, "step": 3590 }, { "epoch": 1.941430888448369, "grad_norm": 0.36338621377944946, "learning_rate": 7.673392845221859e-06, "loss": 0.4265, "step": 3591 }, { "epoch": 1.9419715264011534, "grad_norm": 0.3407050669193268, "learning_rate": 7.671797209840725e-06, "loss": 0.4562, "step": 3592 }, { "epoch": 1.9425121643539378, "grad_norm": 0.3623160123825073, "learning_rate": 7.67020119351959e-06, "loss": 0.431, "step": 3593 }, { "epoch": 1.943052802306722, "grad_norm": 0.3682960271835327, "learning_rate": 7.668604796486013e-06, "loss": 0.4688, "step": 3594 }, { "epoch": 1.943593440259506, "grad_norm": 0.3230957090854645, "learning_rate": 7.667008018967598e-06, "loss": 0.405, "step": 3595 }, { "epoch": 1.9441340782122905, "grad_norm": 0.34176433086395264, "learning_rate": 7.665410861192018e-06, "loss": 0.4731, "step": 3596 }, { "epoch": 1.9446747161650748, "grad_norm": 0.35060107707977295, "learning_rate": 7.663813323386988e-06, "loss": 0.4242, "step": 3597 }, { "epoch": 1.9452153541178592, "grad_norm": 0.37970679998397827, "learning_rate": 7.662215405780287e-06, "loss": 0.4504, "step": 3598 }, { "epoch": 1.9457559920706433, "grad_norm": 0.3594030737876892, "learning_rate": 7.66061710859974e-06, "loss": 0.4249, "step": 3599 }, { "epoch": 1.9462966300234277, "grad_norm": 0.3039158582687378, "learning_rate": 7.65901843207323e-06, "loss": 0.4038, "step": 3600 }, { "epoch": 1.9468372679762118, "grad_norm": 0.3613775968551636, "learning_rate": 7.65741937642869e-06, "loss": 0.4651, "step": 3601 }, { "epoch": 1.9473779059289962, "grad_norm": 0.340748131275177, "learning_rate": 7.655819941894116e-06, "loss": 0.4911, "step": 3602 }, { "epoch": 1.9479185438817805, "grad_norm": 0.2853771150112152, "learning_rate": 7.654220128697547e-06, "loss": 0.3756, "step": 3603 }, { "epoch": 1.948459181834565, "grad_norm": 0.3236043453216553, "learning_rate": 7.652619937067087e-06, "loss": 0.4097, "step": 3604 }, { "epoch": 1.948999819787349, "grad_norm": 0.36772769689559937, "learning_rate": 7.651019367230886e-06, "loss": 0.4804, "step": 3605 }, { "epoch": 1.9495404577401334, "grad_norm": 0.31228798627853394, "learning_rate": 7.64941841941715e-06, "loss": 0.4372, "step": 3606 }, { "epoch": 1.9500810956929175, "grad_norm": 0.27597206830978394, "learning_rate": 7.64781709385414e-06, "loss": 0.3691, "step": 3607 }, { "epoch": 1.950621733645702, "grad_norm": 0.3294198513031006, "learning_rate": 7.646215390770167e-06, "loss": 0.4796, "step": 3608 }, { "epoch": 1.9511623715984863, "grad_norm": 0.3063039779663086, "learning_rate": 7.644613310393604e-06, "loss": 0.3831, "step": 3609 }, { "epoch": 1.9517030095512706, "grad_norm": 0.31528839468955994, "learning_rate": 7.643010852952871e-06, "loss": 0.4339, "step": 3610 }, { "epoch": 1.9522436475040548, "grad_norm": 0.3063347637653351, "learning_rate": 7.641408018676439e-06, "loss": 0.4614, "step": 3611 }, { "epoch": 1.952784285456839, "grad_norm": 0.3073441684246063, "learning_rate": 7.639804807792843e-06, "loss": 0.4229, "step": 3612 }, { "epoch": 1.9533249234096233, "grad_norm": 0.2667381167411804, "learning_rate": 7.638201220530664e-06, "loss": 0.4005, "step": 3613 }, { "epoch": 1.9538655613624076, "grad_norm": 0.3190470039844513, "learning_rate": 7.63659725711854e-06, "loss": 0.4365, "step": 3614 }, { "epoch": 1.954406199315192, "grad_norm": 0.29926618933677673, "learning_rate": 7.634992917785156e-06, "loss": 0.4455, "step": 3615 }, { "epoch": 1.9549468372679764, "grad_norm": 0.29153937101364136, "learning_rate": 7.633388202759262e-06, "loss": 0.4145, "step": 3616 }, { "epoch": 1.9554874752207605, "grad_norm": 0.35738757252693176, "learning_rate": 7.63178311226965e-06, "loss": 0.4494, "step": 3617 }, { "epoch": 1.9560281131735446, "grad_norm": 0.32931193709373474, "learning_rate": 7.630177646545176e-06, "loss": 0.4564, "step": 3618 }, { "epoch": 1.956568751126329, "grad_norm": 0.3191809058189392, "learning_rate": 7.628571805814742e-06, "loss": 0.4342, "step": 3619 }, { "epoch": 1.9571093890791134, "grad_norm": 0.3088025748729706, "learning_rate": 7.626965590307305e-06, "loss": 0.4067, "step": 3620 }, { "epoch": 1.9576500270318977, "grad_norm": 0.2964020371437073, "learning_rate": 7.625359000251875e-06, "loss": 0.3987, "step": 3621 }, { "epoch": 1.958190664984682, "grad_norm": 0.39133739471435547, "learning_rate": 7.623752035877523e-06, "loss": 0.4738, "step": 3622 }, { "epoch": 1.9587313029374662, "grad_norm": 0.3323116898536682, "learning_rate": 7.622144697413361e-06, "loss": 0.4001, "step": 3623 }, { "epoch": 1.9592719408902504, "grad_norm": 0.3124770522117615, "learning_rate": 7.620536985088562e-06, "loss": 0.4063, "step": 3624 }, { "epoch": 1.9598125788430347, "grad_norm": 0.359270304441452, "learning_rate": 7.6189288991323505e-06, "loss": 0.4482, "step": 3625 }, { "epoch": 1.960353216795819, "grad_norm": 0.3633440136909485, "learning_rate": 7.617320439774005e-06, "loss": 0.4319, "step": 3626 }, { "epoch": 1.9608938547486034, "grad_norm": 0.3513455390930176, "learning_rate": 7.615711607242857e-06, "loss": 0.4525, "step": 3627 }, { "epoch": 1.9614344927013876, "grad_norm": 0.2922728359699249, "learning_rate": 7.614102401768293e-06, "loss": 0.4126, "step": 3628 }, { "epoch": 1.961975130654172, "grad_norm": 0.3656744956970215, "learning_rate": 7.612492823579744e-06, "loss": 0.4565, "step": 3629 }, { "epoch": 1.962515768606956, "grad_norm": 0.3397183120250702, "learning_rate": 7.610882872906709e-06, "loss": 0.393, "step": 3630 }, { "epoch": 1.9630564065597405, "grad_norm": 0.41956302523612976, "learning_rate": 7.609272549978725e-06, "loss": 0.498, "step": 3631 }, { "epoch": 1.9635970445125248, "grad_norm": 0.3319515883922577, "learning_rate": 7.607661855025393e-06, "loss": 0.4167, "step": 3632 }, { "epoch": 1.9641376824653092, "grad_norm": 0.3784463703632355, "learning_rate": 7.606050788276361e-06, "loss": 0.4434, "step": 3633 }, { "epoch": 1.9646783204180933, "grad_norm": 0.3500726521015167, "learning_rate": 7.604439349961335e-06, "loss": 0.4038, "step": 3634 }, { "epoch": 1.9652189583708777, "grad_norm": 0.31805264949798584, "learning_rate": 7.602827540310065e-06, "loss": 0.3747, "step": 3635 }, { "epoch": 1.9657595963236618, "grad_norm": 0.35021552443504333, "learning_rate": 7.601215359552365e-06, "loss": 0.4301, "step": 3636 }, { "epoch": 1.9663002342764462, "grad_norm": 0.3575853407382965, "learning_rate": 7.599602807918096e-06, "loss": 0.4343, "step": 3637 }, { "epoch": 1.9668408722292305, "grad_norm": 0.3714921474456787, "learning_rate": 7.597989885637172e-06, "loss": 0.3832, "step": 3638 }, { "epoch": 1.967381510182015, "grad_norm": 0.3801950514316559, "learning_rate": 7.596376592939559e-06, "loss": 0.4923, "step": 3639 }, { "epoch": 1.967922148134799, "grad_norm": 0.3086744248867035, "learning_rate": 7.594762930055281e-06, "loss": 0.4363, "step": 3640 }, { "epoch": 1.9684627860875832, "grad_norm": 0.37697261571884155, "learning_rate": 7.593148897214409e-06, "loss": 0.409, "step": 3641 }, { "epoch": 1.9690034240403675, "grad_norm": 0.35460126399993896, "learning_rate": 7.591534494647066e-06, "loss": 0.4284, "step": 3642 }, { "epoch": 1.969544061993152, "grad_norm": 0.3188256621360779, "learning_rate": 7.5899197225834364e-06, "loss": 0.4454, "step": 3643 }, { "epoch": 1.9700846999459363, "grad_norm": 0.3555510342121124, "learning_rate": 7.5883045812537485e-06, "loss": 0.4528, "step": 3644 }, { "epoch": 1.9706253378987206, "grad_norm": 0.38000836968421936, "learning_rate": 7.586689070888284e-06, "loss": 0.4331, "step": 3645 }, { "epoch": 1.9711659758515048, "grad_norm": 0.2912016808986664, "learning_rate": 7.585073191717385e-06, "loss": 0.3979, "step": 3646 }, { "epoch": 1.971706613804289, "grad_norm": 0.3479340672492981, "learning_rate": 7.583456943971435e-06, "loss": 0.477, "step": 3647 }, { "epoch": 1.9722472517570733, "grad_norm": 0.31951528787612915, "learning_rate": 7.581840327880878e-06, "loss": 0.4319, "step": 3648 }, { "epoch": 1.9727878897098576, "grad_norm": 0.33479586243629456, "learning_rate": 7.580223343676209e-06, "loss": 0.4288, "step": 3649 }, { "epoch": 1.973328527662642, "grad_norm": 0.33754345774650574, "learning_rate": 7.578605991587974e-06, "loss": 0.4253, "step": 3650 }, { "epoch": 1.9738691656154264, "grad_norm": 0.3368953466415405, "learning_rate": 7.576988271846771e-06, "loss": 0.4239, "step": 3651 }, { "epoch": 1.9744098035682105, "grad_norm": 0.4046306908130646, "learning_rate": 7.575370184683255e-06, "loss": 0.4576, "step": 3652 }, { "epoch": 1.9749504415209946, "grad_norm": 0.3182411789894104, "learning_rate": 7.573751730328125e-06, "loss": 0.4007, "step": 3653 }, { "epoch": 1.975491079473779, "grad_norm": 0.34206026792526245, "learning_rate": 7.572132909012139e-06, "loss": 0.4597, "step": 3654 }, { "epoch": 1.9760317174265634, "grad_norm": 0.31874173879623413, "learning_rate": 7.570513720966108e-06, "loss": 0.4068, "step": 3655 }, { "epoch": 1.9765723553793477, "grad_norm": 0.37682193517684937, "learning_rate": 7.568894166420892e-06, "loss": 0.461, "step": 3656 }, { "epoch": 1.9771129933321319, "grad_norm": 0.3360116481781006, "learning_rate": 7.567274245607403e-06, "loss": 0.3982, "step": 3657 }, { "epoch": 1.9776536312849162, "grad_norm": 0.31616827845573425, "learning_rate": 7.5656539587566066e-06, "loss": 0.4308, "step": 3658 }, { "epoch": 1.9781942692377004, "grad_norm": 0.3326205909252167, "learning_rate": 7.5640333060995215e-06, "loss": 0.4388, "step": 3659 }, { "epoch": 1.9787349071904847, "grad_norm": 0.30274784564971924, "learning_rate": 7.562412287867214e-06, "loss": 0.4266, "step": 3660 }, { "epoch": 1.979275545143269, "grad_norm": 0.3377111256122589, "learning_rate": 7.5607909042908115e-06, "loss": 0.4476, "step": 3661 }, { "epoch": 1.9798161830960535, "grad_norm": 0.31110507249832153, "learning_rate": 7.559169155601483e-06, "loss": 0.4095, "step": 3662 }, { "epoch": 1.9803568210488376, "grad_norm": 0.310968816280365, "learning_rate": 7.557547042030458e-06, "loss": 0.4085, "step": 3663 }, { "epoch": 1.980897459001622, "grad_norm": 0.29778537154197693, "learning_rate": 7.555924563809011e-06, "loss": 0.4395, "step": 3664 }, { "epoch": 1.981438096954406, "grad_norm": 0.3150721788406372, "learning_rate": 7.5543017211684745e-06, "loss": 0.485, "step": 3665 }, { "epoch": 1.9819787349071905, "grad_norm": 0.3148729205131531, "learning_rate": 7.552678514340229e-06, "loss": 0.4126, "step": 3666 }, { "epoch": 1.9825193728599748, "grad_norm": 0.37179580330848694, "learning_rate": 7.551054943555711e-06, "loss": 0.4871, "step": 3667 }, { "epoch": 1.9830600108127592, "grad_norm": 0.32496505975723267, "learning_rate": 7.549431009046404e-06, "loss": 0.4354, "step": 3668 }, { "epoch": 1.9836006487655433, "grad_norm": 0.31917044520378113, "learning_rate": 7.547806711043846e-06, "loss": 0.4393, "step": 3669 }, { "epoch": 1.9841412867183277, "grad_norm": 0.3260762393474579, "learning_rate": 7.5461820497796255e-06, "loss": 0.429, "step": 3670 }, { "epoch": 1.9846819246711118, "grad_norm": 0.33916527032852173, "learning_rate": 7.544557025485386e-06, "loss": 0.4321, "step": 3671 }, { "epoch": 1.9852225626238962, "grad_norm": 0.34038153290748596, "learning_rate": 7.542931638392818e-06, "loss": 0.4241, "step": 3672 }, { "epoch": 1.9857632005766805, "grad_norm": 0.3504200577735901, "learning_rate": 7.54130588873367e-06, "loss": 0.4447, "step": 3673 }, { "epoch": 1.986303838529465, "grad_norm": 0.3235137164592743, "learning_rate": 7.5396797767397345e-06, "loss": 0.4268, "step": 3674 }, { "epoch": 1.986844476482249, "grad_norm": 0.34521469473838806, "learning_rate": 7.5380533026428625e-06, "loss": 0.4137, "step": 3675 }, { "epoch": 1.9873851144350332, "grad_norm": 0.38376936316490173, "learning_rate": 7.536426466674951e-06, "loss": 0.4826, "step": 3676 }, { "epoch": 1.9879257523878175, "grad_norm": 0.30524715781211853, "learning_rate": 7.534799269067952e-06, "loss": 0.3893, "step": 3677 }, { "epoch": 1.988466390340602, "grad_norm": 0.3902176022529602, "learning_rate": 7.533171710053871e-06, "loss": 0.4489, "step": 3678 }, { "epoch": 1.9890070282933863, "grad_norm": 0.4094945788383484, "learning_rate": 7.531543789864759e-06, "loss": 0.4433, "step": 3679 }, { "epoch": 1.9895476662461706, "grad_norm": 0.3604152798652649, "learning_rate": 7.529915508732725e-06, "loss": 0.4362, "step": 3680 }, { "epoch": 1.9900883041989548, "grad_norm": 0.4172627925872803, "learning_rate": 7.528286866889924e-06, "loss": 0.449, "step": 3681 }, { "epoch": 1.990628942151739, "grad_norm": 0.3344157636165619, "learning_rate": 7.526657864568565e-06, "loss": 0.4196, "step": 3682 }, { "epoch": 1.9911695801045233, "grad_norm": 0.3269338011741638, "learning_rate": 7.52502850200091e-06, "loss": 0.4154, "step": 3683 }, { "epoch": 1.9917102180573076, "grad_norm": 0.36076146364212036, "learning_rate": 7.5233987794192675e-06, "loss": 0.4295, "step": 3684 }, { "epoch": 1.992250856010092, "grad_norm": 0.3513505160808563, "learning_rate": 7.521768697056004e-06, "loss": 0.4206, "step": 3685 }, { "epoch": 1.9927914939628764, "grad_norm": 0.3319392502307892, "learning_rate": 7.520138255143532e-06, "loss": 0.4619, "step": 3686 }, { "epoch": 1.9933321319156605, "grad_norm": 0.3468388617038727, "learning_rate": 7.518507453914317e-06, "loss": 0.4198, "step": 3687 }, { "epoch": 1.9938727698684446, "grad_norm": 0.3449645936489105, "learning_rate": 7.5168762936008744e-06, "loss": 0.4282, "step": 3688 }, { "epoch": 1.994413407821229, "grad_norm": 0.33245494961738586, "learning_rate": 7.515244774435773e-06, "loss": 0.4228, "step": 3689 }, { "epoch": 1.9949540457740134, "grad_norm": 0.36770904064178467, "learning_rate": 7.513612896651632e-06, "loss": 0.4486, "step": 3690 }, { "epoch": 1.9954946837267977, "grad_norm": 0.31897082924842834, "learning_rate": 7.511980660481123e-06, "loss": 0.4252, "step": 3691 }, { "epoch": 1.9960353216795819, "grad_norm": 0.3040165603160858, "learning_rate": 7.510348066156965e-06, "loss": 0.4024, "step": 3692 }, { "epoch": 1.9965759596323662, "grad_norm": 0.4437829256057739, "learning_rate": 7.508715113911933e-06, "loss": 0.4148, "step": 3693 }, { "epoch": 1.9971165975851504, "grad_norm": 0.3649711608886719, "learning_rate": 7.5070818039788455e-06, "loss": 0.4666, "step": 3694 }, { "epoch": 1.9976572355379347, "grad_norm": 0.34586653113365173, "learning_rate": 7.505448136590583e-06, "loss": 0.4076, "step": 3695 }, { "epoch": 1.998197873490719, "grad_norm": 0.3936247229576111, "learning_rate": 7.5038141119800655e-06, "loss": 0.4342, "step": 3696 }, { "epoch": 1.9987385114435035, "grad_norm": 0.36371323466300964, "learning_rate": 7.502179730380274e-06, "loss": 0.4855, "step": 3697 }, { "epoch": 1.9992791493962876, "grad_norm": 0.3190724551677704, "learning_rate": 7.500544992024231e-06, "loss": 0.3891, "step": 3698 }, { "epoch": 1.999819787349072, "grad_norm": 0.4348657429218292, "learning_rate": 7.498909897145017e-06, "loss": 0.5114, "step": 3699 }, { "epoch": 2.000360425301856, "grad_norm": 0.4390939176082611, "learning_rate": 7.497274445975762e-06, "loss": 0.4686, "step": 3700 }, { "epoch": 2.0009010632546405, "grad_norm": 0.38029152154922485, "learning_rate": 7.495638638749645e-06, "loss": 0.4204, "step": 3701 }, { "epoch": 2.001441701207425, "grad_norm": 0.33925145864486694, "learning_rate": 7.494002475699893e-06, "loss": 0.4045, "step": 3702 }, { "epoch": 2.001982339160209, "grad_norm": 0.35955920815467834, "learning_rate": 7.492365957059793e-06, "loss": 0.3878, "step": 3703 }, { "epoch": 2.0025229771129935, "grad_norm": 0.35933130979537964, "learning_rate": 7.490729083062671e-06, "loss": 0.4152, "step": 3704 }, { "epoch": 2.0030636150657775, "grad_norm": 0.3439458906650543, "learning_rate": 7.489091853941914e-06, "loss": 0.4004, "step": 3705 }, { "epoch": 2.003604253018562, "grad_norm": 0.40505823493003845, "learning_rate": 7.487454269930953e-06, "loss": 0.4297, "step": 3706 }, { "epoch": 2.004144890971346, "grad_norm": 0.317466139793396, "learning_rate": 7.485816331263273e-06, "loss": 0.3772, "step": 3707 }, { "epoch": 2.0046855289241305, "grad_norm": 0.37041476368904114, "learning_rate": 7.484178038172407e-06, "loss": 0.4075, "step": 3708 }, { "epoch": 2.005226166876915, "grad_norm": 0.3804113566875458, "learning_rate": 7.482539390891941e-06, "loss": 0.4189, "step": 3709 }, { "epoch": 2.005766804829699, "grad_norm": 0.33350056409835815, "learning_rate": 7.480900389655508e-06, "loss": 0.3992, "step": 3710 }, { "epoch": 2.006307442782483, "grad_norm": 0.37814444303512573, "learning_rate": 7.479261034696797e-06, "loss": 0.3577, "step": 3711 }, { "epoch": 2.0068480807352675, "grad_norm": 0.35549354553222656, "learning_rate": 7.4776213262495425e-06, "loss": 0.4154, "step": 3712 }, { "epoch": 2.007388718688052, "grad_norm": 0.30540111660957336, "learning_rate": 7.475981264547531e-06, "loss": 0.3675, "step": 3713 }, { "epoch": 2.0079293566408363, "grad_norm": 0.33743345737457275, "learning_rate": 7.474340849824601e-06, "loss": 0.4002, "step": 3714 }, { "epoch": 2.0084699945936206, "grad_norm": 0.3345016539096832, "learning_rate": 7.4727000823146386e-06, "loss": 0.4265, "step": 3715 }, { "epoch": 2.0090106325464046, "grad_norm": 0.26939669251441956, "learning_rate": 7.471058962251582e-06, "loss": 0.3433, "step": 3716 }, { "epoch": 2.009551270499189, "grad_norm": 0.3526904284954071, "learning_rate": 7.4694174898694186e-06, "loss": 0.4127, "step": 3717 }, { "epoch": 2.0100919084519733, "grad_norm": 0.32508018612861633, "learning_rate": 7.467775665402186e-06, "loss": 0.3917, "step": 3718 }, { "epoch": 2.0106325464047576, "grad_norm": 0.3042299151420593, "learning_rate": 7.466133489083975e-06, "loss": 0.3881, "step": 3719 }, { "epoch": 2.011173184357542, "grad_norm": 0.288141667842865, "learning_rate": 7.464490961148921e-06, "loss": 0.3708, "step": 3720 }, { "epoch": 2.0117138223103264, "grad_norm": 0.31178662180900574, "learning_rate": 7.462848081831214e-06, "loss": 0.399, "step": 3721 }, { "epoch": 2.0122544602631103, "grad_norm": 0.33863207697868347, "learning_rate": 7.461204851365095e-06, "loss": 0.4177, "step": 3722 }, { "epoch": 2.0127950982158946, "grad_norm": 0.3190074861049652, "learning_rate": 7.459561269984848e-06, "loss": 0.408, "step": 3723 }, { "epoch": 2.013335736168679, "grad_norm": 0.38593724370002747, "learning_rate": 7.457917337924817e-06, "loss": 0.4353, "step": 3724 }, { "epoch": 2.0138763741214634, "grad_norm": 0.3085099458694458, "learning_rate": 7.4562730554193875e-06, "loss": 0.424, "step": 3725 }, { "epoch": 2.0144170120742477, "grad_norm": 0.3138388693332672, "learning_rate": 7.454628422703e-06, "loss": 0.3525, "step": 3726 }, { "epoch": 2.014957650027032, "grad_norm": 0.37040871381759644, "learning_rate": 7.452983440010141e-06, "loss": 0.4242, "step": 3727 }, { "epoch": 2.015498287979816, "grad_norm": 0.2976129651069641, "learning_rate": 7.451338107575351e-06, "loss": 0.3775, "step": 3728 }, { "epoch": 2.0160389259326004, "grad_norm": 0.40502285957336426, "learning_rate": 7.449692425633219e-06, "loss": 0.4125, "step": 3729 }, { "epoch": 2.0165795638853847, "grad_norm": 0.31863898038864136, "learning_rate": 7.448046394418383e-06, "loss": 0.3827, "step": 3730 }, { "epoch": 2.017120201838169, "grad_norm": 0.3395459055900574, "learning_rate": 7.446400014165529e-06, "loss": 0.4056, "step": 3731 }, { "epoch": 2.0176608397909535, "grad_norm": 0.32964348793029785, "learning_rate": 7.444753285109399e-06, "loss": 0.3992, "step": 3732 }, { "epoch": 2.018201477743738, "grad_norm": 0.33514976501464844, "learning_rate": 7.443106207484776e-06, "loss": 0.4061, "step": 3733 }, { "epoch": 2.0187421156965217, "grad_norm": 0.33783411979675293, "learning_rate": 7.4414587815265e-06, "loss": 0.3938, "step": 3734 }, { "epoch": 2.019282753649306, "grad_norm": 0.3240983784198761, "learning_rate": 7.439811007469457e-06, "loss": 0.4301, "step": 3735 }, { "epoch": 2.0198233916020905, "grad_norm": 0.3201411962509155, "learning_rate": 7.438162885548585e-06, "loss": 0.3957, "step": 3736 }, { "epoch": 2.020364029554875, "grad_norm": 0.3318484425544739, "learning_rate": 7.43651441599887e-06, "loss": 0.4134, "step": 3737 }, { "epoch": 2.020904667507659, "grad_norm": 0.3605779707431793, "learning_rate": 7.434865599055348e-06, "loss": 0.4148, "step": 3738 }, { "epoch": 2.0214453054604435, "grad_norm": 0.3019539415836334, "learning_rate": 7.433216434953101e-06, "loss": 0.3711, "step": 3739 }, { "epoch": 2.0219859434132275, "grad_norm": 0.3316117525100708, "learning_rate": 7.431566923927267e-06, "loss": 0.4522, "step": 3740 }, { "epoch": 2.022526581366012, "grad_norm": 0.34275633096694946, "learning_rate": 7.42991706621303e-06, "loss": 0.3895, "step": 3741 }, { "epoch": 2.023067219318796, "grad_norm": 0.3309865891933441, "learning_rate": 7.428266862045625e-06, "loss": 0.412, "step": 3742 }, { "epoch": 2.0236078572715805, "grad_norm": 0.29785841703414917, "learning_rate": 7.426616311660332e-06, "loss": 0.4133, "step": 3743 }, { "epoch": 2.024148495224365, "grad_norm": 0.29913681745529175, "learning_rate": 7.424965415292487e-06, "loss": 0.3711, "step": 3744 }, { "epoch": 2.024689133177149, "grad_norm": 0.32399049401283264, "learning_rate": 7.423314173177467e-06, "loss": 0.4411, "step": 3745 }, { "epoch": 2.025229771129933, "grad_norm": 0.29665037989616394, "learning_rate": 7.421662585550707e-06, "loss": 0.3682, "step": 3746 }, { "epoch": 2.0257704090827175, "grad_norm": 0.350752592086792, "learning_rate": 7.4200106526476865e-06, "loss": 0.4197, "step": 3747 }, { "epoch": 2.026311047035502, "grad_norm": 0.30756109952926636, "learning_rate": 7.418358374703936e-06, "loss": 0.3995, "step": 3748 }, { "epoch": 2.0268516849882863, "grad_norm": 0.3451012670993805, "learning_rate": 7.416705751955031e-06, "loss": 0.3939, "step": 3749 }, { "epoch": 2.0273923229410706, "grad_norm": 0.35276153683662415, "learning_rate": 7.415052784636603e-06, "loss": 0.4161, "step": 3750 }, { "epoch": 2.0279329608938546, "grad_norm": 0.33562684059143066, "learning_rate": 7.4133994729843275e-06, "loss": 0.3954, "step": 3751 }, { "epoch": 2.028473598846639, "grad_norm": 0.34942105412483215, "learning_rate": 7.41174581723393e-06, "loss": 0.4171, "step": 3752 }, { "epoch": 2.0290142367994233, "grad_norm": 0.3535307049751282, "learning_rate": 7.4100918176211876e-06, "loss": 0.4104, "step": 3753 }, { "epoch": 2.0295548747522076, "grad_norm": 0.32952383160591125, "learning_rate": 7.408437474381924e-06, "loss": 0.3924, "step": 3754 }, { "epoch": 2.030095512704992, "grad_norm": 0.3535063862800598, "learning_rate": 7.406782787752011e-06, "loss": 0.384, "step": 3755 }, { "epoch": 2.0306361506577764, "grad_norm": 0.29314327239990234, "learning_rate": 7.4051277579673746e-06, "loss": 0.3447, "step": 3756 }, { "epoch": 2.0311767886105603, "grad_norm": 0.3491775095462799, "learning_rate": 7.403472385263979e-06, "loss": 0.4355, "step": 3757 }, { "epoch": 2.0317174265633446, "grad_norm": 0.31308335065841675, "learning_rate": 7.401816669877852e-06, "loss": 0.4135, "step": 3758 }, { "epoch": 2.032258064516129, "grad_norm": 0.3441132605075836, "learning_rate": 7.400160612045057e-06, "loss": 0.4233, "step": 3759 }, { "epoch": 2.0327987024689134, "grad_norm": 0.3010082244873047, "learning_rate": 7.398504212001714e-06, "loss": 0.374, "step": 3760 }, { "epoch": 2.0333393404216977, "grad_norm": 0.36063721776008606, "learning_rate": 7.39684746998399e-06, "loss": 0.4582, "step": 3761 }, { "epoch": 2.033879978374482, "grad_norm": 0.3262293338775635, "learning_rate": 7.395190386228098e-06, "loss": 0.4108, "step": 3762 }, { "epoch": 2.034420616327266, "grad_norm": 0.3062325716018677, "learning_rate": 7.393532960970305e-06, "loss": 0.3861, "step": 3763 }, { "epoch": 2.0349612542800504, "grad_norm": 0.374900221824646, "learning_rate": 7.391875194446922e-06, "loss": 0.412, "step": 3764 }, { "epoch": 2.0355018922328347, "grad_norm": 0.3617575764656067, "learning_rate": 7.390217086894309e-06, "loss": 0.3655, "step": 3765 }, { "epoch": 2.036042530185619, "grad_norm": 0.345198392868042, "learning_rate": 7.38855863854888e-06, "loss": 0.4332, "step": 3766 }, { "epoch": 2.0365831681384035, "grad_norm": 0.3414783775806427, "learning_rate": 7.386899849647089e-06, "loss": 0.4083, "step": 3767 }, { "epoch": 2.037123806091188, "grad_norm": 0.3830156624317169, "learning_rate": 7.385240720425446e-06, "loss": 0.4146, "step": 3768 }, { "epoch": 2.0376644440439717, "grad_norm": 0.3343322277069092, "learning_rate": 7.3835812511205055e-06, "loss": 0.4011, "step": 3769 }, { "epoch": 2.038205081996756, "grad_norm": 0.35519006848335266, "learning_rate": 7.3819214419688725e-06, "loss": 0.4243, "step": 3770 }, { "epoch": 2.0387457199495405, "grad_norm": 0.3095337748527527, "learning_rate": 7.380261293207198e-06, "loss": 0.3586, "step": 3771 }, { "epoch": 2.039286357902325, "grad_norm": 0.37620463967323303, "learning_rate": 7.378600805072186e-06, "loss": 0.4157, "step": 3772 }, { "epoch": 2.039826995855109, "grad_norm": 0.31508979201316833, "learning_rate": 7.376939977800581e-06, "loss": 0.3668, "step": 3773 }, { "epoch": 2.040367633807893, "grad_norm": 0.3138459324836731, "learning_rate": 7.375278811629185e-06, "loss": 0.4474, "step": 3774 }, { "epoch": 2.0409082717606775, "grad_norm": 0.302555114030838, "learning_rate": 7.373617306794844e-06, "loss": 0.4138, "step": 3775 }, { "epoch": 2.041448909713462, "grad_norm": 0.2993220090866089, "learning_rate": 7.3719554635344505e-06, "loss": 0.3921, "step": 3776 }, { "epoch": 2.041989547666246, "grad_norm": 0.3078453242778778, "learning_rate": 7.370293282084946e-06, "loss": 0.419, "step": 3777 }, { "epoch": 2.0425301856190305, "grad_norm": 0.2940816581249237, "learning_rate": 7.368630762683324e-06, "loss": 0.3867, "step": 3778 }, { "epoch": 2.043070823571815, "grad_norm": 0.3160632848739624, "learning_rate": 7.366967905566622e-06, "loss": 0.4225, "step": 3779 }, { "epoch": 2.043611461524599, "grad_norm": 0.33914080262184143, "learning_rate": 7.365304710971928e-06, "loss": 0.4386, "step": 3780 }, { "epoch": 2.044152099477383, "grad_norm": 0.29692062735557556, "learning_rate": 7.363641179136377e-06, "loss": 0.3939, "step": 3781 }, { "epoch": 2.0446927374301676, "grad_norm": 0.33448755741119385, "learning_rate": 7.361977310297153e-06, "loss": 0.4324, "step": 3782 }, { "epoch": 2.045233375382952, "grad_norm": 0.2931983470916748, "learning_rate": 7.360313104691485e-06, "loss": 0.3849, "step": 3783 }, { "epoch": 2.0457740133357363, "grad_norm": 0.3646359145641327, "learning_rate": 7.358648562556656e-06, "loss": 0.4415, "step": 3784 }, { "epoch": 2.0463146512885206, "grad_norm": 0.3032764494419098, "learning_rate": 7.3569836841299905e-06, "loss": 0.4167, "step": 3785 }, { "epoch": 2.0468552892413046, "grad_norm": 0.2867969274520874, "learning_rate": 7.3553184696488625e-06, "loss": 0.3492, "step": 3786 }, { "epoch": 2.047395927194089, "grad_norm": 0.3223315477371216, "learning_rate": 7.3536529193507e-06, "loss": 0.4154, "step": 3787 }, { "epoch": 2.0479365651468733, "grad_norm": 0.3315862715244293, "learning_rate": 7.351987033472971e-06, "loss": 0.398, "step": 3788 }, { "epoch": 2.0484772030996576, "grad_norm": 0.3055437207221985, "learning_rate": 7.350320812253196e-06, "loss": 0.3715, "step": 3789 }, { "epoch": 2.049017841052442, "grad_norm": 0.29256078600883484, "learning_rate": 7.348654255928941e-06, "loss": 0.381, "step": 3790 }, { "epoch": 2.0495584790052264, "grad_norm": 0.29239529371261597, "learning_rate": 7.346987364737819e-06, "loss": 0.3993, "step": 3791 }, { "epoch": 2.0500991169580103, "grad_norm": 0.3444713056087494, "learning_rate": 7.345320138917496e-06, "loss": 0.4172, "step": 3792 }, { "epoch": 2.0506397549107946, "grad_norm": 0.32070884108543396, "learning_rate": 7.343652578705678e-06, "loss": 0.4356, "step": 3793 }, { "epoch": 2.051180392863579, "grad_norm": 0.2924729883670807, "learning_rate": 7.341984684340125e-06, "loss": 0.3917, "step": 3794 }, { "epoch": 2.0517210308163634, "grad_norm": 0.34132906794548035, "learning_rate": 7.340316456058644e-06, "loss": 0.4196, "step": 3795 }, { "epoch": 2.0522616687691477, "grad_norm": 0.2765282988548279, "learning_rate": 7.338647894099085e-06, "loss": 0.3524, "step": 3796 }, { "epoch": 2.052802306721932, "grad_norm": 0.29886043071746826, "learning_rate": 7.336978998699348e-06, "loss": 0.3891, "step": 3797 }, { "epoch": 2.053342944674716, "grad_norm": 0.3242138624191284, "learning_rate": 7.335309770097383e-06, "loss": 0.447, "step": 3798 }, { "epoch": 2.0538835826275004, "grad_norm": 0.3068273067474365, "learning_rate": 7.333640208531187e-06, "loss": 0.4029, "step": 3799 }, { "epoch": 2.0544242205802847, "grad_norm": 0.320049911737442, "learning_rate": 7.331970314238799e-06, "loss": 0.3743, "step": 3800 }, { "epoch": 2.054964858533069, "grad_norm": 0.33802640438079834, "learning_rate": 7.330300087458313e-06, "loss": 0.4385, "step": 3801 }, { "epoch": 2.0555054964858535, "grad_norm": 0.32195529341697693, "learning_rate": 7.328629528427865e-06, "loss": 0.4199, "step": 3802 }, { "epoch": 2.0560461344386374, "grad_norm": 0.30489280819892883, "learning_rate": 7.3269586373856415e-06, "loss": 0.4167, "step": 3803 }, { "epoch": 2.0565867723914217, "grad_norm": 0.3016655147075653, "learning_rate": 7.325287414569874e-06, "loss": 0.4095, "step": 3804 }, { "epoch": 2.057127410344206, "grad_norm": 0.31946998834609985, "learning_rate": 7.323615860218844e-06, "loss": 0.439, "step": 3805 }, { "epoch": 2.0576680482969905, "grad_norm": 0.29036518931388855, "learning_rate": 7.321943974570876e-06, "loss": 0.3623, "step": 3806 }, { "epoch": 2.058208686249775, "grad_norm": 0.3299275040626526, "learning_rate": 7.320271757864348e-06, "loss": 0.4034, "step": 3807 }, { "epoch": 2.058749324202559, "grad_norm": 0.31324502825737, "learning_rate": 7.318599210337678e-06, "loss": 0.4124, "step": 3808 }, { "epoch": 2.059289962155343, "grad_norm": 0.3337649703025818, "learning_rate": 7.316926332229337e-06, "loss": 0.4473, "step": 3809 }, { "epoch": 2.0598306001081275, "grad_norm": 0.27556607127189636, "learning_rate": 7.31525312377784e-06, "loss": 0.3664, "step": 3810 }, { "epoch": 2.060371238060912, "grad_norm": 0.3056291937828064, "learning_rate": 7.313579585221752e-06, "loss": 0.4191, "step": 3811 }, { "epoch": 2.060911876013696, "grad_norm": 0.34928005933761597, "learning_rate": 7.31190571679968e-06, "loss": 0.4415, "step": 3812 }, { "epoch": 2.0614525139664805, "grad_norm": 0.28535032272338867, "learning_rate": 7.310231518750284e-06, "loss": 0.3789, "step": 3813 }, { "epoch": 2.061993151919265, "grad_norm": 0.32170960307121277, "learning_rate": 7.308556991312263e-06, "loss": 0.392, "step": 3814 }, { "epoch": 2.062533789872049, "grad_norm": 0.351576030254364, "learning_rate": 7.306882134724376e-06, "loss": 0.4495, "step": 3815 }, { "epoch": 2.063074427824833, "grad_norm": 0.31351256370544434, "learning_rate": 7.3052069492254154e-06, "loss": 0.4306, "step": 3816 }, { "epoch": 2.0636150657776176, "grad_norm": 0.29618924856185913, "learning_rate": 7.303531435054229e-06, "loss": 0.3763, "step": 3817 }, { "epoch": 2.064155703730402, "grad_norm": 0.33462122082710266, "learning_rate": 7.301855592449704e-06, "loss": 0.4162, "step": 3818 }, { "epoch": 2.0646963416831863, "grad_norm": 0.3589819669723511, "learning_rate": 7.3001794216507845e-06, "loss": 0.4185, "step": 3819 }, { "epoch": 2.0652369796359706, "grad_norm": 0.3030599057674408, "learning_rate": 7.298502922896453e-06, "loss": 0.4091, "step": 3820 }, { "epoch": 2.0657776175887546, "grad_norm": 0.3502945005893707, "learning_rate": 7.296826096425743e-06, "loss": 0.4215, "step": 3821 }, { "epoch": 2.066318255541539, "grad_norm": 0.3764542043209076, "learning_rate": 7.295148942477732e-06, "loss": 0.3956, "step": 3822 }, { "epoch": 2.0668588934943233, "grad_norm": 0.2936761975288391, "learning_rate": 7.293471461291546e-06, "loss": 0.3914, "step": 3823 }, { "epoch": 2.0673995314471076, "grad_norm": 0.36366504430770874, "learning_rate": 7.291793653106357e-06, "loss": 0.4001, "step": 3824 }, { "epoch": 2.067940169399892, "grad_norm": 0.40998461842536926, "learning_rate": 7.290115518161385e-06, "loss": 0.4144, "step": 3825 }, { "epoch": 2.0684808073526764, "grad_norm": 0.2950108051300049, "learning_rate": 7.288437056695894e-06, "loss": 0.4165, "step": 3826 }, { "epoch": 2.0690214453054603, "grad_norm": 0.34533965587615967, "learning_rate": 7.286758268949198e-06, "loss": 0.3996, "step": 3827 }, { "epoch": 2.0695620832582446, "grad_norm": 0.34263163805007935, "learning_rate": 7.285079155160652e-06, "loss": 0.4258, "step": 3828 }, { "epoch": 2.070102721211029, "grad_norm": 0.333176851272583, "learning_rate": 7.283399715569666e-06, "loss": 0.3754, "step": 3829 }, { "epoch": 2.0706433591638134, "grad_norm": 0.3491484224796295, "learning_rate": 7.281719950415686e-06, "loss": 0.4263, "step": 3830 }, { "epoch": 2.0711839971165977, "grad_norm": 0.31822386384010315, "learning_rate": 7.280039859938213e-06, "loss": 0.3965, "step": 3831 }, { "epoch": 2.0717246350693816, "grad_norm": 0.3203738033771515, "learning_rate": 7.27835944437679e-06, "loss": 0.408, "step": 3832 }, { "epoch": 2.072265273022166, "grad_norm": 0.3189562261104584, "learning_rate": 7.276678703971011e-06, "loss": 0.4019, "step": 3833 }, { "epoch": 2.0728059109749504, "grad_norm": 0.3436689078807831, "learning_rate": 7.274997638960508e-06, "loss": 0.381, "step": 3834 }, { "epoch": 2.0733465489277347, "grad_norm": 0.33741891384124756, "learning_rate": 7.273316249584969e-06, "loss": 0.4172, "step": 3835 }, { "epoch": 2.073887186880519, "grad_norm": 0.32150623202323914, "learning_rate": 7.271634536084118e-06, "loss": 0.4375, "step": 3836 }, { "epoch": 2.0744278248333035, "grad_norm": 0.3251088261604309, "learning_rate": 7.269952498697734e-06, "loss": 0.3904, "step": 3837 }, { "epoch": 2.0749684627860874, "grad_norm": 0.3126635253429413, "learning_rate": 7.268270137665639e-06, "loss": 0.373, "step": 3838 }, { "epoch": 2.0755091007388717, "grad_norm": 0.3661896586418152, "learning_rate": 7.266587453227703e-06, "loss": 0.4026, "step": 3839 }, { "epoch": 2.076049738691656, "grad_norm": 0.3148078918457031, "learning_rate": 7.2649044456238334e-06, "loss": 0.3691, "step": 3840 }, { "epoch": 2.0765903766444405, "grad_norm": 0.35688409209251404, "learning_rate": 7.263221115093997e-06, "loss": 0.4114, "step": 3841 }, { "epoch": 2.077131014597225, "grad_norm": 0.371732234954834, "learning_rate": 7.261537461878196e-06, "loss": 0.4202, "step": 3842 }, { "epoch": 2.077671652550009, "grad_norm": 0.3141860365867615, "learning_rate": 7.259853486216485e-06, "loss": 0.3495, "step": 3843 }, { "epoch": 2.078212290502793, "grad_norm": 0.3581951856613159, "learning_rate": 7.2581691883489605e-06, "loss": 0.4252, "step": 3844 }, { "epoch": 2.0787529284555775, "grad_norm": 0.2928514778614044, "learning_rate": 7.256484568515769e-06, "loss": 0.3449, "step": 3845 }, { "epoch": 2.079293566408362, "grad_norm": 0.419791579246521, "learning_rate": 7.254799626957098e-06, "loss": 0.4256, "step": 3846 }, { "epoch": 2.079834204361146, "grad_norm": 0.33046984672546387, "learning_rate": 7.253114363913185e-06, "loss": 0.4195, "step": 3847 }, { "epoch": 2.0803748423139306, "grad_norm": 0.3584929406642914, "learning_rate": 7.251428779624309e-06, "loss": 0.4098, "step": 3848 }, { "epoch": 2.080915480266715, "grad_norm": 0.31421512365341187, "learning_rate": 7.249742874330802e-06, "loss": 0.3536, "step": 3849 }, { "epoch": 2.081456118219499, "grad_norm": 0.29763343930244446, "learning_rate": 7.248056648273034e-06, "loss": 0.3575, "step": 3850 }, { "epoch": 2.081996756172283, "grad_norm": 0.39056849479675293, "learning_rate": 7.246370101691424e-06, "loss": 0.4319, "step": 3851 }, { "epoch": 2.0825373941250676, "grad_norm": 0.34038469195365906, "learning_rate": 7.244683234826441e-06, "loss": 0.4107, "step": 3852 }, { "epoch": 2.083078032077852, "grad_norm": 0.3491818308830261, "learning_rate": 7.242996047918589e-06, "loss": 0.4068, "step": 3853 }, { "epoch": 2.0836186700306363, "grad_norm": 0.37478092312812805, "learning_rate": 7.241308541208429e-06, "loss": 0.3975, "step": 3854 }, { "epoch": 2.0841593079834206, "grad_norm": 0.33679676055908203, "learning_rate": 7.239620714936561e-06, "loss": 0.4176, "step": 3855 }, { "epoch": 2.0846999459362046, "grad_norm": 0.3508088290691376, "learning_rate": 7.237932569343632e-06, "loss": 0.4104, "step": 3856 }, { "epoch": 2.085240583888989, "grad_norm": 0.3401055932044983, "learning_rate": 7.2362441046703344e-06, "loss": 0.4029, "step": 3857 }, { "epoch": 2.0857812218417733, "grad_norm": 0.38918283581733704, "learning_rate": 7.2345553211574086e-06, "loss": 0.373, "step": 3858 }, { "epoch": 2.0863218597945576, "grad_norm": 0.321361780166626, "learning_rate": 7.232866219045634e-06, "loss": 0.4362, "step": 3859 }, { "epoch": 2.086862497747342, "grad_norm": 0.33438366651535034, "learning_rate": 7.231176798575843e-06, "loss": 0.4012, "step": 3860 }, { "epoch": 2.087403135700126, "grad_norm": 0.3418934941291809, "learning_rate": 7.22948705998891e-06, "loss": 0.403, "step": 3861 }, { "epoch": 2.0879437736529103, "grad_norm": 0.30169782042503357, "learning_rate": 7.227797003525755e-06, "loss": 0.389, "step": 3862 }, { "epoch": 2.0884844116056946, "grad_norm": 0.3699062466621399, "learning_rate": 7.226106629427342e-06, "loss": 0.4012, "step": 3863 }, { "epoch": 2.089025049558479, "grad_norm": 0.34036868810653687, "learning_rate": 7.2244159379346826e-06, "loss": 0.4144, "step": 3864 }, { "epoch": 2.0895656875112634, "grad_norm": 0.30740082263946533, "learning_rate": 7.22272492928883e-06, "loss": 0.4116, "step": 3865 }, { "epoch": 2.0901063254640477, "grad_norm": 0.4237309396266937, "learning_rate": 7.221033603730888e-06, "loss": 0.4236, "step": 3866 }, { "epoch": 2.0906469634168316, "grad_norm": 0.30990955233573914, "learning_rate": 7.219341961502002e-06, "loss": 0.4114, "step": 3867 }, { "epoch": 2.091187601369616, "grad_norm": 0.3375382423400879, "learning_rate": 7.217650002843364e-06, "loss": 0.4131, "step": 3868 }, { "epoch": 2.0917282393224004, "grad_norm": 0.32740265130996704, "learning_rate": 7.215957727996208e-06, "loss": 0.3892, "step": 3869 }, { "epoch": 2.0922688772751847, "grad_norm": 0.30971941351890564, "learning_rate": 7.214265137201817e-06, "loss": 0.4316, "step": 3870 }, { "epoch": 2.092809515227969, "grad_norm": 0.3367486596107483, "learning_rate": 7.212572230701517e-06, "loss": 0.3895, "step": 3871 }, { "epoch": 2.0933501531807535, "grad_norm": 0.3166569769382477, "learning_rate": 7.210879008736681e-06, "loss": 0.398, "step": 3872 }, { "epoch": 2.0938907911335374, "grad_norm": 0.2920517325401306, "learning_rate": 7.209185471548724e-06, "loss": 0.3801, "step": 3873 }, { "epoch": 2.0944314290863217, "grad_norm": 0.36852407455444336, "learning_rate": 7.207491619379109e-06, "loss": 0.4359, "step": 3874 }, { "epoch": 2.094972067039106, "grad_norm": 0.3067874610424042, "learning_rate": 7.205797452469341e-06, "loss": 0.3864, "step": 3875 }, { "epoch": 2.0955127049918905, "grad_norm": 0.2726293206214905, "learning_rate": 7.204102971060971e-06, "loss": 0.3885, "step": 3876 }, { "epoch": 2.096053342944675, "grad_norm": 0.3440646529197693, "learning_rate": 7.2024081753955944e-06, "loss": 0.4552, "step": 3877 }, { "epoch": 2.096593980897459, "grad_norm": 0.32205453515052795, "learning_rate": 7.200713065714856e-06, "loss": 0.3593, "step": 3878 }, { "epoch": 2.097134618850243, "grad_norm": 0.31225115060806274, "learning_rate": 7.1990176422604375e-06, "loss": 0.3778, "step": 3879 }, { "epoch": 2.0976752568030275, "grad_norm": 0.3493831157684326, "learning_rate": 7.197321905274071e-06, "loss": 0.4246, "step": 3880 }, { "epoch": 2.098215894755812, "grad_norm": 0.3429039418697357, "learning_rate": 7.195625854997531e-06, "loss": 0.4213, "step": 3881 }, { "epoch": 2.098756532708596, "grad_norm": 0.3157693147659302, "learning_rate": 7.1939294916726375e-06, "loss": 0.3945, "step": 3882 }, { "epoch": 2.0992971706613806, "grad_norm": 0.32176530361175537, "learning_rate": 7.1922328155412545e-06, "loss": 0.3816, "step": 3883 }, { "epoch": 2.099837808614165, "grad_norm": 0.3419540822505951, "learning_rate": 7.190535826845293e-06, "loss": 0.4148, "step": 3884 }, { "epoch": 2.100378446566949, "grad_norm": 0.3286668360233307, "learning_rate": 7.188838525826702e-06, "loss": 0.4135, "step": 3885 }, { "epoch": 2.100919084519733, "grad_norm": 0.34139060974121094, "learning_rate": 7.187140912727486e-06, "loss": 0.4178, "step": 3886 }, { "epoch": 2.1014597224725176, "grad_norm": 0.3621421456336975, "learning_rate": 7.185442987789683e-06, "loss": 0.4192, "step": 3887 }, { "epoch": 2.102000360425302, "grad_norm": 0.3126319944858551, "learning_rate": 7.18374475125538e-06, "loss": 0.3858, "step": 3888 }, { "epoch": 2.1025409983780863, "grad_norm": 0.3388730585575104, "learning_rate": 7.18204620336671e-06, "loss": 0.4164, "step": 3889 }, { "epoch": 2.1030816363308706, "grad_norm": 0.32302772998809814, "learning_rate": 7.18034734436585e-06, "loss": 0.4124, "step": 3890 }, { "epoch": 2.1036222742836546, "grad_norm": 0.32162100076675415, "learning_rate": 7.1786481744950186e-06, "loss": 0.4305, "step": 3891 }, { "epoch": 2.104162912236439, "grad_norm": 0.2945658266544342, "learning_rate": 7.17694869399648e-06, "loss": 0.3711, "step": 3892 }, { "epoch": 2.1047035501892233, "grad_norm": 0.3670465350151062, "learning_rate": 7.175248903112544e-06, "loss": 0.4306, "step": 3893 }, { "epoch": 2.1052441881420076, "grad_norm": 0.29766687750816345, "learning_rate": 7.173548802085564e-06, "loss": 0.3714, "step": 3894 }, { "epoch": 2.105784826094792, "grad_norm": 0.320868581533432, "learning_rate": 7.171848391157935e-06, "loss": 0.4301, "step": 3895 }, { "epoch": 2.1063254640475764, "grad_norm": 0.28945621848106384, "learning_rate": 7.170147670572102e-06, "loss": 0.3641, "step": 3896 }, { "epoch": 2.1068661020003603, "grad_norm": 0.3298172652721405, "learning_rate": 7.1684466405705475e-06, "loss": 0.4205, "step": 3897 }, { "epoch": 2.1074067399531446, "grad_norm": 0.318289577960968, "learning_rate": 7.166745301395804e-06, "loss": 0.4215, "step": 3898 }, { "epoch": 2.107947377905929, "grad_norm": 0.3189803957939148, "learning_rate": 7.165043653290443e-06, "loss": 0.3796, "step": 3899 }, { "epoch": 2.1084880158587134, "grad_norm": 0.29908958077430725, "learning_rate": 7.163341696497084e-06, "loss": 0.3637, "step": 3900 }, { "epoch": 2.1090286538114977, "grad_norm": 0.3295332193374634, "learning_rate": 7.161639431258387e-06, "loss": 0.4193, "step": 3901 }, { "epoch": 2.1095692917642817, "grad_norm": 0.31142479181289673, "learning_rate": 7.15993685781706e-06, "loss": 0.3817, "step": 3902 }, { "epoch": 2.110109929717066, "grad_norm": 0.33382973074913025, "learning_rate": 7.158233976415852e-06, "loss": 0.4067, "step": 3903 }, { "epoch": 2.1106505676698504, "grad_norm": 0.3381470739841461, "learning_rate": 7.1565307872975576e-06, "loss": 0.4123, "step": 3904 }, { "epoch": 2.1111912056226347, "grad_norm": 0.30384355783462524, "learning_rate": 7.154827290705012e-06, "loss": 0.3842, "step": 3905 }, { "epoch": 2.111731843575419, "grad_norm": 0.30277004837989807, "learning_rate": 7.1531234868811e-06, "loss": 0.3578, "step": 3906 }, { "epoch": 2.1122724815282035, "grad_norm": 0.390278160572052, "learning_rate": 7.151419376068743e-06, "loss": 0.4165, "step": 3907 }, { "epoch": 2.1128131194809874, "grad_norm": 0.3517399728298187, "learning_rate": 7.149714958510914e-06, "loss": 0.371, "step": 3908 }, { "epoch": 2.1133537574337717, "grad_norm": 0.3465571701526642, "learning_rate": 7.148010234450623e-06, "loss": 0.3995, "step": 3909 }, { "epoch": 2.113894395386556, "grad_norm": 0.4046260714530945, "learning_rate": 7.146305204130928e-06, "loss": 0.4302, "step": 3910 }, { "epoch": 2.1144350333393405, "grad_norm": 0.3325851559638977, "learning_rate": 7.144599867794927e-06, "loss": 0.3922, "step": 3911 }, { "epoch": 2.114975671292125, "grad_norm": 0.3606100082397461, "learning_rate": 7.142894225685767e-06, "loss": 0.4152, "step": 3912 }, { "epoch": 2.115516309244909, "grad_norm": 0.3804704546928406, "learning_rate": 7.141188278046632e-06, "loss": 0.3951, "step": 3913 }, { "epoch": 2.116056947197693, "grad_norm": 0.39560452103614807, "learning_rate": 7.139482025120757e-06, "loss": 0.431, "step": 3914 }, { "epoch": 2.1165975851504775, "grad_norm": 0.300865113735199, "learning_rate": 7.137775467151411e-06, "loss": 0.3932, "step": 3915 }, { "epoch": 2.117138223103262, "grad_norm": 0.3441906273365021, "learning_rate": 7.136068604381916e-06, "loss": 0.3983, "step": 3916 }, { "epoch": 2.117678861056046, "grad_norm": 0.40447837114334106, "learning_rate": 7.134361437055633e-06, "loss": 0.3983, "step": 3917 }, { "epoch": 2.1182194990088306, "grad_norm": 0.3152672052383423, "learning_rate": 7.132653965415965e-06, "loss": 0.3887, "step": 3918 }, { "epoch": 2.118760136961615, "grad_norm": 0.3489867150783539, "learning_rate": 7.130946189706364e-06, "loss": 0.434, "step": 3919 }, { "epoch": 2.119300774914399, "grad_norm": 0.33735814690589905, "learning_rate": 7.129238110170315e-06, "loss": 0.3816, "step": 3920 }, { "epoch": 2.119841412867183, "grad_norm": 0.3443851172924042, "learning_rate": 7.1275297270513614e-06, "loss": 0.4159, "step": 3921 }, { "epoch": 2.1203820508199676, "grad_norm": 0.32266682386398315, "learning_rate": 7.125821040593073e-06, "loss": 0.3909, "step": 3922 }, { "epoch": 2.120922688772752, "grad_norm": 0.31138888001441956, "learning_rate": 7.124112051039076e-06, "loss": 0.371, "step": 3923 }, { "epoch": 2.1214633267255363, "grad_norm": 0.3722877502441406, "learning_rate": 7.122402758633033e-06, "loss": 0.4169, "step": 3924 }, { "epoch": 2.1220039646783206, "grad_norm": 0.3135385811328888, "learning_rate": 7.120693163618656e-06, "loss": 0.4054, "step": 3925 }, { "epoch": 2.1225446026311046, "grad_norm": 0.3250928521156311, "learning_rate": 7.118983266239691e-06, "loss": 0.3783, "step": 3926 }, { "epoch": 2.123085240583889, "grad_norm": 0.3110595941543579, "learning_rate": 7.117273066739934e-06, "loss": 0.3776, "step": 3927 }, { "epoch": 2.1236258785366733, "grad_norm": 0.3043820858001709, "learning_rate": 7.115562565363221e-06, "loss": 0.4084, "step": 3928 }, { "epoch": 2.1241665164894576, "grad_norm": 0.31215688586235046, "learning_rate": 7.1138517623534346e-06, "loss": 0.3875, "step": 3929 }, { "epoch": 2.124707154442242, "grad_norm": 0.33761128783226013, "learning_rate": 7.112140657954495e-06, "loss": 0.4115, "step": 3930 }, { "epoch": 2.125247792395026, "grad_norm": 0.31252968311309814, "learning_rate": 7.110429252410371e-06, "loss": 0.4143, "step": 3931 }, { "epoch": 2.1257884303478103, "grad_norm": 0.31016919016838074, "learning_rate": 7.108717545965072e-06, "loss": 0.4142, "step": 3932 }, { "epoch": 2.1263290683005946, "grad_norm": 0.3337792158126831, "learning_rate": 7.107005538862647e-06, "loss": 0.4247, "step": 3933 }, { "epoch": 2.126869706253379, "grad_norm": 0.2973635792732239, "learning_rate": 7.105293231347192e-06, "loss": 0.3987, "step": 3934 }, { "epoch": 2.1274103442061634, "grad_norm": 0.30230849981307983, "learning_rate": 7.103580623662845e-06, "loss": 0.3848, "step": 3935 }, { "epoch": 2.1279509821589477, "grad_norm": 0.31636372208595276, "learning_rate": 7.101867716053787e-06, "loss": 0.4301, "step": 3936 }, { "epoch": 2.1284916201117317, "grad_norm": 0.30398645997047424, "learning_rate": 7.100154508764243e-06, "loss": 0.3712, "step": 3937 }, { "epoch": 2.129032258064516, "grad_norm": 0.334203839302063, "learning_rate": 7.098441002038476e-06, "loss": 0.4045, "step": 3938 }, { "epoch": 2.1295728960173004, "grad_norm": 0.368012934923172, "learning_rate": 7.096727196120796e-06, "loss": 0.3982, "step": 3939 }, { "epoch": 2.1301135339700847, "grad_norm": 0.35910269618034363, "learning_rate": 7.0950130912555515e-06, "loss": 0.4148, "step": 3940 }, { "epoch": 2.130654171922869, "grad_norm": 0.3062628507614136, "learning_rate": 7.093298687687141e-06, "loss": 0.3762, "step": 3941 }, { "epoch": 2.1311948098756535, "grad_norm": 0.3462604880332947, "learning_rate": 7.091583985659999e-06, "loss": 0.3984, "step": 3942 }, { "epoch": 2.1317354478284374, "grad_norm": 0.3089619278907776, "learning_rate": 7.089868985418605e-06, "loss": 0.379, "step": 3943 }, { "epoch": 2.1322760857812217, "grad_norm": 0.34678173065185547, "learning_rate": 7.088153687207479e-06, "loss": 0.4223, "step": 3944 }, { "epoch": 2.132816723734006, "grad_norm": 0.27627551555633545, "learning_rate": 7.086438091271186e-06, "loss": 0.3217, "step": 3945 }, { "epoch": 2.1333573616867905, "grad_norm": 0.3420870304107666, "learning_rate": 7.084722197854334e-06, "loss": 0.3856, "step": 3946 }, { "epoch": 2.133897999639575, "grad_norm": 0.3347543478012085, "learning_rate": 7.08300600720157e-06, "loss": 0.4363, "step": 3947 }, { "epoch": 2.134438637592359, "grad_norm": 0.30943727493286133, "learning_rate": 7.0812895195575875e-06, "loss": 0.3733, "step": 3948 }, { "epoch": 2.134979275545143, "grad_norm": 0.32437270879745483, "learning_rate": 7.079572735167119e-06, "loss": 0.4325, "step": 3949 }, { "epoch": 2.1355199134979275, "grad_norm": 0.30971601605415344, "learning_rate": 7.077855654274939e-06, "loss": 0.3662, "step": 3950 }, { "epoch": 2.136060551450712, "grad_norm": 0.3495504856109619, "learning_rate": 7.076138277125868e-06, "loss": 0.4172, "step": 3951 }, { "epoch": 2.136601189403496, "grad_norm": 0.28133028745651245, "learning_rate": 7.0744206039647645e-06, "loss": 0.3627, "step": 3952 }, { "epoch": 2.1371418273562806, "grad_norm": 0.32612937688827515, "learning_rate": 7.072702635036535e-06, "loss": 0.4179, "step": 3953 }, { "epoch": 2.137682465309065, "grad_norm": 0.304006427526474, "learning_rate": 7.070984370586119e-06, "loss": 0.3927, "step": 3954 }, { "epoch": 2.138223103261849, "grad_norm": 0.30550140142440796, "learning_rate": 7.069265810858509e-06, "loss": 0.3633, "step": 3955 }, { "epoch": 2.138763741214633, "grad_norm": 0.3439962565898895, "learning_rate": 7.0675469560987295e-06, "loss": 0.4209, "step": 3956 }, { "epoch": 2.1393043791674176, "grad_norm": 0.3123915195465088, "learning_rate": 7.065827806551855e-06, "loss": 0.3985, "step": 3957 }, { "epoch": 2.139845017120202, "grad_norm": 0.3467862904071808, "learning_rate": 7.064108362462996e-06, "loss": 0.409, "step": 3958 }, { "epoch": 2.1403856550729863, "grad_norm": 0.3011173605918884, "learning_rate": 7.062388624077311e-06, "loss": 0.3816, "step": 3959 }, { "epoch": 2.14092629302577, "grad_norm": 0.3171491026878357, "learning_rate": 7.0606685916399945e-06, "loss": 0.4161, "step": 3960 }, { "epoch": 2.1414669309785546, "grad_norm": 0.3087047338485718, "learning_rate": 7.0589482653962856e-06, "loss": 0.3937, "step": 3961 }, { "epoch": 2.142007568931339, "grad_norm": 0.30052605271339417, "learning_rate": 7.057227645591467e-06, "loss": 0.3914, "step": 3962 }, { "epoch": 2.1425482068841233, "grad_norm": 0.28875285387039185, "learning_rate": 7.0555067324708604e-06, "loss": 0.3875, "step": 3963 }, { "epoch": 2.1430888448369076, "grad_norm": 0.34616565704345703, "learning_rate": 7.05378552627983e-06, "loss": 0.4161, "step": 3964 }, { "epoch": 2.143629482789692, "grad_norm": 0.31041425466537476, "learning_rate": 7.052064027263785e-06, "loss": 0.3853, "step": 3965 }, { "epoch": 2.144170120742476, "grad_norm": 0.37378445267677307, "learning_rate": 7.05034223566817e-06, "loss": 0.4231, "step": 3966 }, { "epoch": 2.1447107586952603, "grad_norm": 0.30695459246635437, "learning_rate": 7.048620151738478e-06, "loss": 0.3955, "step": 3967 }, { "epoch": 2.1452513966480447, "grad_norm": 0.36416110396385193, "learning_rate": 7.0468977757202375e-06, "loss": 0.4027, "step": 3968 }, { "epoch": 2.145792034600829, "grad_norm": 0.305324524641037, "learning_rate": 7.045175107859024e-06, "loss": 0.3895, "step": 3969 }, { "epoch": 2.1463326725536134, "grad_norm": 0.32880768179893494, "learning_rate": 7.043452148400452e-06, "loss": 0.3969, "step": 3970 }, { "epoch": 2.1468733105063977, "grad_norm": 0.3543906807899475, "learning_rate": 7.041728897590178e-06, "loss": 0.4065, "step": 3971 }, { "epoch": 2.1474139484591817, "grad_norm": 0.3406868577003479, "learning_rate": 7.040005355673899e-06, "loss": 0.4306, "step": 3972 }, { "epoch": 2.147954586411966, "grad_norm": 0.316324919462204, "learning_rate": 7.038281522897356e-06, "loss": 0.365, "step": 3973 }, { "epoch": 2.1484952243647504, "grad_norm": 0.29952096939086914, "learning_rate": 7.036557399506327e-06, "loss": 0.4209, "step": 3974 }, { "epoch": 2.1490358623175347, "grad_norm": 0.3157758116722107, "learning_rate": 7.034832985746638e-06, "loss": 0.3852, "step": 3975 }, { "epoch": 2.149576500270319, "grad_norm": 0.34011179208755493, "learning_rate": 7.033108281864152e-06, "loss": 0.4282, "step": 3976 }, { "epoch": 2.1501171382231035, "grad_norm": 0.29371097683906555, "learning_rate": 7.0313832881047725e-06, "loss": 0.3558, "step": 3977 }, { "epoch": 2.1506577761758874, "grad_norm": 0.35102444887161255, "learning_rate": 7.029658004714447e-06, "loss": 0.4375, "step": 3978 }, { "epoch": 2.1511984141286717, "grad_norm": 0.30691254138946533, "learning_rate": 7.027932431939163e-06, "loss": 0.3707, "step": 3979 }, { "epoch": 2.151739052081456, "grad_norm": 0.294600248336792, "learning_rate": 7.026206570024949e-06, "loss": 0.3815, "step": 3980 }, { "epoch": 2.1522796900342405, "grad_norm": 0.30190160870552063, "learning_rate": 7.024480419217878e-06, "loss": 0.3601, "step": 3981 }, { "epoch": 2.152820327987025, "grad_norm": 0.31586983799934387, "learning_rate": 7.022753979764058e-06, "loss": 0.4305, "step": 3982 }, { "epoch": 2.153360965939809, "grad_norm": 0.31739407777786255, "learning_rate": 7.021027251909643e-06, "loss": 0.3953, "step": 3983 }, { "epoch": 2.153901603892593, "grad_norm": 0.3141368627548218, "learning_rate": 7.019300235900829e-06, "loss": 0.4127, "step": 3984 }, { "epoch": 2.1544422418453775, "grad_norm": 0.27301716804504395, "learning_rate": 7.017572931983846e-06, "loss": 0.3737, "step": 3985 }, { "epoch": 2.154982879798162, "grad_norm": 0.3058796226978302, "learning_rate": 7.015845340404973e-06, "loss": 0.4071, "step": 3986 }, { "epoch": 2.155523517750946, "grad_norm": 0.3368726968765259, "learning_rate": 7.014117461410526e-06, "loss": 0.3808, "step": 3987 }, { "epoch": 2.1560641557037306, "grad_norm": 0.3034706115722656, "learning_rate": 7.012389295246865e-06, "loss": 0.363, "step": 3988 }, { "epoch": 2.1566047936565145, "grad_norm": 0.33887624740600586, "learning_rate": 7.010660842160386e-06, "loss": 0.4696, "step": 3989 }, { "epoch": 2.157145431609299, "grad_norm": 0.29883840680122375, "learning_rate": 7.00893210239753e-06, "loss": 0.4038, "step": 3990 }, { "epoch": 2.157686069562083, "grad_norm": 0.30450916290283203, "learning_rate": 7.007203076204776e-06, "loss": 0.3788, "step": 3991 }, { "epoch": 2.1582267075148676, "grad_norm": 0.3299378454685211, "learning_rate": 7.005473763828647e-06, "loss": 0.3999, "step": 3992 }, { "epoch": 2.158767345467652, "grad_norm": 0.3248005211353302, "learning_rate": 7.0037441655157045e-06, "loss": 0.4304, "step": 3993 }, { "epoch": 2.1593079834204363, "grad_norm": 0.33100029826164246, "learning_rate": 7.0020142815125545e-06, "loss": 0.4253, "step": 3994 }, { "epoch": 2.15984862137322, "grad_norm": 0.31740209460258484, "learning_rate": 7.000284112065836e-06, "loss": 0.4396, "step": 3995 }, { "epoch": 2.1603892593260046, "grad_norm": 0.2936188876628876, "learning_rate": 6.998553657422236e-06, "loss": 0.3766, "step": 3996 }, { "epoch": 2.160929897278789, "grad_norm": 0.2977309226989746, "learning_rate": 6.9968229178284775e-06, "loss": 0.3841, "step": 3997 }, { "epoch": 2.1614705352315733, "grad_norm": 0.289065420627594, "learning_rate": 6.9950918935313305e-06, "loss": 0.3942, "step": 3998 }, { "epoch": 2.1620111731843576, "grad_norm": 0.2957701086997986, "learning_rate": 6.993360584777597e-06, "loss": 0.3966, "step": 3999 }, { "epoch": 2.162551811137142, "grad_norm": 0.29311293363571167, "learning_rate": 6.9916289918141265e-06, "loss": 0.3617, "step": 4000 }, { "epoch": 2.163092449089926, "grad_norm": 0.341141015291214, "learning_rate": 6.989897114887805e-06, "loss": 0.4497, "step": 4001 }, { "epoch": 2.1636330870427103, "grad_norm": 0.2881960868835449, "learning_rate": 6.98816495424556e-06, "loss": 0.4157, "step": 4002 }, { "epoch": 2.1641737249954947, "grad_norm": 0.2937532961368561, "learning_rate": 6.986432510134361e-06, "loss": 0.397, "step": 4003 }, { "epoch": 2.164714362948279, "grad_norm": 0.29233914613723755, "learning_rate": 6.9846997828012174e-06, "loss": 0.3754, "step": 4004 }, { "epoch": 2.1652550009010634, "grad_norm": 0.32360079884529114, "learning_rate": 6.982966772493176e-06, "loss": 0.4484, "step": 4005 }, { "epoch": 2.1657956388538477, "grad_norm": 0.320137083530426, "learning_rate": 6.9812334794573285e-06, "loss": 0.4113, "step": 4006 }, { "epoch": 2.1663362768066317, "grad_norm": 0.2784048914909363, "learning_rate": 6.979499903940803e-06, "loss": 0.3888, "step": 4007 }, { "epoch": 2.166876914759416, "grad_norm": 0.3299471437931061, "learning_rate": 6.977766046190771e-06, "loss": 0.4527, "step": 4008 }, { "epoch": 2.1674175527122004, "grad_norm": 0.3056192398071289, "learning_rate": 6.976031906454441e-06, "loss": 0.387, "step": 4009 }, { "epoch": 2.1679581906649847, "grad_norm": 0.31629693508148193, "learning_rate": 6.974297484979066e-06, "loss": 0.4065, "step": 4010 }, { "epoch": 2.168498828617769, "grad_norm": 0.307719886302948, "learning_rate": 6.972562782011934e-06, "loss": 0.4331, "step": 4011 }, { "epoch": 2.1690394665705535, "grad_norm": 0.31467607617378235, "learning_rate": 6.970827797800378e-06, "loss": 0.403, "step": 4012 }, { "epoch": 2.1695801045233374, "grad_norm": 0.2923955023288727, "learning_rate": 6.969092532591767e-06, "loss": 0.3769, "step": 4013 }, { "epoch": 2.1701207424761217, "grad_norm": 0.31039196252822876, "learning_rate": 6.967356986633512e-06, "loss": 0.3884, "step": 4014 }, { "epoch": 2.170661380428906, "grad_norm": 0.32309892773628235, "learning_rate": 6.965621160173066e-06, "loss": 0.4091, "step": 4015 }, { "epoch": 2.1712020183816905, "grad_norm": 0.3064098358154297, "learning_rate": 6.96388505345792e-06, "loss": 0.3954, "step": 4016 }, { "epoch": 2.171742656334475, "grad_norm": 0.32693055272102356, "learning_rate": 6.962148666735602e-06, "loss": 0.4027, "step": 4017 }, { "epoch": 2.1722832942872587, "grad_norm": 0.3119713366031647, "learning_rate": 6.960412000253687e-06, "loss": 0.4001, "step": 4018 }, { "epoch": 2.172823932240043, "grad_norm": 0.31579023599624634, "learning_rate": 6.95867505425978e-06, "loss": 0.4049, "step": 4019 }, { "epoch": 2.1733645701928275, "grad_norm": 0.32477495074272156, "learning_rate": 6.9569378290015375e-06, "loss": 0.3839, "step": 4020 }, { "epoch": 2.173905208145612, "grad_norm": 0.29349246621131897, "learning_rate": 6.9552003247266465e-06, "loss": 0.3928, "step": 4021 }, { "epoch": 2.174445846098396, "grad_norm": 0.30101755261421204, "learning_rate": 6.95346254168284e-06, "loss": 0.3904, "step": 4022 }, { "epoch": 2.1749864840511806, "grad_norm": 0.33197450637817383, "learning_rate": 6.951724480117884e-06, "loss": 0.4102, "step": 4023 }, { "epoch": 2.1755271220039645, "grad_norm": 0.342006117105484, "learning_rate": 6.949986140279592e-06, "loss": 0.4215, "step": 4024 }, { "epoch": 2.176067759956749, "grad_norm": 0.3066186308860779, "learning_rate": 6.948247522415811e-06, "loss": 0.3803, "step": 4025 }, { "epoch": 2.176608397909533, "grad_norm": 0.36611872911453247, "learning_rate": 6.94650862677443e-06, "loss": 0.4349, "step": 4026 }, { "epoch": 2.1771490358623176, "grad_norm": 0.3239299952983856, "learning_rate": 6.944769453603378e-06, "loss": 0.41, "step": 4027 }, { "epoch": 2.177689673815102, "grad_norm": 0.35315537452697754, "learning_rate": 6.9430300031506244e-06, "loss": 0.415, "step": 4028 }, { "epoch": 2.1782303117678863, "grad_norm": 0.38641583919525146, "learning_rate": 6.941290275664175e-06, "loss": 0.3841, "step": 4029 }, { "epoch": 2.17877094972067, "grad_norm": 0.3433842957019806, "learning_rate": 6.939550271392079e-06, "loss": 0.4018, "step": 4030 }, { "epoch": 2.1793115876734546, "grad_norm": 0.35578569769859314, "learning_rate": 6.937809990582421e-06, "loss": 0.4095, "step": 4031 }, { "epoch": 2.179852225626239, "grad_norm": 0.35455605387687683, "learning_rate": 6.936069433483329e-06, "loss": 0.3989, "step": 4032 }, { "epoch": 2.1803928635790233, "grad_norm": 0.32006603479385376, "learning_rate": 6.934328600342966e-06, "loss": 0.3701, "step": 4033 }, { "epoch": 2.1809335015318077, "grad_norm": 0.3517322242259979, "learning_rate": 6.93258749140954e-06, "loss": 0.4208, "step": 4034 }, { "epoch": 2.181474139484592, "grad_norm": 0.3352504372596741, "learning_rate": 6.930846106931292e-06, "loss": 0.4202, "step": 4035 }, { "epoch": 2.182014777437376, "grad_norm": 0.3257448971271515, "learning_rate": 6.929104447156508e-06, "loss": 0.3979, "step": 4036 }, { "epoch": 2.1825554153901603, "grad_norm": 0.3509262800216675, "learning_rate": 6.9273625123335085e-06, "loss": 0.4567, "step": 4037 }, { "epoch": 2.1830960533429447, "grad_norm": 0.28874605894088745, "learning_rate": 6.9256203027106585e-06, "loss": 0.3563, "step": 4038 }, { "epoch": 2.183636691295729, "grad_norm": 0.35471436381340027, "learning_rate": 6.923877818536355e-06, "loss": 0.4083, "step": 4039 }, { "epoch": 2.1841773292485134, "grad_norm": 0.3229536712169647, "learning_rate": 6.922135060059043e-06, "loss": 0.4115, "step": 4040 }, { "epoch": 2.1847179672012977, "grad_norm": 0.3280383050441742, "learning_rate": 6.9203920275271965e-06, "loss": 0.4076, "step": 4041 }, { "epoch": 2.1852586051540817, "grad_norm": 0.3501759171485901, "learning_rate": 6.9186487211893374e-06, "loss": 0.4334, "step": 4042 }, { "epoch": 2.185799243106866, "grad_norm": 0.33414164185523987, "learning_rate": 6.916905141294023e-06, "loss": 0.3978, "step": 4043 }, { "epoch": 2.1863398810596504, "grad_norm": 0.2950269281864166, "learning_rate": 6.915161288089849e-06, "loss": 0.4086, "step": 4044 }, { "epoch": 2.1868805190124347, "grad_norm": 0.33400097489356995, "learning_rate": 6.913417161825449e-06, "loss": 0.4427, "step": 4045 }, { "epoch": 2.187421156965219, "grad_norm": 0.32226991653442383, "learning_rate": 6.911672762749502e-06, "loss": 0.3979, "step": 4046 }, { "epoch": 2.187961794918003, "grad_norm": 0.3366377055644989, "learning_rate": 6.9099280911107166e-06, "loss": 0.4158, "step": 4047 }, { "epoch": 2.1885024328707874, "grad_norm": 0.3064139783382416, "learning_rate": 6.908183147157847e-06, "loss": 0.3948, "step": 4048 }, { "epoch": 2.1890430708235717, "grad_norm": 0.3099367320537567, "learning_rate": 6.906437931139686e-06, "loss": 0.3498, "step": 4049 }, { "epoch": 2.189583708776356, "grad_norm": 0.40618830919265747, "learning_rate": 6.904692443305059e-06, "loss": 0.3899, "step": 4050 }, { "epoch": 2.1901243467291405, "grad_norm": 0.3219248354434967, "learning_rate": 6.902946683902839e-06, "loss": 0.4097, "step": 4051 }, { "epoch": 2.190664984681925, "grad_norm": 0.349616676568985, "learning_rate": 6.90120065318193e-06, "loss": 0.4085, "step": 4052 }, { "epoch": 2.1912056226347087, "grad_norm": 0.3385084569454193, "learning_rate": 6.899454351391279e-06, "loss": 0.4001, "step": 4053 }, { "epoch": 2.191746260587493, "grad_norm": 0.3068590462207794, "learning_rate": 6.897707778779871e-06, "loss": 0.3806, "step": 4054 }, { "epoch": 2.1922868985402775, "grad_norm": 0.34392955899238586, "learning_rate": 6.895960935596728e-06, "loss": 0.4064, "step": 4055 }, { "epoch": 2.192827536493062, "grad_norm": 0.35632532835006714, "learning_rate": 6.8942138220909116e-06, "loss": 0.4246, "step": 4056 }, { "epoch": 2.193368174445846, "grad_norm": 0.293804794549942, "learning_rate": 6.892466438511525e-06, "loss": 0.4068, "step": 4057 }, { "epoch": 2.1939088123986306, "grad_norm": 0.3229387700557709, "learning_rate": 6.8907187851077026e-06, "loss": 0.385, "step": 4058 }, { "epoch": 2.1944494503514145, "grad_norm": 0.32888251543045044, "learning_rate": 6.888970862128627e-06, "loss": 0.3808, "step": 4059 }, { "epoch": 2.194990088304199, "grad_norm": 0.3496990501880646, "learning_rate": 6.8872226698235065e-06, "loss": 0.4257, "step": 4060 }, { "epoch": 2.195530726256983, "grad_norm": 0.29963988065719604, "learning_rate": 6.885474208441602e-06, "loss": 0.3935, "step": 4061 }, { "epoch": 2.1960713642097676, "grad_norm": 0.31232914328575134, "learning_rate": 6.883725478232204e-06, "loss": 0.3689, "step": 4062 }, { "epoch": 2.196612002162552, "grad_norm": 0.3286808133125305, "learning_rate": 6.8819764794446434e-06, "loss": 0.4064, "step": 4063 }, { "epoch": 2.1971526401153363, "grad_norm": 0.31781333684921265, "learning_rate": 6.880227212328285e-06, "loss": 0.4102, "step": 4064 }, { "epoch": 2.19769327806812, "grad_norm": 0.30479738116264343, "learning_rate": 6.8784776771325426e-06, "loss": 0.4077, "step": 4065 }, { "epoch": 2.1982339160209046, "grad_norm": 0.3371626138687134, "learning_rate": 6.876727874106858e-06, "loss": 0.3896, "step": 4066 }, { "epoch": 2.198774553973689, "grad_norm": 0.2924918532371521, "learning_rate": 6.874977803500716e-06, "loss": 0.3964, "step": 4067 }, { "epoch": 2.1993151919264733, "grad_norm": 0.30662932991981506, "learning_rate": 6.873227465563639e-06, "loss": 0.4427, "step": 4068 }, { "epoch": 2.1998558298792577, "grad_norm": 0.2745401859283447, "learning_rate": 6.8714768605451865e-06, "loss": 0.3562, "step": 4069 }, { "epoch": 2.200396467832042, "grad_norm": 0.3846108019351959, "learning_rate": 6.869725988694955e-06, "loss": 0.437, "step": 4070 }, { "epoch": 2.200937105784826, "grad_norm": 0.29725199937820435, "learning_rate": 6.867974850262582e-06, "loss": 0.3998, "step": 4071 }, { "epoch": 2.2014777437376103, "grad_norm": 0.3217564821243286, "learning_rate": 6.866223445497743e-06, "loss": 0.411, "step": 4072 }, { "epoch": 2.2020183816903947, "grad_norm": 0.32411977648735046, "learning_rate": 6.864471774650147e-06, "loss": 0.4095, "step": 4073 }, { "epoch": 2.202559019643179, "grad_norm": 0.33379268646240234, "learning_rate": 6.862719837969548e-06, "loss": 0.4391, "step": 4074 }, { "epoch": 2.2030996575959634, "grad_norm": 0.3779868483543396, "learning_rate": 6.860967635705732e-06, "loss": 0.4423, "step": 4075 }, { "epoch": 2.2036402955487473, "grad_norm": 0.3187415897846222, "learning_rate": 6.859215168108523e-06, "loss": 0.4474, "step": 4076 }, { "epoch": 2.2041809335015317, "grad_norm": 0.2879991829395294, "learning_rate": 6.8574624354277866e-06, "loss": 0.368, "step": 4077 }, { "epoch": 2.204721571454316, "grad_norm": 0.3639747202396393, "learning_rate": 6.855709437913424e-06, "loss": 0.3946, "step": 4078 }, { "epoch": 2.2052622094071004, "grad_norm": 0.305832177400589, "learning_rate": 6.853956175815375e-06, "loss": 0.3756, "step": 4079 }, { "epoch": 2.2058028473598847, "grad_norm": 0.32758861780166626, "learning_rate": 6.8522026493836144e-06, "loss": 0.4268, "step": 4080 }, { "epoch": 2.206343485312669, "grad_norm": 0.29522502422332764, "learning_rate": 6.850448858868161e-06, "loss": 0.4, "step": 4081 }, { "epoch": 2.2068841232654535, "grad_norm": 0.29559990763664246, "learning_rate": 6.848694804519063e-06, "loss": 0.4127, "step": 4082 }, { "epoch": 2.2074247612182374, "grad_norm": 0.3084014058113098, "learning_rate": 6.846940486586411e-06, "loss": 0.3849, "step": 4083 }, { "epoch": 2.2079653991710217, "grad_norm": 0.33527031540870667, "learning_rate": 6.845185905320333e-06, "loss": 0.3979, "step": 4084 }, { "epoch": 2.208506037123806, "grad_norm": 0.30264729261398315, "learning_rate": 6.843431060970995e-06, "loss": 0.3825, "step": 4085 }, { "epoch": 2.2090466750765905, "grad_norm": 0.323540061712265, "learning_rate": 6.841675953788598e-06, "loss": 0.4055, "step": 4086 }, { "epoch": 2.209587313029375, "grad_norm": 0.32292747497558594, "learning_rate": 6.839920584023384e-06, "loss": 0.4317, "step": 4087 }, { "epoch": 2.2101279509821588, "grad_norm": 0.3411714732646942, "learning_rate": 6.838164951925628e-06, "loss": 0.3958, "step": 4088 }, { "epoch": 2.210668588934943, "grad_norm": 0.31609079241752625, "learning_rate": 6.836409057745645e-06, "loss": 0.419, "step": 4089 }, { "epoch": 2.2112092268877275, "grad_norm": 0.30467334389686584, "learning_rate": 6.834652901733789e-06, "loss": 0.3877, "step": 4090 }, { "epoch": 2.211749864840512, "grad_norm": 0.33683645725250244, "learning_rate": 6.83289648414045e-06, "loss": 0.4336, "step": 4091 }, { "epoch": 2.212290502793296, "grad_norm": 0.29473188519477844, "learning_rate": 6.831139805216053e-06, "loss": 0.3971, "step": 4092 }, { "epoch": 2.2128311407460806, "grad_norm": 0.3108297884464264, "learning_rate": 6.829382865211063e-06, "loss": 0.4082, "step": 4093 }, { "epoch": 2.2133717786988645, "grad_norm": 0.30056747794151306, "learning_rate": 6.827625664375979e-06, "loss": 0.3676, "step": 4094 }, { "epoch": 2.213912416651649, "grad_norm": 0.31948915123939514, "learning_rate": 6.825868202961343e-06, "loss": 0.4109, "step": 4095 }, { "epoch": 2.214453054604433, "grad_norm": 0.32373663783073425, "learning_rate": 6.824110481217728e-06, "loss": 0.3751, "step": 4096 }, { "epoch": 2.2149936925572176, "grad_norm": 0.3314642608165741, "learning_rate": 6.822352499395751e-06, "loss": 0.3544, "step": 4097 }, { "epoch": 2.215534330510002, "grad_norm": 0.3104372024536133, "learning_rate": 6.820594257746055e-06, "loss": 0.4083, "step": 4098 }, { "epoch": 2.2160749684627863, "grad_norm": 0.3360132873058319, "learning_rate": 6.818835756519331e-06, "loss": 0.4017, "step": 4099 }, { "epoch": 2.21661560641557, "grad_norm": 0.3249875009059906, "learning_rate": 6.8170769959663045e-06, "loss": 0.434, "step": 4100 }, { "epoch": 2.2171562443683546, "grad_norm": 0.28293728828430176, "learning_rate": 6.815317976337734e-06, "loss": 0.3623, "step": 4101 }, { "epoch": 2.217696882321139, "grad_norm": 0.3353882133960724, "learning_rate": 6.8135586978844175e-06, "loss": 0.3761, "step": 4102 }, { "epoch": 2.2182375202739233, "grad_norm": 0.3167102038860321, "learning_rate": 6.811799160857191e-06, "loss": 0.3856, "step": 4103 }, { "epoch": 2.2187781582267077, "grad_norm": 0.31739798188209534, "learning_rate": 6.810039365506923e-06, "loss": 0.3971, "step": 4104 }, { "epoch": 2.2193187961794916, "grad_norm": 0.2923511266708374, "learning_rate": 6.808279312084525e-06, "loss": 0.3651, "step": 4105 }, { "epoch": 2.219859434132276, "grad_norm": 0.32602718472480774, "learning_rate": 6.806519000840941e-06, "loss": 0.4572, "step": 4106 }, { "epoch": 2.2204000720850603, "grad_norm": 0.3319365680217743, "learning_rate": 6.8047584320271555e-06, "loss": 0.4192, "step": 4107 }, { "epoch": 2.2209407100378447, "grad_norm": 0.33049488067626953, "learning_rate": 6.802997605894183e-06, "loss": 0.3878, "step": 4108 }, { "epoch": 2.221481347990629, "grad_norm": 0.3242778778076172, "learning_rate": 6.8012365226930825e-06, "loss": 0.4217, "step": 4109 }, { "epoch": 2.2220219859434134, "grad_norm": 0.33477145433425903, "learning_rate": 6.799475182674942e-06, "loss": 0.3831, "step": 4110 }, { "epoch": 2.2225626238961977, "grad_norm": 0.3455541133880615, "learning_rate": 6.797713586090893e-06, "loss": 0.4395, "step": 4111 }, { "epoch": 2.2231032618489817, "grad_norm": 0.3297395706176758, "learning_rate": 6.795951733192101e-06, "loss": 0.4273, "step": 4112 }, { "epoch": 2.223643899801766, "grad_norm": 0.3724072277545929, "learning_rate": 6.794189624229768e-06, "loss": 0.4298, "step": 4113 }, { "epoch": 2.2241845377545504, "grad_norm": 0.3061438500881195, "learning_rate": 6.792427259455131e-06, "loss": 0.3993, "step": 4114 }, { "epoch": 2.2247251757073347, "grad_norm": 0.3195669651031494, "learning_rate": 6.790664639119464e-06, "loss": 0.3715, "step": 4115 }, { "epoch": 2.225265813660119, "grad_norm": 0.4220083951950073, "learning_rate": 6.788901763474082e-06, "loss": 0.4554, "step": 4116 }, { "epoch": 2.225806451612903, "grad_norm": 0.29164981842041016, "learning_rate": 6.787138632770327e-06, "loss": 0.3788, "step": 4117 }, { "epoch": 2.2263470895656874, "grad_norm": 0.35630443692207336, "learning_rate": 6.785375247259588e-06, "loss": 0.4045, "step": 4118 }, { "epoch": 2.2268877275184717, "grad_norm": 0.41154932975769043, "learning_rate": 6.783611607193282e-06, "loss": 0.4645, "step": 4119 }, { "epoch": 2.227428365471256, "grad_norm": 0.3228520154953003, "learning_rate": 6.781847712822869e-06, "loss": 0.4128, "step": 4120 }, { "epoch": 2.2279690034240405, "grad_norm": 0.32682761549949646, "learning_rate": 6.7800835643998374e-06, "loss": 0.3776, "step": 4121 }, { "epoch": 2.228509641376825, "grad_norm": 0.3961796164512634, "learning_rate": 6.778319162175722e-06, "loss": 0.4248, "step": 4122 }, { "epoch": 2.2290502793296088, "grad_norm": 0.32837003469467163, "learning_rate": 6.776554506402081e-06, "loss": 0.418, "step": 4123 }, { "epoch": 2.229590917282393, "grad_norm": 0.3298190236091614, "learning_rate": 6.774789597330523e-06, "loss": 0.4255, "step": 4124 }, { "epoch": 2.2301315552351775, "grad_norm": 0.3101571202278137, "learning_rate": 6.773024435212678e-06, "loss": 0.4013, "step": 4125 }, { "epoch": 2.230672193187962, "grad_norm": 0.31095200777053833, "learning_rate": 6.771259020300227e-06, "loss": 0.3868, "step": 4126 }, { "epoch": 2.231212831140746, "grad_norm": 0.3008178770542145, "learning_rate": 6.769493352844876e-06, "loss": 0.3842, "step": 4127 }, { "epoch": 2.2317534690935306, "grad_norm": 0.3014555275440216, "learning_rate": 6.76772743309837e-06, "loss": 0.4122, "step": 4128 }, { "epoch": 2.2322941070463145, "grad_norm": 0.29356569051742554, "learning_rate": 6.765961261312492e-06, "loss": 0.3627, "step": 4129 }, { "epoch": 2.232834744999099, "grad_norm": 0.32311195135116577, "learning_rate": 6.76419483773906e-06, "loss": 0.4066, "step": 4130 }, { "epoch": 2.233375382951883, "grad_norm": 0.2994847893714905, "learning_rate": 6.762428162629925e-06, "loss": 0.3879, "step": 4131 }, { "epoch": 2.2339160209046676, "grad_norm": 0.32352471351623535, "learning_rate": 6.76066123623698e-06, "loss": 0.3943, "step": 4132 }, { "epoch": 2.234456658857452, "grad_norm": 0.30245065689086914, "learning_rate": 6.758894058812146e-06, "loss": 0.3753, "step": 4133 }, { "epoch": 2.234997296810236, "grad_norm": 0.3160868287086487, "learning_rate": 6.757126630607389e-06, "loss": 0.4269, "step": 4134 }, { "epoch": 2.23553793476302, "grad_norm": 0.3280041813850403, "learning_rate": 6.755358951874701e-06, "loss": 0.4646, "step": 4135 }, { "epoch": 2.2360785727158046, "grad_norm": 0.2949141561985016, "learning_rate": 6.753591022866117e-06, "loss": 0.4271, "step": 4136 }, { "epoch": 2.236619210668589, "grad_norm": 0.284419983625412, "learning_rate": 6.751822843833704e-06, "loss": 0.3972, "step": 4137 }, { "epoch": 2.2371598486213733, "grad_norm": 0.2773050367832184, "learning_rate": 6.750054415029567e-06, "loss": 0.3976, "step": 4138 }, { "epoch": 2.2377004865741577, "grad_norm": 0.33673977851867676, "learning_rate": 6.748285736705844e-06, "loss": 0.4122, "step": 4139 }, { "epoch": 2.238241124526942, "grad_norm": 0.3167836368083954, "learning_rate": 6.7465168091147094e-06, "loss": 0.3868, "step": 4140 }, { "epoch": 2.238781762479726, "grad_norm": 0.3082415461540222, "learning_rate": 6.7447476325083764e-06, "loss": 0.4109, "step": 4141 }, { "epoch": 2.2393224004325103, "grad_norm": 0.34712666273117065, "learning_rate": 6.7429782071390895e-06, "loss": 0.3989, "step": 4142 }, { "epoch": 2.2398630383852947, "grad_norm": 0.3211289346218109, "learning_rate": 6.741208533259128e-06, "loss": 0.4312, "step": 4143 }, { "epoch": 2.240403676338079, "grad_norm": 0.30233892798423767, "learning_rate": 6.739438611120813e-06, "loss": 0.4164, "step": 4144 }, { "epoch": 2.2409443142908634, "grad_norm": 0.3043302595615387, "learning_rate": 6.737668440976494e-06, "loss": 0.3808, "step": 4145 }, { "epoch": 2.2414849522436473, "grad_norm": 0.3419129550457001, "learning_rate": 6.735898023078558e-06, "loss": 0.4315, "step": 4146 }, { "epoch": 2.2420255901964317, "grad_norm": 0.3226799964904785, "learning_rate": 6.734127357679431e-06, "loss": 0.384, "step": 4147 }, { "epoch": 2.242566228149216, "grad_norm": 0.3249484896659851, "learning_rate": 6.732356445031569e-06, "loss": 0.4241, "step": 4148 }, { "epoch": 2.2431068661020004, "grad_norm": 0.36469215154647827, "learning_rate": 6.730585285387465e-06, "loss": 0.4364, "step": 4149 }, { "epoch": 2.2436475040547847, "grad_norm": 0.32166287302970886, "learning_rate": 6.728813878999652e-06, "loss": 0.3981, "step": 4150 }, { "epoch": 2.244188142007569, "grad_norm": 0.3083460330963135, "learning_rate": 6.727042226120686e-06, "loss": 0.3986, "step": 4151 }, { "epoch": 2.244728779960353, "grad_norm": 0.31005361676216125, "learning_rate": 6.725270327003174e-06, "loss": 0.3685, "step": 4152 }, { "epoch": 2.2452694179131374, "grad_norm": 0.347377747297287, "learning_rate": 6.723498181899746e-06, "loss": 0.4205, "step": 4153 }, { "epoch": 2.2458100558659218, "grad_norm": 0.3285289704799652, "learning_rate": 6.721725791063071e-06, "loss": 0.4045, "step": 4154 }, { "epoch": 2.246350693818706, "grad_norm": 0.32021549344062805, "learning_rate": 6.719953154745857e-06, "loss": 0.3981, "step": 4155 }, { "epoch": 2.2468913317714905, "grad_norm": 0.3265751600265503, "learning_rate": 6.7181802732008385e-06, "loss": 0.3944, "step": 4156 }, { "epoch": 2.247431969724275, "grad_norm": 0.36013728380203247, "learning_rate": 6.716407146680793e-06, "loss": 0.4218, "step": 4157 }, { "epoch": 2.2479726076770588, "grad_norm": 0.29418373107910156, "learning_rate": 6.714633775438528e-06, "loss": 0.3816, "step": 4158 }, { "epoch": 2.248513245629843, "grad_norm": 0.34002870321273804, "learning_rate": 6.712860159726887e-06, "loss": 0.4253, "step": 4159 }, { "epoch": 2.2490538835826275, "grad_norm": 0.34566155076026917, "learning_rate": 6.7110862997987525e-06, "loss": 0.4148, "step": 4160 }, { "epoch": 2.249594521535412, "grad_norm": 0.2925388216972351, "learning_rate": 6.709312195907034e-06, "loss": 0.3885, "step": 4161 }, { "epoch": 2.250135159488196, "grad_norm": 0.34234461188316345, "learning_rate": 6.707537848304682e-06, "loss": 0.4115, "step": 4162 }, { "epoch": 2.25067579744098, "grad_norm": 0.3295589089393616, "learning_rate": 6.705763257244679e-06, "loss": 0.4087, "step": 4163 }, { "epoch": 2.2512164353937645, "grad_norm": 0.32688218355178833, "learning_rate": 6.703988422980045e-06, "loss": 0.414, "step": 4164 }, { "epoch": 2.251757073346549, "grad_norm": 0.30534660816192627, "learning_rate": 6.70221334576383e-06, "loss": 0.3775, "step": 4165 }, { "epoch": 2.252297711299333, "grad_norm": 0.34004101157188416, "learning_rate": 6.7004380258491256e-06, "loss": 0.4108, "step": 4166 }, { "epoch": 2.2528383492521176, "grad_norm": 0.3068768382072449, "learning_rate": 6.698662463489047e-06, "loss": 0.3879, "step": 4167 }, { "epoch": 2.253378987204902, "grad_norm": 0.3664450943470001, "learning_rate": 6.696886658936754e-06, "loss": 0.4355, "step": 4168 }, { "epoch": 2.2539196251576863, "grad_norm": 0.35220062732696533, "learning_rate": 6.695110612445439e-06, "loss": 0.4041, "step": 4169 }, { "epoch": 2.25446026311047, "grad_norm": 0.35381558537483215, "learning_rate": 6.693334324268328e-06, "loss": 0.4549, "step": 4170 }, { "epoch": 2.2550009010632546, "grad_norm": 0.30724719166755676, "learning_rate": 6.691557794658676e-06, "loss": 0.3792, "step": 4171 }, { "epoch": 2.255541539016039, "grad_norm": 0.35158011317253113, "learning_rate": 6.689781023869784e-06, "loss": 0.4359, "step": 4172 }, { "epoch": 2.2560821769688233, "grad_norm": 0.33039185404777527, "learning_rate": 6.688004012154975e-06, "loss": 0.3976, "step": 4173 }, { "epoch": 2.2566228149216077, "grad_norm": 0.3083387315273285, "learning_rate": 6.686226759767616e-06, "loss": 0.3943, "step": 4174 }, { "epoch": 2.2571634528743916, "grad_norm": 0.3018893599510193, "learning_rate": 6.684449266961101e-06, "loss": 0.3741, "step": 4175 }, { "epoch": 2.257704090827176, "grad_norm": 0.3342258334159851, "learning_rate": 6.682671533988864e-06, "loss": 0.4128, "step": 4176 }, { "epoch": 2.2582447287799603, "grad_norm": 0.30471324920654297, "learning_rate": 6.680893561104373e-06, "loss": 0.4202, "step": 4177 }, { "epoch": 2.2587853667327447, "grad_norm": 0.32386910915374756, "learning_rate": 6.679115348561122e-06, "loss": 0.4405, "step": 4178 }, { "epoch": 2.259326004685529, "grad_norm": 0.2943861186504364, "learning_rate": 6.677336896612652e-06, "loss": 0.3788, "step": 4179 }, { "epoch": 2.2598666426383134, "grad_norm": 0.35374173521995544, "learning_rate": 6.675558205512527e-06, "loss": 0.408, "step": 4180 }, { "epoch": 2.2604072805910973, "grad_norm": 0.30802565813064575, "learning_rate": 6.673779275514351e-06, "loss": 0.4028, "step": 4181 }, { "epoch": 2.2609479185438817, "grad_norm": 0.2978096008300781, "learning_rate": 6.672000106871761e-06, "loss": 0.3682, "step": 4182 }, { "epoch": 2.261488556496666, "grad_norm": 0.31863123178482056, "learning_rate": 6.670220699838429e-06, "loss": 0.4376, "step": 4183 }, { "epoch": 2.2620291944494504, "grad_norm": 0.3239419758319855, "learning_rate": 6.668441054668055e-06, "loss": 0.4333, "step": 4184 }, { "epoch": 2.2625698324022347, "grad_norm": 0.31215062737464905, "learning_rate": 6.666661171614382e-06, "loss": 0.404, "step": 4185 }, { "epoch": 2.263110470355019, "grad_norm": 0.2932754456996918, "learning_rate": 6.66488105093118e-06, "loss": 0.3572, "step": 4186 }, { "epoch": 2.263651108307803, "grad_norm": 0.3539320230484009, "learning_rate": 6.663100692872259e-06, "loss": 0.4356, "step": 4187 }, { "epoch": 2.2641917462605874, "grad_norm": 0.2999046742916107, "learning_rate": 6.661320097691454e-06, "loss": 0.3999, "step": 4188 }, { "epoch": 2.2647323842133718, "grad_norm": 0.32621529698371887, "learning_rate": 6.659539265642643e-06, "loss": 0.407, "step": 4189 }, { "epoch": 2.265273022166156, "grad_norm": 0.2953600287437439, "learning_rate": 6.657758196979732e-06, "loss": 0.3681, "step": 4190 }, { "epoch": 2.2658136601189405, "grad_norm": 0.30013948678970337, "learning_rate": 6.655976891956662e-06, "loss": 0.3866, "step": 4191 }, { "epoch": 2.2663542980717244, "grad_norm": 0.34683653712272644, "learning_rate": 6.654195350827411e-06, "loss": 0.4192, "step": 4192 }, { "epoch": 2.2668949360245088, "grad_norm": 0.31711041927337646, "learning_rate": 6.652413573845985e-06, "loss": 0.3867, "step": 4193 }, { "epoch": 2.267435573977293, "grad_norm": 0.29812562465667725, "learning_rate": 6.650631561266427e-06, "loss": 0.3507, "step": 4194 }, { "epoch": 2.2679762119300775, "grad_norm": 0.349576473236084, "learning_rate": 6.648849313342816e-06, "loss": 0.4139, "step": 4195 }, { "epoch": 2.268516849882862, "grad_norm": 0.31229591369628906, "learning_rate": 6.647066830329258e-06, "loss": 0.3888, "step": 4196 }, { "epoch": 2.269057487835646, "grad_norm": 0.3425281047821045, "learning_rate": 6.645284112479897e-06, "loss": 0.42, "step": 4197 }, { "epoch": 2.2695981257884306, "grad_norm": 0.3798248767852783, "learning_rate": 6.643501160048911e-06, "loss": 0.429, "step": 4198 }, { "epoch": 2.2701387637412145, "grad_norm": 0.30689990520477295, "learning_rate": 6.6417179732905104e-06, "loss": 0.3916, "step": 4199 }, { "epoch": 2.270679401693999, "grad_norm": 0.3451610207557678, "learning_rate": 6.6399345524589366e-06, "loss": 0.4665, "step": 4200 }, { "epoch": 2.271220039646783, "grad_norm": 0.29163801670074463, "learning_rate": 6.638150897808469e-06, "loss": 0.3605, "step": 4201 }, { "epoch": 2.2717606775995676, "grad_norm": 0.3330785632133484, "learning_rate": 6.636367009593415e-06, "loss": 0.4162, "step": 4202 }, { "epoch": 2.272301315552352, "grad_norm": 0.3180115222930908, "learning_rate": 6.63458288806812e-06, "loss": 0.3684, "step": 4203 }, { "epoch": 2.272841953505136, "grad_norm": 0.33413010835647583, "learning_rate": 6.632798533486961e-06, "loss": 0.4144, "step": 4204 }, { "epoch": 2.27338259145792, "grad_norm": 0.3052087724208832, "learning_rate": 6.631013946104348e-06, "loss": 0.4008, "step": 4205 }, { "epoch": 2.2739232294107046, "grad_norm": 0.3361079692840576, "learning_rate": 6.6292291261747225e-06, "loss": 0.3795, "step": 4206 }, { "epoch": 2.274463867363489, "grad_norm": 0.29779067635536194, "learning_rate": 6.6274440739525635e-06, "loss": 0.3918, "step": 4207 }, { "epoch": 2.2750045053162733, "grad_norm": 0.3415556252002716, "learning_rate": 6.6256587896923785e-06, "loss": 0.4196, "step": 4208 }, { "epoch": 2.2755451432690577, "grad_norm": 0.31661680340766907, "learning_rate": 6.62387327364871e-06, "loss": 0.3929, "step": 4209 }, { "epoch": 2.276085781221842, "grad_norm": 0.2811526358127594, "learning_rate": 6.622087526076135e-06, "loss": 0.3871, "step": 4210 }, { "epoch": 2.276626419174626, "grad_norm": 0.33795827627182007, "learning_rate": 6.620301547229262e-06, "loss": 0.4119, "step": 4211 }, { "epoch": 2.2771670571274103, "grad_norm": 0.326340913772583, "learning_rate": 6.618515337362732e-06, "loss": 0.4251, "step": 4212 }, { "epoch": 2.2777076950801947, "grad_norm": 0.3061928153038025, "learning_rate": 6.61672889673122e-06, "loss": 0.4028, "step": 4213 }, { "epoch": 2.278248333032979, "grad_norm": 0.31582531332969666, "learning_rate": 6.614942225589432e-06, "loss": 0.3773, "step": 4214 }, { "epoch": 2.2787889709857634, "grad_norm": 0.30591699481010437, "learning_rate": 6.613155324192111e-06, "loss": 0.416, "step": 4215 }, { "epoch": 2.2793296089385473, "grad_norm": 0.3101104497909546, "learning_rate": 6.611368192794028e-06, "loss": 0.3711, "step": 4216 }, { "epoch": 2.2798702468913317, "grad_norm": 0.3451632857322693, "learning_rate": 6.609580831649991e-06, "loss": 0.3934, "step": 4217 }, { "epoch": 2.280410884844116, "grad_norm": 0.32937222719192505, "learning_rate": 6.607793241014835e-06, "loss": 0.4237, "step": 4218 }, { "epoch": 2.2809515227969004, "grad_norm": 0.3161192536354065, "learning_rate": 6.606005421143436e-06, "loss": 0.3976, "step": 4219 }, { "epoch": 2.2814921607496847, "grad_norm": 0.32822996377944946, "learning_rate": 6.604217372290693e-06, "loss": 0.4398, "step": 4220 }, { "epoch": 2.2820327987024687, "grad_norm": 0.30190369486808777, "learning_rate": 6.602429094711549e-06, "loss": 0.4031, "step": 4221 }, { "epoch": 2.282573436655253, "grad_norm": 0.3189171552658081, "learning_rate": 6.600640588660968e-06, "loss": 0.4208, "step": 4222 }, { "epoch": 2.2831140746080374, "grad_norm": 0.315018892288208, "learning_rate": 6.598851854393956e-06, "loss": 0.4253, "step": 4223 }, { "epoch": 2.2836547125608218, "grad_norm": 0.3788672089576721, "learning_rate": 6.5970628921655445e-06, "loss": 0.4235, "step": 4224 }, { "epoch": 2.284195350513606, "grad_norm": 0.3006134033203125, "learning_rate": 6.5952737022308e-06, "loss": 0.3976, "step": 4225 }, { "epoch": 2.2847359884663905, "grad_norm": 0.3113172948360443, "learning_rate": 6.5934842848448245e-06, "loss": 0.4296, "step": 4226 }, { "epoch": 2.285276626419175, "grad_norm": 0.2868853509426117, "learning_rate": 6.591694640262749e-06, "loss": 0.3771, "step": 4227 }, { "epoch": 2.2858172643719588, "grad_norm": 0.3638264536857605, "learning_rate": 6.589904768739737e-06, "loss": 0.4157, "step": 4228 }, { "epoch": 2.286357902324743, "grad_norm": 0.33750247955322266, "learning_rate": 6.588114670530989e-06, "loss": 0.3962, "step": 4229 }, { "epoch": 2.2868985402775275, "grad_norm": 0.29702359437942505, "learning_rate": 6.586324345891727e-06, "loss": 0.3742, "step": 4230 }, { "epoch": 2.287439178230312, "grad_norm": 0.31465309858322144, "learning_rate": 6.584533795077217e-06, "loss": 0.3886, "step": 4231 }, { "epoch": 2.287979816183096, "grad_norm": 0.3077332079410553, "learning_rate": 6.582743018342751e-06, "loss": 0.398, "step": 4232 }, { "epoch": 2.28852045413588, "grad_norm": 0.3539144992828369, "learning_rate": 6.580952015943656e-06, "loss": 0.4202, "step": 4233 }, { "epoch": 2.2890610920886645, "grad_norm": 0.2928106486797333, "learning_rate": 6.579160788135288e-06, "loss": 0.3878, "step": 4234 }, { "epoch": 2.289601730041449, "grad_norm": 0.318897545337677, "learning_rate": 6.57736933517304e-06, "loss": 0.4048, "step": 4235 }, { "epoch": 2.290142367994233, "grad_norm": 0.3368491530418396, "learning_rate": 6.57557765731233e-06, "loss": 0.3887, "step": 4236 }, { "epoch": 2.2906830059470176, "grad_norm": 0.3481080234050751, "learning_rate": 6.573785754808615e-06, "loss": 0.3766, "step": 4237 }, { "epoch": 2.291223643899802, "grad_norm": 0.31124183535575867, "learning_rate": 6.5719936279173805e-06, "loss": 0.4233, "step": 4238 }, { "epoch": 2.2917642818525863, "grad_norm": 0.3217078447341919, "learning_rate": 6.570201276894146e-06, "loss": 0.3749, "step": 4239 }, { "epoch": 2.29230491980537, "grad_norm": 0.36222043633461, "learning_rate": 6.568408701994459e-06, "loss": 0.3869, "step": 4240 }, { "epoch": 2.2928455577581546, "grad_norm": 0.32569992542266846, "learning_rate": 6.566615903473902e-06, "loss": 0.4175, "step": 4241 }, { "epoch": 2.293386195710939, "grad_norm": 0.3401198983192444, "learning_rate": 6.564822881588092e-06, "loss": 0.3587, "step": 4242 }, { "epoch": 2.2939268336637233, "grad_norm": 0.3378213047981262, "learning_rate": 6.563029636592671e-06, "loss": 0.3994, "step": 4243 }, { "epoch": 2.2944674716165077, "grad_norm": 0.3366250693798065, "learning_rate": 6.56123616874332e-06, "loss": 0.3975, "step": 4244 }, { "epoch": 2.2950081095692916, "grad_norm": 0.31765657663345337, "learning_rate": 6.559442478295745e-06, "loss": 0.4338, "step": 4245 }, { "epoch": 2.295548747522076, "grad_norm": 0.31671106815338135, "learning_rate": 6.557648565505691e-06, "loss": 0.4179, "step": 4246 }, { "epoch": 2.2960893854748603, "grad_norm": 0.331931471824646, "learning_rate": 6.555854430628927e-06, "loss": 0.3769, "step": 4247 }, { "epoch": 2.2966300234276447, "grad_norm": 0.3320377767086029, "learning_rate": 6.55406007392126e-06, "loss": 0.4169, "step": 4248 }, { "epoch": 2.297170661380429, "grad_norm": 0.2971826493740082, "learning_rate": 6.5522654956385254e-06, "loss": 0.3569, "step": 4249 }, { "epoch": 2.297711299333213, "grad_norm": 0.3325771391391754, "learning_rate": 6.550470696036591e-06, "loss": 0.44, "step": 4250 }, { "epoch": 2.2982519372859973, "grad_norm": 0.3099461495876312, "learning_rate": 6.548675675371356e-06, "loss": 0.4025, "step": 4251 }, { "epoch": 2.2987925752387817, "grad_norm": 0.3246251344680786, "learning_rate": 6.5468804338987515e-06, "loss": 0.3984, "step": 4252 }, { "epoch": 2.299333213191566, "grad_norm": 0.33402061462402344, "learning_rate": 6.545084971874738e-06, "loss": 0.4016, "step": 4253 }, { "epoch": 2.2998738511443504, "grad_norm": 0.28140220046043396, "learning_rate": 6.5432892895553115e-06, "loss": 0.3769, "step": 4254 }, { "epoch": 2.3004144890971348, "grad_norm": 0.3311731517314911, "learning_rate": 6.541493387196496e-06, "loss": 0.426, "step": 4255 }, { "epoch": 2.300955127049919, "grad_norm": 0.3210577666759491, "learning_rate": 6.539697265054348e-06, "loss": 0.3881, "step": 4256 }, { "epoch": 2.301495765002703, "grad_norm": 0.28738880157470703, "learning_rate": 6.537900923384956e-06, "loss": 0.3882, "step": 4257 }, { "epoch": 2.3020364029554874, "grad_norm": 0.3025340735912323, "learning_rate": 6.536104362444439e-06, "loss": 0.3794, "step": 4258 }, { "epoch": 2.3025770409082718, "grad_norm": 0.32594531774520874, "learning_rate": 6.534307582488946e-06, "loss": 0.4204, "step": 4259 }, { "epoch": 2.303117678861056, "grad_norm": 0.31777167320251465, "learning_rate": 6.5325105837746604e-06, "loss": 0.4193, "step": 4260 }, { "epoch": 2.3036583168138405, "grad_norm": 0.3318474292755127, "learning_rate": 6.5307133665577945e-06, "loss": 0.4251, "step": 4261 }, { "epoch": 2.3041989547666244, "grad_norm": 0.29136520624160767, "learning_rate": 6.528915931094594e-06, "loss": 0.3787, "step": 4262 }, { "epoch": 2.3047395927194088, "grad_norm": 0.29807668924331665, "learning_rate": 6.527118277641329e-06, "loss": 0.4066, "step": 4263 }, { "epoch": 2.305280230672193, "grad_norm": 0.301704466342926, "learning_rate": 6.525320406454312e-06, "loss": 0.4013, "step": 4264 }, { "epoch": 2.3058208686249775, "grad_norm": 0.31239601969718933, "learning_rate": 6.523522317789874e-06, "loss": 0.4284, "step": 4265 }, { "epoch": 2.306361506577762, "grad_norm": 0.2982839345932007, "learning_rate": 6.521724011904387e-06, "loss": 0.3791, "step": 4266 }, { "epoch": 2.306902144530546, "grad_norm": 0.47317105531692505, "learning_rate": 6.5199254890542496e-06, "loss": 0.4585, "step": 4267 }, { "epoch": 2.3074427824833306, "grad_norm": 0.3282627761363983, "learning_rate": 6.518126749495894e-06, "loss": 0.443, "step": 4268 }, { "epoch": 2.3079834204361145, "grad_norm": 0.32441818714141846, "learning_rate": 6.516327793485776e-06, "loss": 0.4201, "step": 4269 }, { "epoch": 2.308524058388899, "grad_norm": 0.2971220910549164, "learning_rate": 6.514528621280391e-06, "loss": 0.3927, "step": 4270 }, { "epoch": 2.309064696341683, "grad_norm": 0.361585795879364, "learning_rate": 6.512729233136262e-06, "loss": 0.393, "step": 4271 }, { "epoch": 2.3096053342944676, "grad_norm": 0.30100318789482117, "learning_rate": 6.510929629309941e-06, "loss": 0.4155, "step": 4272 }, { "epoch": 2.310145972247252, "grad_norm": 0.29078614711761475, "learning_rate": 6.509129810058014e-06, "loss": 0.3956, "step": 4273 }, { "epoch": 2.310686610200036, "grad_norm": 0.27215951681137085, "learning_rate": 6.507329775637095e-06, "loss": 0.343, "step": 4274 }, { "epoch": 2.31122724815282, "grad_norm": 0.3316851258277893, "learning_rate": 6.5055295263038286e-06, "loss": 0.4285, "step": 4275 }, { "epoch": 2.3117678861056046, "grad_norm": 0.3086435794830322, "learning_rate": 6.503729062314893e-06, "loss": 0.4265, "step": 4276 }, { "epoch": 2.312308524058389, "grad_norm": 0.2726685106754303, "learning_rate": 6.501928383926992e-06, "loss": 0.3544, "step": 4277 }, { "epoch": 2.3128491620111733, "grad_norm": 0.3564140498638153, "learning_rate": 6.500127491396867e-06, "loss": 0.448, "step": 4278 }, { "epoch": 2.3133897999639577, "grad_norm": 0.3078831434249878, "learning_rate": 6.4983263849812835e-06, "loss": 0.3846, "step": 4279 }, { "epoch": 2.3139304379167416, "grad_norm": 0.31032395362854004, "learning_rate": 6.496525064937042e-06, "loss": 0.4148, "step": 4280 }, { "epoch": 2.314471075869526, "grad_norm": 0.318835586309433, "learning_rate": 6.494723531520968e-06, "loss": 0.4032, "step": 4281 }, { "epoch": 2.3150117138223103, "grad_norm": 0.31365159153938293, "learning_rate": 6.492921784989924e-06, "loss": 0.3961, "step": 4282 }, { "epoch": 2.3155523517750947, "grad_norm": 0.3446863889694214, "learning_rate": 6.4911198256007994e-06, "loss": 0.4357, "step": 4283 }, { "epoch": 2.316092989727879, "grad_norm": 0.3149493336677551, "learning_rate": 6.489317653610513e-06, "loss": 0.4261, "step": 4284 }, { "epoch": 2.3166336276806634, "grad_norm": 0.29682353138923645, "learning_rate": 6.487515269276015e-06, "loss": 0.3862, "step": 4285 }, { "epoch": 2.3171742656334473, "grad_norm": 0.34198012948036194, "learning_rate": 6.485712672854289e-06, "loss": 0.4254, "step": 4286 }, { "epoch": 2.3177149035862317, "grad_norm": 0.30188503861427307, "learning_rate": 6.483909864602342e-06, "loss": 0.4027, "step": 4287 }, { "epoch": 2.318255541539016, "grad_norm": 0.29825958609580994, "learning_rate": 6.482106844777219e-06, "loss": 0.3976, "step": 4288 }, { "epoch": 2.3187961794918004, "grad_norm": 0.2877810299396515, "learning_rate": 6.480303613635986e-06, "loss": 0.3549, "step": 4289 }, { "epoch": 2.3193368174445848, "grad_norm": 0.3249087631702423, "learning_rate": 6.478500171435751e-06, "loss": 0.4514, "step": 4290 }, { "epoch": 2.3198774553973687, "grad_norm": 0.31923142075538635, "learning_rate": 6.476696518433641e-06, "loss": 0.404, "step": 4291 }, { "epoch": 2.320418093350153, "grad_norm": 0.32674330472946167, "learning_rate": 6.474892654886819e-06, "loss": 0.4099, "step": 4292 }, { "epoch": 2.3209587313029374, "grad_norm": 0.3069779872894287, "learning_rate": 6.473088581052476e-06, "loss": 0.3957, "step": 4293 }, { "epoch": 2.3214993692557218, "grad_norm": 0.30247175693511963, "learning_rate": 6.471284297187834e-06, "loss": 0.3873, "step": 4294 }, { "epoch": 2.322040007208506, "grad_norm": 0.3422676920890808, "learning_rate": 6.469479803550144e-06, "loss": 0.3926, "step": 4295 }, { "epoch": 2.3225806451612905, "grad_norm": 0.31129398941993713, "learning_rate": 6.46767510039669e-06, "loss": 0.4031, "step": 4296 }, { "epoch": 2.323121283114075, "grad_norm": 0.3049183189868927, "learning_rate": 6.46587018798478e-06, "loss": 0.3974, "step": 4297 }, { "epoch": 2.3236619210668588, "grad_norm": 0.31287881731987, "learning_rate": 6.464065066571756e-06, "loss": 0.3665, "step": 4298 }, { "epoch": 2.324202559019643, "grad_norm": 0.3058696985244751, "learning_rate": 6.46225973641499e-06, "loss": 0.4005, "step": 4299 }, { "epoch": 2.3247431969724275, "grad_norm": 0.2986745238304138, "learning_rate": 6.460454197771881e-06, "loss": 0.4264, "step": 4300 }, { "epoch": 2.325283834925212, "grad_norm": 0.3037396967411041, "learning_rate": 6.45864845089986e-06, "loss": 0.3759, "step": 4301 }, { "epoch": 2.325824472877996, "grad_norm": 0.322265088558197, "learning_rate": 6.45684249605639e-06, "loss": 0.3629, "step": 4302 }, { "epoch": 2.32636511083078, "grad_norm": 0.3143858015537262, "learning_rate": 6.455036333498956e-06, "loss": 0.3923, "step": 4303 }, { "epoch": 2.3269057487835645, "grad_norm": 0.34491589665412903, "learning_rate": 6.453229963485081e-06, "loss": 0.4167, "step": 4304 }, { "epoch": 2.327446386736349, "grad_norm": 0.32867443561553955, "learning_rate": 6.451423386272312e-06, "loss": 0.4052, "step": 4305 }, { "epoch": 2.327987024689133, "grad_norm": 0.306112676858902, "learning_rate": 6.449616602118228e-06, "loss": 0.3867, "step": 4306 }, { "epoch": 2.3285276626419176, "grad_norm": 0.3475956618785858, "learning_rate": 6.447809611280439e-06, "loss": 0.4371, "step": 4307 }, { "epoch": 2.329068300594702, "grad_norm": 0.3290245532989502, "learning_rate": 6.446002414016579e-06, "loss": 0.3924, "step": 4308 }, { "epoch": 2.329608938547486, "grad_norm": 0.34356066584587097, "learning_rate": 6.444195010584318e-06, "loss": 0.423, "step": 4309 }, { "epoch": 2.33014957650027, "grad_norm": 0.290546715259552, "learning_rate": 6.442387401241349e-06, "loss": 0.3812, "step": 4310 }, { "epoch": 2.3306902144530546, "grad_norm": 0.30830860137939453, "learning_rate": 6.4405795862454e-06, "loss": 0.4228, "step": 4311 }, { "epoch": 2.331230852405839, "grad_norm": 0.3429715037345886, "learning_rate": 6.438771565854226e-06, "loss": 0.4478, "step": 4312 }, { "epoch": 2.3317714903586233, "grad_norm": 0.2854730486869812, "learning_rate": 6.436963340325611e-06, "loss": 0.39, "step": 4313 }, { "epoch": 2.3323121283114077, "grad_norm": 0.3231770396232605, "learning_rate": 6.4351549099173685e-06, "loss": 0.4313, "step": 4314 }, { "epoch": 2.3328527662641916, "grad_norm": 0.31456172466278076, "learning_rate": 6.433346274887341e-06, "loss": 0.394, "step": 4315 }, { "epoch": 2.333393404216976, "grad_norm": 0.3068475127220154, "learning_rate": 6.4315374354934e-06, "loss": 0.3842, "step": 4316 }, { "epoch": 2.3339340421697603, "grad_norm": 0.3269546329975128, "learning_rate": 6.429728391993446e-06, "loss": 0.4229, "step": 4317 }, { "epoch": 2.3344746801225447, "grad_norm": 0.3204357326030731, "learning_rate": 6.427919144645411e-06, "loss": 0.3981, "step": 4318 }, { "epoch": 2.335015318075329, "grad_norm": 0.2805817127227783, "learning_rate": 6.426109693707254e-06, "loss": 0.3681, "step": 4319 }, { "epoch": 2.335555956028113, "grad_norm": 0.3602171540260315, "learning_rate": 6.4243000394369626e-06, "loss": 0.4511, "step": 4320 }, { "epoch": 2.3360965939808973, "grad_norm": 0.30108556151390076, "learning_rate": 6.4224901820925545e-06, "loss": 0.4065, "step": 4321 }, { "epoch": 2.3366372319336817, "grad_norm": 0.30365124344825745, "learning_rate": 6.420680121932074e-06, "loss": 0.3956, "step": 4322 }, { "epoch": 2.337177869886466, "grad_norm": 0.3073485195636749, "learning_rate": 6.418869859213598e-06, "loss": 0.4045, "step": 4323 }, { "epoch": 2.3377185078392504, "grad_norm": 0.31126657128334045, "learning_rate": 6.417059394195228e-06, "loss": 0.3786, "step": 4324 }, { "epoch": 2.3382591457920348, "grad_norm": 0.34042781591415405, "learning_rate": 6.415248727135103e-06, "loss": 0.4239, "step": 4325 }, { "epoch": 2.338799783744819, "grad_norm": 0.3334735631942749, "learning_rate": 6.413437858291378e-06, "loss": 0.404, "step": 4326 }, { "epoch": 2.339340421697603, "grad_norm": 0.3195256292819977, "learning_rate": 6.411626787922247e-06, "loss": 0.379, "step": 4327 }, { "epoch": 2.3398810596503874, "grad_norm": 0.3124770224094391, "learning_rate": 6.409815516285927e-06, "loss": 0.3759, "step": 4328 }, { "epoch": 2.3404216976031718, "grad_norm": 0.3149203062057495, "learning_rate": 6.408004043640667e-06, "loss": 0.4137, "step": 4329 }, { "epoch": 2.340962335555956, "grad_norm": 0.29894107580184937, "learning_rate": 6.406192370244742e-06, "loss": 0.4012, "step": 4330 }, { "epoch": 2.3415029735087405, "grad_norm": 0.3471241295337677, "learning_rate": 6.4043804963564616e-06, "loss": 0.418, "step": 4331 }, { "epoch": 2.3420436114615244, "grad_norm": 0.34101614356040955, "learning_rate": 6.402568422234154e-06, "loss": 0.4392, "step": 4332 }, { "epoch": 2.3425842494143088, "grad_norm": 0.29668599367141724, "learning_rate": 6.400756148136185e-06, "loss": 0.3709, "step": 4333 }, { "epoch": 2.343124887367093, "grad_norm": 0.28744280338287354, "learning_rate": 6.398943674320942e-06, "loss": 0.3881, "step": 4334 }, { "epoch": 2.3436655253198775, "grad_norm": 0.29911768436431885, "learning_rate": 6.397131001046849e-06, "loss": 0.4208, "step": 4335 }, { "epoch": 2.344206163272662, "grad_norm": 0.3078124225139618, "learning_rate": 6.39531812857235e-06, "loss": 0.3949, "step": 4336 }, { "epoch": 2.344746801225446, "grad_norm": 0.3062511682510376, "learning_rate": 6.393505057155922e-06, "loss": 0.4346, "step": 4337 }, { "epoch": 2.34528743917823, "grad_norm": 0.28572142124176025, "learning_rate": 6.3916917870560695e-06, "loss": 0.3935, "step": 4338 }, { "epoch": 2.3458280771310145, "grad_norm": 0.3043213188648224, "learning_rate": 6.389878318531325e-06, "loss": 0.3959, "step": 4339 }, { "epoch": 2.346368715083799, "grad_norm": 0.3265484571456909, "learning_rate": 6.38806465184025e-06, "loss": 0.4122, "step": 4340 }, { "epoch": 2.346909353036583, "grad_norm": 0.3467870354652405, "learning_rate": 6.3862507872414345e-06, "loss": 0.4254, "step": 4341 }, { "epoch": 2.3474499909893676, "grad_norm": 0.3298346996307373, "learning_rate": 6.384436724993494e-06, "loss": 0.4301, "step": 4342 }, { "epoch": 2.347990628942152, "grad_norm": 0.3124622106552124, "learning_rate": 6.382622465355077e-06, "loss": 0.3696, "step": 4343 }, { "epoch": 2.348531266894936, "grad_norm": 0.4205555021762848, "learning_rate": 6.3808080085848544e-06, "loss": 0.4354, "step": 4344 }, { "epoch": 2.34907190484772, "grad_norm": 0.3335787057876587, "learning_rate": 6.378993354941529e-06, "loss": 0.4019, "step": 4345 }, { "epoch": 2.3496125428005046, "grad_norm": 0.346219539642334, "learning_rate": 6.377178504683832e-06, "loss": 0.4124, "step": 4346 }, { "epoch": 2.350153180753289, "grad_norm": 0.34190288186073303, "learning_rate": 6.3753634580705225e-06, "loss": 0.4029, "step": 4347 }, { "epoch": 2.3506938187060733, "grad_norm": 0.34621718525886536, "learning_rate": 6.373548215360382e-06, "loss": 0.3996, "step": 4348 }, { "epoch": 2.351234456658857, "grad_norm": 0.3342035412788391, "learning_rate": 6.37173277681223e-06, "loss": 0.4408, "step": 4349 }, { "epoch": 2.3517750946116416, "grad_norm": 0.30481693148612976, "learning_rate": 6.3699171426849036e-06, "loss": 0.4001, "step": 4350 }, { "epoch": 2.352315732564426, "grad_norm": 0.3469946086406708, "learning_rate": 6.368101313237276e-06, "loss": 0.4217, "step": 4351 }, { "epoch": 2.3528563705172103, "grad_norm": 0.31573811173439026, "learning_rate": 6.366285288728242e-06, "loss": 0.3654, "step": 4352 }, { "epoch": 2.3533970084699947, "grad_norm": 0.32064908742904663, "learning_rate": 6.364469069416731e-06, "loss": 0.4058, "step": 4353 }, { "epoch": 2.353937646422779, "grad_norm": 0.310427188873291, "learning_rate": 6.362652655561693e-06, "loss": 0.3917, "step": 4354 }, { "epoch": 2.3544782843755634, "grad_norm": 0.3326384127140045, "learning_rate": 6.3608360474221106e-06, "loss": 0.462, "step": 4355 }, { "epoch": 2.3550189223283473, "grad_norm": 0.29486921429634094, "learning_rate": 6.359019245256992e-06, "loss": 0.3774, "step": 4356 }, { "epoch": 2.3555595602811317, "grad_norm": 0.36760416626930237, "learning_rate": 6.3572022493253715e-06, "loss": 0.4397, "step": 4357 }, { "epoch": 2.356100198233916, "grad_norm": 0.3134630024433136, "learning_rate": 6.355385059886316e-06, "loss": 0.4056, "step": 4358 }, { "epoch": 2.3566408361867004, "grad_norm": 0.31693926453590393, "learning_rate": 6.353567677198917e-06, "loss": 0.3867, "step": 4359 }, { "epoch": 2.3571814741394848, "grad_norm": 0.3252623975276947, "learning_rate": 6.3517501015222924e-06, "loss": 0.3738, "step": 4360 }, { "epoch": 2.3577221120922687, "grad_norm": 0.294716477394104, "learning_rate": 6.349932333115591e-06, "loss": 0.4094, "step": 4361 }, { "epoch": 2.358262750045053, "grad_norm": 0.28273141384124756, "learning_rate": 6.348114372237983e-06, "loss": 0.3618, "step": 4362 }, { "epoch": 2.3588033879978374, "grad_norm": 0.31758493185043335, "learning_rate": 6.346296219148671e-06, "loss": 0.407, "step": 4363 }, { "epoch": 2.3593440259506218, "grad_norm": 0.29638582468032837, "learning_rate": 6.344477874106887e-06, "loss": 0.3816, "step": 4364 }, { "epoch": 2.359884663903406, "grad_norm": 0.3291521370410919, "learning_rate": 6.342659337371884e-06, "loss": 0.4152, "step": 4365 }, { "epoch": 2.3604253018561905, "grad_norm": 0.3485001027584076, "learning_rate": 6.340840609202949e-06, "loss": 0.4198, "step": 4366 }, { "epoch": 2.3609659398089744, "grad_norm": 0.29301851987838745, "learning_rate": 6.33902168985939e-06, "loss": 0.4179, "step": 4367 }, { "epoch": 2.3615065777617588, "grad_norm": 0.3409198820590973, "learning_rate": 6.337202579600546e-06, "loss": 0.4069, "step": 4368 }, { "epoch": 2.362047215714543, "grad_norm": 0.28736093640327454, "learning_rate": 6.3353832786857825e-06, "loss": 0.3684, "step": 4369 }, { "epoch": 2.3625878536673275, "grad_norm": 0.3112078309059143, "learning_rate": 6.333563787374493e-06, "loss": 0.4161, "step": 4370 }, { "epoch": 2.363128491620112, "grad_norm": 0.3426651060581207, "learning_rate": 6.331744105926095e-06, "loss": 0.4033, "step": 4371 }, { "epoch": 2.363669129572896, "grad_norm": 0.31945520639419556, "learning_rate": 6.32992423460004e-06, "loss": 0.3956, "step": 4372 }, { "epoch": 2.36420976752568, "grad_norm": 0.29475951194763184, "learning_rate": 6.328104173655797e-06, "loss": 0.3612, "step": 4373 }, { "epoch": 2.3647504054784645, "grad_norm": 0.3905600309371948, "learning_rate": 6.326283923352868e-06, "loss": 0.433, "step": 4374 }, { "epoch": 2.365291043431249, "grad_norm": 0.29405343532562256, "learning_rate": 6.3244634839507834e-06, "loss": 0.3981, "step": 4375 }, { "epoch": 2.365831681384033, "grad_norm": 0.31816449761390686, "learning_rate": 6.3226428557090966e-06, "loss": 0.4358, "step": 4376 }, { "epoch": 2.3663723193368176, "grad_norm": 0.31002146005630493, "learning_rate": 6.320822038887388e-06, "loss": 0.3585, "step": 4377 }, { "epoch": 2.3669129572896015, "grad_norm": 0.31022676825523376, "learning_rate": 6.319001033745271e-06, "loss": 0.3939, "step": 4378 }, { "epoch": 2.367453595242386, "grad_norm": 0.33718812465667725, "learning_rate": 6.3171798405423755e-06, "loss": 0.4338, "step": 4379 }, { "epoch": 2.36799423319517, "grad_norm": 0.3083488345146179, "learning_rate": 6.315358459538367e-06, "loss": 0.3672, "step": 4380 }, { "epoch": 2.3685348711479546, "grad_norm": 0.2915855944156647, "learning_rate": 6.313536890992935e-06, "loss": 0.372, "step": 4381 }, { "epoch": 2.369075509100739, "grad_norm": 0.28754106163978577, "learning_rate": 6.3117151351657944e-06, "loss": 0.3773, "step": 4382 }, { "epoch": 2.3696161470535233, "grad_norm": 0.358699768781662, "learning_rate": 6.309893192316687e-06, "loss": 0.4501, "step": 4383 }, { "epoch": 2.3701567850063077, "grad_norm": 0.3254886865615845, "learning_rate": 6.308071062705385e-06, "loss": 0.417, "step": 4384 }, { "epoch": 2.3706974229590916, "grad_norm": 0.3160276412963867, "learning_rate": 6.3062487465916825e-06, "loss": 0.4035, "step": 4385 }, { "epoch": 2.371238060911876, "grad_norm": 0.2895281910896301, "learning_rate": 6.304426244235401e-06, "loss": 0.356, "step": 4386 }, { "epoch": 2.3717786988646603, "grad_norm": 0.32785364985466003, "learning_rate": 6.30260355589639e-06, "loss": 0.441, "step": 4387 }, { "epoch": 2.3723193368174447, "grad_norm": 0.31930238008499146, "learning_rate": 6.300780681834529e-06, "loss": 0.409, "step": 4388 }, { "epoch": 2.372859974770229, "grad_norm": 0.31189289689064026, "learning_rate": 6.298957622309713e-06, "loss": 0.3836, "step": 4389 }, { "epoch": 2.373400612723013, "grad_norm": 0.3160669505596161, "learning_rate": 6.297134377581877e-06, "loss": 0.3918, "step": 4390 }, { "epoch": 2.3739412506757973, "grad_norm": 0.27443766593933105, "learning_rate": 6.295310947910972e-06, "loss": 0.3779, "step": 4391 }, { "epoch": 2.3744818886285817, "grad_norm": 0.3024101257324219, "learning_rate": 6.2934873335569806e-06, "loss": 0.3866, "step": 4392 }, { "epoch": 2.375022526581366, "grad_norm": 0.3389159142971039, "learning_rate": 6.29166353477991e-06, "loss": 0.4335, "step": 4393 }, { "epoch": 2.3755631645341504, "grad_norm": 0.31065016984939575, "learning_rate": 6.289839551839796e-06, "loss": 0.3878, "step": 4394 }, { "epoch": 2.3761038024869348, "grad_norm": 0.3136639893054962, "learning_rate": 6.2880153849966966e-06, "loss": 0.427, "step": 4395 }, { "epoch": 2.3766444404397187, "grad_norm": 0.30515700578689575, "learning_rate": 6.2861910345107e-06, "loss": 0.4246, "step": 4396 }, { "epoch": 2.377185078392503, "grad_norm": 0.33984237909317017, "learning_rate": 6.284366500641914e-06, "loss": 0.4069, "step": 4397 }, { "epoch": 2.3777257163452874, "grad_norm": 0.3370453417301178, "learning_rate": 6.282541783650486e-06, "loss": 0.4111, "step": 4398 }, { "epoch": 2.3782663542980718, "grad_norm": 0.3021021783351898, "learning_rate": 6.280716883796573e-06, "loss": 0.351, "step": 4399 }, { "epoch": 2.378806992250856, "grad_norm": 0.34611067175865173, "learning_rate": 6.2788918013403695e-06, "loss": 0.4108, "step": 4400 }, { "epoch": 2.3793476302036405, "grad_norm": 0.2943975329399109, "learning_rate": 6.277066536542091e-06, "loss": 0.4119, "step": 4401 }, { "epoch": 2.3798882681564244, "grad_norm": 0.35113584995269775, "learning_rate": 6.275241089661982e-06, "loss": 0.4489, "step": 4402 }, { "epoch": 2.3804289061092088, "grad_norm": 0.3373362421989441, "learning_rate": 6.273415460960309e-06, "loss": 0.3663, "step": 4403 }, { "epoch": 2.380969544061993, "grad_norm": 0.32804322242736816, "learning_rate": 6.271589650697371e-06, "loss": 0.4132, "step": 4404 }, { "epoch": 2.3815101820147775, "grad_norm": 0.31782838702201843, "learning_rate": 6.269763659133486e-06, "loss": 0.4333, "step": 4405 }, { "epoch": 2.382050819967562, "grad_norm": 0.3200927674770355, "learning_rate": 6.267937486528999e-06, "loss": 0.3873, "step": 4406 }, { "epoch": 2.3825914579203458, "grad_norm": 0.3715694844722748, "learning_rate": 6.266111133144285e-06, "loss": 0.4296, "step": 4407 }, { "epoch": 2.38313209587313, "grad_norm": 0.283189058303833, "learning_rate": 6.264284599239741e-06, "loss": 0.3674, "step": 4408 }, { "epoch": 2.3836727338259145, "grad_norm": 0.34319746494293213, "learning_rate": 6.26245788507579e-06, "loss": 0.4131, "step": 4409 }, { "epoch": 2.384213371778699, "grad_norm": 0.32422542572021484, "learning_rate": 6.2606309909128845e-06, "loss": 0.3834, "step": 4410 }, { "epoch": 2.384754009731483, "grad_norm": 0.29863274097442627, "learning_rate": 6.258803917011497e-06, "loss": 0.3962, "step": 4411 }, { "epoch": 2.3852946476842676, "grad_norm": 0.3698817193508148, "learning_rate": 6.256976663632131e-06, "loss": 0.4527, "step": 4412 }, { "epoch": 2.385835285637052, "grad_norm": 0.278394877910614, "learning_rate": 6.2551492310353094e-06, "loss": 0.3525, "step": 4413 }, { "epoch": 2.386375923589836, "grad_norm": 0.2943514287471771, "learning_rate": 6.253321619481586e-06, "loss": 0.3689, "step": 4414 }, { "epoch": 2.38691656154262, "grad_norm": 0.3604004681110382, "learning_rate": 6.251493829231539e-06, "loss": 0.4397, "step": 4415 }, { "epoch": 2.3874571994954046, "grad_norm": 0.296670526266098, "learning_rate": 6.249665860545773e-06, "loss": 0.3612, "step": 4416 }, { "epoch": 2.387997837448189, "grad_norm": 0.3186405897140503, "learning_rate": 6.247837713684911e-06, "loss": 0.4183, "step": 4417 }, { "epoch": 2.3885384754009733, "grad_norm": 0.3029390275478363, "learning_rate": 6.246009388909613e-06, "loss": 0.3691, "step": 4418 }, { "epoch": 2.3890791133537572, "grad_norm": 0.33948981761932373, "learning_rate": 6.244180886480555e-06, "loss": 0.4404, "step": 4419 }, { "epoch": 2.3896197513065416, "grad_norm": 0.31117311120033264, "learning_rate": 6.24235220665844e-06, "loss": 0.3941, "step": 4420 }, { "epoch": 2.390160389259326, "grad_norm": 0.30091291666030884, "learning_rate": 6.240523349704002e-06, "loss": 0.3869, "step": 4421 }, { "epoch": 2.3907010272121103, "grad_norm": 0.3182219862937927, "learning_rate": 6.238694315877994e-06, "loss": 0.4322, "step": 4422 }, { "epoch": 2.3912416651648947, "grad_norm": 0.2877960801124573, "learning_rate": 6.236865105441194e-06, "loss": 0.3592, "step": 4423 }, { "epoch": 2.391782303117679, "grad_norm": 0.3085620105266571, "learning_rate": 6.235035718654413e-06, "loss": 0.4263, "step": 4424 }, { "epoch": 2.3923229410704634, "grad_norm": 0.3202997148036957, "learning_rate": 6.233206155778476e-06, "loss": 0.4291, "step": 4425 }, { "epoch": 2.3928635790232473, "grad_norm": 0.3051954209804535, "learning_rate": 6.231376417074243e-06, "loss": 0.387, "step": 4426 }, { "epoch": 2.3934042169760317, "grad_norm": 0.3193745017051697, "learning_rate": 6.229546502802591e-06, "loss": 0.4108, "step": 4427 }, { "epoch": 2.393944854928816, "grad_norm": 0.31878572702407837, "learning_rate": 6.2277164132244305e-06, "loss": 0.4014, "step": 4428 }, { "epoch": 2.3944854928816004, "grad_norm": 0.2973358631134033, "learning_rate": 6.225886148600688e-06, "loss": 0.389, "step": 4429 }, { "epoch": 2.3950261308343848, "grad_norm": 0.2974739670753479, "learning_rate": 6.224055709192323e-06, "loss": 0.454, "step": 4430 }, { "epoch": 2.3955667687871687, "grad_norm": 0.31804347038269043, "learning_rate": 6.222225095260311e-06, "loss": 0.4335, "step": 4431 }, { "epoch": 2.396107406739953, "grad_norm": 0.2907731533050537, "learning_rate": 6.220394307065665e-06, "loss": 0.3707, "step": 4432 }, { "epoch": 2.3966480446927374, "grad_norm": 0.2965898811817169, "learning_rate": 6.218563344869408e-06, "loss": 0.3832, "step": 4433 }, { "epoch": 2.3971886826455218, "grad_norm": 0.3112434148788452, "learning_rate": 6.216732208932601e-06, "loss": 0.457, "step": 4434 }, { "epoch": 2.397729320598306, "grad_norm": 0.28128254413604736, "learning_rate": 6.21490089951632e-06, "loss": 0.3674, "step": 4435 }, { "epoch": 2.39826995855109, "grad_norm": 0.33449041843414307, "learning_rate": 6.213069416881672e-06, "loss": 0.4326, "step": 4436 }, { "epoch": 2.3988105965038744, "grad_norm": 0.27020829916000366, "learning_rate": 6.211237761289787e-06, "loss": 0.3616, "step": 4437 }, { "epoch": 2.3993512344566588, "grad_norm": 0.3244174122810364, "learning_rate": 6.2094059330018165e-06, "loss": 0.384, "step": 4438 }, { "epoch": 2.399891872409443, "grad_norm": 0.3321855068206787, "learning_rate": 6.207573932278943e-06, "loss": 0.4299, "step": 4439 }, { "epoch": 2.4004325103622275, "grad_norm": 0.30754154920578003, "learning_rate": 6.205741759382365e-06, "loss": 0.388, "step": 4440 }, { "epoch": 2.400973148315012, "grad_norm": 0.321622759103775, "learning_rate": 6.203909414573316e-06, "loss": 0.3919, "step": 4441 }, { "epoch": 2.401513786267796, "grad_norm": 0.3470320701599121, "learning_rate": 6.202076898113043e-06, "loss": 0.4141, "step": 4442 }, { "epoch": 2.40205442422058, "grad_norm": 0.30662572383880615, "learning_rate": 6.200244210262827e-06, "loss": 0.3852, "step": 4443 }, { "epoch": 2.4025950621733645, "grad_norm": 0.34137198328971863, "learning_rate": 6.198411351283966e-06, "loss": 0.4033, "step": 4444 }, { "epoch": 2.403135700126149, "grad_norm": 0.34139108657836914, "learning_rate": 6.1965783214377895e-06, "loss": 0.4092, "step": 4445 }, { "epoch": 2.403676338078933, "grad_norm": 0.35481759905815125, "learning_rate": 6.194745120985644e-06, "loss": 0.4397, "step": 4446 }, { "epoch": 2.4042169760317176, "grad_norm": 0.3117373585700989, "learning_rate": 6.192911750188907e-06, "loss": 0.3776, "step": 4447 }, { "epoch": 2.4047576139845015, "grad_norm": 0.35322365164756775, "learning_rate": 6.191078209308974e-06, "loss": 0.4319, "step": 4448 }, { "epoch": 2.405298251937286, "grad_norm": 0.31928256154060364, "learning_rate": 6.1892444986072695e-06, "loss": 0.3733, "step": 4449 }, { "epoch": 2.40583888989007, "grad_norm": 0.3453699052333832, "learning_rate": 6.187410618345241e-06, "loss": 0.4663, "step": 4450 }, { "epoch": 2.4063795278428546, "grad_norm": 0.28178903460502625, "learning_rate": 6.1855765687843595e-06, "loss": 0.3854, "step": 4451 }, { "epoch": 2.406920165795639, "grad_norm": 0.37080490589141846, "learning_rate": 6.1837423501861205e-06, "loss": 0.4157, "step": 4452 }, { "epoch": 2.4074608037484233, "grad_norm": 0.3195529282093048, "learning_rate": 6.181907962812044e-06, "loss": 0.385, "step": 4453 }, { "epoch": 2.4080014417012077, "grad_norm": 0.3060106337070465, "learning_rate": 6.180073406923672e-06, "loss": 0.391, "step": 4454 }, { "epoch": 2.4085420796539916, "grad_norm": 0.37391820549964905, "learning_rate": 6.178238682782574e-06, "loss": 0.4299, "step": 4455 }, { "epoch": 2.409082717606776, "grad_norm": 0.32717540860176086, "learning_rate": 6.1764037906503395e-06, "loss": 0.4189, "step": 4456 }, { "epoch": 2.4096233555595603, "grad_norm": 0.28247183561325073, "learning_rate": 6.174568730788587e-06, "loss": 0.3802, "step": 4457 }, { "epoch": 2.4101639935123447, "grad_norm": 0.32408735156059265, "learning_rate": 6.172733503458954e-06, "loss": 0.4352, "step": 4458 }, { "epoch": 2.410704631465129, "grad_norm": 0.3047981858253479, "learning_rate": 6.170898108923105e-06, "loss": 0.3846, "step": 4459 }, { "epoch": 2.411245269417913, "grad_norm": 0.2936648726463318, "learning_rate": 6.169062547442724e-06, "loss": 0.4196, "step": 4460 }, { "epoch": 2.4117859073706973, "grad_norm": 0.3075091242790222, "learning_rate": 6.1672268192795285e-06, "loss": 0.4279, "step": 4461 }, { "epoch": 2.4123265453234817, "grad_norm": 0.3028499186038971, "learning_rate": 6.165390924695247e-06, "loss": 0.4138, "step": 4462 }, { "epoch": 2.412867183276266, "grad_norm": 0.30673113465309143, "learning_rate": 6.1635548639516415e-06, "loss": 0.396, "step": 4463 }, { "epoch": 2.4134078212290504, "grad_norm": 0.31158581376075745, "learning_rate": 6.161718637310492e-06, "loss": 0.4055, "step": 4464 }, { "epoch": 2.4139484591818343, "grad_norm": 0.3450443148612976, "learning_rate": 6.159882245033606e-06, "loss": 0.3807, "step": 4465 }, { "epoch": 2.4144890971346187, "grad_norm": 0.3178133964538574, "learning_rate": 6.158045687382812e-06, "loss": 0.4181, "step": 4466 }, { "epoch": 2.415029735087403, "grad_norm": 0.30661630630493164, "learning_rate": 6.156208964619965e-06, "loss": 0.3869, "step": 4467 }, { "epoch": 2.4155703730401874, "grad_norm": 0.31599074602127075, "learning_rate": 6.154372077006939e-06, "loss": 0.4319, "step": 4468 }, { "epoch": 2.4161110109929718, "grad_norm": 0.29334557056427, "learning_rate": 6.152535024805637e-06, "loss": 0.4171, "step": 4469 }, { "epoch": 2.416651648945756, "grad_norm": 0.2875959277153015, "learning_rate": 6.150697808277979e-06, "loss": 0.3593, "step": 4470 }, { "epoch": 2.4171922868985405, "grad_norm": 0.3488504886627197, "learning_rate": 6.148860427685914e-06, "loss": 0.4441, "step": 4471 }, { "epoch": 2.4177329248513244, "grad_norm": 0.30295583605766296, "learning_rate": 6.147022883291412e-06, "loss": 0.3676, "step": 4472 }, { "epoch": 2.4182735628041088, "grad_norm": 0.3115604817867279, "learning_rate": 6.145185175356468e-06, "loss": 0.3938, "step": 4473 }, { "epoch": 2.418814200756893, "grad_norm": 0.2997246980667114, "learning_rate": 6.143347304143098e-06, "loss": 0.3981, "step": 4474 }, { "epoch": 2.4193548387096775, "grad_norm": 0.33742642402648926, "learning_rate": 6.141509269913343e-06, "loss": 0.4301, "step": 4475 }, { "epoch": 2.419895476662462, "grad_norm": 0.31137093901634216, "learning_rate": 6.139671072929264e-06, "loss": 0.3888, "step": 4476 }, { "epoch": 2.4204361146152458, "grad_norm": 0.29388248920440674, "learning_rate": 6.13783271345295e-06, "loss": 0.3868, "step": 4477 }, { "epoch": 2.42097675256803, "grad_norm": 0.32453441619873047, "learning_rate": 6.135994191746511e-06, "loss": 0.4564, "step": 4478 }, { "epoch": 2.4215173905208145, "grad_norm": 0.3015061318874359, "learning_rate": 6.134155508072081e-06, "loss": 0.3957, "step": 4479 }, { "epoch": 2.422058028473599, "grad_norm": 0.2795189619064331, "learning_rate": 6.132316662691815e-06, "loss": 0.3913, "step": 4480 }, { "epoch": 2.422598666426383, "grad_norm": 0.31015628576278687, "learning_rate": 6.130477655867893e-06, "loss": 0.4248, "step": 4481 }, { "epoch": 2.4231393043791676, "grad_norm": 0.308458149433136, "learning_rate": 6.128638487862514e-06, "loss": 0.4203, "step": 4482 }, { "epoch": 2.423679942331952, "grad_norm": 0.2929111421108246, "learning_rate": 6.126799158937906e-06, "loss": 0.4078, "step": 4483 }, { "epoch": 2.424220580284736, "grad_norm": 0.3093794286251068, "learning_rate": 6.124959669356319e-06, "loss": 0.3899, "step": 4484 }, { "epoch": 2.42476121823752, "grad_norm": 0.28093525767326355, "learning_rate": 6.123120019380021e-06, "loss": 0.397, "step": 4485 }, { "epoch": 2.4253018561903046, "grad_norm": 0.2979103922843933, "learning_rate": 6.121280209271306e-06, "loss": 0.4463, "step": 4486 }, { "epoch": 2.425842494143089, "grad_norm": 0.2875688076019287, "learning_rate": 6.119440239292493e-06, "loss": 0.3858, "step": 4487 }, { "epoch": 2.4263831320958733, "grad_norm": 0.34129956364631653, "learning_rate": 6.117600109705919e-06, "loss": 0.4681, "step": 4488 }, { "epoch": 2.4269237700486572, "grad_norm": 0.28495529294013977, "learning_rate": 6.1157598207739496e-06, "loss": 0.3874, "step": 4489 }, { "epoch": 2.4274644080014416, "grad_norm": 0.3119986355304718, "learning_rate": 6.1139193727589665e-06, "loss": 0.386, "step": 4490 }, { "epoch": 2.428005045954226, "grad_norm": 0.3121979534626007, "learning_rate": 6.1120787659233805e-06, "loss": 0.4054, "step": 4491 }, { "epoch": 2.4285456839070103, "grad_norm": 0.29938867688179016, "learning_rate": 6.110238000529619e-06, "loss": 0.4111, "step": 4492 }, { "epoch": 2.4290863218597947, "grad_norm": 0.3388979136943817, "learning_rate": 6.108397076840137e-06, "loss": 0.3868, "step": 4493 }, { "epoch": 2.429626959812579, "grad_norm": 0.35237500071525574, "learning_rate": 6.106555995117408e-06, "loss": 0.4234, "step": 4494 }, { "epoch": 2.430167597765363, "grad_norm": 0.3059629499912262, "learning_rate": 6.1047147556239325e-06, "loss": 0.3874, "step": 4495 }, { "epoch": 2.4307082357181473, "grad_norm": 0.3108119070529938, "learning_rate": 6.10287335862223e-06, "loss": 0.4132, "step": 4496 }, { "epoch": 2.4312488736709317, "grad_norm": 0.31125408411026, "learning_rate": 6.101031804374845e-06, "loss": 0.3836, "step": 4497 }, { "epoch": 2.431789511623716, "grad_norm": 0.3597297668457031, "learning_rate": 6.099190093144341e-06, "loss": 0.4046, "step": 4498 }, { "epoch": 2.4323301495765004, "grad_norm": 0.3167576789855957, "learning_rate": 6.097348225193305e-06, "loss": 0.4121, "step": 4499 }, { "epoch": 2.4328707875292848, "grad_norm": 0.335818886756897, "learning_rate": 6.095506200784349e-06, "loss": 0.4143, "step": 4500 }, { "epoch": 2.4334114254820687, "grad_norm": 0.3068419098854065, "learning_rate": 6.093664020180106e-06, "loss": 0.3896, "step": 4501 }, { "epoch": 2.433952063434853, "grad_norm": 0.3358902037143707, "learning_rate": 6.091821683643231e-06, "loss": 0.423, "step": 4502 }, { "epoch": 2.4344927013876374, "grad_norm": 0.3129465579986572, "learning_rate": 6.089979191436398e-06, "loss": 0.3915, "step": 4503 }, { "epoch": 2.4350333393404218, "grad_norm": 0.3382420241832733, "learning_rate": 6.088136543822309e-06, "loss": 0.4193, "step": 4504 }, { "epoch": 2.435573977293206, "grad_norm": 0.33931517601013184, "learning_rate": 6.086293741063685e-06, "loss": 0.4202, "step": 4505 }, { "epoch": 2.43611461524599, "grad_norm": 0.31078824400901794, "learning_rate": 6.084450783423268e-06, "loss": 0.4075, "step": 4506 }, { "epoch": 2.4366552531987744, "grad_norm": 0.28337806463241577, "learning_rate": 6.082607671163823e-06, "loss": 0.3763, "step": 4507 }, { "epoch": 2.4371958911515588, "grad_norm": 0.3330620527267456, "learning_rate": 6.0807644045481425e-06, "loss": 0.3932, "step": 4508 }, { "epoch": 2.437736529104343, "grad_norm": 0.2874024212360382, "learning_rate": 6.078920983839032e-06, "loss": 0.3703, "step": 4509 }, { "epoch": 2.4382771670571275, "grad_norm": 0.3029569387435913, "learning_rate": 6.077077409299323e-06, "loss": 0.4456, "step": 4510 }, { "epoch": 2.438817805009912, "grad_norm": 0.31840410828590393, "learning_rate": 6.07523368119187e-06, "loss": 0.4065, "step": 4511 }, { "epoch": 2.439358442962696, "grad_norm": 0.29436418414115906, "learning_rate": 6.073389799779547e-06, "loss": 0.3617, "step": 4512 }, { "epoch": 2.43989908091548, "grad_norm": 0.32618609070777893, "learning_rate": 6.071545765325254e-06, "loss": 0.4419, "step": 4513 }, { "epoch": 2.4404397188682645, "grad_norm": 0.3147270977497101, "learning_rate": 6.069701578091909e-06, "loss": 0.4055, "step": 4514 }, { "epoch": 2.440980356821049, "grad_norm": 0.35851117968559265, "learning_rate": 6.067857238342451e-06, "loss": 0.4027, "step": 4515 }, { "epoch": 2.441520994773833, "grad_norm": 0.33268657326698303, "learning_rate": 6.066012746339847e-06, "loss": 0.3888, "step": 4516 }, { "epoch": 2.4420616327266176, "grad_norm": 0.3391202688217163, "learning_rate": 6.064168102347074e-06, "loss": 0.453, "step": 4517 }, { "epoch": 2.4426022706794015, "grad_norm": 0.3072931170463562, "learning_rate": 6.062323306627146e-06, "loss": 0.3971, "step": 4518 }, { "epoch": 2.443142908632186, "grad_norm": 0.3158687651157379, "learning_rate": 6.060478359443085e-06, "loss": 0.4018, "step": 4519 }, { "epoch": 2.4436835465849702, "grad_norm": 0.2785457968711853, "learning_rate": 6.058633261057945e-06, "loss": 0.4063, "step": 4520 }, { "epoch": 2.4442241845377546, "grad_norm": 0.2931409776210785, "learning_rate": 6.056788011734791e-06, "loss": 0.3744, "step": 4521 }, { "epoch": 2.444764822490539, "grad_norm": 0.33004578948020935, "learning_rate": 6.0549426117367195e-06, "loss": 0.4643, "step": 4522 }, { "epoch": 2.4453054604433233, "grad_norm": 0.2701852023601532, "learning_rate": 6.053097061326843e-06, "loss": 0.3744, "step": 4523 }, { "epoch": 2.4458460983961072, "grad_norm": 0.3357827961444855, "learning_rate": 6.0512513607682976e-06, "loss": 0.4313, "step": 4524 }, { "epoch": 2.4463867363488916, "grad_norm": 0.31330615282058716, "learning_rate": 6.049405510324237e-06, "loss": 0.4108, "step": 4525 }, { "epoch": 2.446927374301676, "grad_norm": 0.30435165762901306, "learning_rate": 6.0475595102578455e-06, "loss": 0.42, "step": 4526 }, { "epoch": 2.4474680122544603, "grad_norm": 0.3326108753681183, "learning_rate": 6.045713360832315e-06, "loss": 0.403, "step": 4527 }, { "epoch": 2.4480086502072447, "grad_norm": 0.30275386571884155, "learning_rate": 6.04386706231087e-06, "loss": 0.4118, "step": 4528 }, { "epoch": 2.448549288160029, "grad_norm": 0.2818882465362549, "learning_rate": 6.042020614956753e-06, "loss": 0.3621, "step": 4529 }, { "epoch": 2.449089926112813, "grad_norm": 0.3257218301296234, "learning_rate": 6.040174019033226e-06, "loss": 0.4251, "step": 4530 }, { "epoch": 2.4496305640655973, "grad_norm": 0.293617844581604, "learning_rate": 6.0383272748035724e-06, "loss": 0.389, "step": 4531 }, { "epoch": 2.4501712020183817, "grad_norm": 0.30719658732414246, "learning_rate": 6.036480382531099e-06, "loss": 0.4113, "step": 4532 }, { "epoch": 2.450711839971166, "grad_norm": 0.3174991309642792, "learning_rate": 6.0346333424791325e-06, "loss": 0.3894, "step": 4533 }, { "epoch": 2.4512524779239504, "grad_norm": 0.33805057406425476, "learning_rate": 6.032786154911019e-06, "loss": 0.392, "step": 4534 }, { "epoch": 2.4517931158767343, "grad_norm": 0.295246422290802, "learning_rate": 6.030938820090128e-06, "loss": 0.3868, "step": 4535 }, { "epoch": 2.4523337538295187, "grad_norm": 0.347849041223526, "learning_rate": 6.02909133827985e-06, "loss": 0.4459, "step": 4536 }, { "epoch": 2.452874391782303, "grad_norm": 0.35072389245033264, "learning_rate": 6.027243709743595e-06, "loss": 0.4103, "step": 4537 }, { "epoch": 2.4534150297350874, "grad_norm": 0.3043549954891205, "learning_rate": 6.025395934744793e-06, "loss": 0.4218, "step": 4538 }, { "epoch": 2.4539556676878718, "grad_norm": 0.32718175649642944, "learning_rate": 6.023548013546899e-06, "loss": 0.4026, "step": 4539 }, { "epoch": 2.454496305640656, "grad_norm": 0.3174917995929718, "learning_rate": 6.021699946413384e-06, "loss": 0.3996, "step": 4540 }, { "epoch": 2.4550369435934405, "grad_norm": 0.2845597565174103, "learning_rate": 6.019851733607744e-06, "loss": 0.4009, "step": 4541 }, { "epoch": 2.4555775815462244, "grad_norm": 0.3087173104286194, "learning_rate": 6.018003375393493e-06, "loss": 0.4119, "step": 4542 }, { "epoch": 2.4561182194990088, "grad_norm": 0.3086446523666382, "learning_rate": 6.016154872034167e-06, "loss": 0.4218, "step": 4543 }, { "epoch": 2.456658857451793, "grad_norm": 0.3066726326942444, "learning_rate": 6.014306223793321e-06, "loss": 0.3948, "step": 4544 }, { "epoch": 2.4571994954045775, "grad_norm": 0.31927090883255005, "learning_rate": 6.012457430934532e-06, "loss": 0.4437, "step": 4545 }, { "epoch": 2.457740133357362, "grad_norm": 0.27344468235969543, "learning_rate": 6.010608493721399e-06, "loss": 0.3903, "step": 4546 }, { "epoch": 2.4582807713101458, "grad_norm": 0.30820268392562866, "learning_rate": 6.008759412417539e-06, "loss": 0.3931, "step": 4547 }, { "epoch": 2.45882140926293, "grad_norm": 0.3438683748245239, "learning_rate": 6.006910187286592e-06, "loss": 0.4159, "step": 4548 }, { "epoch": 2.4593620472157145, "grad_norm": 0.2932104766368866, "learning_rate": 6.005060818592214e-06, "loss": 0.3802, "step": 4549 }, { "epoch": 2.459902685168499, "grad_norm": 0.3381466567516327, "learning_rate": 6.003211306598089e-06, "loss": 0.4422, "step": 4550 }, { "epoch": 2.460443323121283, "grad_norm": 0.33015963435173035, "learning_rate": 6.001361651567913e-06, "loss": 0.4047, "step": 4551 }, { "epoch": 2.4609839610740676, "grad_norm": 0.30845144391059875, "learning_rate": 5.99951185376541e-06, "loss": 0.4006, "step": 4552 }, { "epoch": 2.4615245990268515, "grad_norm": 0.3066163957118988, "learning_rate": 5.9976619134543175e-06, "loss": 0.393, "step": 4553 }, { "epoch": 2.462065236979636, "grad_norm": 0.2943369746208191, "learning_rate": 5.995811830898399e-06, "loss": 0.401, "step": 4554 }, { "epoch": 2.4626058749324202, "grad_norm": 0.35713574290275574, "learning_rate": 5.993961606361436e-06, "loss": 0.3973, "step": 4555 }, { "epoch": 2.4631465128852046, "grad_norm": 0.35209155082702637, "learning_rate": 5.9921112401072275e-06, "loss": 0.4029, "step": 4556 }, { "epoch": 2.463687150837989, "grad_norm": 0.31026819348335266, "learning_rate": 5.990260732399598e-06, "loss": 0.4012, "step": 4557 }, { "epoch": 2.4642277887907733, "grad_norm": 0.3545580804347992, "learning_rate": 5.988410083502389e-06, "loss": 0.3933, "step": 4558 }, { "epoch": 2.4647684267435572, "grad_norm": 0.33118224143981934, "learning_rate": 5.986559293679464e-06, "loss": 0.3995, "step": 4559 }, { "epoch": 2.4653090646963416, "grad_norm": 0.33403223752975464, "learning_rate": 5.984708363194702e-06, "loss": 0.4219, "step": 4560 }, { "epoch": 2.465849702649126, "grad_norm": 0.30222567915916443, "learning_rate": 5.982857292312007e-06, "loss": 0.392, "step": 4561 }, { "epoch": 2.4663903406019103, "grad_norm": 0.31281334161758423, "learning_rate": 5.981006081295301e-06, "loss": 0.3606, "step": 4562 }, { "epoch": 2.4669309785546947, "grad_norm": 0.32246240973472595, "learning_rate": 5.979154730408526e-06, "loss": 0.442, "step": 4563 }, { "epoch": 2.4674716165074786, "grad_norm": 0.3207295536994934, "learning_rate": 5.977303239915646e-06, "loss": 0.3983, "step": 4564 }, { "epoch": 2.468012254460263, "grad_norm": 0.3525555431842804, "learning_rate": 5.975451610080643e-06, "loss": 0.4066, "step": 4565 }, { "epoch": 2.4685528924130473, "grad_norm": 0.29853981733322144, "learning_rate": 5.973599841167516e-06, "loss": 0.411, "step": 4566 }, { "epoch": 2.4690935303658317, "grad_norm": 0.3197988271713257, "learning_rate": 5.97174793344029e-06, "loss": 0.4275, "step": 4567 }, { "epoch": 2.469634168318616, "grad_norm": 0.29205453395843506, "learning_rate": 5.969895887163005e-06, "loss": 0.3614, "step": 4568 }, { "epoch": 2.4701748062714004, "grad_norm": 0.28719770908355713, "learning_rate": 5.968043702599723e-06, "loss": 0.3925, "step": 4569 }, { "epoch": 2.4707154442241848, "grad_norm": 0.3598988652229309, "learning_rate": 5.966191380014524e-06, "loss": 0.4228, "step": 4570 }, { "epoch": 2.4712560821769687, "grad_norm": 0.28883805871009827, "learning_rate": 5.9643389196715125e-06, "loss": 0.3829, "step": 4571 }, { "epoch": 2.471796720129753, "grad_norm": 0.28611552715301514, "learning_rate": 5.962486321834805e-06, "loss": 0.3518, "step": 4572 }, { "epoch": 2.4723373580825374, "grad_norm": 0.3418418765068054, "learning_rate": 5.9606335867685424e-06, "loss": 0.4148, "step": 4573 }, { "epoch": 2.4728779960353218, "grad_norm": 0.31138092279434204, "learning_rate": 5.958780714736886e-06, "loss": 0.3593, "step": 4574 }, { "epoch": 2.473418633988106, "grad_norm": 0.3250178098678589, "learning_rate": 5.956927706004012e-06, "loss": 0.392, "step": 4575 }, { "epoch": 2.47395927194089, "grad_norm": 0.3134720027446747, "learning_rate": 5.955074560834121e-06, "loss": 0.4526, "step": 4576 }, { "epoch": 2.4744999098936744, "grad_norm": 0.28503695130348206, "learning_rate": 5.953221279491432e-06, "loss": 0.3592, "step": 4577 }, { "epoch": 2.4750405478464588, "grad_norm": 0.3523981273174286, "learning_rate": 5.95136786224018e-06, "loss": 0.4211, "step": 4578 }, { "epoch": 2.475581185799243, "grad_norm": 0.31300053000450134, "learning_rate": 5.949514309344624e-06, "loss": 0.3936, "step": 4579 }, { "epoch": 2.4761218237520275, "grad_norm": 0.30527982115745544, "learning_rate": 5.947660621069038e-06, "loss": 0.3958, "step": 4580 }, { "epoch": 2.476662461704812, "grad_norm": 0.32108503580093384, "learning_rate": 5.94580679767772e-06, "loss": 0.4074, "step": 4581 }, { "epoch": 2.4772030996575958, "grad_norm": 0.289093941450119, "learning_rate": 5.9439528394349835e-06, "loss": 0.3972, "step": 4582 }, { "epoch": 2.47774373761038, "grad_norm": 0.29553407430648804, "learning_rate": 5.942098746605164e-06, "loss": 0.3958, "step": 4583 }, { "epoch": 2.4782843755631645, "grad_norm": 0.32005220651626587, "learning_rate": 5.940244519452612e-06, "loss": 0.4394, "step": 4584 }, { "epoch": 2.478825013515949, "grad_norm": 0.27880796790122986, "learning_rate": 5.938390158241701e-06, "loss": 0.3645, "step": 4585 }, { "epoch": 2.4793656514687332, "grad_norm": 0.3102542459964752, "learning_rate": 5.936535663236822e-06, "loss": 0.3932, "step": 4586 }, { "epoch": 2.4799062894215176, "grad_norm": 0.31755635142326355, "learning_rate": 5.934681034702387e-06, "loss": 0.4033, "step": 4587 }, { "epoch": 2.4804469273743015, "grad_norm": 0.341374009847641, "learning_rate": 5.932826272902825e-06, "loss": 0.3751, "step": 4588 }, { "epoch": 2.480987565327086, "grad_norm": 0.3454676866531372, "learning_rate": 5.930971378102585e-06, "loss": 0.4437, "step": 4589 }, { "epoch": 2.4815282032798702, "grad_norm": 0.3300340175628662, "learning_rate": 5.929116350566132e-06, "loss": 0.4481, "step": 4590 }, { "epoch": 2.4820688412326546, "grad_norm": 0.3258576989173889, "learning_rate": 5.927261190557955e-06, "loss": 0.3914, "step": 4591 }, { "epoch": 2.482609479185439, "grad_norm": 0.35531044006347656, "learning_rate": 5.925405898342559e-06, "loss": 0.4038, "step": 4592 }, { "epoch": 2.483150117138223, "grad_norm": 0.3070998787879944, "learning_rate": 5.9235504741844686e-06, "loss": 0.4024, "step": 4593 }, { "epoch": 2.4836907550910072, "grad_norm": 0.3188576400279999, "learning_rate": 5.9216949183482245e-06, "loss": 0.3864, "step": 4594 }, { "epoch": 2.4842313930437916, "grad_norm": 0.32584765553474426, "learning_rate": 5.919839231098392e-06, "loss": 0.4016, "step": 4595 }, { "epoch": 2.484772030996576, "grad_norm": 0.3314954340457916, "learning_rate": 5.917983412699549e-06, "loss": 0.4243, "step": 4596 }, { "epoch": 2.4853126689493603, "grad_norm": 0.2937794327735901, "learning_rate": 5.916127463416293e-06, "loss": 0.3844, "step": 4597 }, { "epoch": 2.4858533069021447, "grad_norm": 0.29798558354377747, "learning_rate": 5.914271383513247e-06, "loss": 0.4113, "step": 4598 }, { "epoch": 2.486393944854929, "grad_norm": 0.3137997090816498, "learning_rate": 5.912415173255045e-06, "loss": 0.4324, "step": 4599 }, { "epoch": 2.486934582807713, "grad_norm": 0.3202819526195526, "learning_rate": 5.910558832906341e-06, "loss": 0.4093, "step": 4600 }, { "epoch": 2.4874752207604973, "grad_norm": 0.29597213864326477, "learning_rate": 5.90870236273181e-06, "loss": 0.3855, "step": 4601 }, { "epoch": 2.4880158587132817, "grad_norm": 0.3094213604927063, "learning_rate": 5.906845762996143e-06, "loss": 0.384, "step": 4602 }, { "epoch": 2.488556496666066, "grad_norm": 0.3382684290409088, "learning_rate": 5.904989033964051e-06, "loss": 0.4134, "step": 4603 }, { "epoch": 2.4890971346188504, "grad_norm": 0.31785866618156433, "learning_rate": 5.903132175900264e-06, "loss": 0.4363, "step": 4604 }, { "epoch": 2.4896377725716343, "grad_norm": 0.30754876136779785, "learning_rate": 5.90127518906953e-06, "loss": 0.4128, "step": 4605 }, { "epoch": 2.4901784105244187, "grad_norm": 0.28241950273513794, "learning_rate": 5.8994180737366125e-06, "loss": 0.3662, "step": 4606 }, { "epoch": 2.490719048477203, "grad_norm": 0.27633342146873474, "learning_rate": 5.897560830166297e-06, "loss": 0.3934, "step": 4607 }, { "epoch": 2.4912596864299874, "grad_norm": 0.2971936762332916, "learning_rate": 5.8957034586233855e-06, "loss": 0.3798, "step": 4608 }, { "epoch": 2.4918003243827718, "grad_norm": 0.3198434114456177, "learning_rate": 5.8938459593726985e-06, "loss": 0.3967, "step": 4609 }, { "epoch": 2.492340962335556, "grad_norm": 0.2966821789741516, "learning_rate": 5.891988332679075e-06, "loss": 0.3967, "step": 4610 }, { "epoch": 2.49288160028834, "grad_norm": 0.3066607415676117, "learning_rate": 5.8901305788073735e-06, "loss": 0.3915, "step": 4611 }, { "epoch": 2.4934222382411244, "grad_norm": 0.36355921626091003, "learning_rate": 5.888272698022468e-06, "loss": 0.4414, "step": 4612 }, { "epoch": 2.4939628761939088, "grad_norm": 0.3148326873779297, "learning_rate": 5.886414690589252e-06, "loss": 0.3541, "step": 4613 }, { "epoch": 2.494503514146693, "grad_norm": 0.33225294947624207, "learning_rate": 5.884556556772634e-06, "loss": 0.405, "step": 4614 }, { "epoch": 2.4950441520994775, "grad_norm": 0.3356441855430603, "learning_rate": 5.882698296837549e-06, "loss": 0.3887, "step": 4615 }, { "epoch": 2.495584790052262, "grad_norm": 0.3474816381931305, "learning_rate": 5.880839911048939e-06, "loss": 0.3972, "step": 4616 }, { "epoch": 2.4961254280050458, "grad_norm": 0.3433469235897064, "learning_rate": 5.878981399671774e-06, "loss": 0.3986, "step": 4617 }, { "epoch": 2.49666606595783, "grad_norm": 0.3132109045982361, "learning_rate": 5.877122762971033e-06, "loss": 0.4208, "step": 4618 }, { "epoch": 2.4972067039106145, "grad_norm": 0.3536732792854309, "learning_rate": 5.875264001211719e-06, "loss": 0.4096, "step": 4619 }, { "epoch": 2.497747341863399, "grad_norm": 0.35122716426849365, "learning_rate": 5.87340511465885e-06, "loss": 0.4056, "step": 4620 }, { "epoch": 2.4982879798161832, "grad_norm": 0.3390818238258362, "learning_rate": 5.871546103577464e-06, "loss": 0.3954, "step": 4621 }, { "epoch": 2.498828617768967, "grad_norm": 0.3005041778087616, "learning_rate": 5.869686968232615e-06, "loss": 0.3719, "step": 4622 }, { "epoch": 2.4993692557217515, "grad_norm": 0.33602702617645264, "learning_rate": 5.867827708889375e-06, "loss": 0.4081, "step": 4623 }, { "epoch": 2.499909893674536, "grad_norm": 0.31842952966690063, "learning_rate": 5.8659683258128344e-06, "loss": 0.4403, "step": 4624 }, { "epoch": 2.5004505316273202, "grad_norm": 0.2911980152130127, "learning_rate": 5.864108819268098e-06, "loss": 0.399, "step": 4625 }, { "epoch": 2.5009911695801046, "grad_norm": 0.2828127145767212, "learning_rate": 5.862249189520293e-06, "loss": 0.3872, "step": 4626 }, { "epoch": 2.501531807532889, "grad_norm": 0.2884853780269623, "learning_rate": 5.860389436834565e-06, "loss": 0.3947, "step": 4627 }, { "epoch": 2.5020724454856733, "grad_norm": 0.28914675116539, "learning_rate": 5.858529561476069e-06, "loss": 0.413, "step": 4628 }, { "epoch": 2.5026130834384572, "grad_norm": 0.3035869002342224, "learning_rate": 5.856669563709985e-06, "loss": 0.3678, "step": 4629 }, { "epoch": 2.5031537213912416, "grad_norm": 0.31376922130584717, "learning_rate": 5.8548094438015065e-06, "loss": 0.421, "step": 4630 }, { "epoch": 2.503694359344026, "grad_norm": 0.2984936833381653, "learning_rate": 5.852949202015849e-06, "loss": 0.4254, "step": 4631 }, { "epoch": 2.5042349972968103, "grad_norm": 0.2927659749984741, "learning_rate": 5.851088838618239e-06, "loss": 0.4027, "step": 4632 }, { "epoch": 2.5047756352495947, "grad_norm": 0.3192109763622284, "learning_rate": 5.849228353873927e-06, "loss": 0.405, "step": 4633 }, { "epoch": 2.5053162732023786, "grad_norm": 0.29724055528640747, "learning_rate": 5.847367748048177e-06, "loss": 0.3795, "step": 4634 }, { "epoch": 2.505856911155163, "grad_norm": 0.30973199009895325, "learning_rate": 5.8455070214062685e-06, "loss": 0.4421, "step": 4635 }, { "epoch": 2.5063975491079473, "grad_norm": 0.29204344749450684, "learning_rate": 5.843646174213502e-06, "loss": 0.3985, "step": 4636 }, { "epoch": 2.5069381870607317, "grad_norm": 0.29323068261146545, "learning_rate": 5.841785206735192e-06, "loss": 0.4091, "step": 4637 }, { "epoch": 2.507478825013516, "grad_norm": 0.32439741492271423, "learning_rate": 5.839924119236676e-06, "loss": 0.4158, "step": 4638 }, { "epoch": 2.5080194629663, "grad_norm": 0.3009544909000397, "learning_rate": 5.838062911983301e-06, "loss": 0.3853, "step": 4639 }, { "epoch": 2.5085601009190848, "grad_norm": 0.3224140405654907, "learning_rate": 5.8362015852404365e-06, "loss": 0.4289, "step": 4640 }, { "epoch": 2.5091007388718687, "grad_norm": 0.2925460636615753, "learning_rate": 5.834340139273465e-06, "loss": 0.3596, "step": 4641 }, { "epoch": 2.509641376824653, "grad_norm": 0.3109346926212311, "learning_rate": 5.832478574347789e-06, "loss": 0.3939, "step": 4642 }, { "epoch": 2.5101820147774374, "grad_norm": 0.34562817215919495, "learning_rate": 5.830616890728828e-06, "loss": 0.4246, "step": 4643 }, { "epoch": 2.5107226527302218, "grad_norm": 0.3096538186073303, "learning_rate": 5.828755088682016e-06, "loss": 0.4123, "step": 4644 }, { "epoch": 2.511263290683006, "grad_norm": 0.31189072132110596, "learning_rate": 5.826893168472807e-06, "loss": 0.3759, "step": 4645 }, { "epoch": 2.51180392863579, "grad_norm": 0.36504462361335754, "learning_rate": 5.82503113036667e-06, "loss": 0.3922, "step": 4646 }, { "epoch": 2.5123445665885744, "grad_norm": 0.3030308783054352, "learning_rate": 5.823168974629088e-06, "loss": 0.371, "step": 4647 }, { "epoch": 2.5128852045413588, "grad_norm": 0.3304159641265869, "learning_rate": 5.821306701525566e-06, "loss": 0.4055, "step": 4648 }, { "epoch": 2.513425842494143, "grad_norm": 0.31587693095207214, "learning_rate": 5.819444311321624e-06, "loss": 0.3859, "step": 4649 }, { "epoch": 2.5139664804469275, "grad_norm": 0.3149249851703644, "learning_rate": 5.8175818042828e-06, "loss": 0.4065, "step": 4650 }, { "epoch": 2.5145071183997114, "grad_norm": 0.2843349277973175, "learning_rate": 5.815719180674644e-06, "loss": 0.4158, "step": 4651 }, { "epoch": 2.5150477563524962, "grad_norm": 0.3453637659549713, "learning_rate": 5.813856440762726e-06, "loss": 0.4431, "step": 4652 }, { "epoch": 2.51558839430528, "grad_norm": 0.28882738947868347, "learning_rate": 5.811993584812631e-06, "loss": 0.3657, "step": 4653 }, { "epoch": 2.5161290322580645, "grad_norm": 0.3001982271671295, "learning_rate": 5.810130613089964e-06, "loss": 0.3916, "step": 4654 }, { "epoch": 2.516669670210849, "grad_norm": 0.3245398998260498, "learning_rate": 5.808267525860343e-06, "loss": 0.4098, "step": 4655 }, { "epoch": 2.5172103081636332, "grad_norm": 0.2959372401237488, "learning_rate": 5.806404323389403e-06, "loss": 0.3907, "step": 4656 }, { "epoch": 2.5177509461164176, "grad_norm": 0.33096209168434143, "learning_rate": 5.8045410059427964e-06, "loss": 0.4194, "step": 4657 }, { "epoch": 2.5182915840692015, "grad_norm": 0.33586692810058594, "learning_rate": 5.802677573786194e-06, "loss": 0.4213, "step": 4658 }, { "epoch": 2.518832222021986, "grad_norm": 0.30121517181396484, "learning_rate": 5.800814027185276e-06, "loss": 0.3844, "step": 4659 }, { "epoch": 2.5193728599747702, "grad_norm": 0.32516399025917053, "learning_rate": 5.798950366405748e-06, "loss": 0.3805, "step": 4660 }, { "epoch": 2.5199134979275546, "grad_norm": 0.3295672535896301, "learning_rate": 5.797086591713324e-06, "loss": 0.4169, "step": 4661 }, { "epoch": 2.520454135880339, "grad_norm": 0.2826218903064728, "learning_rate": 5.79522270337374e-06, "loss": 0.3613, "step": 4662 }, { "epoch": 2.520994773833123, "grad_norm": 0.34624770283699036, "learning_rate": 5.793358701652743e-06, "loss": 0.4241, "step": 4663 }, { "epoch": 2.5215354117859072, "grad_norm": 0.30984780192375183, "learning_rate": 5.7914945868161035e-06, "loss": 0.4, "step": 4664 }, { "epoch": 2.5220760497386916, "grad_norm": 0.3277011811733246, "learning_rate": 5.789630359129599e-06, "loss": 0.4549, "step": 4665 }, { "epoch": 2.522616687691476, "grad_norm": 0.2756793797016144, "learning_rate": 5.787766018859029e-06, "loss": 0.3835, "step": 4666 }, { "epoch": 2.5231573256442603, "grad_norm": 0.2876024842262268, "learning_rate": 5.785901566270209e-06, "loss": 0.4025, "step": 4667 }, { "epoch": 2.5236979635970442, "grad_norm": 0.2890456020832062, "learning_rate": 5.784037001628969e-06, "loss": 0.3946, "step": 4668 }, { "epoch": 2.524238601549829, "grad_norm": 0.2782418131828308, "learning_rate": 5.782172325201155e-06, "loss": 0.4002, "step": 4669 }, { "epoch": 2.524779239502613, "grad_norm": 0.3088947534561157, "learning_rate": 5.780307537252629e-06, "loss": 0.4331, "step": 4670 }, { "epoch": 2.5253198774553973, "grad_norm": 0.29838061332702637, "learning_rate": 5.778442638049269e-06, "loss": 0.3774, "step": 4671 }, { "epoch": 2.5258605154081817, "grad_norm": 0.28203192353248596, "learning_rate": 5.776577627856969e-06, "loss": 0.3978, "step": 4672 }, { "epoch": 2.526401153360966, "grad_norm": 0.29183122515678406, "learning_rate": 5.7747125069416374e-06, "loss": 0.4065, "step": 4673 }, { "epoch": 2.5269417913137504, "grad_norm": 0.3165287375450134, "learning_rate": 5.772847275569204e-06, "loss": 0.4, "step": 4674 }, { "epoch": 2.5274824292665343, "grad_norm": 0.30928778648376465, "learning_rate": 5.770981934005606e-06, "loss": 0.4046, "step": 4675 }, { "epoch": 2.5280230672193187, "grad_norm": 0.2827933132648468, "learning_rate": 5.769116482516801e-06, "loss": 0.3991, "step": 4676 }, { "epoch": 2.528563705172103, "grad_norm": 0.3062750995159149, "learning_rate": 5.767250921368763e-06, "loss": 0.437, "step": 4677 }, { "epoch": 2.5291043431248874, "grad_norm": 0.32716378569602966, "learning_rate": 5.7653852508274796e-06, "loss": 0.4036, "step": 4678 }, { "epoch": 2.5296449810776718, "grad_norm": 0.29937511682510376, "learning_rate": 5.763519471158956e-06, "loss": 0.3997, "step": 4679 }, { "epoch": 2.5301856190304557, "grad_norm": 0.2924257814884186, "learning_rate": 5.76165358262921e-06, "loss": 0.3738, "step": 4680 }, { "epoch": 2.5307262569832405, "grad_norm": 0.30196434259414673, "learning_rate": 5.7597875855042765e-06, "loss": 0.4284, "step": 4681 }, { "epoch": 2.5312668949360244, "grad_norm": 0.29077422618865967, "learning_rate": 5.757921480050206e-06, "loss": 0.3903, "step": 4682 }, { "epoch": 2.5318075328888088, "grad_norm": 0.3043310344219208, "learning_rate": 5.756055266533066e-06, "loss": 0.3825, "step": 4683 }, { "epoch": 2.532348170841593, "grad_norm": 0.3497193157672882, "learning_rate": 5.754188945218937e-06, "loss": 0.4463, "step": 4684 }, { "epoch": 2.5328888087943775, "grad_norm": 0.28372862935066223, "learning_rate": 5.752322516373916e-06, "loss": 0.3929, "step": 4685 }, { "epoch": 2.533429446747162, "grad_norm": 0.34064146876335144, "learning_rate": 5.7504559802641144e-06, "loss": 0.4059, "step": 4686 }, { "epoch": 2.533970084699946, "grad_norm": 0.30861422419548035, "learning_rate": 5.748589337155659e-06, "loss": 0.4047, "step": 4687 }, { "epoch": 2.53451072265273, "grad_norm": 0.29709404706954956, "learning_rate": 5.746722587314693e-06, "loss": 0.3686, "step": 4688 }, { "epoch": 2.5350513606055145, "grad_norm": 0.30919149518013, "learning_rate": 5.744855731007376e-06, "loss": 0.3693, "step": 4689 }, { "epoch": 2.535591998558299, "grad_norm": 0.29292532801628113, "learning_rate": 5.742988768499879e-06, "loss": 0.3915, "step": 4690 }, { "epoch": 2.5361326365110832, "grad_norm": 0.31115463376045227, "learning_rate": 5.74112170005839e-06, "loss": 0.4598, "step": 4691 }, { "epoch": 2.536673274463867, "grad_norm": 0.3015570342540741, "learning_rate": 5.739254525949113e-06, "loss": 0.4062, "step": 4692 }, { "epoch": 2.5372139124166515, "grad_norm": 0.3175840973854065, "learning_rate": 5.737387246438266e-06, "loss": 0.3894, "step": 4693 }, { "epoch": 2.537754550369436, "grad_norm": 0.3196827471256256, "learning_rate": 5.735519861792081e-06, "loss": 0.3792, "step": 4694 }, { "epoch": 2.5382951883222202, "grad_norm": 0.3393493890762329, "learning_rate": 5.733652372276809e-06, "loss": 0.4803, "step": 4695 }, { "epoch": 2.5388358262750046, "grad_norm": 0.2754509150981903, "learning_rate": 5.731784778158712e-06, "loss": 0.3826, "step": 4696 }, { "epoch": 2.539376464227789, "grad_norm": 0.31321465969085693, "learning_rate": 5.729917079704068e-06, "loss": 0.4181, "step": 4697 }, { "epoch": 2.5399171021805733, "grad_norm": 0.30417436361312866, "learning_rate": 5.72804927717917e-06, "loss": 0.3997, "step": 4698 }, { "epoch": 2.5404577401333572, "grad_norm": 0.3323020339012146, "learning_rate": 5.726181370850327e-06, "loss": 0.3878, "step": 4699 }, { "epoch": 2.5409983780861416, "grad_norm": 0.3090798854827881, "learning_rate": 5.724313360983859e-06, "loss": 0.4242, "step": 4700 }, { "epoch": 2.541539016038926, "grad_norm": 0.3007925748825073, "learning_rate": 5.722445247846107e-06, "loss": 0.395, "step": 4701 }, { "epoch": 2.5420796539917103, "grad_norm": 0.31759968400001526, "learning_rate": 5.72057703170342e-06, "loss": 0.4297, "step": 4702 }, { "epoch": 2.5426202919444947, "grad_norm": 0.3306910991668701, "learning_rate": 5.7187087128221685e-06, "loss": 0.4202, "step": 4703 }, { "epoch": 2.5431609298972786, "grad_norm": 0.30417653918266296, "learning_rate": 5.71684029146873e-06, "loss": 0.3866, "step": 4704 }, { "epoch": 2.543701567850063, "grad_norm": 0.29840660095214844, "learning_rate": 5.7149717679095026e-06, "loss": 0.3956, "step": 4705 }, { "epoch": 2.5442422058028473, "grad_norm": 0.3214104175567627, "learning_rate": 5.713103142410896e-06, "loss": 0.3872, "step": 4706 }, { "epoch": 2.5447828437556317, "grad_norm": 0.31197574734687805, "learning_rate": 5.71123441523934e-06, "loss": 0.3789, "step": 4707 }, { "epoch": 2.545323481708416, "grad_norm": 0.32361727952957153, "learning_rate": 5.709365586661266e-06, "loss": 0.4224, "step": 4708 }, { "epoch": 2.5458641196612, "grad_norm": 0.267334908246994, "learning_rate": 5.707496656943137e-06, "loss": 0.3408, "step": 4709 }, { "epoch": 2.5464047576139848, "grad_norm": 0.34889429807662964, "learning_rate": 5.705627626351415e-06, "loss": 0.4383, "step": 4710 }, { "epoch": 2.5469453955667687, "grad_norm": 0.28287893533706665, "learning_rate": 5.703758495152585e-06, "loss": 0.4015, "step": 4711 }, { "epoch": 2.547486033519553, "grad_norm": 0.29935476183891296, "learning_rate": 5.701889263613145e-06, "loss": 0.4103, "step": 4712 }, { "epoch": 2.5480266714723374, "grad_norm": 0.32376861572265625, "learning_rate": 5.700019931999607e-06, "loss": 0.3986, "step": 4713 }, { "epoch": 2.5485673094251218, "grad_norm": 0.2909325659275055, "learning_rate": 5.698150500578497e-06, "loss": 0.4137, "step": 4714 }, { "epoch": 2.549107947377906, "grad_norm": 0.28764599561691284, "learning_rate": 5.6962809696163536e-06, "loss": 0.4149, "step": 4715 }, { "epoch": 2.54964858533069, "grad_norm": 0.2751624286174774, "learning_rate": 5.6944113393797314e-06, "loss": 0.404, "step": 4716 }, { "epoch": 2.5501892232834744, "grad_norm": 0.28708869218826294, "learning_rate": 5.6925416101352e-06, "loss": 0.3879, "step": 4717 }, { "epoch": 2.5507298612362588, "grad_norm": 0.28639307618141174, "learning_rate": 5.690671782149342e-06, "loss": 0.377, "step": 4718 }, { "epoch": 2.551270499189043, "grad_norm": 0.3289453983306885, "learning_rate": 5.688801855688752e-06, "loss": 0.3934, "step": 4719 }, { "epoch": 2.5518111371418275, "grad_norm": 0.2868677079677582, "learning_rate": 5.686931831020044e-06, "loss": 0.4083, "step": 4720 }, { "epoch": 2.5523517750946114, "grad_norm": 0.3312520682811737, "learning_rate": 5.6850617084098416e-06, "loss": 0.4313, "step": 4721 }, { "epoch": 2.552892413047396, "grad_norm": 0.2790953516960144, "learning_rate": 5.683191488124782e-06, "loss": 0.3846, "step": 4722 }, { "epoch": 2.55343305100018, "grad_norm": 0.3344435393810272, "learning_rate": 5.681321170431517e-06, "loss": 0.4598, "step": 4723 }, { "epoch": 2.5539736889529645, "grad_norm": 0.3081229627132416, "learning_rate": 5.679450755596716e-06, "loss": 0.4078, "step": 4724 }, { "epoch": 2.554514326905749, "grad_norm": 0.31888335943222046, "learning_rate": 5.6775802438870596e-06, "loss": 0.412, "step": 4725 }, { "epoch": 2.5550549648585332, "grad_norm": 0.28536099195480347, "learning_rate": 5.67570963556924e-06, "loss": 0.3876, "step": 4726 }, { "epoch": 2.5555956028113176, "grad_norm": 0.3071342408657074, "learning_rate": 5.673838930909965e-06, "loss": 0.3933, "step": 4727 }, { "epoch": 2.5561362407641015, "grad_norm": 0.30499646067619324, "learning_rate": 5.671968130175957e-06, "loss": 0.4141, "step": 4728 }, { "epoch": 2.556676878716886, "grad_norm": 0.2743418216705322, "learning_rate": 5.670097233633951e-06, "loss": 0.4209, "step": 4729 }, { "epoch": 2.5572175166696702, "grad_norm": 0.29773473739624023, "learning_rate": 5.668226241550698e-06, "loss": 0.3932, "step": 4730 }, { "epoch": 2.5577581546224546, "grad_norm": 0.3202398121356964, "learning_rate": 5.666355154192958e-06, "loss": 0.3952, "step": 4731 }, { "epoch": 2.558298792575239, "grad_norm": 0.2851807475090027, "learning_rate": 5.664483971827508e-06, "loss": 0.3783, "step": 4732 }, { "epoch": 2.558839430528023, "grad_norm": 0.31066733598709106, "learning_rate": 5.662612694721139e-06, "loss": 0.4034, "step": 4733 }, { "epoch": 2.5593800684808072, "grad_norm": 0.29603439569473267, "learning_rate": 5.660741323140651e-06, "loss": 0.3991, "step": 4734 }, { "epoch": 2.5599207064335916, "grad_norm": 0.2804199159145355, "learning_rate": 5.658869857352866e-06, "loss": 0.3779, "step": 4735 }, { "epoch": 2.560461344386376, "grad_norm": 0.27277815341949463, "learning_rate": 5.65699829762461e-06, "loss": 0.3828, "step": 4736 }, { "epoch": 2.5610019823391603, "grad_norm": 0.28206098079681396, "learning_rate": 5.655126644222728e-06, "loss": 0.413, "step": 4737 }, { "epoch": 2.5615426202919442, "grad_norm": 0.28778842091560364, "learning_rate": 5.653254897414076e-06, "loss": 0.3704, "step": 4738 }, { "epoch": 2.562083258244729, "grad_norm": 0.29356199502944946, "learning_rate": 5.651383057465527e-06, "loss": 0.4005, "step": 4739 }, { "epoch": 2.562623896197513, "grad_norm": 0.32198256254196167, "learning_rate": 5.649511124643962e-06, "loss": 0.4573, "step": 4740 }, { "epoch": 2.5631645341502973, "grad_norm": 0.2882884442806244, "learning_rate": 5.647639099216278e-06, "loss": 0.407, "step": 4741 }, { "epoch": 2.5637051721030817, "grad_norm": 0.2837863862514496, "learning_rate": 5.6457669814493855e-06, "loss": 0.3928, "step": 4742 }, { "epoch": 2.564245810055866, "grad_norm": 0.29012438654899597, "learning_rate": 5.6438947716102085e-06, "loss": 0.4155, "step": 4743 }, { "epoch": 2.5647864480086504, "grad_norm": 0.2923259437084198, "learning_rate": 5.642022469965682e-06, "loss": 0.3967, "step": 4744 }, { "epoch": 2.5653270859614343, "grad_norm": 0.33380910754203796, "learning_rate": 5.640150076782755e-06, "loss": 0.4377, "step": 4745 }, { "epoch": 2.5658677239142187, "grad_norm": 0.2974168360233307, "learning_rate": 5.638277592328392e-06, "loss": 0.3964, "step": 4746 }, { "epoch": 2.566408361867003, "grad_norm": 0.34730857610702515, "learning_rate": 5.636405016869567e-06, "loss": 0.4124, "step": 4747 }, { "epoch": 2.5669489998197874, "grad_norm": 0.3285943865776062, "learning_rate": 5.634532350673267e-06, "loss": 0.3749, "step": 4748 }, { "epoch": 2.5674896377725718, "grad_norm": 0.2939063608646393, "learning_rate": 5.632659594006498e-06, "loss": 0.3693, "step": 4749 }, { "epoch": 2.5680302757253557, "grad_norm": 0.31589987874031067, "learning_rate": 5.630786747136269e-06, "loss": 0.4131, "step": 4750 }, { "epoch": 2.56857091367814, "grad_norm": 0.3158331513404846, "learning_rate": 5.628913810329608e-06, "loss": 0.3751, "step": 4751 }, { "epoch": 2.5691115516309244, "grad_norm": 0.30993759632110596, "learning_rate": 5.6270407838535575e-06, "loss": 0.4034, "step": 4752 }, { "epoch": 2.569652189583709, "grad_norm": 0.30420321226119995, "learning_rate": 5.625167667975171e-06, "loss": 0.4026, "step": 4753 }, { "epoch": 2.570192827536493, "grad_norm": 0.3265765309333801, "learning_rate": 5.623294462961509e-06, "loss": 0.4372, "step": 4754 }, { "epoch": 2.5707334654892775, "grad_norm": 0.3445880711078644, "learning_rate": 5.621421169079655e-06, "loss": 0.4083, "step": 4755 }, { "epoch": 2.571274103442062, "grad_norm": 0.3127903938293457, "learning_rate": 5.619547786596695e-06, "loss": 0.4445, "step": 4756 }, { "epoch": 2.571814741394846, "grad_norm": 0.2989613711833954, "learning_rate": 5.617674315779737e-06, "loss": 0.3752, "step": 4757 }, { "epoch": 2.57235537934763, "grad_norm": 0.3358243703842163, "learning_rate": 5.615800756895893e-06, "loss": 0.3885, "step": 4758 }, { "epoch": 2.5728960173004145, "grad_norm": 0.2883393168449402, "learning_rate": 5.613927110212295e-06, "loss": 0.4034, "step": 4759 }, { "epoch": 2.573436655253199, "grad_norm": 0.2877352237701416, "learning_rate": 5.612053375996082e-06, "loss": 0.4145, "step": 4760 }, { "epoch": 2.5739772932059832, "grad_norm": 0.30452650785446167, "learning_rate": 5.610179554514408e-06, "loss": 0.3687, "step": 4761 }, { "epoch": 2.574517931158767, "grad_norm": 0.3329680263996124, "learning_rate": 5.608305646034441e-06, "loss": 0.4156, "step": 4762 }, { "epoch": 2.5750585691115515, "grad_norm": 0.27887311577796936, "learning_rate": 5.6064316508233555e-06, "loss": 0.3646, "step": 4763 }, { "epoch": 2.575599207064336, "grad_norm": 0.291676789522171, "learning_rate": 5.604557569148347e-06, "loss": 0.4007, "step": 4764 }, { "epoch": 2.5761398450171202, "grad_norm": 0.3124271333217621, "learning_rate": 5.6026834012766155e-06, "loss": 0.409, "step": 4765 }, { "epoch": 2.5766804829699046, "grad_norm": 0.3063427805900574, "learning_rate": 5.600809147475378e-06, "loss": 0.3944, "step": 4766 }, { "epoch": 2.5772211209226885, "grad_norm": 0.290528267621994, "learning_rate": 5.598934808011861e-06, "loss": 0.4085, "step": 4767 }, { "epoch": 2.5777617588754733, "grad_norm": 0.2874440848827362, "learning_rate": 5.5970603831533055e-06, "loss": 0.401, "step": 4768 }, { "epoch": 2.5783023968282572, "grad_norm": 0.3331311345100403, "learning_rate": 5.595185873166961e-06, "loss": 0.3851, "step": 4769 }, { "epoch": 2.5788430347810416, "grad_norm": 0.31015604734420776, "learning_rate": 5.593311278320097e-06, "loss": 0.4194, "step": 4770 }, { "epoch": 2.579383672733826, "grad_norm": 0.306537002325058, "learning_rate": 5.5914365988799854e-06, "loss": 0.4077, "step": 4771 }, { "epoch": 2.5799243106866103, "grad_norm": 0.3082190752029419, "learning_rate": 5.589561835113917e-06, "loss": 0.3948, "step": 4772 }, { "epoch": 2.5804649486393947, "grad_norm": 0.2981390655040741, "learning_rate": 5.587686987289189e-06, "loss": 0.3974, "step": 4773 }, { "epoch": 2.5810055865921786, "grad_norm": 0.28734248876571655, "learning_rate": 5.585812055673117e-06, "loss": 0.3953, "step": 4774 }, { "epoch": 2.581546224544963, "grad_norm": 0.3071170747280121, "learning_rate": 5.583937040533023e-06, "loss": 0.4089, "step": 4775 }, { "epoch": 2.5820868624977473, "grad_norm": 0.31013354659080505, "learning_rate": 5.582061942136247e-06, "loss": 0.4042, "step": 4776 }, { "epoch": 2.5826275004505317, "grad_norm": 0.3202647566795349, "learning_rate": 5.580186760750132e-06, "loss": 0.3825, "step": 4777 }, { "epoch": 2.583168138403316, "grad_norm": 0.2962801158428192, "learning_rate": 5.578311496642042e-06, "loss": 0.3861, "step": 4778 }, { "epoch": 2.5837087763561, "grad_norm": 0.3064135015010834, "learning_rate": 5.576436150079347e-06, "loss": 0.4166, "step": 4779 }, { "epoch": 2.5842494143088843, "grad_norm": 0.3127712607383728, "learning_rate": 5.574560721329431e-06, "loss": 0.3752, "step": 4780 }, { "epoch": 2.5847900522616687, "grad_norm": 0.28814229369163513, "learning_rate": 5.572685210659688e-06, "loss": 0.389, "step": 4781 }, { "epoch": 2.585330690214453, "grad_norm": 0.28176191449165344, "learning_rate": 5.570809618337528e-06, "loss": 0.4222, "step": 4782 }, { "epoch": 2.5858713281672374, "grad_norm": 0.33255767822265625, "learning_rate": 5.568933944630367e-06, "loss": 0.4089, "step": 4783 }, { "epoch": 2.5864119661200218, "grad_norm": 0.29645800590515137, "learning_rate": 5.567058189805636e-06, "loss": 0.3528, "step": 4784 }, { "epoch": 2.586952604072806, "grad_norm": 0.3250539302825928, "learning_rate": 5.565182354130776e-06, "loss": 0.4067, "step": 4785 }, { "epoch": 2.58749324202559, "grad_norm": 0.31018659472465515, "learning_rate": 5.563306437873239e-06, "loss": 0.4024, "step": 4786 }, { "epoch": 2.5880338799783744, "grad_norm": 0.30082499980926514, "learning_rate": 5.561430441300493e-06, "loss": 0.3976, "step": 4787 }, { "epoch": 2.588574517931159, "grad_norm": 0.3291463255882263, "learning_rate": 5.5595543646800134e-06, "loss": 0.4123, "step": 4788 }, { "epoch": 2.589115155883943, "grad_norm": 0.2999454140663147, "learning_rate": 5.557678208279286e-06, "loss": 0.4015, "step": 4789 }, { "epoch": 2.5896557938367275, "grad_norm": 0.3320070803165436, "learning_rate": 5.555801972365812e-06, "loss": 0.3789, "step": 4790 }, { "epoch": 2.5901964317895114, "grad_norm": 0.3625023663043976, "learning_rate": 5.553925657207098e-06, "loss": 0.426, "step": 4791 }, { "epoch": 2.590737069742296, "grad_norm": 0.29068654775619507, "learning_rate": 5.5520492630706705e-06, "loss": 0.3966, "step": 4792 }, { "epoch": 2.59127770769508, "grad_norm": 0.3232583701610565, "learning_rate": 5.5501727902240584e-06, "loss": 0.4406, "step": 4793 }, { "epoch": 2.5918183456478645, "grad_norm": 0.3110728859901428, "learning_rate": 5.5482962389348084e-06, "loss": 0.3989, "step": 4794 }, { "epoch": 2.592358983600649, "grad_norm": 0.3199319839477539, "learning_rate": 5.5464196094704745e-06, "loss": 0.3721, "step": 4795 }, { "epoch": 2.592899621553433, "grad_norm": 0.29083722829818726, "learning_rate": 5.544542902098624e-06, "loss": 0.3863, "step": 4796 }, { "epoch": 2.5934402595062176, "grad_norm": 0.3540262281894684, "learning_rate": 5.542666117086832e-06, "loss": 0.4659, "step": 4797 }, { "epoch": 2.5939808974590015, "grad_norm": 0.2991962134838104, "learning_rate": 5.540789254702691e-06, "loss": 0.4013, "step": 4798 }, { "epoch": 2.594521535411786, "grad_norm": 0.28734034299850464, "learning_rate": 5.5389123152137965e-06, "loss": 0.4237, "step": 4799 }, { "epoch": 2.5950621733645702, "grad_norm": 0.28482699394226074, "learning_rate": 5.537035298887764e-06, "loss": 0.3689, "step": 4800 }, { "epoch": 2.5956028113173546, "grad_norm": 0.2997708320617676, "learning_rate": 5.53515820599221e-06, "loss": 0.4029, "step": 4801 }, { "epoch": 2.596143449270139, "grad_norm": 0.30606913566589355, "learning_rate": 5.53328103679477e-06, "loss": 0.4063, "step": 4802 }, { "epoch": 2.596684087222923, "grad_norm": 0.30242788791656494, "learning_rate": 5.5314037915630855e-06, "loss": 0.3998, "step": 4803 }, { "epoch": 2.5972247251757072, "grad_norm": 0.3239155411720276, "learning_rate": 5.529526470564814e-06, "loss": 0.3953, "step": 4804 }, { "epoch": 2.5977653631284916, "grad_norm": 0.2938888967037201, "learning_rate": 5.527649074067618e-06, "loss": 0.4338, "step": 4805 }, { "epoch": 2.598306001081276, "grad_norm": 0.2955934703350067, "learning_rate": 5.525771602339174e-06, "loss": 0.3807, "step": 4806 }, { "epoch": 2.5988466390340603, "grad_norm": 0.3063485026359558, "learning_rate": 5.523894055647167e-06, "loss": 0.397, "step": 4807 }, { "epoch": 2.5993872769868442, "grad_norm": 0.28256726264953613, "learning_rate": 5.522016434259295e-06, "loss": 0.3811, "step": 4808 }, { "epoch": 2.5999279149396286, "grad_norm": 0.3530399203300476, "learning_rate": 5.520138738443267e-06, "loss": 0.3994, "step": 4809 }, { "epoch": 2.600468552892413, "grad_norm": 0.31889668107032776, "learning_rate": 5.5182609684668024e-06, "loss": 0.4323, "step": 4810 }, { "epoch": 2.6010091908451973, "grad_norm": 0.3093354105949402, "learning_rate": 5.516383124597626e-06, "loss": 0.3996, "step": 4811 }, { "epoch": 2.6015498287979817, "grad_norm": 0.33319196105003357, "learning_rate": 5.514505207103482e-06, "loss": 0.3765, "step": 4812 }, { "epoch": 2.602090466750766, "grad_norm": 0.337973028421402, "learning_rate": 5.512627216252117e-06, "loss": 0.4089, "step": 4813 }, { "epoch": 2.6026311047035504, "grad_norm": 0.3009018898010254, "learning_rate": 5.510749152311293e-06, "loss": 0.4139, "step": 4814 }, { "epoch": 2.6031717426563343, "grad_norm": 0.3260372281074524, "learning_rate": 5.508871015548781e-06, "loss": 0.4199, "step": 4815 }, { "epoch": 2.6037123806091187, "grad_norm": 0.3295881450176239, "learning_rate": 5.506992806232363e-06, "loss": 0.379, "step": 4816 }, { "epoch": 2.604253018561903, "grad_norm": 0.3007758855819702, "learning_rate": 5.50511452462983e-06, "loss": 0.3894, "step": 4817 }, { "epoch": 2.6047936565146874, "grad_norm": 0.2632093131542206, "learning_rate": 5.503236171008983e-06, "loss": 0.3655, "step": 4818 }, { "epoch": 2.605334294467472, "grad_norm": 0.3454212248325348, "learning_rate": 5.501357745637635e-06, "loss": 0.4068, "step": 4819 }, { "epoch": 2.6058749324202557, "grad_norm": 0.3434267044067383, "learning_rate": 5.49947924878361e-06, "loss": 0.3986, "step": 4820 }, { "epoch": 2.60641557037304, "grad_norm": 0.27694422006607056, "learning_rate": 5.497600680714738e-06, "loss": 0.3915, "step": 4821 }, { "epoch": 2.6069562083258244, "grad_norm": 0.3544457256793976, "learning_rate": 5.495722041698864e-06, "loss": 0.436, "step": 4822 }, { "epoch": 2.607496846278609, "grad_norm": 0.30748435854911804, "learning_rate": 5.4938433320038395e-06, "loss": 0.3588, "step": 4823 }, { "epoch": 2.608037484231393, "grad_norm": 0.301878422498703, "learning_rate": 5.49196455189753e-06, "loss": 0.4161, "step": 4824 }, { "epoch": 2.608578122184177, "grad_norm": 0.297625333070755, "learning_rate": 5.490085701647805e-06, "loss": 0.4219, "step": 4825 }, { "epoch": 2.609118760136962, "grad_norm": 0.31237471103668213, "learning_rate": 5.488206781522547e-06, "loss": 0.4199, "step": 4826 }, { "epoch": 2.609659398089746, "grad_norm": 0.3142875134944916, "learning_rate": 5.486327791789654e-06, "loss": 0.4171, "step": 4827 }, { "epoch": 2.61020003604253, "grad_norm": 0.30160701274871826, "learning_rate": 5.484448732717026e-06, "loss": 0.3549, "step": 4828 }, { "epoch": 2.6107406739953145, "grad_norm": 0.30564194917678833, "learning_rate": 5.482569604572577e-06, "loss": 0.4039, "step": 4829 }, { "epoch": 2.611281311948099, "grad_norm": 0.357769638299942, "learning_rate": 5.480690407624227e-06, "loss": 0.4275, "step": 4830 }, { "epoch": 2.6118219499008832, "grad_norm": 0.31541019678115845, "learning_rate": 5.47881114213991e-06, "loss": 0.4173, "step": 4831 }, { "epoch": 2.612362587853667, "grad_norm": 0.28554439544677734, "learning_rate": 5.476931808387569e-06, "loss": 0.4039, "step": 4832 }, { "epoch": 2.6129032258064515, "grad_norm": 0.31113046407699585, "learning_rate": 5.475052406635158e-06, "loss": 0.4206, "step": 4833 }, { "epoch": 2.613443863759236, "grad_norm": 0.2959286868572235, "learning_rate": 5.473172937150633e-06, "loss": 0.3876, "step": 4834 }, { "epoch": 2.6139845017120202, "grad_norm": 0.31250184774398804, "learning_rate": 5.47129340020197e-06, "loss": 0.3951, "step": 4835 }, { "epoch": 2.6145251396648046, "grad_norm": 0.33424627780914307, "learning_rate": 5.469413796057147e-06, "loss": 0.4076, "step": 4836 }, { "epoch": 2.6150657776175885, "grad_norm": 0.29603421688079834, "learning_rate": 5.467534124984158e-06, "loss": 0.3878, "step": 4837 }, { "epoch": 2.615606415570373, "grad_norm": 0.3031003177165985, "learning_rate": 5.4656543872509994e-06, "loss": 0.4352, "step": 4838 }, { "epoch": 2.6161470535231572, "grad_norm": 0.2941995859146118, "learning_rate": 5.4637745831256835e-06, "loss": 0.3927, "step": 4839 }, { "epoch": 2.6166876914759416, "grad_norm": 0.2877275049686432, "learning_rate": 5.461894712876228e-06, "loss": 0.3931, "step": 4840 }, { "epoch": 2.617228329428726, "grad_norm": 0.2958478033542633, "learning_rate": 5.460014776770663e-06, "loss": 0.4354, "step": 4841 }, { "epoch": 2.6177689673815103, "grad_norm": 0.2891026735305786, "learning_rate": 5.458134775077024e-06, "loss": 0.3842, "step": 4842 }, { "epoch": 2.6183096053342947, "grad_norm": 0.28329116106033325, "learning_rate": 5.45625470806336e-06, "loss": 0.3872, "step": 4843 }, { "epoch": 2.6188502432870786, "grad_norm": 0.2999545931816101, "learning_rate": 5.4543745759977265e-06, "loss": 0.4215, "step": 4844 }, { "epoch": 2.619390881239863, "grad_norm": 0.30322304368019104, "learning_rate": 5.45249437914819e-06, "loss": 0.4065, "step": 4845 }, { "epoch": 2.6199315191926473, "grad_norm": 0.2818833887577057, "learning_rate": 5.4506141177828255e-06, "loss": 0.3785, "step": 4846 }, { "epoch": 2.6204721571454317, "grad_norm": 0.2854217290878296, "learning_rate": 5.448733792169717e-06, "loss": 0.4159, "step": 4847 }, { "epoch": 2.621012795098216, "grad_norm": 0.2914997339248657, "learning_rate": 5.446853402576958e-06, "loss": 0.4188, "step": 4848 }, { "epoch": 2.621553433051, "grad_norm": 0.2967655658721924, "learning_rate": 5.44497294927265e-06, "loss": 0.3725, "step": 4849 }, { "epoch": 2.6220940710037843, "grad_norm": 0.3074378967285156, "learning_rate": 5.443092432524906e-06, "loss": 0.4387, "step": 4850 }, { "epoch": 2.6226347089565687, "grad_norm": 0.3074101209640503, "learning_rate": 5.441211852601849e-06, "loss": 0.421, "step": 4851 }, { "epoch": 2.623175346909353, "grad_norm": 0.30987322330474854, "learning_rate": 5.439331209771604e-06, "loss": 0.3981, "step": 4852 }, { "epoch": 2.6237159848621374, "grad_norm": 0.29704779386520386, "learning_rate": 5.437450504302312e-06, "loss": 0.3864, "step": 4853 }, { "epoch": 2.6242566228149213, "grad_norm": 0.29757240414619446, "learning_rate": 5.435569736462119e-06, "loss": 0.4094, "step": 4854 }, { "epoch": 2.624797260767706, "grad_norm": 0.2916492819786072, "learning_rate": 5.433688906519183e-06, "loss": 0.3857, "step": 4855 }, { "epoch": 2.62533789872049, "grad_norm": 0.30935555696487427, "learning_rate": 5.4318080147416695e-06, "loss": 0.3868, "step": 4856 }, { "epoch": 2.6258785366732744, "grad_norm": 0.2876951992511749, "learning_rate": 5.429927061397754e-06, "loss": 0.3804, "step": 4857 }, { "epoch": 2.626419174626059, "grad_norm": 0.29807013273239136, "learning_rate": 5.428046046755615e-06, "loss": 0.3899, "step": 4858 }, { "epoch": 2.626959812578843, "grad_norm": 0.30588850378990173, "learning_rate": 5.426164971083447e-06, "loss": 0.4116, "step": 4859 }, { "epoch": 2.6275004505316275, "grad_norm": 0.2891402840614319, "learning_rate": 5.424283834649451e-06, "loss": 0.3746, "step": 4860 }, { "epoch": 2.6280410884844114, "grad_norm": 0.3078900873661041, "learning_rate": 5.4224026377218365e-06, "loss": 0.4146, "step": 4861 }, { "epoch": 2.628581726437196, "grad_norm": 0.2716504633426666, "learning_rate": 5.4205213805688174e-06, "loss": 0.3487, "step": 4862 }, { "epoch": 2.62912236438998, "grad_norm": 0.29706239700317383, "learning_rate": 5.4186400634586246e-06, "loss": 0.4373, "step": 4863 }, { "epoch": 2.6296630023427645, "grad_norm": 0.28729817271232605, "learning_rate": 5.416758686659488e-06, "loss": 0.3455, "step": 4864 }, { "epoch": 2.630203640295549, "grad_norm": 0.3301662802696228, "learning_rate": 5.414877250439654e-06, "loss": 0.4292, "step": 4865 }, { "epoch": 2.630744278248333, "grad_norm": 0.29704442620277405, "learning_rate": 5.412995755067375e-06, "loss": 0.402, "step": 4866 }, { "epoch": 2.631284916201117, "grad_norm": 0.30376604199409485, "learning_rate": 5.41111420081091e-06, "loss": 0.374, "step": 4867 }, { "epoch": 2.6318255541539015, "grad_norm": 0.2995263636112213, "learning_rate": 5.4092325879385264e-06, "loss": 0.3908, "step": 4868 }, { "epoch": 2.632366192106686, "grad_norm": 0.2971474528312683, "learning_rate": 5.4073509167185045e-06, "loss": 0.4179, "step": 4869 }, { "epoch": 2.6329068300594702, "grad_norm": 0.2971806228160858, "learning_rate": 5.405469187419126e-06, "loss": 0.3567, "step": 4870 }, { "epoch": 2.6334474680122546, "grad_norm": 0.2885863482952118, "learning_rate": 5.403587400308685e-06, "loss": 0.3911, "step": 4871 }, { "epoch": 2.633988105965039, "grad_norm": 0.30982890725135803, "learning_rate": 5.401705555655485e-06, "loss": 0.4027, "step": 4872 }, { "epoch": 2.634528743917823, "grad_norm": 0.29124149680137634, "learning_rate": 5.399823653727837e-06, "loss": 0.4034, "step": 4873 }, { "epoch": 2.6350693818706072, "grad_norm": 0.3088896572589874, "learning_rate": 5.3979416947940556e-06, "loss": 0.4279, "step": 4874 }, { "epoch": 2.6356100198233916, "grad_norm": 0.26539069414138794, "learning_rate": 5.39605967912247e-06, "loss": 0.3746, "step": 4875 }, { "epoch": 2.636150657776176, "grad_norm": 0.31384748220443726, "learning_rate": 5.3941776069814124e-06, "loss": 0.4475, "step": 4876 }, { "epoch": 2.6366912957289603, "grad_norm": 0.287557989358902, "learning_rate": 5.392295478639226e-06, "loss": 0.404, "step": 4877 }, { "epoch": 2.6372319336817442, "grad_norm": 0.2968330681324005, "learning_rate": 5.390413294364261e-06, "loss": 0.4049, "step": 4878 }, { "epoch": 2.6377725716345286, "grad_norm": 0.29474076628685, "learning_rate": 5.388531054424878e-06, "loss": 0.3785, "step": 4879 }, { "epoch": 2.638313209587313, "grad_norm": 0.3032850921154022, "learning_rate": 5.386648759089441e-06, "loss": 0.4237, "step": 4880 }, { "epoch": 2.6388538475400973, "grad_norm": 0.2936110496520996, "learning_rate": 5.3847664086263264e-06, "loss": 0.4111, "step": 4881 }, { "epoch": 2.6393944854928817, "grad_norm": 0.3138837516307831, "learning_rate": 5.382884003303913e-06, "loss": 0.4136, "step": 4882 }, { "epoch": 2.6399351234456656, "grad_norm": 0.28655844926834106, "learning_rate": 5.381001543390592e-06, "loss": 0.4019, "step": 4883 }, { "epoch": 2.6404757613984504, "grad_norm": 0.3090074360370636, "learning_rate": 5.379119029154763e-06, "loss": 0.3926, "step": 4884 }, { "epoch": 2.6410163993512343, "grad_norm": 0.3107156455516815, "learning_rate": 5.3772364608648304e-06, "loss": 0.4038, "step": 4885 }, { "epoch": 2.6415570373040187, "grad_norm": 0.28442808985710144, "learning_rate": 5.375353838789207e-06, "loss": 0.3933, "step": 4886 }, { "epoch": 2.642097675256803, "grad_norm": 0.2985711991786957, "learning_rate": 5.373471163196314e-06, "loss": 0.3881, "step": 4887 }, { "epoch": 2.6426383132095874, "grad_norm": 0.29716822504997253, "learning_rate": 5.371588434354579e-06, "loss": 0.3739, "step": 4888 }, { "epoch": 2.643178951162372, "grad_norm": 0.2983829975128174, "learning_rate": 5.36970565253244e-06, "loss": 0.4014, "step": 4889 }, { "epoch": 2.6437195891151557, "grad_norm": 0.28274622559547424, "learning_rate": 5.367822817998338e-06, "loss": 0.3979, "step": 4890 }, { "epoch": 2.64426022706794, "grad_norm": 0.2913801372051239, "learning_rate": 5.365939931020725e-06, "loss": 0.4069, "step": 4891 }, { "epoch": 2.6448008650207244, "grad_norm": 0.37011346220970154, "learning_rate": 5.364056991868063e-06, "loss": 0.4528, "step": 4892 }, { "epoch": 2.645341502973509, "grad_norm": 0.31129953265190125, "learning_rate": 5.362174000808813e-06, "loss": 0.3882, "step": 4893 }, { "epoch": 2.645882140926293, "grad_norm": 0.279175341129303, "learning_rate": 5.360290958111451e-06, "loss": 0.3741, "step": 4894 }, { "epoch": 2.646422778879077, "grad_norm": 0.290469765663147, "learning_rate": 5.358407864044456e-06, "loss": 0.3578, "step": 4895 }, { "epoch": 2.646963416831862, "grad_norm": 0.3076058030128479, "learning_rate": 5.35652471887632e-06, "loss": 0.393, "step": 4896 }, { "epoch": 2.647504054784646, "grad_norm": 0.3355569541454315, "learning_rate": 5.354641522875535e-06, "loss": 0.4244, "step": 4897 }, { "epoch": 2.64804469273743, "grad_norm": 0.2836383879184723, "learning_rate": 5.352758276310606e-06, "loss": 0.4281, "step": 4898 }, { "epoch": 2.6485853306902145, "grad_norm": 0.3006925582885742, "learning_rate": 5.3508749794500395e-06, "loss": 0.4293, "step": 4899 }, { "epoch": 2.649125968642999, "grad_norm": 0.33312705159187317, "learning_rate": 5.348991632562355e-06, "loss": 0.3727, "step": 4900 }, { "epoch": 2.6496666065957832, "grad_norm": 0.30963844060897827, "learning_rate": 5.347108235916077e-06, "loss": 0.3869, "step": 4901 }, { "epoch": 2.650207244548567, "grad_norm": 0.32791048288345337, "learning_rate": 5.345224789779735e-06, "loss": 0.4117, "step": 4902 }, { "epoch": 2.6507478825013515, "grad_norm": 0.2883923053741455, "learning_rate": 5.343341294421868e-06, "loss": 0.4054, "step": 4903 }, { "epoch": 2.651288520454136, "grad_norm": 0.2899568974971771, "learning_rate": 5.3414577501110255e-06, "loss": 0.4046, "step": 4904 }, { "epoch": 2.6518291584069202, "grad_norm": 0.3008023798465729, "learning_rate": 5.339574157115752e-06, "loss": 0.3975, "step": 4905 }, { "epoch": 2.6523697963597046, "grad_norm": 0.31351393461227417, "learning_rate": 5.337690515704612e-06, "loss": 0.4381, "step": 4906 }, { "epoch": 2.6529104343124885, "grad_norm": 0.2951430678367615, "learning_rate": 5.335806826146171e-06, "loss": 0.3764, "step": 4907 }, { "epoch": 2.653451072265273, "grad_norm": 0.2831011414527893, "learning_rate": 5.333923088709002e-06, "loss": 0.4136, "step": 4908 }, { "epoch": 2.6539917102180572, "grad_norm": 0.3104168176651001, "learning_rate": 5.332039303661683e-06, "loss": 0.4166, "step": 4909 }, { "epoch": 2.6545323481708416, "grad_norm": 0.26301953196525574, "learning_rate": 5.330155471272804e-06, "loss": 0.3623, "step": 4910 }, { "epoch": 2.655072986123626, "grad_norm": 0.3025093972682953, "learning_rate": 5.328271591810956e-06, "loss": 0.4161, "step": 4911 }, { "epoch": 2.6556136240764103, "grad_norm": 0.2944580912590027, "learning_rate": 5.326387665544739e-06, "loss": 0.3918, "step": 4912 }, { "epoch": 2.6561542620291947, "grad_norm": 0.2983967661857605, "learning_rate": 5.32450369274276e-06, "loss": 0.3895, "step": 4913 }, { "epoch": 2.6566948999819786, "grad_norm": 0.29173171520233154, "learning_rate": 5.3226196736736345e-06, "loss": 0.3957, "step": 4914 }, { "epoch": 2.657235537934763, "grad_norm": 0.3415560722351074, "learning_rate": 5.320735608605979e-06, "loss": 0.3912, "step": 4915 }, { "epoch": 2.6577761758875473, "grad_norm": 0.287569522857666, "learning_rate": 5.318851497808424e-06, "loss": 0.3931, "step": 4916 }, { "epoch": 2.6583168138403317, "grad_norm": 0.2986423671245575, "learning_rate": 5.316967341549598e-06, "loss": 0.4022, "step": 4917 }, { "epoch": 2.658857451793116, "grad_norm": 0.2977146804332733, "learning_rate": 5.315083140098145e-06, "loss": 0.3984, "step": 4918 }, { "epoch": 2.6593980897459, "grad_norm": 0.26954126358032227, "learning_rate": 5.313198893722708e-06, "loss": 0.4017, "step": 4919 }, { "epoch": 2.6599387276986843, "grad_norm": 0.29893094301223755, "learning_rate": 5.311314602691943e-06, "loss": 0.4225, "step": 4920 }, { "epoch": 2.6604793656514687, "grad_norm": 0.28754350543022156, "learning_rate": 5.309430267274503e-06, "loss": 0.4092, "step": 4921 }, { "epoch": 2.661020003604253, "grad_norm": 0.29370546340942383, "learning_rate": 5.307545887739059e-06, "loss": 0.4352, "step": 4922 }, { "epoch": 2.6615606415570374, "grad_norm": 0.28276485204696655, "learning_rate": 5.305661464354278e-06, "loss": 0.3883, "step": 4923 }, { "epoch": 2.6621012795098213, "grad_norm": 0.31154152750968933, "learning_rate": 5.303776997388842e-06, "loss": 0.3647, "step": 4924 }, { "epoch": 2.662641917462606, "grad_norm": 0.31019219756126404, "learning_rate": 5.301892487111431e-06, "loss": 0.4203, "step": 4925 }, { "epoch": 2.66318255541539, "grad_norm": 0.28385746479034424, "learning_rate": 5.300007933790737e-06, "loss": 0.4078, "step": 4926 }, { "epoch": 2.6637231933681744, "grad_norm": 0.298709511756897, "learning_rate": 5.298123337695455e-06, "loss": 0.4308, "step": 4927 }, { "epoch": 2.664263831320959, "grad_norm": 0.2809273898601532, "learning_rate": 5.296238699094288e-06, "loss": 0.3719, "step": 4928 }, { "epoch": 2.664804469273743, "grad_norm": 0.33656007051467896, "learning_rate": 5.294354018255945e-06, "loss": 0.4532, "step": 4929 }, { "epoch": 2.6653451072265275, "grad_norm": 0.31055185198783875, "learning_rate": 5.292469295449141e-06, "loss": 0.3959, "step": 4930 }, { "epoch": 2.6658857451793114, "grad_norm": 0.303205281496048, "learning_rate": 5.290584530942593e-06, "loss": 0.4183, "step": 4931 }, { "epoch": 2.666426383132096, "grad_norm": 0.3017641603946686, "learning_rate": 5.288699725005031e-06, "loss": 0.3883, "step": 4932 }, { "epoch": 2.66696702108488, "grad_norm": 0.34416377544403076, "learning_rate": 5.286814877905186e-06, "loss": 0.4114, "step": 4933 }, { "epoch": 2.6675076590376645, "grad_norm": 0.29349640011787415, "learning_rate": 5.284929989911793e-06, "loss": 0.3976, "step": 4934 }, { "epoch": 2.668048296990449, "grad_norm": 0.2900095582008362, "learning_rate": 5.2830450612936e-06, "loss": 0.3908, "step": 4935 }, { "epoch": 2.668588934943233, "grad_norm": 0.31933918595314026, "learning_rate": 5.281160092319358e-06, "loss": 0.406, "step": 4936 }, { "epoch": 2.669129572896017, "grad_norm": 0.32243990898132324, "learning_rate": 5.2792750832578164e-06, "loss": 0.4107, "step": 4937 }, { "epoch": 2.6696702108488015, "grad_norm": 0.2958493232727051, "learning_rate": 5.277390034377742e-06, "loss": 0.4199, "step": 4938 }, { "epoch": 2.670210848801586, "grad_norm": 0.31553322076797485, "learning_rate": 5.275504945947898e-06, "loss": 0.3843, "step": 4939 }, { "epoch": 2.6707514867543702, "grad_norm": 0.3030598759651184, "learning_rate": 5.273619818237058e-06, "loss": 0.3627, "step": 4940 }, { "epoch": 2.6712921247071546, "grad_norm": 0.2761203944683075, "learning_rate": 5.271734651514001e-06, "loss": 0.3991, "step": 4941 }, { "epoch": 2.671832762659939, "grad_norm": 0.26242905855178833, "learning_rate": 5.26984944604751e-06, "loss": 0.3785, "step": 4942 }, { "epoch": 2.672373400612723, "grad_norm": 0.3502192497253418, "learning_rate": 5.267964202106375e-06, "loss": 0.4364, "step": 4943 }, { "epoch": 2.6729140385655072, "grad_norm": 0.3181464672088623, "learning_rate": 5.26607891995939e-06, "loss": 0.4247, "step": 4944 }, { "epoch": 2.6734546765182916, "grad_norm": 0.2903212010860443, "learning_rate": 5.264193599875353e-06, "loss": 0.384, "step": 4945 }, { "epoch": 2.673995314471076, "grad_norm": 0.30827388167381287, "learning_rate": 5.2623082421230735e-06, "loss": 0.4271, "step": 4946 }, { "epoch": 2.6745359524238603, "grad_norm": 0.3044445514678955, "learning_rate": 5.260422846971359e-06, "loss": 0.4173, "step": 4947 }, { "epoch": 2.6750765903766442, "grad_norm": 0.3259648382663727, "learning_rate": 5.258537414689029e-06, "loss": 0.4217, "step": 4948 }, { "epoch": 2.6756172283294286, "grad_norm": 0.2715407907962799, "learning_rate": 5.256651945544902e-06, "loss": 0.3924, "step": 4949 }, { "epoch": 2.676157866282213, "grad_norm": 0.3154553174972534, "learning_rate": 5.254766439807807e-06, "loss": 0.3936, "step": 4950 }, { "epoch": 2.6766985042349973, "grad_norm": 0.3301718533039093, "learning_rate": 5.252880897746573e-06, "loss": 0.4239, "step": 4951 }, { "epoch": 2.6772391421877817, "grad_norm": 0.3474384546279907, "learning_rate": 5.25099531963004e-06, "loss": 0.417, "step": 4952 }, { "epoch": 2.6777797801405656, "grad_norm": 0.29553794860839844, "learning_rate": 5.249109705727049e-06, "loss": 0.3806, "step": 4953 }, { "epoch": 2.6783204180933504, "grad_norm": 0.2954624593257904, "learning_rate": 5.24722405630645e-06, "loss": 0.4002, "step": 4954 }, { "epoch": 2.6788610560461343, "grad_norm": 0.3270542621612549, "learning_rate": 5.245338371637091e-06, "loss": 0.397, "step": 4955 }, { "epoch": 2.6794016939989187, "grad_norm": 0.3222622275352478, "learning_rate": 5.243452651987833e-06, "loss": 0.3858, "step": 4956 }, { "epoch": 2.679942331951703, "grad_norm": 0.28066954016685486, "learning_rate": 5.241566897627536e-06, "loss": 0.3499, "step": 4957 }, { "epoch": 2.6804829699044874, "grad_norm": 0.33865922689437866, "learning_rate": 5.239681108825069e-06, "loss": 0.4373, "step": 4958 }, { "epoch": 2.681023607857272, "grad_norm": 0.318342387676239, "learning_rate": 5.237795285849305e-06, "loss": 0.3795, "step": 4959 }, { "epoch": 2.6815642458100557, "grad_norm": 0.31130820512771606, "learning_rate": 5.235909428969119e-06, "loss": 0.4446, "step": 4960 }, { "epoch": 2.68210488376284, "grad_norm": 0.29390275478363037, "learning_rate": 5.234023538453396e-06, "loss": 0.39, "step": 4961 }, { "epoch": 2.6826455217156244, "grad_norm": 0.2983420491218567, "learning_rate": 5.23213761457102e-06, "loss": 0.3953, "step": 4962 }, { "epoch": 2.683186159668409, "grad_norm": 0.3359537124633789, "learning_rate": 5.230251657590884e-06, "loss": 0.4291, "step": 4963 }, { "epoch": 2.683726797621193, "grad_norm": 0.30198025703430176, "learning_rate": 5.228365667781885e-06, "loss": 0.376, "step": 4964 }, { "epoch": 2.684267435573977, "grad_norm": 0.3557318449020386, "learning_rate": 5.226479645412923e-06, "loss": 0.451, "step": 4965 }, { "epoch": 2.6848080735267614, "grad_norm": 0.328320175409317, "learning_rate": 5.224593590752902e-06, "loss": 0.3907, "step": 4966 }, { "epoch": 2.685348711479546, "grad_norm": 0.31203341484069824, "learning_rate": 5.222707504070737e-06, "loss": 0.3777, "step": 4967 }, { "epoch": 2.68588934943233, "grad_norm": 0.3094787001609802, "learning_rate": 5.220821385635337e-06, "loss": 0.3683, "step": 4968 }, { "epoch": 2.6864299873851145, "grad_norm": 0.30369725823402405, "learning_rate": 5.218935235715625e-06, "loss": 0.4159, "step": 4969 }, { "epoch": 2.686970625337899, "grad_norm": 0.3582439124584198, "learning_rate": 5.2170490545805255e-06, "loss": 0.4446, "step": 4970 }, { "epoch": 2.6875112632906832, "grad_norm": 0.2925317585468292, "learning_rate": 5.215162842498964e-06, "loss": 0.3696, "step": 4971 }, { "epoch": 2.688051901243467, "grad_norm": 0.3087409734725952, "learning_rate": 5.213276599739875e-06, "loss": 0.423, "step": 4972 }, { "epoch": 2.6885925391962515, "grad_norm": 0.351330429315567, "learning_rate": 5.211390326572196e-06, "loss": 0.4392, "step": 4973 }, { "epoch": 2.689133177149036, "grad_norm": 0.3118128478527069, "learning_rate": 5.209504023264865e-06, "loss": 0.4046, "step": 4974 }, { "epoch": 2.6896738151018202, "grad_norm": 0.3023654520511627, "learning_rate": 5.207617690086831e-06, "loss": 0.4049, "step": 4975 }, { "epoch": 2.6902144530546046, "grad_norm": 0.27988362312316895, "learning_rate": 5.205731327307044e-06, "loss": 0.4134, "step": 4976 }, { "epoch": 2.6907550910073885, "grad_norm": 0.2854805290699005, "learning_rate": 5.2038449351944585e-06, "loss": 0.4033, "step": 4977 }, { "epoch": 2.691295728960173, "grad_norm": 0.2994115352630615, "learning_rate": 5.2019585140180295e-06, "loss": 0.4255, "step": 4978 }, { "epoch": 2.6918363669129572, "grad_norm": 0.2791630029678345, "learning_rate": 5.200072064046724e-06, "loss": 0.3844, "step": 4979 }, { "epoch": 2.6923770048657416, "grad_norm": 0.28842294216156006, "learning_rate": 5.1981855855495035e-06, "loss": 0.3967, "step": 4980 }, { "epoch": 2.692917642818526, "grad_norm": 0.31106212735176086, "learning_rate": 5.1962990787953436e-06, "loss": 0.3962, "step": 4981 }, { "epoch": 2.69345828077131, "grad_norm": 0.30779221653938293, "learning_rate": 5.194412544053217e-06, "loss": 0.4067, "step": 4982 }, { "epoch": 2.6939989187240947, "grad_norm": 0.31400802731513977, "learning_rate": 5.192525981592101e-06, "loss": 0.4035, "step": 4983 }, { "epoch": 2.6945395566768786, "grad_norm": 0.3023657202720642, "learning_rate": 5.190639391680981e-06, "loss": 0.4192, "step": 4984 }, { "epoch": 2.695080194629663, "grad_norm": 0.27142763137817383, "learning_rate": 5.188752774588841e-06, "loss": 0.3672, "step": 4985 }, { "epoch": 2.6956208325824473, "grad_norm": 0.33573058247566223, "learning_rate": 5.186866130584674e-06, "loss": 0.421, "step": 4986 }, { "epoch": 2.6961614705352317, "grad_norm": 0.3077828288078308, "learning_rate": 5.184979459937471e-06, "loss": 0.4268, "step": 4987 }, { "epoch": 2.696702108488016, "grad_norm": 0.28344956040382385, "learning_rate": 5.183092762916234e-06, "loss": 0.4138, "step": 4988 }, { "epoch": 2.6972427464408, "grad_norm": 0.2885357737541199, "learning_rate": 5.1812060397899624e-06, "loss": 0.4027, "step": 4989 }, { "epoch": 2.6977833843935843, "grad_norm": 0.2987276315689087, "learning_rate": 5.179319290827661e-06, "loss": 0.3597, "step": 4990 }, { "epoch": 2.6983240223463687, "grad_norm": 0.30471739172935486, "learning_rate": 5.177432516298341e-06, "loss": 0.4168, "step": 4991 }, { "epoch": 2.698864660299153, "grad_norm": 0.3163413405418396, "learning_rate": 5.175545716471014e-06, "loss": 0.4396, "step": 4992 }, { "epoch": 2.6994052982519374, "grad_norm": 0.28372013568878174, "learning_rate": 5.173658891614699e-06, "loss": 0.3795, "step": 4993 }, { "epoch": 2.6999459362047213, "grad_norm": 0.2838355004787445, "learning_rate": 5.171772041998412e-06, "loss": 0.3829, "step": 4994 }, { "epoch": 2.7004865741575057, "grad_norm": 0.3122652471065521, "learning_rate": 5.16988516789118e-06, "loss": 0.3888, "step": 4995 }, { "epoch": 2.70102721211029, "grad_norm": 0.32194578647613525, "learning_rate": 5.167998269562028e-06, "loss": 0.4292, "step": 4996 }, { "epoch": 2.7015678500630744, "grad_norm": 0.27087098360061646, "learning_rate": 5.166111347279987e-06, "loss": 0.3972, "step": 4997 }, { "epoch": 2.702108488015859, "grad_norm": 0.2952033579349518, "learning_rate": 5.164224401314092e-06, "loss": 0.3987, "step": 4998 }, { "epoch": 2.702649125968643, "grad_norm": 0.3722441494464874, "learning_rate": 5.16233743193338e-06, "loss": 0.4366, "step": 4999 }, { "epoch": 2.7031897639214275, "grad_norm": 0.27760955691337585, "learning_rate": 5.16045043940689e-06, "loss": 0.4061, "step": 5000 }, { "epoch": 2.7037304018742114, "grad_norm": 0.29309314489364624, "learning_rate": 5.158563424003669e-06, "loss": 0.4173, "step": 5001 }, { "epoch": 2.704271039826996, "grad_norm": 0.30780085921287537, "learning_rate": 5.15667638599276e-06, "loss": 0.4378, "step": 5002 }, { "epoch": 2.70481167777978, "grad_norm": 0.2956397533416748, "learning_rate": 5.154789325643218e-06, "loss": 0.4055, "step": 5003 }, { "epoch": 2.7053523157325645, "grad_norm": 0.3192119598388672, "learning_rate": 5.152902243224093e-06, "loss": 0.4091, "step": 5004 }, { "epoch": 2.705892953685349, "grad_norm": 0.28897231817245483, "learning_rate": 5.151015139004445e-06, "loss": 0.3974, "step": 5005 }, { "epoch": 2.706433591638133, "grad_norm": 0.34239473938941956, "learning_rate": 5.149128013253332e-06, "loss": 0.4338, "step": 5006 }, { "epoch": 2.706974229590917, "grad_norm": 0.29271113872528076, "learning_rate": 5.147240866239817e-06, "loss": 0.3649, "step": 5007 }, { "epoch": 2.7075148675437015, "grad_norm": 0.3110845685005188, "learning_rate": 5.145353698232966e-06, "loss": 0.4095, "step": 5008 }, { "epoch": 2.708055505496486, "grad_norm": 0.3172319829463959, "learning_rate": 5.143466509501849e-06, "loss": 0.3673, "step": 5009 }, { "epoch": 2.7085961434492702, "grad_norm": 0.29760801792144775, "learning_rate": 5.141579300315536e-06, "loss": 0.385, "step": 5010 }, { "epoch": 2.709136781402054, "grad_norm": 0.29533249139785767, "learning_rate": 5.139692070943104e-06, "loss": 0.368, "step": 5011 }, { "epoch": 2.709677419354839, "grad_norm": 0.3004533648490906, "learning_rate": 5.137804821653629e-06, "loss": 0.4129, "step": 5012 }, { "epoch": 2.710218057307623, "grad_norm": 0.2970389425754547, "learning_rate": 5.135917552716194e-06, "loss": 0.3935, "step": 5013 }, { "epoch": 2.7107586952604072, "grad_norm": 0.34341931343078613, "learning_rate": 5.1340302643998775e-06, "loss": 0.4217, "step": 5014 }, { "epoch": 2.7112993332131916, "grad_norm": 0.28611016273498535, "learning_rate": 5.132142956973773e-06, "loss": 0.3685, "step": 5015 }, { "epoch": 2.711839971165976, "grad_norm": 0.3188721835613251, "learning_rate": 5.130255630706962e-06, "loss": 0.3965, "step": 5016 }, { "epoch": 2.7123806091187603, "grad_norm": 0.32119816541671753, "learning_rate": 5.128368285868542e-06, "loss": 0.4312, "step": 5017 }, { "epoch": 2.7129212470715443, "grad_norm": 0.3183414340019226, "learning_rate": 5.126480922727602e-06, "loss": 0.3982, "step": 5018 }, { "epoch": 2.7134618850243286, "grad_norm": 0.2703632414340973, "learning_rate": 5.124593541553243e-06, "loss": 0.395, "step": 5019 }, { "epoch": 2.714002522977113, "grad_norm": 0.30107101798057556, "learning_rate": 5.122706142614562e-06, "loss": 0.3866, "step": 5020 }, { "epoch": 2.7145431609298973, "grad_norm": 0.35960131883621216, "learning_rate": 5.120818726180662e-06, "loss": 0.4124, "step": 5021 }, { "epoch": 2.7150837988826817, "grad_norm": 0.310278058052063, "learning_rate": 5.118931292520647e-06, "loss": 0.3942, "step": 5022 }, { "epoch": 2.7156244368354656, "grad_norm": 0.3234735429286957, "learning_rate": 5.117043841903624e-06, "loss": 0.4256, "step": 5023 }, { "epoch": 2.71616507478825, "grad_norm": 0.30350586771965027, "learning_rate": 5.115156374598703e-06, "loss": 0.3811, "step": 5024 }, { "epoch": 2.7167057127410343, "grad_norm": 0.3205268979072571, "learning_rate": 5.113268890874994e-06, "loss": 0.4017, "step": 5025 }, { "epoch": 2.7172463506938187, "grad_norm": 0.2997487783432007, "learning_rate": 5.111381391001612e-06, "loss": 0.4003, "step": 5026 }, { "epoch": 2.717786988646603, "grad_norm": 0.28808310627937317, "learning_rate": 5.109493875247672e-06, "loss": 0.3904, "step": 5027 }, { "epoch": 2.7183276265993874, "grad_norm": 0.30357658863067627, "learning_rate": 5.1076063438822965e-06, "loss": 0.3867, "step": 5028 }, { "epoch": 2.718868264552172, "grad_norm": 0.32023051381111145, "learning_rate": 5.105718797174601e-06, "loss": 0.4064, "step": 5029 }, { "epoch": 2.7194089025049557, "grad_norm": 0.2869778871536255, "learning_rate": 5.103831235393714e-06, "loss": 0.3559, "step": 5030 }, { "epoch": 2.71994954045774, "grad_norm": 0.38330546021461487, "learning_rate": 5.1019436588087555e-06, "loss": 0.4117, "step": 5031 }, { "epoch": 2.7204901784105244, "grad_norm": 0.30030086636543274, "learning_rate": 5.100056067688854e-06, "loss": 0.4111, "step": 5032 }, { "epoch": 2.721030816363309, "grad_norm": 0.29235854744911194, "learning_rate": 5.098168462303141e-06, "loss": 0.3544, "step": 5033 }, { "epoch": 2.721571454316093, "grad_norm": 0.34840068221092224, "learning_rate": 5.096280842920748e-06, "loss": 0.4158, "step": 5034 }, { "epoch": 2.722112092268877, "grad_norm": 0.34116119146347046, "learning_rate": 5.094393209810806e-06, "loss": 0.4145, "step": 5035 }, { "epoch": 2.7226527302216614, "grad_norm": 0.30736395716667175, "learning_rate": 5.092505563242451e-06, "loss": 0.4202, "step": 5036 }, { "epoch": 2.723193368174446, "grad_norm": 0.32079383730888367, "learning_rate": 5.09061790348482e-06, "loss": 0.4001, "step": 5037 }, { "epoch": 2.72373400612723, "grad_norm": 0.32415834069252014, "learning_rate": 5.088730230807054e-06, "loss": 0.4203, "step": 5038 }, { "epoch": 2.7242746440800145, "grad_norm": 0.3238414525985718, "learning_rate": 5.086842545478291e-06, "loss": 0.4165, "step": 5039 }, { "epoch": 2.7248152820327984, "grad_norm": 0.2785307765007019, "learning_rate": 5.084954847767677e-06, "loss": 0.3935, "step": 5040 }, { "epoch": 2.7253559199855832, "grad_norm": 0.34599483013153076, "learning_rate": 5.083067137944354e-06, "loss": 0.4417, "step": 5041 }, { "epoch": 2.725896557938367, "grad_norm": 0.3113754689693451, "learning_rate": 5.081179416277469e-06, "loss": 0.4182, "step": 5042 }, { "epoch": 2.7264371958911515, "grad_norm": 0.2806205451488495, "learning_rate": 5.079291683036169e-06, "loss": 0.3837, "step": 5043 }, { "epoch": 2.726977833843936, "grad_norm": 0.3233984410762787, "learning_rate": 5.077403938489607e-06, "loss": 0.4077, "step": 5044 }, { "epoch": 2.7275184717967202, "grad_norm": 0.32577744126319885, "learning_rate": 5.07551618290693e-06, "loss": 0.3978, "step": 5045 }, { "epoch": 2.7280591097495046, "grad_norm": 0.35160160064697266, "learning_rate": 5.073628416557293e-06, "loss": 0.4106, "step": 5046 }, { "epoch": 2.7285997477022885, "grad_norm": 0.34330466389656067, "learning_rate": 5.07174063970985e-06, "loss": 0.4159, "step": 5047 }, { "epoch": 2.729140385655073, "grad_norm": 0.289131224155426, "learning_rate": 5.069852852633757e-06, "loss": 0.3944, "step": 5048 }, { "epoch": 2.7296810236078572, "grad_norm": 0.28988659381866455, "learning_rate": 5.06796505559817e-06, "loss": 0.3709, "step": 5049 }, { "epoch": 2.7302216615606416, "grad_norm": 0.3208027780056, "learning_rate": 5.06607724887225e-06, "loss": 0.4192, "step": 5050 }, { "epoch": 2.730762299513426, "grad_norm": 0.3088265657424927, "learning_rate": 5.064189432725154e-06, "loss": 0.4046, "step": 5051 }, { "epoch": 2.73130293746621, "grad_norm": 0.2850970923900604, "learning_rate": 5.062301607426047e-06, "loss": 0.3722, "step": 5052 }, { "epoch": 2.7318435754189943, "grad_norm": 0.32113608717918396, "learning_rate": 5.0604137732440875e-06, "loss": 0.424, "step": 5053 }, { "epoch": 2.7323842133717786, "grad_norm": 0.29261910915374756, "learning_rate": 5.058525930448443e-06, "loss": 0.3948, "step": 5054 }, { "epoch": 2.732924851324563, "grad_norm": 0.28865715861320496, "learning_rate": 5.056638079308277e-06, "loss": 0.3758, "step": 5055 }, { "epoch": 2.7334654892773473, "grad_norm": 0.3075926601886749, "learning_rate": 5.054750220092757e-06, "loss": 0.3927, "step": 5056 }, { "epoch": 2.7340061272301317, "grad_norm": 0.30959898233413696, "learning_rate": 5.05286235307105e-06, "loss": 0.4275, "step": 5057 }, { "epoch": 2.734546765182916, "grad_norm": 0.2836802005767822, "learning_rate": 5.050974478512324e-06, "loss": 0.3876, "step": 5058 }, { "epoch": 2.7350874031357, "grad_norm": 0.2989635169506073, "learning_rate": 5.049086596685749e-06, "loss": 0.4404, "step": 5059 }, { "epoch": 2.7356280410884843, "grad_norm": 0.31573420763015747, "learning_rate": 5.047198707860496e-06, "loss": 0.3916, "step": 5060 }, { "epoch": 2.7361686790412687, "grad_norm": 0.33187058568000793, "learning_rate": 5.045310812305737e-06, "loss": 0.4139, "step": 5061 }, { "epoch": 2.736709316994053, "grad_norm": 0.3388705551624298, "learning_rate": 5.043422910290645e-06, "loss": 0.3856, "step": 5062 }, { "epoch": 2.7372499549468374, "grad_norm": 0.30032768845558167, "learning_rate": 5.041535002084394e-06, "loss": 0.3926, "step": 5063 }, { "epoch": 2.7377905928996213, "grad_norm": 0.31506726145744324, "learning_rate": 5.0396470879561564e-06, "loss": 0.4287, "step": 5064 }, { "epoch": 2.7383312308524057, "grad_norm": 0.2879992425441742, "learning_rate": 5.037759168175109e-06, "loss": 0.3782, "step": 5065 }, { "epoch": 2.73887186880519, "grad_norm": 0.2926962673664093, "learning_rate": 5.035871243010427e-06, "loss": 0.429, "step": 5066 }, { "epoch": 2.7394125067579744, "grad_norm": 0.3311244249343872, "learning_rate": 5.0339833127312885e-06, "loss": 0.4295, "step": 5067 }, { "epoch": 2.739953144710759, "grad_norm": 0.34304389357566833, "learning_rate": 5.032095377606873e-06, "loss": 0.4186, "step": 5068 }, { "epoch": 2.7404937826635427, "grad_norm": 0.27116817235946655, "learning_rate": 5.030207437906354e-06, "loss": 0.3807, "step": 5069 }, { "epoch": 2.7410344206163275, "grad_norm": 0.3297407031059265, "learning_rate": 5.028319493898916e-06, "loss": 0.4224, "step": 5070 }, { "epoch": 2.7415750585691114, "grad_norm": 0.3084923326969147, "learning_rate": 5.026431545853734e-06, "loss": 0.3962, "step": 5071 }, { "epoch": 2.742115696521896, "grad_norm": 0.29288437962532043, "learning_rate": 5.024543594039991e-06, "loss": 0.4289, "step": 5072 }, { "epoch": 2.74265633447468, "grad_norm": 0.30632689595222473, "learning_rate": 5.022655638726866e-06, "loss": 0.3883, "step": 5073 }, { "epoch": 2.7431969724274645, "grad_norm": 0.3066025972366333, "learning_rate": 5.020767680183543e-06, "loss": 0.4027, "step": 5074 }, { "epoch": 2.743737610380249, "grad_norm": 0.32585495710372925, "learning_rate": 5.018879718679199e-06, "loss": 0.4198, "step": 5075 }, { "epoch": 2.744278248333033, "grad_norm": 0.2831990718841553, "learning_rate": 5.0169917544830205e-06, "loss": 0.4104, "step": 5076 }, { "epoch": 2.744818886285817, "grad_norm": 0.29489606618881226, "learning_rate": 5.015103787864187e-06, "loss": 0.3778, "step": 5077 }, { "epoch": 2.7453595242386015, "grad_norm": 0.3362656831741333, "learning_rate": 5.013215819091886e-06, "loss": 0.438, "step": 5078 }, { "epoch": 2.745900162191386, "grad_norm": 0.27341189980506897, "learning_rate": 5.0113278484352945e-06, "loss": 0.3881, "step": 5079 }, { "epoch": 2.7464408001441702, "grad_norm": 0.30783629417419434, "learning_rate": 5.009439876163601e-06, "loss": 0.3815, "step": 5080 }, { "epoch": 2.746981438096954, "grad_norm": 0.3110550343990326, "learning_rate": 5.007551902545986e-06, "loss": 0.4104, "step": 5081 }, { "epoch": 2.7475220760497385, "grad_norm": 0.32011663913726807, "learning_rate": 5.0056639278516335e-06, "loss": 0.4023, "step": 5082 }, { "epoch": 2.748062714002523, "grad_norm": 0.29512232542037964, "learning_rate": 5.00377595234973e-06, "loss": 0.4089, "step": 5083 }, { "epoch": 2.7486033519553073, "grad_norm": 0.2946411371231079, "learning_rate": 5.0018879763094575e-06, "loss": 0.3855, "step": 5084 }, { "epoch": 2.7491439899080916, "grad_norm": 0.31604093313217163, "learning_rate": 5e-06, "loss": 0.3985, "step": 5085 }, { "epoch": 2.749684627860876, "grad_norm": 0.29789918661117554, "learning_rate": 4.998112023690543e-06, "loss": 0.4138, "step": 5086 }, { "epoch": 2.7502252658136603, "grad_norm": 0.30474555492401123, "learning_rate": 4.996224047650271e-06, "loss": 0.4167, "step": 5087 }, { "epoch": 2.7507659037664443, "grad_norm": 0.31081917881965637, "learning_rate": 4.994336072148367e-06, "loss": 0.4325, "step": 5088 }, { "epoch": 2.7513065417192286, "grad_norm": 0.2845231592655182, "learning_rate": 4.992448097454016e-06, "loss": 0.3983, "step": 5089 }, { "epoch": 2.751847179672013, "grad_norm": 0.29780399799346924, "learning_rate": 4.9905601238364006e-06, "loss": 0.4036, "step": 5090 }, { "epoch": 2.7523878176247973, "grad_norm": 0.29843631386756897, "learning_rate": 4.9886721515647055e-06, "loss": 0.3982, "step": 5091 }, { "epoch": 2.7529284555775817, "grad_norm": 0.29134443402290344, "learning_rate": 4.986784180908117e-06, "loss": 0.3975, "step": 5092 }, { "epoch": 2.7534690935303656, "grad_norm": 0.2783971130847931, "learning_rate": 4.984896212135814e-06, "loss": 0.3691, "step": 5093 }, { "epoch": 2.75400973148315, "grad_norm": 0.3527921438217163, "learning_rate": 4.983008245516981e-06, "loss": 0.4279, "step": 5094 }, { "epoch": 2.7545503694359343, "grad_norm": 0.28707146644592285, "learning_rate": 4.981120281320801e-06, "loss": 0.3835, "step": 5095 }, { "epoch": 2.7550910073887187, "grad_norm": 0.30036625266075134, "learning_rate": 4.979232319816461e-06, "loss": 0.4288, "step": 5096 }, { "epoch": 2.755631645341503, "grad_norm": 0.3112487494945526, "learning_rate": 4.977344361273135e-06, "loss": 0.399, "step": 5097 }, { "epoch": 2.756172283294287, "grad_norm": 0.31154346466064453, "learning_rate": 4.97545640596001e-06, "loss": 0.3928, "step": 5098 }, { "epoch": 2.756712921247072, "grad_norm": 0.2694031596183777, "learning_rate": 4.973568454146267e-06, "loss": 0.3935, "step": 5099 }, { "epoch": 2.7572535591998557, "grad_norm": 0.3083007037639618, "learning_rate": 4.971680506101086e-06, "loss": 0.4266, "step": 5100 }, { "epoch": 2.75779419715264, "grad_norm": 0.307720810174942, "learning_rate": 4.9697925620936464e-06, "loss": 0.4154, "step": 5101 }, { "epoch": 2.7583348351054244, "grad_norm": 0.3052947521209717, "learning_rate": 4.967904622393128e-06, "loss": 0.3713, "step": 5102 }, { "epoch": 2.758875473058209, "grad_norm": 0.29163241386413574, "learning_rate": 4.966016687268711e-06, "loss": 0.3715, "step": 5103 }, { "epoch": 2.759416111010993, "grad_norm": 0.30811169743537903, "learning_rate": 4.964128756989575e-06, "loss": 0.45, "step": 5104 }, { "epoch": 2.759956748963777, "grad_norm": 0.31915563344955444, "learning_rate": 4.9622408318248925e-06, "loss": 0.3916, "step": 5105 }, { "epoch": 2.7604973869165614, "grad_norm": 0.34357088804244995, "learning_rate": 4.960352912043845e-06, "loss": 0.4074, "step": 5106 }, { "epoch": 2.761038024869346, "grad_norm": 0.28964367508888245, "learning_rate": 4.958464997915607e-06, "loss": 0.4198, "step": 5107 }, { "epoch": 2.76157866282213, "grad_norm": 0.28185534477233887, "learning_rate": 4.9565770897093565e-06, "loss": 0.3914, "step": 5108 }, { "epoch": 2.7621193007749145, "grad_norm": 0.3085354268550873, "learning_rate": 4.954689187694265e-06, "loss": 0.3799, "step": 5109 }, { "epoch": 2.7626599387276984, "grad_norm": 0.3020288050174713, "learning_rate": 4.952801292139505e-06, "loss": 0.4126, "step": 5110 }, { "epoch": 2.7632005766804832, "grad_norm": 0.2997831404209137, "learning_rate": 4.9509134033142525e-06, "loss": 0.4141, "step": 5111 }, { "epoch": 2.763741214633267, "grad_norm": 0.2812916934490204, "learning_rate": 4.9490255214876785e-06, "loss": 0.4075, "step": 5112 }, { "epoch": 2.7642818525860515, "grad_norm": 0.3080768883228302, "learning_rate": 4.947137646928952e-06, "loss": 0.4211, "step": 5113 }, { "epoch": 2.764822490538836, "grad_norm": 0.3185937702655792, "learning_rate": 4.945249779907244e-06, "loss": 0.443, "step": 5114 }, { "epoch": 2.7653631284916202, "grad_norm": 0.2650339603424072, "learning_rate": 4.9433619206917234e-06, "loss": 0.354, "step": 5115 }, { "epoch": 2.7659037664444046, "grad_norm": 0.30960190296173096, "learning_rate": 4.941474069551559e-06, "loss": 0.4216, "step": 5116 }, { "epoch": 2.7664444043971885, "grad_norm": 0.3096233904361725, "learning_rate": 4.939586226755913e-06, "loss": 0.4039, "step": 5117 }, { "epoch": 2.766985042349973, "grad_norm": 0.28972771763801575, "learning_rate": 4.937698392573955e-06, "loss": 0.39, "step": 5118 }, { "epoch": 2.7675256803027573, "grad_norm": 0.3311610519886017, "learning_rate": 4.935810567274846e-06, "loss": 0.4151, "step": 5119 }, { "epoch": 2.7680663182555416, "grad_norm": 0.29001107811927795, "learning_rate": 4.933922751127753e-06, "loss": 0.395, "step": 5120 }, { "epoch": 2.768606956208326, "grad_norm": 0.3485519289970398, "learning_rate": 4.932034944401832e-06, "loss": 0.4181, "step": 5121 }, { "epoch": 2.76914759416111, "grad_norm": 0.2745487093925476, "learning_rate": 4.930147147366245e-06, "loss": 0.3793, "step": 5122 }, { "epoch": 2.7696882321138943, "grad_norm": 0.35208848118782043, "learning_rate": 4.928259360290151e-06, "loss": 0.4083, "step": 5123 }, { "epoch": 2.7702288700666786, "grad_norm": 0.3201996088027954, "learning_rate": 4.926371583442709e-06, "loss": 0.3866, "step": 5124 }, { "epoch": 2.770769508019463, "grad_norm": 0.31646934151649475, "learning_rate": 4.924483817093071e-06, "loss": 0.4419, "step": 5125 }, { "epoch": 2.7713101459722473, "grad_norm": 0.2841111719608307, "learning_rate": 4.922596061510394e-06, "loss": 0.3893, "step": 5126 }, { "epoch": 2.7718507839250317, "grad_norm": 0.33771705627441406, "learning_rate": 4.920708316963831e-06, "loss": 0.419, "step": 5127 }, { "epoch": 2.772391421877816, "grad_norm": 0.30035245418548584, "learning_rate": 4.918820583722533e-06, "loss": 0.3993, "step": 5128 }, { "epoch": 2.7729320598306, "grad_norm": 0.29486024379730225, "learning_rate": 4.916932862055648e-06, "loss": 0.4014, "step": 5129 }, { "epoch": 2.7734726977833843, "grad_norm": 0.31270942091941833, "learning_rate": 4.915045152232324e-06, "loss": 0.3945, "step": 5130 }, { "epoch": 2.7740133357361687, "grad_norm": 0.30171117186546326, "learning_rate": 4.9131574545217095e-06, "loss": 0.3831, "step": 5131 }, { "epoch": 2.774553973688953, "grad_norm": 0.31151530146598816, "learning_rate": 4.911269769192949e-06, "loss": 0.4271, "step": 5132 }, { "epoch": 2.7750946116417374, "grad_norm": 0.33287256956100464, "learning_rate": 4.909382096515182e-06, "loss": 0.4108, "step": 5133 }, { "epoch": 2.7756352495945213, "grad_norm": 0.280910849571228, "learning_rate": 4.90749443675755e-06, "loss": 0.3765, "step": 5134 }, { "epoch": 2.7761758875473057, "grad_norm": 0.2836659252643585, "learning_rate": 4.9056067901891945e-06, "loss": 0.3699, "step": 5135 }, { "epoch": 2.77671652550009, "grad_norm": 0.32560449838638306, "learning_rate": 4.903719157079254e-06, "loss": 0.4183, "step": 5136 }, { "epoch": 2.7772571634528744, "grad_norm": 0.3005795180797577, "learning_rate": 4.90183153769686e-06, "loss": 0.3789, "step": 5137 }, { "epoch": 2.777797801405659, "grad_norm": 0.30739063024520874, "learning_rate": 4.8999439323111465e-06, "loss": 0.4145, "step": 5138 }, { "epoch": 2.7783384393584427, "grad_norm": 0.26225677132606506, "learning_rate": 4.898056341191246e-06, "loss": 0.3773, "step": 5139 }, { "epoch": 2.7788790773112275, "grad_norm": 0.32766658067703247, "learning_rate": 4.896168764606289e-06, "loss": 0.4375, "step": 5140 }, { "epoch": 2.7794197152640114, "grad_norm": 0.29584774374961853, "learning_rate": 4.8942812028254e-06, "loss": 0.3759, "step": 5141 }, { "epoch": 2.779960353216796, "grad_norm": 0.2836137115955353, "learning_rate": 4.892393656117705e-06, "loss": 0.3962, "step": 5142 }, { "epoch": 2.78050099116958, "grad_norm": 0.2864339053630829, "learning_rate": 4.890506124752328e-06, "loss": 0.391, "step": 5143 }, { "epoch": 2.7810416291223645, "grad_norm": 0.3437691628932953, "learning_rate": 4.88861860899839e-06, "loss": 0.39, "step": 5144 }, { "epoch": 2.781582267075149, "grad_norm": 0.30174973607063293, "learning_rate": 4.886731109125007e-06, "loss": 0.3956, "step": 5145 }, { "epoch": 2.782122905027933, "grad_norm": 0.30526629090309143, "learning_rate": 4.884843625401298e-06, "loss": 0.4309, "step": 5146 }, { "epoch": 2.782663542980717, "grad_norm": 0.2743600308895111, "learning_rate": 4.882956158096376e-06, "loss": 0.3689, "step": 5147 }, { "epoch": 2.7832041809335015, "grad_norm": 0.35046494007110596, "learning_rate": 4.881068707479355e-06, "loss": 0.4318, "step": 5148 }, { "epoch": 2.783744818886286, "grad_norm": 0.2684626579284668, "learning_rate": 4.87918127381934e-06, "loss": 0.372, "step": 5149 }, { "epoch": 2.7842854568390702, "grad_norm": 0.28792130947113037, "learning_rate": 4.87729385738544e-06, "loss": 0.3874, "step": 5150 }, { "epoch": 2.784826094791854, "grad_norm": 0.3143690824508667, "learning_rate": 4.8754064584467585e-06, "loss": 0.4189, "step": 5151 }, { "epoch": 2.7853667327446385, "grad_norm": 0.2900733947753906, "learning_rate": 4.873519077272398e-06, "loss": 0.3864, "step": 5152 }, { "epoch": 2.785907370697423, "grad_norm": 0.3068050742149353, "learning_rate": 4.871631714131461e-06, "loss": 0.425, "step": 5153 }, { "epoch": 2.7864480086502073, "grad_norm": 0.29391971230506897, "learning_rate": 4.869744369293039e-06, "loss": 0.3719, "step": 5154 }, { "epoch": 2.7869886466029916, "grad_norm": 0.30774518847465515, "learning_rate": 4.867857043026229e-06, "loss": 0.4271, "step": 5155 }, { "epoch": 2.787529284555776, "grad_norm": 0.32500559091567993, "learning_rate": 4.8659697356001225e-06, "loss": 0.4138, "step": 5156 }, { "epoch": 2.7880699225085603, "grad_norm": 0.2907654941082001, "learning_rate": 4.864082447283809e-06, "loss": 0.3994, "step": 5157 }, { "epoch": 2.7886105604613443, "grad_norm": 0.2912384867668152, "learning_rate": 4.862195178346372e-06, "loss": 0.3792, "step": 5158 }, { "epoch": 2.7891511984141286, "grad_norm": 0.27517130970954895, "learning_rate": 4.860307929056897e-06, "loss": 0.39, "step": 5159 }, { "epoch": 2.789691836366913, "grad_norm": 0.2983931601047516, "learning_rate": 4.858420699684464e-06, "loss": 0.3849, "step": 5160 }, { "epoch": 2.7902324743196973, "grad_norm": 0.3149608075618744, "learning_rate": 4.856533490498155e-06, "loss": 0.4299, "step": 5161 }, { "epoch": 2.7907731122724817, "grad_norm": 0.2737068235874176, "learning_rate": 4.854646301767037e-06, "loss": 0.3624, "step": 5162 }, { "epoch": 2.7913137502252656, "grad_norm": 0.32693517208099365, "learning_rate": 4.852759133760184e-06, "loss": 0.3982, "step": 5163 }, { "epoch": 2.79185438817805, "grad_norm": 0.2964058220386505, "learning_rate": 4.850871986746668e-06, "loss": 0.3978, "step": 5164 }, { "epoch": 2.7923950261308343, "grad_norm": 0.2971150577068329, "learning_rate": 4.848984860995557e-06, "loss": 0.3665, "step": 5165 }, { "epoch": 2.7929356640836187, "grad_norm": 0.31982165575027466, "learning_rate": 4.847097756775908e-06, "loss": 0.388, "step": 5166 }, { "epoch": 2.793476302036403, "grad_norm": 0.2987110912799835, "learning_rate": 4.845210674356784e-06, "loss": 0.4078, "step": 5167 }, { "epoch": 2.794016939989187, "grad_norm": 0.3030100166797638, "learning_rate": 4.843323614007241e-06, "loss": 0.4205, "step": 5168 }, { "epoch": 2.794557577941972, "grad_norm": 0.301750123500824, "learning_rate": 4.841436575996334e-06, "loss": 0.4118, "step": 5169 }, { "epoch": 2.7950982158947557, "grad_norm": 0.29823026061058044, "learning_rate": 4.839549560593111e-06, "loss": 0.4, "step": 5170 }, { "epoch": 2.79563885384754, "grad_norm": 0.2757793962955475, "learning_rate": 4.837662568066622e-06, "loss": 0.39, "step": 5171 }, { "epoch": 2.7961794918003244, "grad_norm": 0.2817678153514862, "learning_rate": 4.835775598685909e-06, "loss": 0.3878, "step": 5172 }, { "epoch": 2.796720129753109, "grad_norm": 0.30766770243644714, "learning_rate": 4.833888652720015e-06, "loss": 0.4055, "step": 5173 }, { "epoch": 2.797260767705893, "grad_norm": 0.3036001920700073, "learning_rate": 4.832001730437973e-06, "loss": 0.428, "step": 5174 }, { "epoch": 2.797801405658677, "grad_norm": 0.2731790840625763, "learning_rate": 4.830114832108822e-06, "loss": 0.4067, "step": 5175 }, { "epoch": 2.7983420436114614, "grad_norm": 0.34506237506866455, "learning_rate": 4.828227958001589e-06, "loss": 0.4352, "step": 5176 }, { "epoch": 2.798882681564246, "grad_norm": 0.2925313413143158, "learning_rate": 4.826341108385304e-06, "loss": 0.3855, "step": 5177 }, { "epoch": 2.79942331951703, "grad_norm": 0.2907009720802307, "learning_rate": 4.824454283528987e-06, "loss": 0.4019, "step": 5178 }, { "epoch": 2.7999639574698145, "grad_norm": 0.3044201731681824, "learning_rate": 4.82256748370166e-06, "loss": 0.4271, "step": 5179 }, { "epoch": 2.8005045954225984, "grad_norm": 0.3054400384426117, "learning_rate": 4.82068070917234e-06, "loss": 0.4082, "step": 5180 }, { "epoch": 2.801045233375383, "grad_norm": 0.277808278799057, "learning_rate": 4.81879396021004e-06, "loss": 0.3911, "step": 5181 }, { "epoch": 2.801585871328167, "grad_norm": 0.29107189178466797, "learning_rate": 4.816907237083768e-06, "loss": 0.3831, "step": 5182 }, { "epoch": 2.8021265092809515, "grad_norm": 0.29215705394744873, "learning_rate": 4.81502054006253e-06, "loss": 0.3933, "step": 5183 }, { "epoch": 2.802667147233736, "grad_norm": 0.32308441400527954, "learning_rate": 4.813133869415327e-06, "loss": 0.3916, "step": 5184 }, { "epoch": 2.8032077851865203, "grad_norm": 0.3158899247646332, "learning_rate": 4.81124722541116e-06, "loss": 0.4313, "step": 5185 }, { "epoch": 2.8037484231393046, "grad_norm": 0.3029237687587738, "learning_rate": 4.80936060831902e-06, "loss": 0.423, "step": 5186 }, { "epoch": 2.8042890610920885, "grad_norm": 0.2986266613006592, "learning_rate": 4.807474018407899e-06, "loss": 0.374, "step": 5187 }, { "epoch": 2.804829699044873, "grad_norm": 0.3070341944694519, "learning_rate": 4.805587455946784e-06, "loss": 0.4146, "step": 5188 }, { "epoch": 2.8053703369976573, "grad_norm": 0.2837662696838379, "learning_rate": 4.803700921204659e-06, "loss": 0.3944, "step": 5189 }, { "epoch": 2.8059109749504416, "grad_norm": 0.3108964264392853, "learning_rate": 4.801814414450498e-06, "loss": 0.3902, "step": 5190 }, { "epoch": 2.806451612903226, "grad_norm": 0.2998983860015869, "learning_rate": 4.799927935953278e-06, "loss": 0.4153, "step": 5191 }, { "epoch": 2.80699225085601, "grad_norm": 0.31420305371284485, "learning_rate": 4.7980414859819705e-06, "loss": 0.4208, "step": 5192 }, { "epoch": 2.8075328888087943, "grad_norm": 0.26373064517974854, "learning_rate": 4.796155064805544e-06, "loss": 0.3538, "step": 5193 }, { "epoch": 2.8080735267615786, "grad_norm": 0.29200348258018494, "learning_rate": 4.7942686726929575e-06, "loss": 0.4091, "step": 5194 }, { "epoch": 2.808614164714363, "grad_norm": 0.3131158947944641, "learning_rate": 4.7923823099131694e-06, "loss": 0.4298, "step": 5195 }, { "epoch": 2.8091548026671473, "grad_norm": 0.29583874344825745, "learning_rate": 4.790495976735136e-06, "loss": 0.4342, "step": 5196 }, { "epoch": 2.8096954406199313, "grad_norm": 0.25764796137809753, "learning_rate": 4.788609673427807e-06, "loss": 0.3699, "step": 5197 }, { "epoch": 2.810236078572716, "grad_norm": 0.31256401538848877, "learning_rate": 4.786723400260127e-06, "loss": 0.4407, "step": 5198 }, { "epoch": 2.8107767165255, "grad_norm": 0.30436599254608154, "learning_rate": 4.784837157501037e-06, "loss": 0.4169, "step": 5199 }, { "epoch": 2.8113173544782843, "grad_norm": 0.2923033833503723, "learning_rate": 4.782950945419475e-06, "loss": 0.3958, "step": 5200 }, { "epoch": 2.8118579924310687, "grad_norm": 0.28773999214172363, "learning_rate": 4.781064764284376e-06, "loss": 0.4236, "step": 5201 }, { "epoch": 2.812398630383853, "grad_norm": 0.282601535320282, "learning_rate": 4.779178614364664e-06, "loss": 0.3776, "step": 5202 }, { "epoch": 2.8129392683366374, "grad_norm": 0.3376825749874115, "learning_rate": 4.777292495929264e-06, "loss": 0.462, "step": 5203 }, { "epoch": 2.8134799062894214, "grad_norm": 0.27360770106315613, "learning_rate": 4.775406409247097e-06, "loss": 0.381, "step": 5204 }, { "epoch": 2.8140205442422057, "grad_norm": 0.29848596453666687, "learning_rate": 4.7735203545870794e-06, "loss": 0.4162, "step": 5205 }, { "epoch": 2.81456118219499, "grad_norm": 0.31024810671806335, "learning_rate": 4.771634332218117e-06, "loss": 0.3737, "step": 5206 }, { "epoch": 2.8151018201477744, "grad_norm": 0.3096352517604828, "learning_rate": 4.7697483424091166e-06, "loss": 0.4111, "step": 5207 }, { "epoch": 2.815642458100559, "grad_norm": 0.29883527755737305, "learning_rate": 4.767862385428981e-06, "loss": 0.4147, "step": 5208 }, { "epoch": 2.8161830960533427, "grad_norm": 0.31646376848220825, "learning_rate": 4.765976461546606e-06, "loss": 0.4131, "step": 5209 }, { "epoch": 2.816723734006127, "grad_norm": 0.307599276304245, "learning_rate": 4.764090571030882e-06, "loss": 0.3846, "step": 5210 }, { "epoch": 2.8172643719589114, "grad_norm": 0.2796662747859955, "learning_rate": 4.762204714150696e-06, "loss": 0.39, "step": 5211 }, { "epoch": 2.817805009911696, "grad_norm": 0.2873065173625946, "learning_rate": 4.760318891174932e-06, "loss": 0.407, "step": 5212 }, { "epoch": 2.81834564786448, "grad_norm": 0.29299312829971313, "learning_rate": 4.758433102372466e-06, "loss": 0.4046, "step": 5213 }, { "epoch": 2.8188862858172645, "grad_norm": 0.331016480922699, "learning_rate": 4.75654734801217e-06, "loss": 0.4342, "step": 5214 }, { "epoch": 2.819426923770049, "grad_norm": 0.27721527218818665, "learning_rate": 4.75466162836291e-06, "loss": 0.3645, "step": 5215 }, { "epoch": 2.819967561722833, "grad_norm": 0.29178667068481445, "learning_rate": 4.7527759436935516e-06, "loss": 0.3988, "step": 5216 }, { "epoch": 2.820508199675617, "grad_norm": 0.34997060894966125, "learning_rate": 4.750890294272951e-06, "loss": 0.4273, "step": 5217 }, { "epoch": 2.8210488376284015, "grad_norm": 0.2890905439853668, "learning_rate": 4.749004680369963e-06, "loss": 0.3943, "step": 5218 }, { "epoch": 2.821589475581186, "grad_norm": 0.3408333659172058, "learning_rate": 4.747119102253429e-06, "loss": 0.4074, "step": 5219 }, { "epoch": 2.8221301135339703, "grad_norm": 0.33997246623039246, "learning_rate": 4.745233560192195e-06, "loss": 0.4061, "step": 5220 }, { "epoch": 2.822670751486754, "grad_norm": 0.3487636148929596, "learning_rate": 4.743348054455099e-06, "loss": 0.4412, "step": 5221 }, { "epoch": 2.8232113894395385, "grad_norm": 0.2814512550830841, "learning_rate": 4.741462585310973e-06, "loss": 0.3424, "step": 5222 }, { "epoch": 2.823752027392323, "grad_norm": 0.33952921628952026, "learning_rate": 4.739577153028642e-06, "loss": 0.4312, "step": 5223 }, { "epoch": 2.8242926653451073, "grad_norm": 0.2770572602748871, "learning_rate": 4.737691757876928e-06, "loss": 0.3893, "step": 5224 }, { "epoch": 2.8248333032978916, "grad_norm": 0.30399399995803833, "learning_rate": 4.735806400124648e-06, "loss": 0.3823, "step": 5225 }, { "epoch": 2.8253739412506755, "grad_norm": 0.3472537696361542, "learning_rate": 4.733921080040613e-06, "loss": 0.4421, "step": 5226 }, { "epoch": 2.8259145792034603, "grad_norm": 0.3420334756374359, "learning_rate": 4.7320357978936264e-06, "loss": 0.4464, "step": 5227 }, { "epoch": 2.8264552171562443, "grad_norm": 0.2712843418121338, "learning_rate": 4.730150553952491e-06, "loss": 0.3556, "step": 5228 }, { "epoch": 2.8269958551090286, "grad_norm": 0.3301163613796234, "learning_rate": 4.728265348486e-06, "loss": 0.4097, "step": 5229 }, { "epoch": 2.827536493061813, "grad_norm": 0.2986605167388916, "learning_rate": 4.726380181762943e-06, "loss": 0.4089, "step": 5230 }, { "epoch": 2.8280771310145973, "grad_norm": 0.28485170006752014, "learning_rate": 4.724495054052104e-06, "loss": 0.3546, "step": 5231 }, { "epoch": 2.8286177689673817, "grad_norm": 0.3346530497074127, "learning_rate": 4.72260996562226e-06, "loss": 0.3961, "step": 5232 }, { "epoch": 2.8291584069201656, "grad_norm": 0.39170563220977783, "learning_rate": 4.720724916742184e-06, "loss": 0.4424, "step": 5233 }, { "epoch": 2.82969904487295, "grad_norm": 0.26930177211761475, "learning_rate": 4.718839907680646e-06, "loss": 0.3744, "step": 5234 }, { "epoch": 2.8302396828257343, "grad_norm": 0.3284896910190582, "learning_rate": 4.716954938706401e-06, "loss": 0.4225, "step": 5235 }, { "epoch": 2.8307803207785187, "grad_norm": 0.3415636122226715, "learning_rate": 4.715070010088208e-06, "loss": 0.4, "step": 5236 }, { "epoch": 2.831320958731303, "grad_norm": 0.32044726610183716, "learning_rate": 4.713185122094816e-06, "loss": 0.3905, "step": 5237 }, { "epoch": 2.831861596684087, "grad_norm": 0.3255140781402588, "learning_rate": 4.711300274994971e-06, "loss": 0.4328, "step": 5238 }, { "epoch": 2.8324022346368714, "grad_norm": 0.31972357630729675, "learning_rate": 4.709415469057408e-06, "loss": 0.364, "step": 5239 }, { "epoch": 2.8329428725896557, "grad_norm": 0.33498212695121765, "learning_rate": 4.707530704550861e-06, "loss": 0.4336, "step": 5240 }, { "epoch": 2.83348351054244, "grad_norm": 0.34578371047973633, "learning_rate": 4.705645981744055e-06, "loss": 0.4, "step": 5241 }, { "epoch": 2.8340241484952244, "grad_norm": 0.327239990234375, "learning_rate": 4.703761300905712e-06, "loss": 0.3677, "step": 5242 }, { "epoch": 2.834564786448009, "grad_norm": 0.3138897716999054, "learning_rate": 4.701876662304546e-06, "loss": 0.4205, "step": 5243 }, { "epoch": 2.835105424400793, "grad_norm": 0.31926262378692627, "learning_rate": 4.699992066209264e-06, "loss": 0.4351, "step": 5244 }, { "epoch": 2.835646062353577, "grad_norm": 0.35049229860305786, "learning_rate": 4.69810751288857e-06, "loss": 0.3678, "step": 5245 }, { "epoch": 2.8361867003063614, "grad_norm": 0.3599819839000702, "learning_rate": 4.696223002611161e-06, "loss": 0.4248, "step": 5246 }, { "epoch": 2.836727338259146, "grad_norm": 0.28022849559783936, "learning_rate": 4.6943385356457235e-06, "loss": 0.3645, "step": 5247 }, { "epoch": 2.83726797621193, "grad_norm": 0.33059850335121155, "learning_rate": 4.692454112260943e-06, "loss": 0.4147, "step": 5248 }, { "epoch": 2.8378086141647145, "grad_norm": 0.30869242548942566, "learning_rate": 4.690569732725497e-06, "loss": 0.3879, "step": 5249 }, { "epoch": 2.8383492521174984, "grad_norm": 0.3129158318042755, "learning_rate": 4.688685397308061e-06, "loss": 0.3778, "step": 5250 }, { "epoch": 2.838889890070283, "grad_norm": 0.3148750960826874, "learning_rate": 4.686801106277293e-06, "loss": 0.4033, "step": 5251 }, { "epoch": 2.839430528023067, "grad_norm": 0.35224729776382446, "learning_rate": 4.684916859901856e-06, "loss": 0.4547, "step": 5252 }, { "epoch": 2.8399711659758515, "grad_norm": 0.3005257546901703, "learning_rate": 4.6830326584504026e-06, "loss": 0.4178, "step": 5253 }, { "epoch": 2.840511803928636, "grad_norm": 0.29805541038513184, "learning_rate": 4.6811485021915784e-06, "loss": 0.3445, "step": 5254 }, { "epoch": 2.84105244188142, "grad_norm": 0.3516843020915985, "learning_rate": 4.679264391394022e-06, "loss": 0.3812, "step": 5255 }, { "epoch": 2.8415930798342046, "grad_norm": 0.36728399991989136, "learning_rate": 4.677380326326367e-06, "loss": 0.4471, "step": 5256 }, { "epoch": 2.8421337177869885, "grad_norm": 0.3440658152103424, "learning_rate": 4.67549630725724e-06, "loss": 0.3866, "step": 5257 }, { "epoch": 2.842674355739773, "grad_norm": 0.3516998887062073, "learning_rate": 4.673612334455264e-06, "loss": 0.4219, "step": 5258 }, { "epoch": 2.8432149936925573, "grad_norm": 0.4027232825756073, "learning_rate": 4.671728408189046e-06, "loss": 0.391, "step": 5259 }, { "epoch": 2.8437556316453416, "grad_norm": 0.33593621850013733, "learning_rate": 4.669844528727197e-06, "loss": 0.4254, "step": 5260 }, { "epoch": 2.844296269598126, "grad_norm": 0.2753894031047821, "learning_rate": 4.6679606963383166e-06, "loss": 0.3736, "step": 5261 }, { "epoch": 2.84483690755091, "grad_norm": 0.3739554286003113, "learning_rate": 4.666076911291001e-06, "loss": 0.4039, "step": 5262 }, { "epoch": 2.8453775455036943, "grad_norm": 0.35964787006378174, "learning_rate": 4.66419317385383e-06, "loss": 0.4048, "step": 5263 }, { "epoch": 2.8459181834564786, "grad_norm": 0.2805907130241394, "learning_rate": 4.662309484295389e-06, "loss": 0.3859, "step": 5264 }, { "epoch": 2.846458821409263, "grad_norm": 0.28541240096092224, "learning_rate": 4.660425842884249e-06, "loss": 0.4036, "step": 5265 }, { "epoch": 2.8469994593620473, "grad_norm": 0.37052708864212036, "learning_rate": 4.658542249888978e-06, "loss": 0.415, "step": 5266 }, { "epoch": 2.8475400973148313, "grad_norm": 0.31775808334350586, "learning_rate": 4.6566587055781324e-06, "loss": 0.3948, "step": 5267 }, { "epoch": 2.8480807352676156, "grad_norm": 0.32983338832855225, "learning_rate": 4.654775210220266e-06, "loss": 0.4242, "step": 5268 }, { "epoch": 2.8486213732204, "grad_norm": 0.28167930245399475, "learning_rate": 4.652891764083924e-06, "loss": 0.3789, "step": 5269 }, { "epoch": 2.8491620111731844, "grad_norm": 0.2902034521102905, "learning_rate": 4.651008367437646e-06, "loss": 0.4115, "step": 5270 }, { "epoch": 2.8497026491259687, "grad_norm": 0.3520466983318329, "learning_rate": 4.649125020549962e-06, "loss": 0.3903, "step": 5271 }, { "epoch": 2.850243287078753, "grad_norm": 0.3077685534954071, "learning_rate": 4.647241723689396e-06, "loss": 0.4187, "step": 5272 }, { "epoch": 2.8507839250315374, "grad_norm": 0.3050772249698639, "learning_rate": 4.645358477124465e-06, "loss": 0.3745, "step": 5273 }, { "epoch": 2.8513245629843214, "grad_norm": 0.3538419008255005, "learning_rate": 4.643475281123683e-06, "loss": 0.4066, "step": 5274 }, { "epoch": 2.8518652009371057, "grad_norm": 0.31801462173461914, "learning_rate": 4.641592135955545e-06, "loss": 0.4074, "step": 5275 }, { "epoch": 2.85240583888989, "grad_norm": 0.2762812077999115, "learning_rate": 4.639709041888552e-06, "loss": 0.4242, "step": 5276 }, { "epoch": 2.8529464768426744, "grad_norm": 0.30232739448547363, "learning_rate": 4.637825999191189e-06, "loss": 0.3955, "step": 5277 }, { "epoch": 2.853487114795459, "grad_norm": 0.34319016337394714, "learning_rate": 4.63594300813194e-06, "loss": 0.4274, "step": 5278 }, { "epoch": 2.8540277527482427, "grad_norm": 0.3106715679168701, "learning_rate": 4.634060068979276e-06, "loss": 0.3634, "step": 5279 }, { "epoch": 2.854568390701027, "grad_norm": 0.30517441034317017, "learning_rate": 4.6321771820016635e-06, "loss": 0.4139, "step": 5280 }, { "epoch": 2.8551090286538114, "grad_norm": 0.2777802050113678, "learning_rate": 4.6302943474675625e-06, "loss": 0.3912, "step": 5281 }, { "epoch": 2.855649666606596, "grad_norm": 0.33198273181915283, "learning_rate": 4.628411565645422e-06, "loss": 0.4017, "step": 5282 }, { "epoch": 2.85619030455938, "grad_norm": 0.29595592617988586, "learning_rate": 4.626528836803688e-06, "loss": 0.4181, "step": 5283 }, { "epoch": 2.856730942512164, "grad_norm": 0.3072974383831024, "learning_rate": 4.624646161210795e-06, "loss": 0.3844, "step": 5284 }, { "epoch": 2.857271580464949, "grad_norm": 0.32644233107566833, "learning_rate": 4.62276353913517e-06, "loss": 0.4185, "step": 5285 }, { "epoch": 2.857812218417733, "grad_norm": 0.3057146370410919, "learning_rate": 4.6208809708452375e-06, "loss": 0.3726, "step": 5286 }, { "epoch": 2.858352856370517, "grad_norm": 0.29417580366134644, "learning_rate": 4.61899845660941e-06, "loss": 0.3738, "step": 5287 }, { "epoch": 2.8588934943233015, "grad_norm": 0.315688818693161, "learning_rate": 4.6171159966960885e-06, "loss": 0.392, "step": 5288 }, { "epoch": 2.859434132276086, "grad_norm": 0.3033626675605774, "learning_rate": 4.615233591373676e-06, "loss": 0.377, "step": 5289 }, { "epoch": 2.8599747702288703, "grad_norm": 0.33641305565834045, "learning_rate": 4.6133512409105595e-06, "loss": 0.4405, "step": 5290 }, { "epoch": 2.860515408181654, "grad_norm": 0.28855594992637634, "learning_rate": 4.6114689455751245e-06, "loss": 0.3829, "step": 5291 }, { "epoch": 2.8610560461344385, "grad_norm": 0.30093568563461304, "learning_rate": 4.60958670563574e-06, "loss": 0.3877, "step": 5292 }, { "epoch": 2.861596684087223, "grad_norm": 0.3042225241661072, "learning_rate": 4.6077045213607765e-06, "loss": 0.3645, "step": 5293 }, { "epoch": 2.8621373220400073, "grad_norm": 0.31260597705841064, "learning_rate": 4.60582239301859e-06, "loss": 0.4032, "step": 5294 }, { "epoch": 2.8626779599927916, "grad_norm": 0.3076440095901489, "learning_rate": 4.603940320877533e-06, "loss": 0.3937, "step": 5295 }, { "epoch": 2.8632185979455755, "grad_norm": 0.311769038438797, "learning_rate": 4.602058305205946e-06, "loss": 0.419, "step": 5296 }, { "epoch": 2.86375923589836, "grad_norm": 0.33874577283859253, "learning_rate": 4.600176346272165e-06, "loss": 0.4025, "step": 5297 }, { "epoch": 2.8642998738511443, "grad_norm": 0.31491518020629883, "learning_rate": 4.598294444344515e-06, "loss": 0.423, "step": 5298 }, { "epoch": 2.8648405118039286, "grad_norm": 0.33583423495292664, "learning_rate": 4.596412599691316e-06, "loss": 0.41, "step": 5299 }, { "epoch": 2.865381149756713, "grad_norm": 0.2975970506668091, "learning_rate": 4.594530812580876e-06, "loss": 0.3777, "step": 5300 }, { "epoch": 2.8659217877094973, "grad_norm": 0.32264795899391174, "learning_rate": 4.592649083281497e-06, "loss": 0.3981, "step": 5301 }, { "epoch": 2.8664624256622817, "grad_norm": 0.30629464983940125, "learning_rate": 4.5907674120614735e-06, "loss": 0.3899, "step": 5302 }, { "epoch": 2.8670030636150656, "grad_norm": 0.285491406917572, "learning_rate": 4.5888857991890925e-06, "loss": 0.4198, "step": 5303 }, { "epoch": 2.86754370156785, "grad_norm": 0.3029111325740814, "learning_rate": 4.5870042449326265e-06, "loss": 0.3949, "step": 5304 }, { "epoch": 2.8680843395206344, "grad_norm": 0.2939251661300659, "learning_rate": 4.585122749560347e-06, "loss": 0.3983, "step": 5305 }, { "epoch": 2.8686249774734187, "grad_norm": 0.30623742938041687, "learning_rate": 4.583241313340512e-06, "loss": 0.4184, "step": 5306 }, { "epoch": 2.869165615426203, "grad_norm": 0.29048800468444824, "learning_rate": 4.581359936541379e-06, "loss": 0.3976, "step": 5307 }, { "epoch": 2.869706253378987, "grad_norm": 0.3232608437538147, "learning_rate": 4.579478619431184e-06, "loss": 0.4461, "step": 5308 }, { "epoch": 2.8702468913317714, "grad_norm": 0.2938759922981262, "learning_rate": 4.577597362278165e-06, "loss": 0.3785, "step": 5309 }, { "epoch": 2.8707875292845557, "grad_norm": 0.31486600637435913, "learning_rate": 4.575716165350549e-06, "loss": 0.4096, "step": 5310 }, { "epoch": 2.87132816723734, "grad_norm": 0.31167715787887573, "learning_rate": 4.573835028916554e-06, "loss": 0.3929, "step": 5311 }, { "epoch": 2.8718688051901244, "grad_norm": 0.3271113932132721, "learning_rate": 4.5719539532443865e-06, "loss": 0.4228, "step": 5312 }, { "epoch": 2.8724094431429084, "grad_norm": 0.27412769198417664, "learning_rate": 4.570072938602248e-06, "loss": 0.3602, "step": 5313 }, { "epoch": 2.872950081095693, "grad_norm": 0.3018361032009125, "learning_rate": 4.5681919852583304e-06, "loss": 0.4308, "step": 5314 }, { "epoch": 2.873490719048477, "grad_norm": 0.3016452193260193, "learning_rate": 4.566311093480818e-06, "loss": 0.4124, "step": 5315 }, { "epoch": 2.8740313570012614, "grad_norm": 0.2964125871658325, "learning_rate": 4.564430263537884e-06, "loss": 0.4095, "step": 5316 }, { "epoch": 2.874571994954046, "grad_norm": 0.2763165533542633, "learning_rate": 4.56254949569769e-06, "loss": 0.3888, "step": 5317 }, { "epoch": 2.87511263290683, "grad_norm": 0.2857678234577179, "learning_rate": 4.560668790228397e-06, "loss": 0.4014, "step": 5318 }, { "epoch": 2.8756532708596145, "grad_norm": 0.3083879053592682, "learning_rate": 4.5587881473981535e-06, "loss": 0.4118, "step": 5319 }, { "epoch": 2.8761939088123984, "grad_norm": 0.31368550658226013, "learning_rate": 4.556907567475094e-06, "loss": 0.4348, "step": 5320 }, { "epoch": 2.876734546765183, "grad_norm": 0.2863789200782776, "learning_rate": 4.555027050727351e-06, "loss": 0.3506, "step": 5321 }, { "epoch": 2.877275184717967, "grad_norm": 0.27195513248443604, "learning_rate": 4.553146597423044e-06, "loss": 0.3719, "step": 5322 }, { "epoch": 2.8778158226707515, "grad_norm": 0.3159834146499634, "learning_rate": 4.551266207830285e-06, "loss": 0.4071, "step": 5323 }, { "epoch": 2.878356460623536, "grad_norm": 0.28979089856147766, "learning_rate": 4.549385882217177e-06, "loss": 0.4029, "step": 5324 }, { "epoch": 2.87889709857632, "grad_norm": 0.3067147433757782, "learning_rate": 4.547505620851812e-06, "loss": 0.4145, "step": 5325 }, { "epoch": 2.8794377365291046, "grad_norm": 0.29320406913757324, "learning_rate": 4.545625424002274e-06, "loss": 0.3894, "step": 5326 }, { "epoch": 2.8799783744818885, "grad_norm": 0.2783004343509674, "learning_rate": 4.543745291936642e-06, "loss": 0.3781, "step": 5327 }, { "epoch": 2.880519012434673, "grad_norm": 0.30723005533218384, "learning_rate": 4.541865224922977e-06, "loss": 0.4331, "step": 5328 }, { "epoch": 2.8810596503874573, "grad_norm": 0.27082914113998413, "learning_rate": 4.5399852232293384e-06, "loss": 0.3764, "step": 5329 }, { "epoch": 2.8816002883402416, "grad_norm": 0.2909558415412903, "learning_rate": 4.538105287123772e-06, "loss": 0.3847, "step": 5330 }, { "epoch": 2.882140926293026, "grad_norm": 0.26408976316452026, "learning_rate": 4.536225416874319e-06, "loss": 0.374, "step": 5331 }, { "epoch": 2.88268156424581, "grad_norm": 0.3040916919708252, "learning_rate": 4.534345612749002e-06, "loss": 0.392, "step": 5332 }, { "epoch": 2.8832222021985943, "grad_norm": 0.2888509929180145, "learning_rate": 4.532465875015845e-06, "loss": 0.4131, "step": 5333 }, { "epoch": 2.8837628401513786, "grad_norm": 0.2653530538082123, "learning_rate": 4.530586203942854e-06, "loss": 0.3732, "step": 5334 }, { "epoch": 2.884303478104163, "grad_norm": 0.3019903898239136, "learning_rate": 4.528706599798033e-06, "loss": 0.4276, "step": 5335 }, { "epoch": 2.8848441160569473, "grad_norm": 0.29084232449531555, "learning_rate": 4.526827062849369e-06, "loss": 0.3941, "step": 5336 }, { "epoch": 2.8853847540097313, "grad_norm": 0.3075839579105377, "learning_rate": 4.524947593364845e-06, "loss": 0.3991, "step": 5337 }, { "epoch": 2.8859253919625156, "grad_norm": 0.2972691059112549, "learning_rate": 4.5230681916124305e-06, "loss": 0.4216, "step": 5338 }, { "epoch": 2.8864660299153, "grad_norm": 0.28446921706199646, "learning_rate": 4.521188857860091e-06, "loss": 0.4271, "step": 5339 }, { "epoch": 2.8870066678680844, "grad_norm": 0.28713396191596985, "learning_rate": 4.5193095923757745e-06, "loss": 0.3991, "step": 5340 }, { "epoch": 2.8875473058208687, "grad_norm": 0.29077884554862976, "learning_rate": 4.517430395427424e-06, "loss": 0.4136, "step": 5341 }, { "epoch": 2.888087943773653, "grad_norm": 0.27056440711021423, "learning_rate": 4.515551267282974e-06, "loss": 0.3811, "step": 5342 }, { "epoch": 2.8886285817264374, "grad_norm": 0.29237234592437744, "learning_rate": 4.5136722082103476e-06, "loss": 0.4162, "step": 5343 }, { "epoch": 2.8891692196792214, "grad_norm": 0.317898154258728, "learning_rate": 4.511793218477454e-06, "loss": 0.4328, "step": 5344 }, { "epoch": 2.8897098576320057, "grad_norm": 0.2820221781730652, "learning_rate": 4.509914298352197e-06, "loss": 0.4096, "step": 5345 }, { "epoch": 2.89025049558479, "grad_norm": 0.3054172992706299, "learning_rate": 4.508035448102472e-06, "loss": 0.3753, "step": 5346 }, { "epoch": 2.8907911335375744, "grad_norm": 0.2961260974407196, "learning_rate": 4.5061566679961605e-06, "loss": 0.4046, "step": 5347 }, { "epoch": 2.891331771490359, "grad_norm": 0.3012271523475647, "learning_rate": 4.504277958301138e-06, "loss": 0.4176, "step": 5348 }, { "epoch": 2.8918724094431427, "grad_norm": 0.3134300410747528, "learning_rate": 4.502399319285263e-06, "loss": 0.3887, "step": 5349 }, { "epoch": 2.892413047395927, "grad_norm": 0.28506678342819214, "learning_rate": 4.5005207512163914e-06, "loss": 0.3928, "step": 5350 }, { "epoch": 2.8929536853487114, "grad_norm": 0.29594171047210693, "learning_rate": 4.4986422543623655e-06, "loss": 0.4026, "step": 5351 }, { "epoch": 2.893494323301496, "grad_norm": 0.2881558835506439, "learning_rate": 4.496763828991019e-06, "loss": 0.3788, "step": 5352 }, { "epoch": 2.89403496125428, "grad_norm": 0.2928634583950043, "learning_rate": 4.494885475370172e-06, "loss": 0.3898, "step": 5353 }, { "epoch": 2.894575599207064, "grad_norm": 0.30739927291870117, "learning_rate": 4.493007193767638e-06, "loss": 0.3915, "step": 5354 }, { "epoch": 2.895116237159849, "grad_norm": 0.3106321692466736, "learning_rate": 4.491128984451219e-06, "loss": 0.4095, "step": 5355 }, { "epoch": 2.895656875112633, "grad_norm": 0.30263790488243103, "learning_rate": 4.489250847688708e-06, "loss": 0.4198, "step": 5356 }, { "epoch": 2.896197513065417, "grad_norm": 0.29731953144073486, "learning_rate": 4.487372783747884e-06, "loss": 0.4124, "step": 5357 }, { "epoch": 2.8967381510182015, "grad_norm": 0.27237144112586975, "learning_rate": 4.485494792896519e-06, "loss": 0.4029, "step": 5358 }, { "epoch": 2.897278788970986, "grad_norm": 0.2843637764453888, "learning_rate": 4.483616875402374e-06, "loss": 0.3669, "step": 5359 }, { "epoch": 2.8978194269237703, "grad_norm": 0.31389912962913513, "learning_rate": 4.481739031533201e-06, "loss": 0.3815, "step": 5360 }, { "epoch": 2.898360064876554, "grad_norm": 0.3242151439189911, "learning_rate": 4.4798612615567345e-06, "loss": 0.4115, "step": 5361 }, { "epoch": 2.8989007028293385, "grad_norm": 0.3388815224170685, "learning_rate": 4.477983565740706e-06, "loss": 0.4132, "step": 5362 }, { "epoch": 2.899441340782123, "grad_norm": 0.2897244393825531, "learning_rate": 4.476105944352834e-06, "loss": 0.3665, "step": 5363 }, { "epoch": 2.8999819787349073, "grad_norm": 0.32335197925567627, "learning_rate": 4.474228397660829e-06, "loss": 0.4141, "step": 5364 }, { "epoch": 2.9005226166876916, "grad_norm": 0.35047903656959534, "learning_rate": 4.472350925932384e-06, "loss": 0.4355, "step": 5365 }, { "epoch": 2.9010632546404755, "grad_norm": 0.2771613597869873, "learning_rate": 4.470473529435187e-06, "loss": 0.3756, "step": 5366 }, { "epoch": 2.90160389259326, "grad_norm": 0.29492348432540894, "learning_rate": 4.468596208436914e-06, "loss": 0.3888, "step": 5367 }, { "epoch": 2.9021445305460443, "grad_norm": 0.2763018012046814, "learning_rate": 4.466718963205231e-06, "loss": 0.3867, "step": 5368 }, { "epoch": 2.9026851684988286, "grad_norm": 0.3185831308364868, "learning_rate": 4.464841794007791e-06, "loss": 0.3711, "step": 5369 }, { "epoch": 2.903225806451613, "grad_norm": 0.29999685287475586, "learning_rate": 4.462964701112237e-06, "loss": 0.3877, "step": 5370 }, { "epoch": 2.9037664444043974, "grad_norm": 0.32476794719696045, "learning_rate": 4.4610876847862034e-06, "loss": 0.4353, "step": 5371 }, { "epoch": 2.9043070823571817, "grad_norm": 0.28298261761665344, "learning_rate": 4.459210745297312e-06, "loss": 0.3844, "step": 5372 }, { "epoch": 2.9048477203099656, "grad_norm": 0.31960076093673706, "learning_rate": 4.45733388291317e-06, "loss": 0.4018, "step": 5373 }, { "epoch": 2.90538835826275, "grad_norm": 0.3158363997936249, "learning_rate": 4.455457097901377e-06, "loss": 0.4069, "step": 5374 }, { "epoch": 2.9059289962155344, "grad_norm": 0.3054044544696808, "learning_rate": 4.453580390529526e-06, "loss": 0.4169, "step": 5375 }, { "epoch": 2.9064696341683187, "grad_norm": 0.28569933772087097, "learning_rate": 4.451703761065193e-06, "loss": 0.3805, "step": 5376 }, { "epoch": 2.907010272121103, "grad_norm": 0.280813068151474, "learning_rate": 4.449827209775943e-06, "loss": 0.4047, "step": 5377 }, { "epoch": 2.907550910073887, "grad_norm": 0.2897249758243561, "learning_rate": 4.447950736929331e-06, "loss": 0.3892, "step": 5378 }, { "epoch": 2.9080915480266714, "grad_norm": 0.3146097958087921, "learning_rate": 4.4460743427929024e-06, "loss": 0.4322, "step": 5379 }, { "epoch": 2.9086321859794557, "grad_norm": 0.264935165643692, "learning_rate": 4.444198027634191e-06, "loss": 0.3782, "step": 5380 }, { "epoch": 2.90917282393224, "grad_norm": 0.27351197600364685, "learning_rate": 4.4423217917207155e-06, "loss": 0.3905, "step": 5381 }, { "epoch": 2.9097134618850244, "grad_norm": 0.30898234248161316, "learning_rate": 4.440445635319987e-06, "loss": 0.4077, "step": 5382 }, { "epoch": 2.9102540998378084, "grad_norm": 0.31730180978775024, "learning_rate": 4.438569558699507e-06, "loss": 0.4121, "step": 5383 }, { "epoch": 2.910794737790593, "grad_norm": 0.2789078950881958, "learning_rate": 4.436693562126762e-06, "loss": 0.3892, "step": 5384 }, { "epoch": 2.911335375743377, "grad_norm": 0.2933134138584137, "learning_rate": 4.434817645869226e-06, "loss": 0.3922, "step": 5385 }, { "epoch": 2.9118760136961614, "grad_norm": 0.3190993368625641, "learning_rate": 4.4329418101943655e-06, "loss": 0.427, "step": 5386 }, { "epoch": 2.912416651648946, "grad_norm": 0.2673807442188263, "learning_rate": 4.431066055369633e-06, "loss": 0.3726, "step": 5387 }, { "epoch": 2.91295728960173, "grad_norm": 0.29508569836616516, "learning_rate": 4.429190381662473e-06, "loss": 0.4127, "step": 5388 }, { "epoch": 2.9134979275545145, "grad_norm": 0.28597328066825867, "learning_rate": 4.4273147893403126e-06, "loss": 0.3588, "step": 5389 }, { "epoch": 2.9140385655072985, "grad_norm": 0.31585755944252014, "learning_rate": 4.42543927867057e-06, "loss": 0.4496, "step": 5390 }, { "epoch": 2.914579203460083, "grad_norm": 0.29464495182037354, "learning_rate": 4.4235638499206544e-06, "loss": 0.3817, "step": 5391 }, { "epoch": 2.915119841412867, "grad_norm": 0.33141279220581055, "learning_rate": 4.42168850335796e-06, "loss": 0.428, "step": 5392 }, { "epoch": 2.9156604793656515, "grad_norm": 0.29135453701019287, "learning_rate": 4.4198132392498695e-06, "loss": 0.3828, "step": 5393 }, { "epoch": 2.916201117318436, "grad_norm": 0.2641162872314453, "learning_rate": 4.417938057863755e-06, "loss": 0.3823, "step": 5394 }, { "epoch": 2.91674175527122, "grad_norm": 0.2904815375804901, "learning_rate": 4.416062959466978e-06, "loss": 0.4163, "step": 5395 }, { "epoch": 2.917282393224004, "grad_norm": 0.3001211881637573, "learning_rate": 4.414187944326885e-06, "loss": 0.3864, "step": 5396 }, { "epoch": 2.9178230311767885, "grad_norm": 0.28068217635154724, "learning_rate": 4.4123130127108125e-06, "loss": 0.4227, "step": 5397 }, { "epoch": 2.918363669129573, "grad_norm": 0.28638994693756104, "learning_rate": 4.410438164886085e-06, "loss": 0.4238, "step": 5398 }, { "epoch": 2.9189043070823573, "grad_norm": 0.2821596562862396, "learning_rate": 4.408563401120015e-06, "loss": 0.4073, "step": 5399 }, { "epoch": 2.9194449450351416, "grad_norm": 0.27199241518974304, "learning_rate": 4.4066887216799055e-06, "loss": 0.3903, "step": 5400 }, { "epoch": 2.919985582987926, "grad_norm": 0.26883572340011597, "learning_rate": 4.4048141268330395e-06, "loss": 0.4008, "step": 5401 }, { "epoch": 2.92052622094071, "grad_norm": 0.3116496503353119, "learning_rate": 4.402939616846696e-06, "loss": 0.3721, "step": 5402 }, { "epoch": 2.9210668588934943, "grad_norm": 0.2937193214893341, "learning_rate": 4.401065191988139e-06, "loss": 0.3932, "step": 5403 }, { "epoch": 2.9216074968462786, "grad_norm": 0.3178521990776062, "learning_rate": 4.399190852524624e-06, "loss": 0.4182, "step": 5404 }, { "epoch": 2.922148134799063, "grad_norm": 0.27353766560554504, "learning_rate": 4.397316598723385e-06, "loss": 0.3572, "step": 5405 }, { "epoch": 2.9226887727518474, "grad_norm": 0.3278379738330841, "learning_rate": 4.395442430851654e-06, "loss": 0.4477, "step": 5406 }, { "epoch": 2.9232294107046313, "grad_norm": 0.30886223912239075, "learning_rate": 4.3935683491766445e-06, "loss": 0.3951, "step": 5407 }, { "epoch": 2.9237700486574156, "grad_norm": 0.31581950187683105, "learning_rate": 4.391694353965562e-06, "loss": 0.424, "step": 5408 }, { "epoch": 2.9243106866102, "grad_norm": 0.247950479388237, "learning_rate": 4.389820445485593e-06, "loss": 0.3396, "step": 5409 }, { "epoch": 2.9248513245629844, "grad_norm": 0.30187222361564636, "learning_rate": 4.38794662400392e-06, "loss": 0.4398, "step": 5410 }, { "epoch": 2.9253919625157687, "grad_norm": 0.2787235975265503, "learning_rate": 4.386072889787706e-06, "loss": 0.3721, "step": 5411 }, { "epoch": 2.9259326004685526, "grad_norm": 0.29677945375442505, "learning_rate": 4.384199243104107e-06, "loss": 0.4313, "step": 5412 }, { "epoch": 2.9264732384213374, "grad_norm": 0.2835438847541809, "learning_rate": 4.382325684220266e-06, "loss": 0.4192, "step": 5413 }, { "epoch": 2.9270138763741214, "grad_norm": 0.31162241101264954, "learning_rate": 4.380452213403306e-06, "loss": 0.4169, "step": 5414 }, { "epoch": 2.9275545143269057, "grad_norm": 0.3062303066253662, "learning_rate": 4.3785788309203466e-06, "loss": 0.4379, "step": 5415 }, { "epoch": 2.92809515227969, "grad_norm": 0.3071049153804779, "learning_rate": 4.376705537038491e-06, "loss": 0.3876, "step": 5416 }, { "epoch": 2.9286357902324744, "grad_norm": 0.3191245198249817, "learning_rate": 4.3748323320248325e-06, "loss": 0.4023, "step": 5417 }, { "epoch": 2.929176428185259, "grad_norm": 0.2853299379348755, "learning_rate": 4.372959216146443e-06, "loss": 0.366, "step": 5418 }, { "epoch": 2.9297170661380427, "grad_norm": 0.3157528340816498, "learning_rate": 4.371086189670393e-06, "loss": 0.3921, "step": 5419 }, { "epoch": 2.930257704090827, "grad_norm": 0.30653080344200134, "learning_rate": 4.369213252863733e-06, "loss": 0.3965, "step": 5420 }, { "epoch": 2.9307983420436114, "grad_norm": 0.2971093952655792, "learning_rate": 4.367340405993505e-06, "loss": 0.4146, "step": 5421 }, { "epoch": 2.931338979996396, "grad_norm": 0.3022221624851227, "learning_rate": 4.3654676493267335e-06, "loss": 0.3983, "step": 5422 }, { "epoch": 2.93187961794918, "grad_norm": 0.30804184079170227, "learning_rate": 4.363594983130435e-06, "loss": 0.4365, "step": 5423 }, { "epoch": 2.932420255901964, "grad_norm": 0.29444482922554016, "learning_rate": 4.361722407671609e-06, "loss": 0.3646, "step": 5424 }, { "epoch": 2.9329608938547485, "grad_norm": 0.325641930103302, "learning_rate": 4.359849923217246e-06, "loss": 0.4196, "step": 5425 }, { "epoch": 2.933501531807533, "grad_norm": 0.2978176474571228, "learning_rate": 4.357977530034319e-06, "loss": 0.3721, "step": 5426 }, { "epoch": 2.934042169760317, "grad_norm": 0.3437287211418152, "learning_rate": 4.356105228389792e-06, "loss": 0.4467, "step": 5427 }, { "epoch": 2.9345828077131015, "grad_norm": 0.3110363483428955, "learning_rate": 4.3542330185506145e-06, "loss": 0.3747, "step": 5428 }, { "epoch": 2.935123445665886, "grad_norm": 0.310379296541214, "learning_rate": 4.352360900783724e-06, "loss": 0.403, "step": 5429 }, { "epoch": 2.9356640836186703, "grad_norm": 0.34377607703208923, "learning_rate": 4.350488875356041e-06, "loss": 0.4229, "step": 5430 }, { "epoch": 2.936204721571454, "grad_norm": 0.3300040364265442, "learning_rate": 4.348616942534475e-06, "loss": 0.3755, "step": 5431 }, { "epoch": 2.9367453595242385, "grad_norm": 0.28814318776130676, "learning_rate": 4.346745102585923e-06, "loss": 0.3635, "step": 5432 }, { "epoch": 2.937285997477023, "grad_norm": 0.2987310588359833, "learning_rate": 4.344873355777274e-06, "loss": 0.4341, "step": 5433 }, { "epoch": 2.9378266354298073, "grad_norm": 0.2876870334148407, "learning_rate": 4.3430017023753925e-06, "loss": 0.4048, "step": 5434 }, { "epoch": 2.9383672733825916, "grad_norm": 0.31516033411026, "learning_rate": 4.341130142647136e-06, "loss": 0.4341, "step": 5435 }, { "epoch": 2.9389079113353755, "grad_norm": 0.2953851819038391, "learning_rate": 4.339258676859349e-06, "loss": 0.3732, "step": 5436 }, { "epoch": 2.93944854928816, "grad_norm": 0.28782448172569275, "learning_rate": 4.337387305278864e-06, "loss": 0.4046, "step": 5437 }, { "epoch": 2.9399891872409443, "grad_norm": 0.2743951082229614, "learning_rate": 4.3355160281724935e-06, "loss": 0.3728, "step": 5438 }, { "epoch": 2.9405298251937286, "grad_norm": 0.28903907537460327, "learning_rate": 4.333644845807044e-06, "loss": 0.3713, "step": 5439 }, { "epoch": 2.941070463146513, "grad_norm": 0.2945643365383148, "learning_rate": 4.331773758449303e-06, "loss": 0.3941, "step": 5440 }, { "epoch": 2.941611101099297, "grad_norm": 0.3084539473056793, "learning_rate": 4.329902766366051e-06, "loss": 0.4243, "step": 5441 }, { "epoch": 2.9421517390520817, "grad_norm": 0.2908715009689331, "learning_rate": 4.328031869824044e-06, "loss": 0.4025, "step": 5442 }, { "epoch": 2.9426923770048656, "grad_norm": 0.29896068572998047, "learning_rate": 4.3261610690900365e-06, "loss": 0.4012, "step": 5443 }, { "epoch": 2.94323301495765, "grad_norm": 0.29760220646858215, "learning_rate": 4.324290364430761e-06, "loss": 0.3691, "step": 5444 }, { "epoch": 2.9437736529104344, "grad_norm": 0.2887779474258423, "learning_rate": 4.322419756112943e-06, "loss": 0.4186, "step": 5445 }, { "epoch": 2.9443142908632187, "grad_norm": 0.3118552565574646, "learning_rate": 4.320549244403285e-06, "loss": 0.4232, "step": 5446 }, { "epoch": 2.944854928816003, "grad_norm": 0.3034192621707916, "learning_rate": 4.318678829568484e-06, "loss": 0.3947, "step": 5447 }, { "epoch": 2.945395566768787, "grad_norm": 0.294358491897583, "learning_rate": 4.3168085118752205e-06, "loss": 0.4275, "step": 5448 }, { "epoch": 2.9459362047215714, "grad_norm": 0.31848475337028503, "learning_rate": 4.314938291590161e-06, "loss": 0.3882, "step": 5449 }, { "epoch": 2.9464768426743557, "grad_norm": 0.279231458902359, "learning_rate": 4.313068168979957e-06, "loss": 0.4152, "step": 5450 }, { "epoch": 2.94701748062714, "grad_norm": 0.32018351554870605, "learning_rate": 4.3111981443112486e-06, "loss": 0.4382, "step": 5451 }, { "epoch": 2.9475581185799244, "grad_norm": 0.30242928862571716, "learning_rate": 4.309328217850659e-06, "loss": 0.3996, "step": 5452 }, { "epoch": 2.9480987565327084, "grad_norm": 0.272429883480072, "learning_rate": 4.3074583898648016e-06, "loss": 0.3735, "step": 5453 }, { "epoch": 2.9486393944854927, "grad_norm": 0.2945402264595032, "learning_rate": 4.305588660620269e-06, "loss": 0.4183, "step": 5454 }, { "epoch": 2.949180032438277, "grad_norm": 0.31038814783096313, "learning_rate": 4.303719030383648e-06, "loss": 0.4091, "step": 5455 }, { "epoch": 2.9497206703910615, "grad_norm": 0.301114946603775, "learning_rate": 4.301849499421504e-06, "loss": 0.4221, "step": 5456 }, { "epoch": 2.950261308343846, "grad_norm": 0.28582969307899475, "learning_rate": 4.299980068000395e-06, "loss": 0.415, "step": 5457 }, { "epoch": 2.95080194629663, "grad_norm": 0.2952626049518585, "learning_rate": 4.2981107363868564e-06, "loss": 0.4163, "step": 5458 }, { "epoch": 2.9513425842494145, "grad_norm": 0.27767154574394226, "learning_rate": 4.296241504847417e-06, "loss": 0.3702, "step": 5459 }, { "epoch": 2.9518832222021985, "grad_norm": 0.28652843832969666, "learning_rate": 4.294372373648587e-06, "loss": 0.3967, "step": 5460 }, { "epoch": 2.952423860154983, "grad_norm": 0.3140363395214081, "learning_rate": 4.292503343056866e-06, "loss": 0.407, "step": 5461 }, { "epoch": 2.952964498107767, "grad_norm": 0.30203577876091003, "learning_rate": 4.290634413338735e-06, "loss": 0.4303, "step": 5462 }, { "epoch": 2.9535051360605515, "grad_norm": 0.2728308439254761, "learning_rate": 4.288765584760663e-06, "loss": 0.382, "step": 5463 }, { "epoch": 2.954045774013336, "grad_norm": 0.28560322523117065, "learning_rate": 4.286896857589103e-06, "loss": 0.4026, "step": 5464 }, { "epoch": 2.95458641196612, "grad_norm": 0.2892841398715973, "learning_rate": 4.285028232090499e-06, "loss": 0.4071, "step": 5465 }, { "epoch": 2.955127049918904, "grad_norm": 0.27572664618492126, "learning_rate": 4.283159708531272e-06, "loss": 0.3624, "step": 5466 }, { "epoch": 2.9556676878716885, "grad_norm": 0.3437039852142334, "learning_rate": 4.281291287177833e-06, "loss": 0.4019, "step": 5467 }, { "epoch": 2.956208325824473, "grad_norm": 0.3084211051464081, "learning_rate": 4.27942296829658e-06, "loss": 0.4061, "step": 5468 }, { "epoch": 2.9567489637772573, "grad_norm": 0.29442712664604187, "learning_rate": 4.277554752153895e-06, "loss": 0.3714, "step": 5469 }, { "epoch": 2.957289601730041, "grad_norm": 0.2936548590660095, "learning_rate": 4.275686639016142e-06, "loss": 0.3708, "step": 5470 }, { "epoch": 2.957830239682826, "grad_norm": 0.29006555676460266, "learning_rate": 4.273818629149674e-06, "loss": 0.3731, "step": 5471 }, { "epoch": 2.95837087763561, "grad_norm": 0.2901497781276703, "learning_rate": 4.2719507228208305e-06, "loss": 0.4264, "step": 5472 }, { "epoch": 2.9589115155883943, "grad_norm": 0.3107115626335144, "learning_rate": 4.270082920295934e-06, "loss": 0.3747, "step": 5473 }, { "epoch": 2.9594521535411786, "grad_norm": 0.2886500060558319, "learning_rate": 4.26821522184129e-06, "loss": 0.4096, "step": 5474 }, { "epoch": 2.959992791493963, "grad_norm": 0.2953193485736847, "learning_rate": 4.266347627723192e-06, "loss": 0.4331, "step": 5475 }, { "epoch": 2.9605334294467474, "grad_norm": 0.3297775387763977, "learning_rate": 4.26448013820792e-06, "loss": 0.3978, "step": 5476 }, { "epoch": 2.9610740673995313, "grad_norm": 0.3226925730705261, "learning_rate": 4.262612753561736e-06, "loss": 0.411, "step": 5477 }, { "epoch": 2.9616147053523156, "grad_norm": 0.29509931802749634, "learning_rate": 4.260745474050889e-06, "loss": 0.4123, "step": 5478 }, { "epoch": 2.9621553433051, "grad_norm": 0.2667893171310425, "learning_rate": 4.258878299941612e-06, "loss": 0.3607, "step": 5479 }, { "epoch": 2.9626959812578844, "grad_norm": 0.3503236174583435, "learning_rate": 4.257011231500122e-06, "loss": 0.4425, "step": 5480 }, { "epoch": 2.9632366192106687, "grad_norm": 0.27424386143684387, "learning_rate": 4.2551442689926246e-06, "loss": 0.3407, "step": 5481 }, { "epoch": 2.9637772571634526, "grad_norm": 0.36829856038093567, "learning_rate": 4.2532774126853075e-06, "loss": 0.4627, "step": 5482 }, { "epoch": 2.964317895116237, "grad_norm": 0.29745393991470337, "learning_rate": 4.2514106628443415e-06, "loss": 0.3851, "step": 5483 }, { "epoch": 2.9648585330690214, "grad_norm": 0.3074614107608795, "learning_rate": 4.249544019735886e-06, "loss": 0.4216, "step": 5484 }, { "epoch": 2.9653991710218057, "grad_norm": 0.290180504322052, "learning_rate": 4.247677483626085e-06, "loss": 0.4299, "step": 5485 }, { "epoch": 2.96593980897459, "grad_norm": 0.3519513010978699, "learning_rate": 4.245811054781065e-06, "loss": 0.3814, "step": 5486 }, { "epoch": 2.9664804469273744, "grad_norm": 0.34274551272392273, "learning_rate": 4.243944733466935e-06, "loss": 0.433, "step": 5487 }, { "epoch": 2.967021084880159, "grad_norm": 0.3035910725593567, "learning_rate": 4.242078519949795e-06, "loss": 0.3677, "step": 5488 }, { "epoch": 2.9675617228329427, "grad_norm": 0.3082733750343323, "learning_rate": 4.240212414495724e-06, "loss": 0.4269, "step": 5489 }, { "epoch": 2.968102360785727, "grad_norm": 0.31705421209335327, "learning_rate": 4.238346417370793e-06, "loss": 0.4254, "step": 5490 }, { "epoch": 2.9686429987385115, "grad_norm": 0.3229638934135437, "learning_rate": 4.236480528841046e-06, "loss": 0.3812, "step": 5491 }, { "epoch": 2.969183636691296, "grad_norm": 0.3046262860298157, "learning_rate": 4.234614749172521e-06, "loss": 0.3773, "step": 5492 }, { "epoch": 2.96972427464408, "grad_norm": 0.34981080889701843, "learning_rate": 4.232749078631237e-06, "loss": 0.4321, "step": 5493 }, { "epoch": 2.970264912596864, "grad_norm": 0.27616679668426514, "learning_rate": 4.2308835174832e-06, "loss": 0.3885, "step": 5494 }, { "epoch": 2.9708055505496485, "grad_norm": 0.3084140419960022, "learning_rate": 4.229018065994396e-06, "loss": 0.423, "step": 5495 }, { "epoch": 2.971346188502433, "grad_norm": 0.3051964342594147, "learning_rate": 4.2271527244307975e-06, "loss": 0.4013, "step": 5496 }, { "epoch": 2.971886826455217, "grad_norm": 0.31605881452560425, "learning_rate": 4.225287493058362e-06, "loss": 0.3895, "step": 5497 }, { "epoch": 2.9724274644080015, "grad_norm": 0.3115125894546509, "learning_rate": 4.223422372143034e-06, "loss": 0.4296, "step": 5498 }, { "epoch": 2.9729681023607855, "grad_norm": 0.2768317759037018, "learning_rate": 4.221557361950734e-06, "loss": 0.3807, "step": 5499 }, { "epoch": 2.9735087403135703, "grad_norm": 0.3032225966453552, "learning_rate": 4.2196924627473715e-06, "loss": 0.3855, "step": 5500 }, { "epoch": 2.974049378266354, "grad_norm": 0.29233628511428833, "learning_rate": 4.217827674798845e-06, "loss": 0.3735, "step": 5501 }, { "epoch": 2.9745900162191385, "grad_norm": 0.313637375831604, "learning_rate": 4.215962998371032e-06, "loss": 0.4408, "step": 5502 }, { "epoch": 2.975130654171923, "grad_norm": 0.27353334426879883, "learning_rate": 4.214098433729792e-06, "loss": 0.3802, "step": 5503 }, { "epoch": 2.9756712921247073, "grad_norm": 0.31625521183013916, "learning_rate": 4.212233981140972e-06, "loss": 0.4286, "step": 5504 }, { "epoch": 2.9762119300774916, "grad_norm": 0.28535178303718567, "learning_rate": 4.210369640870403e-06, "loss": 0.3642, "step": 5505 }, { "epoch": 2.9767525680302755, "grad_norm": 0.26238372921943665, "learning_rate": 4.208505413183899e-06, "loss": 0.3838, "step": 5506 }, { "epoch": 2.97729320598306, "grad_norm": 0.3076592683792114, "learning_rate": 4.206641298347258e-06, "loss": 0.3996, "step": 5507 }, { "epoch": 2.9778338439358443, "grad_norm": 0.3326891362667084, "learning_rate": 4.204777296626262e-06, "loss": 0.4509, "step": 5508 }, { "epoch": 2.9783744818886286, "grad_norm": 0.32030439376831055, "learning_rate": 4.202913408286677e-06, "loss": 0.3774, "step": 5509 }, { "epoch": 2.978915119841413, "grad_norm": 0.31616464257240295, "learning_rate": 4.201049633594254e-06, "loss": 0.4099, "step": 5510 }, { "epoch": 2.979455757794197, "grad_norm": 0.28750118613243103, "learning_rate": 4.1991859728147245e-06, "loss": 0.3809, "step": 5511 }, { "epoch": 2.9799963957469813, "grad_norm": 0.3051993250846863, "learning_rate": 4.1973224262138075e-06, "loss": 0.4296, "step": 5512 }, { "epoch": 2.9805370336997656, "grad_norm": 0.29199936985969543, "learning_rate": 4.1954589940572035e-06, "loss": 0.3975, "step": 5513 }, { "epoch": 2.98107767165255, "grad_norm": 0.27910029888153076, "learning_rate": 4.193595676610599e-06, "loss": 0.4004, "step": 5514 }, { "epoch": 2.9816183096053344, "grad_norm": 0.290517121553421, "learning_rate": 4.1917324741396595e-06, "loss": 0.3889, "step": 5515 }, { "epoch": 2.9821589475581187, "grad_norm": 0.2989426851272583, "learning_rate": 4.189869386910038e-06, "loss": 0.3773, "step": 5516 }, { "epoch": 2.982699585510903, "grad_norm": 0.31459930539131165, "learning_rate": 4.18800641518737e-06, "loss": 0.4488, "step": 5517 }, { "epoch": 2.983240223463687, "grad_norm": 0.312425434589386, "learning_rate": 4.1861435592372766e-06, "loss": 0.422, "step": 5518 }, { "epoch": 2.9837808614164714, "grad_norm": 0.29273882508277893, "learning_rate": 4.184280819325358e-06, "loss": 0.4171, "step": 5519 }, { "epoch": 2.9843214993692557, "grad_norm": 0.30007869005203247, "learning_rate": 4.1824181957172014e-06, "loss": 0.3704, "step": 5520 }, { "epoch": 2.98486213732204, "grad_norm": 0.3161482810974121, "learning_rate": 4.180555688678376e-06, "loss": 0.3924, "step": 5521 }, { "epoch": 2.9854027752748244, "grad_norm": 0.3036750257015228, "learning_rate": 4.1786932984744345e-06, "loss": 0.3936, "step": 5522 }, { "epoch": 2.9859434132276084, "grad_norm": 0.3443000316619873, "learning_rate": 4.176831025370914e-06, "loss": 0.395, "step": 5523 }, { "epoch": 2.9864840511803927, "grad_norm": 0.34082910418510437, "learning_rate": 4.174968869633333e-06, "loss": 0.4453, "step": 5524 }, { "epoch": 2.987024689133177, "grad_norm": 0.29087314009666443, "learning_rate": 4.173106831527194e-06, "loss": 0.3861, "step": 5525 }, { "epoch": 2.9875653270859615, "grad_norm": 0.32361748814582825, "learning_rate": 4.171244911317986e-06, "loss": 0.4255, "step": 5526 }, { "epoch": 2.988105965038746, "grad_norm": 0.28390243649482727, "learning_rate": 4.169383109271174e-06, "loss": 0.3854, "step": 5527 }, { "epoch": 2.9886466029915297, "grad_norm": 0.35260480642318726, "learning_rate": 4.167521425652212e-06, "loss": 0.4224, "step": 5528 }, { "epoch": 2.9891872409443145, "grad_norm": 0.32985785603523254, "learning_rate": 4.165659860726535e-06, "loss": 0.4329, "step": 5529 }, { "epoch": 2.9897278788970985, "grad_norm": 0.29288801550865173, "learning_rate": 4.163798414759566e-06, "loss": 0.4038, "step": 5530 }, { "epoch": 2.990268516849883, "grad_norm": 0.29671889543533325, "learning_rate": 4.161937088016701e-06, "loss": 0.4096, "step": 5531 }, { "epoch": 2.990809154802667, "grad_norm": 0.2902495265007019, "learning_rate": 4.160075880763325e-06, "loss": 0.3764, "step": 5532 }, { "epoch": 2.9913497927554515, "grad_norm": 0.28445449471473694, "learning_rate": 4.158214793264808e-06, "loss": 0.3758, "step": 5533 }, { "epoch": 2.991890430708236, "grad_norm": 0.3205009698867798, "learning_rate": 4.1563538257865e-06, "loss": 0.4267, "step": 5534 }, { "epoch": 2.99243106866102, "grad_norm": 0.2863154113292694, "learning_rate": 4.154492978593733e-06, "loss": 0.396, "step": 5535 }, { "epoch": 2.992971706613804, "grad_norm": 0.28448721766471863, "learning_rate": 4.1526322519518245e-06, "loss": 0.4027, "step": 5536 }, { "epoch": 2.9935123445665885, "grad_norm": 0.2850733995437622, "learning_rate": 4.150771646126073e-06, "loss": 0.3698, "step": 5537 }, { "epoch": 2.994052982519373, "grad_norm": 0.34360191226005554, "learning_rate": 4.148911161381763e-06, "loss": 0.4403, "step": 5538 }, { "epoch": 2.9945936204721573, "grad_norm": 0.2677406668663025, "learning_rate": 4.147050797984152e-06, "loss": 0.3659, "step": 5539 }, { "epoch": 2.995134258424941, "grad_norm": 0.3009760081768036, "learning_rate": 4.145190556198494e-06, "loss": 0.4231, "step": 5540 }, { "epoch": 2.995674896377726, "grad_norm": 0.30065473914146423, "learning_rate": 4.143330436290016e-06, "loss": 0.3978, "step": 5541 }, { "epoch": 2.99621553433051, "grad_norm": 0.2900971472263336, "learning_rate": 4.141470438523932e-06, "loss": 0.3595, "step": 5542 }, { "epoch": 2.9967561722832943, "grad_norm": 0.31957828998565674, "learning_rate": 4.139610563165438e-06, "loss": 0.4176, "step": 5543 }, { "epoch": 2.9972968102360786, "grad_norm": 0.3145180344581604, "learning_rate": 4.1377508104797075e-06, "loss": 0.4469, "step": 5544 }, { "epoch": 2.997837448188863, "grad_norm": 0.3263266980648041, "learning_rate": 4.135891180731903e-06, "loss": 0.4027, "step": 5545 }, { "epoch": 2.9983780861416474, "grad_norm": 0.28440696001052856, "learning_rate": 4.134031674187167e-06, "loss": 0.399, "step": 5546 }, { "epoch": 2.9989187240944313, "grad_norm": 0.30456775426864624, "learning_rate": 4.132172291110626e-06, "loss": 0.4125, "step": 5547 }, { "epoch": 2.9994593620472156, "grad_norm": 0.3088439702987671, "learning_rate": 4.130313031767386e-06, "loss": 0.3781, "step": 5548 }, { "epoch": 3.0, "grad_norm": 0.4863080382347107, "learning_rate": 4.1284538964225364e-06, "loss": 0.5696, "step": 5549 }, { "epoch": 3.0005406379527844, "grad_norm": 0.33911392092704773, "learning_rate": 4.1265948853411506e-06, "loss": 0.3871, "step": 5550 }, { "epoch": 3.0010812759055687, "grad_norm": 0.3231358528137207, "learning_rate": 4.124735998788283e-06, "loss": 0.3744, "step": 5551 }, { "epoch": 3.0016219138583526, "grad_norm": 0.3206525146961212, "learning_rate": 4.122877237028969e-06, "loss": 0.3783, "step": 5552 }, { "epoch": 3.002162551811137, "grad_norm": 0.3616611957550049, "learning_rate": 4.1210186003282275e-06, "loss": 0.3955, "step": 5553 }, { "epoch": 3.0027031897639214, "grad_norm": 0.31090232729911804, "learning_rate": 4.119160088951061e-06, "loss": 0.3653, "step": 5554 }, { "epoch": 3.0032438277167057, "grad_norm": 0.30760252475738525, "learning_rate": 4.1173017031624544e-06, "loss": 0.3766, "step": 5555 }, { "epoch": 3.00378446566949, "grad_norm": 0.3399474322795868, "learning_rate": 4.115443443227367e-06, "loss": 0.3756, "step": 5556 }, { "epoch": 3.0043251036222745, "grad_norm": 0.3576647937297821, "learning_rate": 4.11358530941075e-06, "loss": 0.3586, "step": 5557 }, { "epoch": 3.0048657415750584, "grad_norm": 0.3041995167732239, "learning_rate": 4.1117273019775326e-06, "loss": 0.3681, "step": 5558 }, { "epoch": 3.0054063795278427, "grad_norm": 0.31850144267082214, "learning_rate": 4.109869421192628e-06, "loss": 0.3922, "step": 5559 }, { "epoch": 3.005947017480627, "grad_norm": 0.3466080129146576, "learning_rate": 4.108011667320926e-06, "loss": 0.4141, "step": 5560 }, { "epoch": 3.0064876554334115, "grad_norm": 0.3188363313674927, "learning_rate": 4.106154040627302e-06, "loss": 0.3696, "step": 5561 }, { "epoch": 3.007028293386196, "grad_norm": 0.32207629084587097, "learning_rate": 4.104296541376616e-06, "loss": 0.3754, "step": 5562 }, { "epoch": 3.00756893133898, "grad_norm": 0.291013240814209, "learning_rate": 4.102439169833705e-06, "loss": 0.3674, "step": 5563 }, { "epoch": 3.008109569291764, "grad_norm": 0.3080962896347046, "learning_rate": 4.100581926263389e-06, "loss": 0.3742, "step": 5564 }, { "epoch": 3.0086502072445485, "grad_norm": 0.316063791513443, "learning_rate": 4.098724810930472e-06, "loss": 0.3762, "step": 5565 }, { "epoch": 3.009190845197333, "grad_norm": 0.3149975836277008, "learning_rate": 4.096867824099736e-06, "loss": 0.3629, "step": 5566 }, { "epoch": 3.009731483150117, "grad_norm": 0.2868281602859497, "learning_rate": 4.09501096603595e-06, "loss": 0.3745, "step": 5567 }, { "epoch": 3.0102721211029015, "grad_norm": 0.3042278587818146, "learning_rate": 4.093154237003858e-06, "loss": 0.341, "step": 5568 }, { "epoch": 3.010812759055686, "grad_norm": 0.32166436314582825, "learning_rate": 4.091297637268191e-06, "loss": 0.3601, "step": 5569 }, { "epoch": 3.01135339700847, "grad_norm": 0.264839231967926, "learning_rate": 4.08944116709366e-06, "loss": 0.3842, "step": 5570 }, { "epoch": 3.011894034961254, "grad_norm": 0.2903689444065094, "learning_rate": 4.087584826744957e-06, "loss": 0.3755, "step": 5571 }, { "epoch": 3.0124346729140385, "grad_norm": 0.2931392192840576, "learning_rate": 4.085728616486754e-06, "loss": 0.3777, "step": 5572 }, { "epoch": 3.012975310866823, "grad_norm": 0.2999896705150604, "learning_rate": 4.083872536583708e-06, "loss": 0.3731, "step": 5573 }, { "epoch": 3.0135159488196073, "grad_norm": 0.30667611956596375, "learning_rate": 4.082016587300453e-06, "loss": 0.3729, "step": 5574 }, { "epoch": 3.0140565867723916, "grad_norm": 0.3090130090713501, "learning_rate": 4.08016076890161e-06, "loss": 0.4096, "step": 5575 }, { "epoch": 3.0145972247251756, "grad_norm": 0.30549952387809753, "learning_rate": 4.078305081651776e-06, "loss": 0.3932, "step": 5576 }, { "epoch": 3.01513786267796, "grad_norm": 0.29738929867744446, "learning_rate": 4.076449525815533e-06, "loss": 0.3804, "step": 5577 }, { "epoch": 3.0156785006307443, "grad_norm": 0.3099241256713867, "learning_rate": 4.074594101657441e-06, "loss": 0.3697, "step": 5578 }, { "epoch": 3.0162191385835286, "grad_norm": 0.2834163308143616, "learning_rate": 4.072738809442046e-06, "loss": 0.3692, "step": 5579 }, { "epoch": 3.016759776536313, "grad_norm": 0.3082650899887085, "learning_rate": 4.0708836494338695e-06, "loss": 0.3759, "step": 5580 }, { "epoch": 3.017300414489097, "grad_norm": 0.3224112391471863, "learning_rate": 4.069028621897417e-06, "loss": 0.3847, "step": 5581 }, { "epoch": 3.0178410524418813, "grad_norm": 0.3178497850894928, "learning_rate": 4.067173727097176e-06, "loss": 0.3752, "step": 5582 }, { "epoch": 3.0183816903946656, "grad_norm": 0.34015393257141113, "learning_rate": 4.065318965297615e-06, "loss": 0.3724, "step": 5583 }, { "epoch": 3.01892232834745, "grad_norm": 0.28791576623916626, "learning_rate": 4.06346433676318e-06, "loss": 0.3669, "step": 5584 }, { "epoch": 3.0194629663002344, "grad_norm": 0.3029351532459259, "learning_rate": 4.061609841758302e-06, "loss": 0.3673, "step": 5585 }, { "epoch": 3.0200036042530187, "grad_norm": 0.2851192057132721, "learning_rate": 4.059755480547389e-06, "loss": 0.3712, "step": 5586 }, { "epoch": 3.0205442422058026, "grad_norm": 0.296670526266098, "learning_rate": 4.057901253394839e-06, "loss": 0.3641, "step": 5587 }, { "epoch": 3.021084880158587, "grad_norm": 0.3029664158821106, "learning_rate": 4.056047160565017e-06, "loss": 0.3707, "step": 5588 }, { "epoch": 3.0216255181113714, "grad_norm": 0.305296927690506, "learning_rate": 4.0541932023222806e-06, "loss": 0.3651, "step": 5589 }, { "epoch": 3.0221661560641557, "grad_norm": 0.2943899631500244, "learning_rate": 4.0523393789309625e-06, "loss": 0.3876, "step": 5590 }, { "epoch": 3.02270679401694, "grad_norm": 0.3069864511489868, "learning_rate": 4.050485690655378e-06, "loss": 0.393, "step": 5591 }, { "epoch": 3.0232474319697245, "grad_norm": 0.3245830237865448, "learning_rate": 4.048632137759821e-06, "loss": 0.3731, "step": 5592 }, { "epoch": 3.0237880699225084, "grad_norm": 0.3169190287590027, "learning_rate": 4.0467787205085694e-06, "loss": 0.3733, "step": 5593 }, { "epoch": 3.0243287078752927, "grad_norm": 0.28116780519485474, "learning_rate": 4.044925439165879e-06, "loss": 0.3721, "step": 5594 }, { "epoch": 3.024869345828077, "grad_norm": 0.27948346734046936, "learning_rate": 4.04307229399599e-06, "loss": 0.3731, "step": 5595 }, { "epoch": 3.0254099837808615, "grad_norm": 0.32127025723457336, "learning_rate": 4.041219285263116e-06, "loss": 0.3831, "step": 5596 }, { "epoch": 3.025950621733646, "grad_norm": 0.30989739298820496, "learning_rate": 4.039366413231458e-06, "loss": 0.3855, "step": 5597 }, { "epoch": 3.02649125968643, "grad_norm": 0.2933104336261749, "learning_rate": 4.037513678165196e-06, "loss": 0.3745, "step": 5598 }, { "epoch": 3.027031897639214, "grad_norm": 0.31462398171424866, "learning_rate": 4.03566108032849e-06, "loss": 0.3724, "step": 5599 }, { "epoch": 3.0275725355919985, "grad_norm": 0.2990002930164337, "learning_rate": 4.0338086199854765e-06, "loss": 0.3613, "step": 5600 }, { "epoch": 3.028113173544783, "grad_norm": 0.30223512649536133, "learning_rate": 4.031956297400279e-06, "loss": 0.406, "step": 5601 }, { "epoch": 3.028653811497567, "grad_norm": 0.3021981716156006, "learning_rate": 4.030104112836997e-06, "loss": 0.3753, "step": 5602 }, { "epoch": 3.0291944494503515, "grad_norm": 0.2970791757106781, "learning_rate": 4.028252066559712e-06, "loss": 0.3922, "step": 5603 }, { "epoch": 3.029735087403136, "grad_norm": 0.2738272547721863, "learning_rate": 4.026400158832486e-06, "loss": 0.3916, "step": 5604 }, { "epoch": 3.03027572535592, "grad_norm": 0.30851078033447266, "learning_rate": 4.02454838991936e-06, "loss": 0.3699, "step": 5605 }, { "epoch": 3.030816363308704, "grad_norm": 0.2815330922603607, "learning_rate": 4.022696760084355e-06, "loss": 0.3881, "step": 5606 }, { "epoch": 3.0313570012614885, "grad_norm": 0.2881687581539154, "learning_rate": 4.020845269591474e-06, "loss": 0.3756, "step": 5607 }, { "epoch": 3.031897639214273, "grad_norm": 0.29471635818481445, "learning_rate": 4.018993918704701e-06, "loss": 0.3869, "step": 5608 }, { "epoch": 3.0324382771670573, "grad_norm": 0.29964473843574524, "learning_rate": 4.017142707687995e-06, "loss": 0.3849, "step": 5609 }, { "epoch": 3.032978915119841, "grad_norm": 0.3093760907649994, "learning_rate": 4.0152916368053e-06, "loss": 0.3662, "step": 5610 }, { "epoch": 3.0335195530726256, "grad_norm": 0.2925645112991333, "learning_rate": 4.013440706320537e-06, "loss": 0.3823, "step": 5611 }, { "epoch": 3.03406019102541, "grad_norm": 0.32020100951194763, "learning_rate": 4.0115899164976125e-06, "loss": 0.3713, "step": 5612 }, { "epoch": 3.0346008289781943, "grad_norm": 0.2816184163093567, "learning_rate": 4.009739267600403e-06, "loss": 0.3553, "step": 5613 }, { "epoch": 3.0351414669309786, "grad_norm": 0.30468878149986267, "learning_rate": 4.007888759892773e-06, "loss": 0.3694, "step": 5614 }, { "epoch": 3.035682104883763, "grad_norm": 0.2784128487110138, "learning_rate": 4.006038393638565e-06, "loss": 0.3943, "step": 5615 }, { "epoch": 3.036222742836547, "grad_norm": 0.2900444269180298, "learning_rate": 4.004188169101603e-06, "loss": 0.3794, "step": 5616 }, { "epoch": 3.0367633807893313, "grad_norm": 0.2990294396877289, "learning_rate": 4.002338086545684e-06, "loss": 0.368, "step": 5617 }, { "epoch": 3.0373040187421156, "grad_norm": 0.29112526774406433, "learning_rate": 4.000488146234592e-06, "loss": 0.4074, "step": 5618 }, { "epoch": 3.0378446566949, "grad_norm": 0.2832828164100647, "learning_rate": 3.9986383484320875e-06, "loss": 0.3825, "step": 5619 }, { "epoch": 3.0383852946476844, "grad_norm": 0.29137280583381653, "learning_rate": 3.996788693401914e-06, "loss": 0.3766, "step": 5620 }, { "epoch": 3.0389259326004687, "grad_norm": 0.2979818880558014, "learning_rate": 3.994939181407787e-06, "loss": 0.378, "step": 5621 }, { "epoch": 3.0394665705532526, "grad_norm": 0.3082057237625122, "learning_rate": 3.99308981271341e-06, "loss": 0.3754, "step": 5622 }, { "epoch": 3.040007208506037, "grad_norm": 0.2824176251888275, "learning_rate": 3.991240587582461e-06, "loss": 0.362, "step": 5623 }, { "epoch": 3.0405478464588214, "grad_norm": 0.2940672039985657, "learning_rate": 3.989391506278603e-06, "loss": 0.394, "step": 5624 }, { "epoch": 3.0410884844116057, "grad_norm": 0.2896587550640106, "learning_rate": 3.987542569065469e-06, "loss": 0.3864, "step": 5625 }, { "epoch": 3.04162912236439, "grad_norm": 0.2874264419078827, "learning_rate": 3.98569377620668e-06, "loss": 0.3654, "step": 5626 }, { "epoch": 3.0421697603171745, "grad_norm": 0.28716790676116943, "learning_rate": 3.983845127965834e-06, "loss": 0.3939, "step": 5627 }, { "epoch": 3.0427103982699584, "grad_norm": 0.2853327691555023, "learning_rate": 3.981996624606509e-06, "loss": 0.3952, "step": 5628 }, { "epoch": 3.0432510362227427, "grad_norm": 0.28403279185295105, "learning_rate": 3.980148266392257e-06, "loss": 0.3773, "step": 5629 }, { "epoch": 3.043791674175527, "grad_norm": 0.27723419666290283, "learning_rate": 3.978300053586617e-06, "loss": 0.407, "step": 5630 }, { "epoch": 3.0443323121283115, "grad_norm": 0.3116014897823334, "learning_rate": 3.9764519864531026e-06, "loss": 0.3884, "step": 5631 }, { "epoch": 3.044872950081096, "grad_norm": 0.27871468663215637, "learning_rate": 3.974604065255208e-06, "loss": 0.3865, "step": 5632 }, { "epoch": 3.04541358803388, "grad_norm": 0.3040054440498352, "learning_rate": 3.972756290256407e-06, "loss": 0.3831, "step": 5633 }, { "epoch": 3.045954225986664, "grad_norm": 0.30204376578330994, "learning_rate": 3.970908661720151e-06, "loss": 0.3485, "step": 5634 }, { "epoch": 3.0464948639394485, "grad_norm": 0.33566614985466003, "learning_rate": 3.969061179909872e-06, "loss": 0.4, "step": 5635 }, { "epoch": 3.047035501892233, "grad_norm": 0.28941500186920166, "learning_rate": 3.967213845088983e-06, "loss": 0.3739, "step": 5636 }, { "epoch": 3.047576139845017, "grad_norm": 0.2895706593990326, "learning_rate": 3.965366657520869e-06, "loss": 0.3738, "step": 5637 }, { "epoch": 3.0481167777978015, "grad_norm": 0.34246695041656494, "learning_rate": 3.963519617468902e-06, "loss": 0.3986, "step": 5638 }, { "epoch": 3.048657415750586, "grad_norm": 0.30092760920524597, "learning_rate": 3.961672725196428e-06, "loss": 0.3581, "step": 5639 }, { "epoch": 3.04919805370337, "grad_norm": 0.2948783040046692, "learning_rate": 3.959825980966777e-06, "loss": 0.3714, "step": 5640 }, { "epoch": 3.049738691656154, "grad_norm": 0.2991983890533447, "learning_rate": 3.957979385043249e-06, "loss": 0.3702, "step": 5641 }, { "epoch": 3.0502793296089385, "grad_norm": 0.32025763392448425, "learning_rate": 3.956132937689131e-06, "loss": 0.3529, "step": 5642 }, { "epoch": 3.050819967561723, "grad_norm": 0.30374646186828613, "learning_rate": 3.954286639167686e-06, "loss": 0.3803, "step": 5643 }, { "epoch": 3.0513606055145073, "grad_norm": 0.3011425733566284, "learning_rate": 3.952440489742158e-06, "loss": 0.378, "step": 5644 }, { "epoch": 3.051901243467291, "grad_norm": 0.28585678339004517, "learning_rate": 3.9505944896757635e-06, "loss": 0.3779, "step": 5645 }, { "epoch": 3.0524418814200756, "grad_norm": 0.2828434407711029, "learning_rate": 3.948748639231704e-06, "loss": 0.3628, "step": 5646 }, { "epoch": 3.05298251937286, "grad_norm": 0.3154783248901367, "learning_rate": 3.946902938673158e-06, "loss": 0.375, "step": 5647 }, { "epoch": 3.0535231573256443, "grad_norm": 0.3142322897911072, "learning_rate": 3.945057388263282e-06, "loss": 0.3565, "step": 5648 }, { "epoch": 3.0540637952784286, "grad_norm": 0.30780693888664246, "learning_rate": 3.943211988265211e-06, "loss": 0.3578, "step": 5649 }, { "epoch": 3.054604433231213, "grad_norm": 0.30567556619644165, "learning_rate": 3.941366738942058e-06, "loss": 0.3922, "step": 5650 }, { "epoch": 3.055145071183997, "grad_norm": 0.3061469793319702, "learning_rate": 3.939521640556915e-06, "loss": 0.3899, "step": 5651 }, { "epoch": 3.0556857091367813, "grad_norm": 0.3018254041671753, "learning_rate": 3.937676693372857e-06, "loss": 0.3567, "step": 5652 }, { "epoch": 3.0562263470895656, "grad_norm": 0.3027184307575226, "learning_rate": 3.935831897652927e-06, "loss": 0.3733, "step": 5653 }, { "epoch": 3.05676698504235, "grad_norm": 0.29756540060043335, "learning_rate": 3.933987253660156e-06, "loss": 0.4123, "step": 5654 }, { "epoch": 3.0573076229951344, "grad_norm": 0.28348982334136963, "learning_rate": 3.932142761657549e-06, "loss": 0.3723, "step": 5655 }, { "epoch": 3.0578482609479187, "grad_norm": 0.2800956070423126, "learning_rate": 3.930298421908093e-06, "loss": 0.3664, "step": 5656 }, { "epoch": 3.0583888989007026, "grad_norm": 0.30221107602119446, "learning_rate": 3.928454234674748e-06, "loss": 0.3762, "step": 5657 }, { "epoch": 3.058929536853487, "grad_norm": 0.29003018140792847, "learning_rate": 3.926610200220453e-06, "loss": 0.3943, "step": 5658 }, { "epoch": 3.0594701748062714, "grad_norm": 0.27501335740089417, "learning_rate": 3.924766318808132e-06, "loss": 0.367, "step": 5659 }, { "epoch": 3.0600108127590557, "grad_norm": 0.2978987693786621, "learning_rate": 3.922922590700679e-06, "loss": 0.3734, "step": 5660 }, { "epoch": 3.06055145071184, "grad_norm": 0.295075386762619, "learning_rate": 3.92107901616097e-06, "loss": 0.3926, "step": 5661 }, { "epoch": 3.0610920886646245, "grad_norm": 0.3045724034309387, "learning_rate": 3.919235595451858e-06, "loss": 0.3673, "step": 5662 }, { "epoch": 3.0616327266174084, "grad_norm": 0.28967294096946716, "learning_rate": 3.917392328836177e-06, "loss": 0.3806, "step": 5663 }, { "epoch": 3.0621733645701927, "grad_norm": 0.3140982687473297, "learning_rate": 3.9155492165767336e-06, "loss": 0.3908, "step": 5664 }, { "epoch": 3.062714002522977, "grad_norm": 0.2868986427783966, "learning_rate": 3.913706258936317e-06, "loss": 0.3851, "step": 5665 }, { "epoch": 3.0632546404757615, "grad_norm": 0.28521233797073364, "learning_rate": 3.911863456177692e-06, "loss": 0.3717, "step": 5666 }, { "epoch": 3.063795278428546, "grad_norm": 0.3208141326904297, "learning_rate": 3.910020808563603e-06, "loss": 0.3994, "step": 5667 }, { "epoch": 3.06433591638133, "grad_norm": 0.3301263749599457, "learning_rate": 3.908178316356772e-06, "loss": 0.3587, "step": 5668 }, { "epoch": 3.064876554334114, "grad_norm": 0.2989030182361603, "learning_rate": 3.906335979819896e-06, "loss": 0.3682, "step": 5669 }, { "epoch": 3.0654171922868985, "grad_norm": 0.28814154863357544, "learning_rate": 3.904493799215652e-06, "loss": 0.3759, "step": 5670 }, { "epoch": 3.065957830239683, "grad_norm": 0.3023150861263275, "learning_rate": 3.902651774806696e-06, "loss": 0.3759, "step": 5671 }, { "epoch": 3.066498468192467, "grad_norm": 0.29809585213661194, "learning_rate": 3.90080990685566e-06, "loss": 0.3781, "step": 5672 }, { "epoch": 3.0670391061452515, "grad_norm": 0.28152433037757874, "learning_rate": 3.898968195625157e-06, "loss": 0.396, "step": 5673 }, { "epoch": 3.067579744098036, "grad_norm": 0.282197505235672, "learning_rate": 3.897126641377771e-06, "loss": 0.3671, "step": 5674 }, { "epoch": 3.06812038205082, "grad_norm": 0.301152765750885, "learning_rate": 3.895285244376068e-06, "loss": 0.3644, "step": 5675 }, { "epoch": 3.068661020003604, "grad_norm": 0.30550146102905273, "learning_rate": 3.893444004882593e-06, "loss": 0.3612, "step": 5676 }, { "epoch": 3.0692016579563886, "grad_norm": 0.280722051858902, "learning_rate": 3.8916029231598655e-06, "loss": 0.3675, "step": 5677 }, { "epoch": 3.069742295909173, "grad_norm": 0.3012804388999939, "learning_rate": 3.889761999470383e-06, "loss": 0.3529, "step": 5678 }, { "epoch": 3.0702829338619573, "grad_norm": 0.2924235463142395, "learning_rate": 3.887921234076621e-06, "loss": 0.3994, "step": 5679 }, { "epoch": 3.070823571814741, "grad_norm": 0.26820337772369385, "learning_rate": 3.886080627241034e-06, "loss": 0.3565, "step": 5680 }, { "epoch": 3.0713642097675256, "grad_norm": 0.3064356744289398, "learning_rate": 3.884240179226053e-06, "loss": 0.3973, "step": 5681 }, { "epoch": 3.07190484772031, "grad_norm": 0.3161008656024933, "learning_rate": 3.882399890294083e-06, "loss": 0.3618, "step": 5682 }, { "epoch": 3.0724454856730943, "grad_norm": 0.28452223539352417, "learning_rate": 3.880559760707508e-06, "loss": 0.3775, "step": 5683 }, { "epoch": 3.0729861236258786, "grad_norm": 0.29817309975624084, "learning_rate": 3.878719790728695e-06, "loss": 0.3659, "step": 5684 }, { "epoch": 3.073526761578663, "grad_norm": 0.2943325638771057, "learning_rate": 3.876879980619982e-06, "loss": 0.3801, "step": 5685 }, { "epoch": 3.074067399531447, "grad_norm": 0.2672979533672333, "learning_rate": 3.875040330643684e-06, "loss": 0.3571, "step": 5686 }, { "epoch": 3.0746080374842313, "grad_norm": 0.2672359347343445, "learning_rate": 3.873200841062095e-06, "loss": 0.3749, "step": 5687 }, { "epoch": 3.0751486754370156, "grad_norm": 0.3029986321926117, "learning_rate": 3.871361512137487e-06, "loss": 0.3842, "step": 5688 }, { "epoch": 3.0756893133898, "grad_norm": 0.30264905095100403, "learning_rate": 3.86952234413211e-06, "loss": 0.3935, "step": 5689 }, { "epoch": 3.0762299513425844, "grad_norm": 0.2739836275577545, "learning_rate": 3.8676833373081864e-06, "loss": 0.3643, "step": 5690 }, { "epoch": 3.0767705892953687, "grad_norm": 0.285391241312027, "learning_rate": 3.8658444919279195e-06, "loss": 0.3678, "step": 5691 }, { "epoch": 3.0773112272481526, "grad_norm": 0.2950049638748169, "learning_rate": 3.864005808253488e-06, "loss": 0.3632, "step": 5692 }, { "epoch": 3.077851865200937, "grad_norm": 0.26679345965385437, "learning_rate": 3.8621672865470505e-06, "loss": 0.3662, "step": 5693 }, { "epoch": 3.0783925031537214, "grad_norm": 0.2964516878128052, "learning_rate": 3.860328927070737e-06, "loss": 0.3809, "step": 5694 }, { "epoch": 3.0789331411065057, "grad_norm": 0.2846154272556305, "learning_rate": 3.8584907300866595e-06, "loss": 0.3832, "step": 5695 }, { "epoch": 3.07947377905929, "grad_norm": 0.28660333156585693, "learning_rate": 3.8566526958569025e-06, "loss": 0.3679, "step": 5696 }, { "epoch": 3.0800144170120745, "grad_norm": 0.2944757640361786, "learning_rate": 3.8548148246435345e-06, "loss": 0.371, "step": 5697 }, { "epoch": 3.0805550549648584, "grad_norm": 0.29508352279663086, "learning_rate": 3.8529771167085894e-06, "loss": 0.3701, "step": 5698 }, { "epoch": 3.0810956929176427, "grad_norm": 0.2802375257015228, "learning_rate": 3.851139572314088e-06, "loss": 0.4006, "step": 5699 }, { "epoch": 3.081636330870427, "grad_norm": 0.294456422328949, "learning_rate": 3.8493021917220225e-06, "loss": 0.3991, "step": 5700 }, { "epoch": 3.0821769688232115, "grad_norm": 0.31199342012405396, "learning_rate": 3.847464975194366e-06, "loss": 0.389, "step": 5701 }, { "epoch": 3.082717606775996, "grad_norm": 0.28415027260780334, "learning_rate": 3.845627922993062e-06, "loss": 0.3664, "step": 5702 }, { "epoch": 3.08325824472878, "grad_norm": 0.3120895326137543, "learning_rate": 3.843791035380036e-06, "loss": 0.3991, "step": 5703 }, { "epoch": 3.083798882681564, "grad_norm": 0.29247161746025085, "learning_rate": 3.841954312617188e-06, "loss": 0.3734, "step": 5704 }, { "epoch": 3.0843395206343485, "grad_norm": 0.28878575563430786, "learning_rate": 3.840117754966396e-06, "loss": 0.3824, "step": 5705 }, { "epoch": 3.084880158587133, "grad_norm": 0.2994827926158905, "learning_rate": 3.8382813626895095e-06, "loss": 0.3827, "step": 5706 }, { "epoch": 3.085420796539917, "grad_norm": 0.28692129254341125, "learning_rate": 3.83644513604836e-06, "loss": 0.3502, "step": 5707 }, { "epoch": 3.0859614344927015, "grad_norm": 0.2684290409088135, "learning_rate": 3.834609075304754e-06, "loss": 0.3819, "step": 5708 }, { "epoch": 3.0865020724454855, "grad_norm": 0.3084404766559601, "learning_rate": 3.832773180720475e-06, "loss": 0.3682, "step": 5709 }, { "epoch": 3.08704271039827, "grad_norm": 0.28687548637390137, "learning_rate": 3.8309374525572765e-06, "loss": 0.4114, "step": 5710 }, { "epoch": 3.087583348351054, "grad_norm": 0.2810053825378418, "learning_rate": 3.829101891076896e-06, "loss": 0.3751, "step": 5711 }, { "epoch": 3.0881239863038386, "grad_norm": 0.2930857241153717, "learning_rate": 3.827266496541047e-06, "loss": 0.3969, "step": 5712 }, { "epoch": 3.088664624256623, "grad_norm": 0.3972669243812561, "learning_rate": 3.825431269211416e-06, "loss": 0.4011, "step": 5713 }, { "epoch": 3.0892052622094073, "grad_norm": 0.28189221024513245, "learning_rate": 3.823596209349662e-06, "loss": 0.3759, "step": 5714 }, { "epoch": 3.089745900162191, "grad_norm": 0.2784714996814728, "learning_rate": 3.821761317217428e-06, "loss": 0.3675, "step": 5715 }, { "epoch": 3.0902865381149756, "grad_norm": 0.3123396337032318, "learning_rate": 3.819926593076329e-06, "loss": 0.3839, "step": 5716 }, { "epoch": 3.09082717606776, "grad_norm": 0.2724792957305908, "learning_rate": 3.818092037187959e-06, "loss": 0.3636, "step": 5717 }, { "epoch": 3.0913678140205443, "grad_norm": 0.27904802560806274, "learning_rate": 3.816257649813881e-06, "loss": 0.3567, "step": 5718 }, { "epoch": 3.0919084519733286, "grad_norm": 0.29857680201530457, "learning_rate": 3.8144234312156413e-06, "loss": 0.3951, "step": 5719 }, { "epoch": 3.092449089926113, "grad_norm": 0.29130810499191284, "learning_rate": 3.81258938165476e-06, "loss": 0.3747, "step": 5720 }, { "epoch": 3.092989727878897, "grad_norm": 0.2917025089263916, "learning_rate": 3.8107555013927334e-06, "loss": 0.3658, "step": 5721 }, { "epoch": 3.0935303658316813, "grad_norm": 0.2853851020336151, "learning_rate": 3.8089217906910274e-06, "loss": 0.3988, "step": 5722 }, { "epoch": 3.0940710037844656, "grad_norm": 0.2891107499599457, "learning_rate": 3.8070882498110946e-06, "loss": 0.3775, "step": 5723 }, { "epoch": 3.09461164173725, "grad_norm": 0.2973926067352295, "learning_rate": 3.805254879014356e-06, "loss": 0.3951, "step": 5724 }, { "epoch": 3.0951522796900344, "grad_norm": 0.2941666841506958, "learning_rate": 3.803421678562213e-06, "loss": 0.3773, "step": 5725 }, { "epoch": 3.0956929176428187, "grad_norm": 0.2840958833694458, "learning_rate": 3.8015886487160347e-06, "loss": 0.363, "step": 5726 }, { "epoch": 3.0962335555956026, "grad_norm": 0.30144110321998596, "learning_rate": 3.799755789737175e-06, "loss": 0.3756, "step": 5727 }, { "epoch": 3.096774193548387, "grad_norm": 0.28447234630584717, "learning_rate": 3.7979231018869578e-06, "loss": 0.3637, "step": 5728 }, { "epoch": 3.0973148315011714, "grad_norm": 0.29409652948379517, "learning_rate": 3.7960905854266865e-06, "loss": 0.4036, "step": 5729 }, { "epoch": 3.0978554694539557, "grad_norm": 0.2620141804218292, "learning_rate": 3.794258240617636e-06, "loss": 0.3884, "step": 5730 }, { "epoch": 3.09839610740674, "grad_norm": 0.2847660183906555, "learning_rate": 3.792426067721059e-06, "loss": 0.3819, "step": 5731 }, { "epoch": 3.0989367453595245, "grad_norm": 0.31244954466819763, "learning_rate": 3.790594066998184e-06, "loss": 0.3752, "step": 5732 }, { "epoch": 3.0994773833123084, "grad_norm": 0.28319838643074036, "learning_rate": 3.788762238710215e-06, "loss": 0.3779, "step": 5733 }, { "epoch": 3.1000180212650927, "grad_norm": 0.29199621081352234, "learning_rate": 3.786930583118329e-06, "loss": 0.3779, "step": 5734 }, { "epoch": 3.100558659217877, "grad_norm": 0.3003768026828766, "learning_rate": 3.7850991004836813e-06, "loss": 0.376, "step": 5735 }, { "epoch": 3.1010992971706615, "grad_norm": 0.3079928457736969, "learning_rate": 3.7832677910674005e-06, "loss": 0.3879, "step": 5736 }, { "epoch": 3.101639935123446, "grad_norm": 0.29220160841941833, "learning_rate": 3.781436655130592e-06, "loss": 0.3818, "step": 5737 }, { "epoch": 3.1021805730762297, "grad_norm": 0.32593148946762085, "learning_rate": 3.7796056929343384e-06, "loss": 0.3809, "step": 5738 }, { "epoch": 3.102721211029014, "grad_norm": 0.2868834137916565, "learning_rate": 3.77777490473969e-06, "loss": 0.3781, "step": 5739 }, { "epoch": 3.1032618489817985, "grad_norm": 0.2728521227836609, "learning_rate": 3.7759442908076786e-06, "loss": 0.3779, "step": 5740 }, { "epoch": 3.103802486934583, "grad_norm": 0.2837616503238678, "learning_rate": 3.774113851399312e-06, "loss": 0.3562, "step": 5741 }, { "epoch": 3.104343124887367, "grad_norm": 0.2798760235309601, "learning_rate": 3.772283586775572e-06, "loss": 0.3721, "step": 5742 }, { "epoch": 3.1048837628401516, "grad_norm": 0.27324506640434265, "learning_rate": 3.77045349719741e-06, "loss": 0.3916, "step": 5743 }, { "epoch": 3.1054244007929355, "grad_norm": 0.2995244264602661, "learning_rate": 3.7686235829257587e-06, "loss": 0.383, "step": 5744 }, { "epoch": 3.10596503874572, "grad_norm": 0.29339131712913513, "learning_rate": 3.7667938442215247e-06, "loss": 0.3611, "step": 5745 }, { "epoch": 3.106505676698504, "grad_norm": 0.2979373335838318, "learning_rate": 3.7649642813455893e-06, "loss": 0.3562, "step": 5746 }, { "epoch": 3.1070463146512886, "grad_norm": 0.3166741728782654, "learning_rate": 3.7631348945588064e-06, "loss": 0.3613, "step": 5747 }, { "epoch": 3.107586952604073, "grad_norm": 0.29804494976997375, "learning_rate": 3.761305684122008e-06, "loss": 0.3868, "step": 5748 }, { "epoch": 3.1081275905568573, "grad_norm": 0.2938231825828552, "learning_rate": 3.759476650295999e-06, "loss": 0.3866, "step": 5749 }, { "epoch": 3.108668228509641, "grad_norm": 0.3072431981563568, "learning_rate": 3.7576477933415612e-06, "loss": 0.373, "step": 5750 }, { "epoch": 3.1092088664624256, "grad_norm": 0.3000989556312561, "learning_rate": 3.755819113519447e-06, "loss": 0.4068, "step": 5751 }, { "epoch": 3.10974950441521, "grad_norm": 0.30347174406051636, "learning_rate": 3.7539906110903885e-06, "loss": 0.3819, "step": 5752 }, { "epoch": 3.1102901423679943, "grad_norm": 0.2933652698993683, "learning_rate": 3.7521622863150887e-06, "loss": 0.3845, "step": 5753 }, { "epoch": 3.1108307803207786, "grad_norm": 0.3027891218662262, "learning_rate": 3.7503341394542305e-06, "loss": 0.3929, "step": 5754 }, { "epoch": 3.111371418273563, "grad_norm": 0.31118011474609375, "learning_rate": 3.748506170768462e-06, "loss": 0.3432, "step": 5755 }, { "epoch": 3.111912056226347, "grad_norm": 0.29796212911605835, "learning_rate": 3.7466783805184146e-06, "loss": 0.3837, "step": 5756 }, { "epoch": 3.1124526941791313, "grad_norm": 0.26798829436302185, "learning_rate": 3.744850768964692e-06, "loss": 0.3675, "step": 5757 }, { "epoch": 3.1129933321319156, "grad_norm": 0.27877551317214966, "learning_rate": 3.743023336367872e-06, "loss": 0.379, "step": 5758 }, { "epoch": 3.1135339700847, "grad_norm": 0.3038302958011627, "learning_rate": 3.7411960829885042e-06, "loss": 0.3816, "step": 5759 }, { "epoch": 3.1140746080374844, "grad_norm": 0.3026898205280304, "learning_rate": 3.739369009087117e-06, "loss": 0.3847, "step": 5760 }, { "epoch": 3.1146152459902687, "grad_norm": 0.29449212551116943, "learning_rate": 3.7375421149242102e-06, "loss": 0.3868, "step": 5761 }, { "epoch": 3.1151558839430527, "grad_norm": 0.2972925007343292, "learning_rate": 3.7357154007602612e-06, "loss": 0.3549, "step": 5762 }, { "epoch": 3.115696521895837, "grad_norm": 0.3053343892097473, "learning_rate": 3.733888866855717e-06, "loss": 0.3998, "step": 5763 }, { "epoch": 3.1162371598486214, "grad_norm": 0.28995615243911743, "learning_rate": 3.732062513471002e-06, "loss": 0.4114, "step": 5764 }, { "epoch": 3.1167777978014057, "grad_norm": 0.2779656946659088, "learning_rate": 3.7302363408665155e-06, "loss": 0.3538, "step": 5765 }, { "epoch": 3.11731843575419, "grad_norm": 0.3097536861896515, "learning_rate": 3.7284103493026312e-06, "loss": 0.3837, "step": 5766 }, { "epoch": 3.117859073706974, "grad_norm": 0.2779688239097595, "learning_rate": 3.7265845390396915e-06, "loss": 0.3603, "step": 5767 }, { "epoch": 3.1183997116597584, "grad_norm": 0.2756264805793762, "learning_rate": 3.72475891033802e-06, "loss": 0.3803, "step": 5768 }, { "epoch": 3.1189403496125427, "grad_norm": 0.27909591794013977, "learning_rate": 3.7229334634579093e-06, "loss": 0.378, "step": 5769 }, { "epoch": 3.119480987565327, "grad_norm": 0.27548834681510925, "learning_rate": 3.721108198659633e-06, "loss": 0.3862, "step": 5770 }, { "epoch": 3.1200216255181115, "grad_norm": 0.28045108914375305, "learning_rate": 3.7192831162034292e-06, "loss": 0.3906, "step": 5771 }, { "epoch": 3.120562263470896, "grad_norm": 0.28375497460365295, "learning_rate": 3.7174582163495167e-06, "loss": 0.3755, "step": 5772 }, { "epoch": 3.1211029014236797, "grad_norm": 0.26914161443710327, "learning_rate": 3.7156334993580854e-06, "loss": 0.3632, "step": 5773 }, { "epoch": 3.121643539376464, "grad_norm": 0.2826704978942871, "learning_rate": 3.7138089654893027e-06, "loss": 0.3828, "step": 5774 }, { "epoch": 3.1221841773292485, "grad_norm": 0.2840440273284912, "learning_rate": 3.7119846150033047e-06, "loss": 0.3661, "step": 5775 }, { "epoch": 3.122724815282033, "grad_norm": 0.30267229676246643, "learning_rate": 3.710160448160205e-06, "loss": 0.3846, "step": 5776 }, { "epoch": 3.123265453234817, "grad_norm": 0.3090881109237671, "learning_rate": 3.7083364652200902e-06, "loss": 0.3697, "step": 5777 }, { "epoch": 3.1238060911876016, "grad_norm": 0.2867450416088104, "learning_rate": 3.706512666443022e-06, "loss": 0.3787, "step": 5778 }, { "epoch": 3.1243467291403855, "grad_norm": 0.3012264370918274, "learning_rate": 3.7046890520890295e-06, "loss": 0.3548, "step": 5779 }, { "epoch": 3.12488736709317, "grad_norm": 0.28534871339797974, "learning_rate": 3.702865622418125e-06, "loss": 0.3618, "step": 5780 }, { "epoch": 3.125428005045954, "grad_norm": 0.2811122238636017, "learning_rate": 3.701042377690287e-06, "loss": 0.3762, "step": 5781 }, { "epoch": 3.1259686429987386, "grad_norm": 0.2708125114440918, "learning_rate": 3.6992193181654747e-06, "loss": 0.3701, "step": 5782 }, { "epoch": 3.126509280951523, "grad_norm": 0.2966213524341583, "learning_rate": 3.697396444103611e-06, "loss": 0.3905, "step": 5783 }, { "epoch": 3.1270499189043073, "grad_norm": 0.3003375828266144, "learning_rate": 3.695573755764601e-06, "loss": 0.3591, "step": 5784 }, { "epoch": 3.127590556857091, "grad_norm": 0.295940637588501, "learning_rate": 3.693751253408319e-06, "loss": 0.3811, "step": 5785 }, { "epoch": 3.1281311948098756, "grad_norm": 0.2861578166484833, "learning_rate": 3.6919289372946167e-06, "loss": 0.3657, "step": 5786 }, { "epoch": 3.12867183276266, "grad_norm": 0.3032360374927521, "learning_rate": 3.6901068076833136e-06, "loss": 0.3778, "step": 5787 }, { "epoch": 3.1292124707154443, "grad_norm": 0.2989215850830078, "learning_rate": 3.688284864834207e-06, "loss": 0.361, "step": 5788 }, { "epoch": 3.1297531086682286, "grad_norm": 0.31468385457992554, "learning_rate": 3.6864631090070656e-06, "loss": 0.3696, "step": 5789 }, { "epoch": 3.130293746621013, "grad_norm": 0.3105284571647644, "learning_rate": 3.6846415404616344e-06, "loss": 0.3659, "step": 5790 }, { "epoch": 3.130834384573797, "grad_norm": 0.2899421751499176, "learning_rate": 3.6828201594576253e-06, "loss": 0.3778, "step": 5791 }, { "epoch": 3.1313750225265813, "grad_norm": 0.3059687912464142, "learning_rate": 3.6809989662547306e-06, "loss": 0.388, "step": 5792 }, { "epoch": 3.1319156604793656, "grad_norm": 0.29407617449760437, "learning_rate": 3.679177961112611e-06, "loss": 0.3737, "step": 5793 }, { "epoch": 3.13245629843215, "grad_norm": 0.3061511218547821, "learning_rate": 3.6773571442909055e-06, "loss": 0.4101, "step": 5794 }, { "epoch": 3.1329969363849344, "grad_norm": 0.29926207661628723, "learning_rate": 3.6755365160492187e-06, "loss": 0.388, "step": 5795 }, { "epoch": 3.1335375743377183, "grad_norm": 0.31455034017562866, "learning_rate": 3.673716076647133e-06, "loss": 0.3842, "step": 5796 }, { "epoch": 3.1340782122905027, "grad_norm": 0.2747032642364502, "learning_rate": 3.6718958263442052e-06, "loss": 0.3595, "step": 5797 }, { "epoch": 3.134618850243287, "grad_norm": 0.2993357479572296, "learning_rate": 3.670075765399963e-06, "loss": 0.3736, "step": 5798 }, { "epoch": 3.1351594881960714, "grad_norm": 0.29780399799346924, "learning_rate": 3.6682558940739053e-06, "loss": 0.3625, "step": 5799 }, { "epoch": 3.1357001261488557, "grad_norm": 0.29055240750312805, "learning_rate": 3.6664362126255087e-06, "loss": 0.3645, "step": 5800 }, { "epoch": 3.13624076410164, "grad_norm": 0.27076342701911926, "learning_rate": 3.6646167213142187e-06, "loss": 0.3953, "step": 5801 }, { "epoch": 3.136781402054424, "grad_norm": 0.2839217185974121, "learning_rate": 3.6627974203994555e-06, "loss": 0.3804, "step": 5802 }, { "epoch": 3.1373220400072084, "grad_norm": 0.30638810992240906, "learning_rate": 3.660978310140612e-06, "loss": 0.373, "step": 5803 }, { "epoch": 3.1378626779599927, "grad_norm": 0.2920389175415039, "learning_rate": 3.659159390797053e-06, "loss": 0.3669, "step": 5804 }, { "epoch": 3.138403315912777, "grad_norm": 0.29665204882621765, "learning_rate": 3.657340662628116e-06, "loss": 0.3778, "step": 5805 }, { "epoch": 3.1389439538655615, "grad_norm": 0.31666800379753113, "learning_rate": 3.6555221258931137e-06, "loss": 0.3883, "step": 5806 }, { "epoch": 3.139484591818346, "grad_norm": 0.2841288149356842, "learning_rate": 3.653703780851331e-06, "loss": 0.3593, "step": 5807 }, { "epoch": 3.1400252297711297, "grad_norm": 0.2947778105735779, "learning_rate": 3.651885627762019e-06, "loss": 0.3727, "step": 5808 }, { "epoch": 3.140565867723914, "grad_norm": 0.32678353786468506, "learning_rate": 3.650067666884411e-06, "loss": 0.3583, "step": 5809 }, { "epoch": 3.1411065056766985, "grad_norm": 0.2785697281360626, "learning_rate": 3.648249898477707e-06, "loss": 0.3766, "step": 5810 }, { "epoch": 3.141647143629483, "grad_norm": 0.2932497560977936, "learning_rate": 3.6464323228010845e-06, "loss": 0.369, "step": 5811 }, { "epoch": 3.142187781582267, "grad_norm": 0.323253333568573, "learning_rate": 3.6446149401136847e-06, "loss": 0.3988, "step": 5812 }, { "epoch": 3.1427284195350516, "grad_norm": 0.27933064103126526, "learning_rate": 3.6427977506746293e-06, "loss": 0.3695, "step": 5813 }, { "epoch": 3.1432690574878355, "grad_norm": 0.293927937746048, "learning_rate": 3.64098075474301e-06, "loss": 0.3955, "step": 5814 }, { "epoch": 3.14380969544062, "grad_norm": 0.30490684509277344, "learning_rate": 3.6391639525778915e-06, "loss": 0.3935, "step": 5815 }, { "epoch": 3.144350333393404, "grad_norm": 0.2889328598976135, "learning_rate": 3.6373473444383083e-06, "loss": 0.3622, "step": 5816 }, { "epoch": 3.1448909713461886, "grad_norm": 0.2749510705471039, "learning_rate": 3.6355309305832698e-06, "loss": 0.3764, "step": 5817 }, { "epoch": 3.145431609298973, "grad_norm": 0.2956562936306, "learning_rate": 3.6337147112717575e-06, "loss": 0.3717, "step": 5818 }, { "epoch": 3.1459722472517573, "grad_norm": 0.29461973905563354, "learning_rate": 3.631898686762726e-06, "loss": 0.3658, "step": 5819 }, { "epoch": 3.146512885204541, "grad_norm": 0.2857483923435211, "learning_rate": 3.6300828573150977e-06, "loss": 0.3576, "step": 5820 }, { "epoch": 3.1470535231573256, "grad_norm": 0.29898321628570557, "learning_rate": 3.6282672231877714e-06, "loss": 0.3526, "step": 5821 }, { "epoch": 3.14759416111011, "grad_norm": 0.3071466088294983, "learning_rate": 3.6264517846396174e-06, "loss": 0.3775, "step": 5822 }, { "epoch": 3.1481347990628943, "grad_norm": 0.30894705653190613, "learning_rate": 3.6246365419294805e-06, "loss": 0.3945, "step": 5823 }, { "epoch": 3.1486754370156786, "grad_norm": 0.31548750400543213, "learning_rate": 3.622821495316169e-06, "loss": 0.3726, "step": 5824 }, { "epoch": 3.1492160749684626, "grad_norm": 0.29202592372894287, "learning_rate": 3.621006645058472e-06, "loss": 0.3875, "step": 5825 }, { "epoch": 3.149756712921247, "grad_norm": 0.2818850874900818, "learning_rate": 3.619191991415146e-06, "loss": 0.3945, "step": 5826 }, { "epoch": 3.1502973508740313, "grad_norm": 0.2867623269557953, "learning_rate": 3.6173775346449253e-06, "loss": 0.3725, "step": 5827 }, { "epoch": 3.1508379888268156, "grad_norm": 0.3019481301307678, "learning_rate": 3.6155632750065074e-06, "loss": 0.3734, "step": 5828 }, { "epoch": 3.1513786267796, "grad_norm": 0.282046377658844, "learning_rate": 3.6137492127585667e-06, "loss": 0.3599, "step": 5829 }, { "epoch": 3.1519192647323844, "grad_norm": 0.29858216643333435, "learning_rate": 3.6119353481597504e-06, "loss": 0.3739, "step": 5830 }, { "epoch": 3.1524599026851683, "grad_norm": 0.30497896671295166, "learning_rate": 3.610121681468676e-06, "loss": 0.3975, "step": 5831 }, { "epoch": 3.1530005406379527, "grad_norm": 0.3183683753013611, "learning_rate": 3.608308212943932e-06, "loss": 0.3812, "step": 5832 }, { "epoch": 3.153541178590737, "grad_norm": 0.29859915375709534, "learning_rate": 3.6064949428440787e-06, "loss": 0.3697, "step": 5833 }, { "epoch": 3.1540818165435214, "grad_norm": 0.3005802035331726, "learning_rate": 3.6046818714276512e-06, "loss": 0.3741, "step": 5834 }, { "epoch": 3.1546224544963057, "grad_norm": 0.27503278851509094, "learning_rate": 3.6028689989531533e-06, "loss": 0.3855, "step": 5835 }, { "epoch": 3.15516309244909, "grad_norm": 0.2953731417655945, "learning_rate": 3.6010563256790587e-06, "loss": 0.3773, "step": 5836 }, { "epoch": 3.155703730401874, "grad_norm": 0.28386104106903076, "learning_rate": 3.599243851863816e-06, "loss": 0.3872, "step": 5837 }, { "epoch": 3.1562443683546584, "grad_norm": 0.275812029838562, "learning_rate": 3.5974315777658463e-06, "loss": 0.3592, "step": 5838 }, { "epoch": 3.1567850063074427, "grad_norm": 0.2900319993495941, "learning_rate": 3.595619503643541e-06, "loss": 0.3715, "step": 5839 }, { "epoch": 3.157325644260227, "grad_norm": 0.2936703562736511, "learning_rate": 3.593807629755258e-06, "loss": 0.3966, "step": 5840 }, { "epoch": 3.1578662822130115, "grad_norm": 0.27260488271713257, "learning_rate": 3.591995956359335e-06, "loss": 0.3958, "step": 5841 }, { "epoch": 3.158406920165796, "grad_norm": 0.2719656825065613, "learning_rate": 3.5901844837140743e-06, "loss": 0.363, "step": 5842 }, { "epoch": 3.1589475581185797, "grad_norm": 0.29166799783706665, "learning_rate": 3.588373212077756e-06, "loss": 0.3669, "step": 5843 }, { "epoch": 3.159488196071364, "grad_norm": 0.29750218987464905, "learning_rate": 3.586562141708624e-06, "loss": 0.396, "step": 5844 }, { "epoch": 3.1600288340241485, "grad_norm": 0.2957509458065033, "learning_rate": 3.584751272864899e-06, "loss": 0.3881, "step": 5845 }, { "epoch": 3.160569471976933, "grad_norm": 0.28142228722572327, "learning_rate": 3.582940605804771e-06, "loss": 0.3775, "step": 5846 }, { "epoch": 3.161110109929717, "grad_norm": 0.3182094991207123, "learning_rate": 3.581130140786404e-06, "loss": 0.3683, "step": 5847 }, { "epoch": 3.1616507478825016, "grad_norm": 0.32549846172332764, "learning_rate": 3.579319878067927e-06, "loss": 0.3822, "step": 5848 }, { "epoch": 3.1621913858352855, "grad_norm": 0.28233757615089417, "learning_rate": 3.5775098179074476e-06, "loss": 0.3824, "step": 5849 }, { "epoch": 3.16273202378807, "grad_norm": 0.2875683903694153, "learning_rate": 3.575699960563038e-06, "loss": 0.3833, "step": 5850 }, { "epoch": 3.163272661740854, "grad_norm": 0.28352779150009155, "learning_rate": 3.5738903062927477e-06, "loss": 0.3551, "step": 5851 }, { "epoch": 3.1638132996936386, "grad_norm": 0.27940163016319275, "learning_rate": 3.5720808553545894e-06, "loss": 0.3766, "step": 5852 }, { "epoch": 3.164353937646423, "grad_norm": 0.2728763818740845, "learning_rate": 3.5702716080065546e-06, "loss": 0.3512, "step": 5853 }, { "epoch": 3.164894575599207, "grad_norm": 0.2832372188568115, "learning_rate": 3.568462564506602e-06, "loss": 0.3751, "step": 5854 }, { "epoch": 3.165435213551991, "grad_norm": 0.2728310525417328, "learning_rate": 3.566653725112661e-06, "loss": 0.3546, "step": 5855 }, { "epoch": 3.1659758515047756, "grad_norm": 0.26566481590270996, "learning_rate": 3.564845090082633e-06, "loss": 0.3927, "step": 5856 }, { "epoch": 3.16651648945756, "grad_norm": 0.31351128220558167, "learning_rate": 3.56303665967439e-06, "loss": 0.392, "step": 5857 }, { "epoch": 3.1670571274103443, "grad_norm": 0.3172876536846161, "learning_rate": 3.5612284341457743e-06, "loss": 0.3841, "step": 5858 }, { "epoch": 3.1675977653631286, "grad_norm": 0.28172582387924194, "learning_rate": 3.5594204137546005e-06, "loss": 0.3869, "step": 5859 }, { "epoch": 3.168138403315913, "grad_norm": 0.2776242792606354, "learning_rate": 3.557612598758652e-06, "loss": 0.3898, "step": 5860 }, { "epoch": 3.168679041268697, "grad_norm": 0.2968648672103882, "learning_rate": 3.5558049894156836e-06, "loss": 0.3938, "step": 5861 }, { "epoch": 3.1692196792214813, "grad_norm": 0.3051604926586151, "learning_rate": 3.5539975859834216e-06, "loss": 0.3765, "step": 5862 }, { "epoch": 3.1697603171742657, "grad_norm": 0.296024888753891, "learning_rate": 3.5521903887195637e-06, "loss": 0.3815, "step": 5863 }, { "epoch": 3.17030095512705, "grad_norm": 0.2797988951206207, "learning_rate": 3.5503833978817733e-06, "loss": 0.3605, "step": 5864 }, { "epoch": 3.1708415930798344, "grad_norm": 0.2781578004360199, "learning_rate": 3.5485766137276894e-06, "loss": 0.3932, "step": 5865 }, { "epoch": 3.1713822310326183, "grad_norm": 0.27553510665893555, "learning_rate": 3.546770036514919e-06, "loss": 0.3753, "step": 5866 }, { "epoch": 3.1719228689854027, "grad_norm": 0.2807295024394989, "learning_rate": 3.5449636665010433e-06, "loss": 0.3821, "step": 5867 }, { "epoch": 3.172463506938187, "grad_norm": 0.29495182633399963, "learning_rate": 3.543157503943613e-06, "loss": 0.3627, "step": 5868 }, { "epoch": 3.1730041448909714, "grad_norm": 0.2861941158771515, "learning_rate": 3.541351549100141e-06, "loss": 0.3734, "step": 5869 }, { "epoch": 3.1735447828437557, "grad_norm": 0.32167086005210876, "learning_rate": 3.5395458022281205e-06, "loss": 0.3863, "step": 5870 }, { "epoch": 3.17408542079654, "grad_norm": 0.2828007936477661, "learning_rate": 3.5377402635850123e-06, "loss": 0.3767, "step": 5871 }, { "epoch": 3.174626058749324, "grad_norm": 0.31658950448036194, "learning_rate": 3.5359349334282466e-06, "loss": 0.358, "step": 5872 }, { "epoch": 3.1751666967021084, "grad_norm": 0.2916194498538971, "learning_rate": 3.5341298120152224e-06, "loss": 0.3965, "step": 5873 }, { "epoch": 3.1757073346548927, "grad_norm": 0.30083373188972473, "learning_rate": 3.532324899603312e-06, "loss": 0.3728, "step": 5874 }, { "epoch": 3.176247972607677, "grad_norm": 0.31124067306518555, "learning_rate": 3.5305201964498557e-06, "loss": 0.3932, "step": 5875 }, { "epoch": 3.1767886105604615, "grad_norm": 0.2781218886375427, "learning_rate": 3.5287157028121676e-06, "loss": 0.3709, "step": 5876 }, { "epoch": 3.177329248513246, "grad_norm": 0.3084629774093628, "learning_rate": 3.5269114189475255e-06, "loss": 0.4255, "step": 5877 }, { "epoch": 3.1778698864660297, "grad_norm": 0.31070947647094727, "learning_rate": 3.5251073451131824e-06, "loss": 0.3753, "step": 5878 }, { "epoch": 3.178410524418814, "grad_norm": 0.3050396144390106, "learning_rate": 3.52330348156636e-06, "loss": 0.3829, "step": 5879 }, { "epoch": 3.1789511623715985, "grad_norm": 0.29054388403892517, "learning_rate": 3.5214998285642517e-06, "loss": 0.3592, "step": 5880 }, { "epoch": 3.179491800324383, "grad_norm": 0.2981637418270111, "learning_rate": 3.5196963863640147e-06, "loss": 0.3663, "step": 5881 }, { "epoch": 3.180032438277167, "grad_norm": 0.2945329248905182, "learning_rate": 3.5178931552227837e-06, "loss": 0.3823, "step": 5882 }, { "epoch": 3.180573076229951, "grad_norm": 0.31379660964012146, "learning_rate": 3.516090135397659e-06, "loss": 0.3866, "step": 5883 }, { "epoch": 3.1811137141827355, "grad_norm": 0.28403589129447937, "learning_rate": 3.5142873271457132e-06, "loss": 0.3963, "step": 5884 }, { "epoch": 3.18165435213552, "grad_norm": 0.2573620676994324, "learning_rate": 3.5124847307239863e-06, "loss": 0.3902, "step": 5885 }, { "epoch": 3.182194990088304, "grad_norm": 0.26139920949935913, "learning_rate": 3.5106823463894884e-06, "loss": 0.3825, "step": 5886 }, { "epoch": 3.1827356280410886, "grad_norm": 0.290723979473114, "learning_rate": 3.508880174399202e-06, "loss": 0.3845, "step": 5887 }, { "epoch": 3.183276265993873, "grad_norm": 0.31544339656829834, "learning_rate": 3.507078215010077e-06, "loss": 0.3835, "step": 5888 }, { "epoch": 3.1838169039466573, "grad_norm": 0.27778783440589905, "learning_rate": 3.505276468479033e-06, "loss": 0.383, "step": 5889 }, { "epoch": 3.184357541899441, "grad_norm": 0.2908482551574707, "learning_rate": 3.5034749350629593e-06, "loss": 0.3796, "step": 5890 }, { "epoch": 3.1848981798522256, "grad_norm": 0.3057006895542145, "learning_rate": 3.501673615018717e-06, "loss": 0.3382, "step": 5891 }, { "epoch": 3.18543881780501, "grad_norm": 0.29850953817367554, "learning_rate": 3.4998725086031353e-06, "loss": 0.3731, "step": 5892 }, { "epoch": 3.1859794557577943, "grad_norm": 0.3245144486427307, "learning_rate": 3.49807161607301e-06, "loss": 0.3903, "step": 5893 }, { "epoch": 3.1865200937105786, "grad_norm": 0.2751791477203369, "learning_rate": 3.496270937685109e-06, "loss": 0.3835, "step": 5894 }, { "epoch": 3.1870607316633626, "grad_norm": 0.3082123398780823, "learning_rate": 3.4944704736961722e-06, "loss": 0.3621, "step": 5895 }, { "epoch": 3.187601369616147, "grad_norm": 0.3176043927669525, "learning_rate": 3.4926702243629075e-06, "loss": 0.3404, "step": 5896 }, { "epoch": 3.1881420075689313, "grad_norm": 0.26966592669487, "learning_rate": 3.490870189941987e-06, "loss": 0.3726, "step": 5897 }, { "epoch": 3.1886826455217157, "grad_norm": 0.29819998145103455, "learning_rate": 3.4890703706900596e-06, "loss": 0.4085, "step": 5898 }, { "epoch": 3.1892232834745, "grad_norm": 0.2900310158729553, "learning_rate": 3.4872707668637387e-06, "loss": 0.3579, "step": 5899 }, { "epoch": 3.1897639214272844, "grad_norm": 0.3027432858943939, "learning_rate": 3.4854713787196105e-06, "loss": 0.3992, "step": 5900 }, { "epoch": 3.1903045593800683, "grad_norm": 0.2828446328639984, "learning_rate": 3.483672206514226e-06, "loss": 0.3548, "step": 5901 }, { "epoch": 3.1908451973328527, "grad_norm": 0.2956213057041168, "learning_rate": 3.4818732505041085e-06, "loss": 0.3749, "step": 5902 }, { "epoch": 3.191385835285637, "grad_norm": 0.30007094144821167, "learning_rate": 3.48007451094575e-06, "loss": 0.3817, "step": 5903 }, { "epoch": 3.1919264732384214, "grad_norm": 0.28123414516448975, "learning_rate": 3.478275988095615e-06, "loss": 0.3866, "step": 5904 }, { "epoch": 3.1924671111912057, "grad_norm": 0.31397712230682373, "learning_rate": 3.4764776822101275e-06, "loss": 0.3592, "step": 5905 }, { "epoch": 3.19300774914399, "grad_norm": 0.28460144996643066, "learning_rate": 3.47467959354569e-06, "loss": 0.3806, "step": 5906 }, { "epoch": 3.193548387096774, "grad_norm": 0.3189926743507385, "learning_rate": 3.472881722358671e-06, "loss": 0.3946, "step": 5907 }, { "epoch": 3.1940890250495584, "grad_norm": 0.2741137444972992, "learning_rate": 3.471084068905409e-06, "loss": 0.3796, "step": 5908 }, { "epoch": 3.1946296630023427, "grad_norm": 0.2794220745563507, "learning_rate": 3.4692866334422063e-06, "loss": 0.3621, "step": 5909 }, { "epoch": 3.195170300955127, "grad_norm": 0.2943795323371887, "learning_rate": 3.4674894162253404e-06, "loss": 0.3867, "step": 5910 }, { "epoch": 3.1957109389079115, "grad_norm": 0.27503079175949097, "learning_rate": 3.4656924175110544e-06, "loss": 0.3585, "step": 5911 }, { "epoch": 3.1962515768606954, "grad_norm": 0.3133552074432373, "learning_rate": 3.463895637555563e-06, "loss": 0.3775, "step": 5912 }, { "epoch": 3.1967922148134797, "grad_norm": 0.2888537049293518, "learning_rate": 3.4620990766150453e-06, "loss": 0.3638, "step": 5913 }, { "epoch": 3.197332852766264, "grad_norm": 0.2793637812137604, "learning_rate": 3.460302734945653e-06, "loss": 0.3757, "step": 5914 }, { "epoch": 3.1978734907190485, "grad_norm": 0.2881743907928467, "learning_rate": 3.458506612803505e-06, "loss": 0.3795, "step": 5915 }, { "epoch": 3.198414128671833, "grad_norm": 0.2902590334415436, "learning_rate": 3.4567107104446906e-06, "loss": 0.375, "step": 5916 }, { "epoch": 3.198954766624617, "grad_norm": 0.3024289608001709, "learning_rate": 3.4549150281252635e-06, "loss": 0.3933, "step": 5917 }, { "epoch": 3.1994954045774016, "grad_norm": 0.29701340198516846, "learning_rate": 3.4531195661012506e-06, "loss": 0.3992, "step": 5918 }, { "epoch": 3.2000360425301855, "grad_norm": 0.28090935945510864, "learning_rate": 3.451324324628645e-06, "loss": 0.3847, "step": 5919 }, { "epoch": 3.20057668048297, "grad_norm": 0.29121866822242737, "learning_rate": 3.4495293039634113e-06, "loss": 0.3854, "step": 5920 }, { "epoch": 3.201117318435754, "grad_norm": 0.2985329031944275, "learning_rate": 3.4477345043614762e-06, "loss": 0.3944, "step": 5921 }, { "epoch": 3.2016579563885386, "grad_norm": 0.2983471751213074, "learning_rate": 3.445939926078741e-06, "loss": 0.3774, "step": 5922 }, { "epoch": 3.202198594341323, "grad_norm": 0.30166491866111755, "learning_rate": 3.444145569371073e-06, "loss": 0.4013, "step": 5923 }, { "epoch": 3.202739232294107, "grad_norm": 0.2873641550540924, "learning_rate": 3.442351434494311e-06, "loss": 0.3625, "step": 5924 }, { "epoch": 3.203279870246891, "grad_norm": 0.29263344407081604, "learning_rate": 3.440557521704256e-06, "loss": 0.3699, "step": 5925 }, { "epoch": 3.2038205081996756, "grad_norm": 0.30022427439689636, "learning_rate": 3.4387638312566817e-06, "loss": 0.3887, "step": 5926 }, { "epoch": 3.20436114615246, "grad_norm": 0.2768106162548065, "learning_rate": 3.4369703634073293e-06, "loss": 0.373, "step": 5927 }, { "epoch": 3.2049017841052443, "grad_norm": 0.30571630597114563, "learning_rate": 3.4351771184119104e-06, "loss": 0.3851, "step": 5928 }, { "epoch": 3.2054424220580287, "grad_norm": 0.29890185594558716, "learning_rate": 3.433384096526099e-06, "loss": 0.3643, "step": 5929 }, { "epoch": 3.2059830600108126, "grad_norm": 0.346992164850235, "learning_rate": 3.4315912980055433e-06, "loss": 0.387, "step": 5930 }, { "epoch": 3.206523697963597, "grad_norm": 0.25692644715309143, "learning_rate": 3.429798723105856e-06, "loss": 0.3648, "step": 5931 }, { "epoch": 3.2070643359163813, "grad_norm": 0.27923527359962463, "learning_rate": 3.4280063720826203e-06, "loss": 0.3656, "step": 5932 }, { "epoch": 3.2076049738691657, "grad_norm": 0.36623239517211914, "learning_rate": 3.4262142451913865e-06, "loss": 0.3856, "step": 5933 }, { "epoch": 3.20814561182195, "grad_norm": 0.31749051809310913, "learning_rate": 3.424422342687671e-06, "loss": 0.364, "step": 5934 }, { "epoch": 3.2086862497747344, "grad_norm": 0.28651195764541626, "learning_rate": 3.4226306648269616e-06, "loss": 0.3564, "step": 5935 }, { "epoch": 3.2092268877275183, "grad_norm": 0.29628702998161316, "learning_rate": 3.420839211864712e-06, "loss": 0.3576, "step": 5936 }, { "epoch": 3.2097675256803027, "grad_norm": 0.289095401763916, "learning_rate": 3.419047984056346e-06, "loss": 0.3795, "step": 5937 }, { "epoch": 3.210308163633087, "grad_norm": 0.2880508601665497, "learning_rate": 3.417256981657251e-06, "loss": 0.3869, "step": 5938 }, { "epoch": 3.2108488015858714, "grad_norm": 0.30657216906547546, "learning_rate": 3.4154662049227848e-06, "loss": 0.3809, "step": 5939 }, { "epoch": 3.2113894395386557, "grad_norm": 0.2976471483707428, "learning_rate": 3.413675654108275e-06, "loss": 0.373, "step": 5940 }, { "epoch": 3.21193007749144, "grad_norm": 0.27903828024864197, "learning_rate": 3.4118853294690148e-06, "loss": 0.3802, "step": 5941 }, { "epoch": 3.212470715444224, "grad_norm": 0.2901027500629425, "learning_rate": 3.410095231260263e-06, "loss": 0.3923, "step": 5942 }, { "epoch": 3.2130113533970084, "grad_norm": 0.2960743308067322, "learning_rate": 3.4083053597372517e-06, "loss": 0.3554, "step": 5943 }, { "epoch": 3.2135519913497927, "grad_norm": 0.2988324463367462, "learning_rate": 3.406515715155176e-06, "loss": 0.3729, "step": 5944 }, { "epoch": 3.214092629302577, "grad_norm": 0.2973558306694031, "learning_rate": 3.4047262977692014e-06, "loss": 0.3838, "step": 5945 }, { "epoch": 3.2146332672553615, "grad_norm": 0.2790868282318115, "learning_rate": 3.4029371078344576e-06, "loss": 0.3736, "step": 5946 }, { "epoch": 3.215173905208146, "grad_norm": 0.30476298928260803, "learning_rate": 3.4011481456060457e-06, "loss": 0.3829, "step": 5947 }, { "epoch": 3.2157145431609298, "grad_norm": 0.29460281133651733, "learning_rate": 3.3993594113390316e-06, "loss": 0.4076, "step": 5948 }, { "epoch": 3.216255181113714, "grad_norm": 0.2972364127635956, "learning_rate": 3.397570905288453e-06, "loss": 0.377, "step": 5949 }, { "epoch": 3.2167958190664985, "grad_norm": 0.2778155207633972, "learning_rate": 3.3957826277093074e-06, "loss": 0.3802, "step": 5950 }, { "epoch": 3.217336457019283, "grad_norm": 0.26630061864852905, "learning_rate": 3.3939945788565664e-06, "loss": 0.3561, "step": 5951 }, { "epoch": 3.217877094972067, "grad_norm": 0.28079941868782043, "learning_rate": 3.392206758985165e-06, "loss": 0.3567, "step": 5952 }, { "epoch": 3.218417732924851, "grad_norm": 0.2812572121620178, "learning_rate": 3.390419168350012e-06, "loss": 0.3904, "step": 5953 }, { "epoch": 3.2189583708776355, "grad_norm": 0.3071571886539459, "learning_rate": 3.3886318072059733e-06, "loss": 0.3763, "step": 5954 }, { "epoch": 3.21949900883042, "grad_norm": 0.28525233268737793, "learning_rate": 3.3868446758078897e-06, "loss": 0.3895, "step": 5955 }, { "epoch": 3.220039646783204, "grad_norm": 0.2900713384151459, "learning_rate": 3.3850577744105682e-06, "loss": 0.38, "step": 5956 }, { "epoch": 3.2205802847359886, "grad_norm": 0.2950497269630432, "learning_rate": 3.383271103268782e-06, "loss": 0.3707, "step": 5957 }, { "epoch": 3.221120922688773, "grad_norm": 0.27890312671661377, "learning_rate": 3.3814846626372693e-06, "loss": 0.3807, "step": 5958 }, { "epoch": 3.221661560641557, "grad_norm": 0.3092116415500641, "learning_rate": 3.379698452770739e-06, "loss": 0.3523, "step": 5959 }, { "epoch": 3.222202198594341, "grad_norm": 0.2828221619129181, "learning_rate": 3.3779124739238657e-06, "loss": 0.3718, "step": 5960 }, { "epoch": 3.2227428365471256, "grad_norm": 0.2936552166938782, "learning_rate": 3.376126726351292e-06, "loss": 0.3725, "step": 5961 }, { "epoch": 3.22328347449991, "grad_norm": 0.307039350271225, "learning_rate": 3.3743412103076235e-06, "loss": 0.3881, "step": 5962 }, { "epoch": 3.2238241124526943, "grad_norm": 0.29379069805145264, "learning_rate": 3.3725559260474378e-06, "loss": 0.3699, "step": 5963 }, { "epoch": 3.2243647504054787, "grad_norm": 0.30865761637687683, "learning_rate": 3.3707708738252774e-06, "loss": 0.3749, "step": 5964 }, { "epoch": 3.2249053883582626, "grad_norm": 0.29781603813171387, "learning_rate": 3.3689860538956547e-06, "loss": 0.3577, "step": 5965 }, { "epoch": 3.225446026311047, "grad_norm": 0.2828763723373413, "learning_rate": 3.3672014665130404e-06, "loss": 0.3856, "step": 5966 }, { "epoch": 3.2259866642638313, "grad_norm": 0.29489558935165405, "learning_rate": 3.3654171119318814e-06, "loss": 0.3837, "step": 5967 }, { "epoch": 3.2265273022166157, "grad_norm": 0.2914468050003052, "learning_rate": 3.3636329904065863e-06, "loss": 0.3527, "step": 5968 }, { "epoch": 3.2270679401694, "grad_norm": 0.27329519391059875, "learning_rate": 3.3618491021915334e-06, "loss": 0.3659, "step": 5969 }, { "epoch": 3.2276085781221844, "grad_norm": 0.29091718792915344, "learning_rate": 3.3600654475410643e-06, "loss": 0.3832, "step": 5970 }, { "epoch": 3.2281492160749683, "grad_norm": 0.33960139751434326, "learning_rate": 3.358282026709491e-06, "loss": 0.3927, "step": 5971 }, { "epoch": 3.2286898540277527, "grad_norm": 0.32778263092041016, "learning_rate": 3.356498839951089e-06, "loss": 0.3613, "step": 5972 }, { "epoch": 3.229230491980537, "grad_norm": 0.285845011472702, "learning_rate": 3.354715887520104e-06, "loss": 0.3569, "step": 5973 }, { "epoch": 3.2297711299333214, "grad_norm": 0.2771826386451721, "learning_rate": 3.3529331696707434e-06, "loss": 0.4028, "step": 5974 }, { "epoch": 3.2303117678861057, "grad_norm": 0.32097023725509644, "learning_rate": 3.351150686657185e-06, "loss": 0.3813, "step": 5975 }, { "epoch": 3.23085240583889, "grad_norm": 0.3273971676826477, "learning_rate": 3.349368438733572e-06, "loss": 0.3857, "step": 5976 }, { "epoch": 3.231393043791674, "grad_norm": 0.28685319423675537, "learning_rate": 3.347586426154017e-06, "loss": 0.4034, "step": 5977 }, { "epoch": 3.2319336817444584, "grad_norm": 0.2998667061328888, "learning_rate": 3.3458046491725915e-06, "loss": 0.4105, "step": 5978 }, { "epoch": 3.2324743196972427, "grad_norm": 0.2772698402404785, "learning_rate": 3.344023108043339e-06, "loss": 0.377, "step": 5979 }, { "epoch": 3.233014957650027, "grad_norm": 0.2977933883666992, "learning_rate": 3.3422418030202696e-06, "loss": 0.3806, "step": 5980 }, { "epoch": 3.2335555956028115, "grad_norm": 0.311739057302475, "learning_rate": 3.340460734357359e-06, "loss": 0.3769, "step": 5981 }, { "epoch": 3.2340962335555954, "grad_norm": 0.28269141912460327, "learning_rate": 3.338679902308547e-06, "loss": 0.3553, "step": 5982 }, { "epoch": 3.2346368715083798, "grad_norm": 0.302408367395401, "learning_rate": 3.3368993071277426e-06, "loss": 0.4017, "step": 5983 }, { "epoch": 3.235177509461164, "grad_norm": 0.3035907745361328, "learning_rate": 3.33511894906882e-06, "loss": 0.3826, "step": 5984 }, { "epoch": 3.2357181474139485, "grad_norm": 0.2984257638454437, "learning_rate": 3.3333388283856195e-06, "loss": 0.3719, "step": 5985 }, { "epoch": 3.236258785366733, "grad_norm": 0.28472962975502014, "learning_rate": 3.331558945331946e-06, "loss": 0.3894, "step": 5986 }, { "epoch": 3.236799423319517, "grad_norm": 0.303316205739975, "learning_rate": 3.329779300161573e-06, "loss": 0.3767, "step": 5987 }, { "epoch": 3.237340061272301, "grad_norm": 0.2893165051937103, "learning_rate": 3.3279998931282388e-06, "loss": 0.3614, "step": 5988 }, { "epoch": 3.2378806992250855, "grad_norm": 0.32114818692207336, "learning_rate": 3.326220724485651e-06, "loss": 0.3804, "step": 5989 }, { "epoch": 3.23842133717787, "grad_norm": 0.30687180161476135, "learning_rate": 3.324441794487475e-06, "loss": 0.3905, "step": 5990 }, { "epoch": 3.238961975130654, "grad_norm": 0.303936630487442, "learning_rate": 3.322663103387349e-06, "loss": 0.3863, "step": 5991 }, { "epoch": 3.2395026130834386, "grad_norm": 0.28427422046661377, "learning_rate": 3.3208846514388776e-06, "loss": 0.3948, "step": 5992 }, { "epoch": 3.240043251036223, "grad_norm": 0.2725846767425537, "learning_rate": 3.3191064388956306e-06, "loss": 0.3514, "step": 5993 }, { "epoch": 3.240583888989007, "grad_norm": 0.2954155504703522, "learning_rate": 3.317328466011137e-06, "loss": 0.3685, "step": 5994 }, { "epoch": 3.241124526941791, "grad_norm": 0.3024853467941284, "learning_rate": 3.3155507330389004e-06, "loss": 0.3712, "step": 5995 }, { "epoch": 3.2416651648945756, "grad_norm": 0.27961960434913635, "learning_rate": 3.3137732402323863e-06, "loss": 0.377, "step": 5996 }, { "epoch": 3.24220580284736, "grad_norm": 0.2657700777053833, "learning_rate": 3.3119959878450257e-06, "loss": 0.3724, "step": 5997 }, { "epoch": 3.2427464408001443, "grad_norm": 0.2811744511127472, "learning_rate": 3.3102189761302185e-06, "loss": 0.3878, "step": 5998 }, { "epoch": 3.2432870787529287, "grad_norm": 0.3270666301250458, "learning_rate": 3.3084422053413247e-06, "loss": 0.3962, "step": 5999 }, { "epoch": 3.2438277167057126, "grad_norm": 0.26997464895248413, "learning_rate": 3.306665675731674e-06, "loss": 0.3983, "step": 6000 }, { "epoch": 3.244368354658497, "grad_norm": 0.283130943775177, "learning_rate": 3.3048893875545606e-06, "loss": 0.3924, "step": 6001 }, { "epoch": 3.2449089926112813, "grad_norm": 0.29643136262893677, "learning_rate": 3.3031133410632465e-06, "loss": 0.3707, "step": 6002 }, { "epoch": 3.2454496305640657, "grad_norm": 0.27885952591896057, "learning_rate": 3.3013375365109547e-06, "loss": 0.3666, "step": 6003 }, { "epoch": 3.24599026851685, "grad_norm": 0.2765432298183441, "learning_rate": 3.2995619741508765e-06, "loss": 0.3773, "step": 6004 }, { "epoch": 3.2465309064696344, "grad_norm": 0.2908070385456085, "learning_rate": 3.297786654236169e-06, "loss": 0.356, "step": 6005 }, { "epoch": 3.2470715444224183, "grad_norm": 0.27661949396133423, "learning_rate": 3.2960115770199563e-06, "loss": 0.3748, "step": 6006 }, { "epoch": 3.2476121823752027, "grad_norm": 0.31730109453201294, "learning_rate": 3.294236742755322e-06, "loss": 0.3699, "step": 6007 }, { "epoch": 3.248152820327987, "grad_norm": 0.3065454363822937, "learning_rate": 3.2924621516953195e-06, "loss": 0.3852, "step": 6008 }, { "epoch": 3.2486934582807714, "grad_norm": 0.29832738637924194, "learning_rate": 3.2906878040929664e-06, "loss": 0.3576, "step": 6009 }, { "epoch": 3.2492340962335557, "grad_norm": 0.2941710948944092, "learning_rate": 3.28891370020125e-06, "loss": 0.356, "step": 6010 }, { "epoch": 3.2497747341863397, "grad_norm": 0.29035940766334534, "learning_rate": 3.2871398402731134e-06, "loss": 0.3717, "step": 6011 }, { "epoch": 3.250315372139124, "grad_norm": 0.3080737292766571, "learning_rate": 3.285366224561474e-06, "loss": 0.4003, "step": 6012 }, { "epoch": 3.2508560100919084, "grad_norm": 0.28215858340263367, "learning_rate": 3.2835928533192086e-06, "loss": 0.3624, "step": 6013 }, { "epoch": 3.2513966480446927, "grad_norm": 0.28796327114105225, "learning_rate": 3.2818197267991636e-06, "loss": 0.3644, "step": 6014 }, { "epoch": 3.251937285997477, "grad_norm": 0.2933882772922516, "learning_rate": 3.280046845254145e-06, "loss": 0.3685, "step": 6015 }, { "epoch": 3.2524779239502615, "grad_norm": 0.3135727643966675, "learning_rate": 3.278274208936929e-06, "loss": 0.3678, "step": 6016 }, { "epoch": 3.253018561903046, "grad_norm": 0.31397953629493713, "learning_rate": 3.276501818100255e-06, "loss": 0.3591, "step": 6017 }, { "epoch": 3.2535591998558298, "grad_norm": 0.30858469009399414, "learning_rate": 3.274729672996829e-06, "loss": 0.37, "step": 6018 }, { "epoch": 3.254099837808614, "grad_norm": 0.308296263217926, "learning_rate": 3.272957773879315e-06, "loss": 0.3973, "step": 6019 }, { "epoch": 3.2546404757613985, "grad_norm": 0.30975034832954407, "learning_rate": 3.2711861210003503e-06, "loss": 0.3685, "step": 6020 }, { "epoch": 3.255181113714183, "grad_norm": 0.31152427196502686, "learning_rate": 3.269414714612534e-06, "loss": 0.3967, "step": 6021 }, { "epoch": 3.255721751666967, "grad_norm": 0.27662646770477295, "learning_rate": 3.267643554968433e-06, "loss": 0.3754, "step": 6022 }, { "epoch": 3.256262389619751, "grad_norm": 0.28363901376724243, "learning_rate": 3.265872642320571e-06, "loss": 0.3753, "step": 6023 }, { "epoch": 3.2568030275725355, "grad_norm": 0.29461127519607544, "learning_rate": 3.2641019769214433e-06, "loss": 0.3595, "step": 6024 }, { "epoch": 3.25734366552532, "grad_norm": 0.30085429549217224, "learning_rate": 3.2623315590235076e-06, "loss": 0.3663, "step": 6025 }, { "epoch": 3.257884303478104, "grad_norm": 0.28633686900138855, "learning_rate": 3.260561388879189e-06, "loss": 0.3913, "step": 6026 }, { "epoch": 3.2584249414308886, "grad_norm": 0.30552682280540466, "learning_rate": 3.258791466740873e-06, "loss": 0.3871, "step": 6027 }, { "epoch": 3.258965579383673, "grad_norm": 0.2848190367221832, "learning_rate": 3.2570217928609126e-06, "loss": 0.4061, "step": 6028 }, { "epoch": 3.259506217336457, "grad_norm": 0.28970232605934143, "learning_rate": 3.255252367491625e-06, "loss": 0.3773, "step": 6029 }, { "epoch": 3.260046855289241, "grad_norm": 0.3319888114929199, "learning_rate": 3.2534831908852914e-06, "loss": 0.382, "step": 6030 }, { "epoch": 3.2605874932420256, "grad_norm": 0.2861935496330261, "learning_rate": 3.251714263294158e-06, "loss": 0.371, "step": 6031 }, { "epoch": 3.26112813119481, "grad_norm": 0.29303890466690063, "learning_rate": 3.2499455849704344e-06, "loss": 0.3969, "step": 6032 }, { "epoch": 3.2616687691475943, "grad_norm": 0.2763252258300781, "learning_rate": 3.2481771561662965e-06, "loss": 0.3829, "step": 6033 }, { "epoch": 3.2622094071003787, "grad_norm": 0.3113039433956146, "learning_rate": 3.2464089771338856e-06, "loss": 0.3892, "step": 6034 }, { "epoch": 3.2627500450531626, "grad_norm": 0.281794935464859, "learning_rate": 3.244641048125301e-06, "loss": 0.3798, "step": 6035 }, { "epoch": 3.263290683005947, "grad_norm": 0.28851622343063354, "learning_rate": 3.242873369392613e-06, "loss": 0.3862, "step": 6036 }, { "epoch": 3.2638313209587313, "grad_norm": 0.27319225668907166, "learning_rate": 3.241105941187854e-06, "loss": 0.3702, "step": 6037 }, { "epoch": 3.2643719589115157, "grad_norm": 0.29888665676116943, "learning_rate": 3.2393387637630223e-06, "loss": 0.3803, "step": 6038 }, { "epoch": 3.2649125968643, "grad_norm": 0.2770290672779083, "learning_rate": 3.237571837370076e-06, "loss": 0.363, "step": 6039 }, { "epoch": 3.265453234817084, "grad_norm": 0.28202933073043823, "learning_rate": 3.235805162260942e-06, "loss": 0.3668, "step": 6040 }, { "epoch": 3.2659938727698683, "grad_norm": 0.29129648208618164, "learning_rate": 3.2340387386875095e-06, "loss": 0.384, "step": 6041 }, { "epoch": 3.2665345107226527, "grad_norm": 0.27913179993629456, "learning_rate": 3.232272566901632e-06, "loss": 0.373, "step": 6042 }, { "epoch": 3.267075148675437, "grad_norm": 0.3138071298599243, "learning_rate": 3.230506647155126e-06, "loss": 0.39, "step": 6043 }, { "epoch": 3.2676157866282214, "grad_norm": 0.2757951319217682, "learning_rate": 3.228740979699774e-06, "loss": 0.3786, "step": 6044 }, { "epoch": 3.2681564245810057, "grad_norm": 0.31086909770965576, "learning_rate": 3.226975564787322e-06, "loss": 0.3838, "step": 6045 }, { "epoch": 3.26869706253379, "grad_norm": 0.29600006341934204, "learning_rate": 3.2252104026694807e-06, "loss": 0.3657, "step": 6046 }, { "epoch": 3.269237700486574, "grad_norm": 0.28875118494033813, "learning_rate": 3.223445493597921e-06, "loss": 0.3905, "step": 6047 }, { "epoch": 3.2697783384393584, "grad_norm": 0.28248870372772217, "learning_rate": 3.2216808378242802e-06, "loss": 0.3522, "step": 6048 }, { "epoch": 3.2703189763921428, "grad_norm": 0.2825542390346527, "learning_rate": 3.219916435600162e-06, "loss": 0.3729, "step": 6049 }, { "epoch": 3.270859614344927, "grad_norm": 0.2938670814037323, "learning_rate": 3.218152287177133e-06, "loss": 0.3855, "step": 6050 }, { "epoch": 3.2714002522977115, "grad_norm": 0.28813305497169495, "learning_rate": 3.216388392806719e-06, "loss": 0.3824, "step": 6051 }, { "epoch": 3.2719408902504954, "grad_norm": 0.26874107122421265, "learning_rate": 3.214624752740413e-06, "loss": 0.3724, "step": 6052 }, { "epoch": 3.2724815282032798, "grad_norm": 0.2823548913002014, "learning_rate": 3.2128613672296737e-06, "loss": 0.378, "step": 6053 }, { "epoch": 3.273022166156064, "grad_norm": 0.2911010682582855, "learning_rate": 3.2110982365259206e-06, "loss": 0.3757, "step": 6054 }, { "epoch": 3.2735628041088485, "grad_norm": 0.28966856002807617, "learning_rate": 3.2093353608805368e-06, "loss": 0.3711, "step": 6055 }, { "epoch": 3.274103442061633, "grad_norm": 0.28305694460868835, "learning_rate": 3.2075727405448707e-06, "loss": 0.3608, "step": 6056 }, { "epoch": 3.274644080014417, "grad_norm": 0.2980901300907135, "learning_rate": 3.205810375770233e-06, "loss": 0.3692, "step": 6057 }, { "epoch": 3.275184717967201, "grad_norm": 0.3049103319644928, "learning_rate": 3.204048266807901e-06, "loss": 0.3812, "step": 6058 }, { "epoch": 3.2757253559199855, "grad_norm": 0.279361367225647, "learning_rate": 3.202286413909108e-06, "loss": 0.4078, "step": 6059 }, { "epoch": 3.27626599387277, "grad_norm": 0.2576105296611786, "learning_rate": 3.2005248173250593e-06, "loss": 0.3704, "step": 6060 }, { "epoch": 3.276806631825554, "grad_norm": 0.29323723912239075, "learning_rate": 3.19876347730692e-06, "loss": 0.3791, "step": 6061 }, { "epoch": 3.2773472697783386, "grad_norm": 0.28759148716926575, "learning_rate": 3.197002394105818e-06, "loss": 0.3611, "step": 6062 }, { "epoch": 3.277887907731123, "grad_norm": 0.2939707040786743, "learning_rate": 3.195241567972848e-06, "loss": 0.392, "step": 6063 }, { "epoch": 3.278428545683907, "grad_norm": 0.2800416052341461, "learning_rate": 3.19348099915906e-06, "loss": 0.3885, "step": 6064 }, { "epoch": 3.278969183636691, "grad_norm": 0.2766898274421692, "learning_rate": 3.1917206879154762e-06, "loss": 0.3834, "step": 6065 }, { "epoch": 3.2795098215894756, "grad_norm": 0.28272634744644165, "learning_rate": 3.189960634493078e-06, "loss": 0.3836, "step": 6066 }, { "epoch": 3.28005045954226, "grad_norm": 0.2898666560649872, "learning_rate": 3.1882008391428123e-06, "loss": 0.3881, "step": 6067 }, { "epoch": 3.2805910974950443, "grad_norm": 0.2773001790046692, "learning_rate": 3.1864413021155842e-06, "loss": 0.3578, "step": 6068 }, { "epoch": 3.281131735447828, "grad_norm": 0.28953588008880615, "learning_rate": 3.184682023662268e-06, "loss": 0.3784, "step": 6069 }, { "epoch": 3.2816723734006126, "grad_norm": 0.311117023229599, "learning_rate": 3.1829230040336967e-06, "loss": 0.3625, "step": 6070 }, { "epoch": 3.282213011353397, "grad_norm": 0.272461473941803, "learning_rate": 3.18116424348067e-06, "loss": 0.3865, "step": 6071 }, { "epoch": 3.2827536493061813, "grad_norm": 0.2678007185459137, "learning_rate": 3.179405742253947e-06, "loss": 0.3823, "step": 6072 }, { "epoch": 3.2832942872589657, "grad_norm": 0.2916713356971741, "learning_rate": 3.177647500604252e-06, "loss": 0.3852, "step": 6073 }, { "epoch": 3.28383492521175, "grad_norm": 0.2645060420036316, "learning_rate": 3.1758895187822725e-06, "loss": 0.3974, "step": 6074 }, { "epoch": 3.2843755631645344, "grad_norm": 0.2962478697299957, "learning_rate": 3.1741317970386597e-06, "loss": 0.3865, "step": 6075 }, { "epoch": 3.2849162011173183, "grad_norm": 0.272658109664917, "learning_rate": 3.1723743356240232e-06, "loss": 0.3695, "step": 6076 }, { "epoch": 3.2854568390701027, "grad_norm": 0.28344181180000305, "learning_rate": 3.170617134788939e-06, "loss": 0.3651, "step": 6077 }, { "epoch": 3.285997477022887, "grad_norm": 0.29079508781433105, "learning_rate": 3.1688601947839477e-06, "loss": 0.396, "step": 6078 }, { "epoch": 3.2865381149756714, "grad_norm": 0.27090904116630554, "learning_rate": 3.167103515859552e-06, "loss": 0.3734, "step": 6079 }, { "epoch": 3.2870787529284557, "grad_norm": 0.2851702570915222, "learning_rate": 3.1653470982662114e-06, "loss": 0.3583, "step": 6080 }, { "epoch": 3.2876193908812397, "grad_norm": 0.2843466103076935, "learning_rate": 3.1635909422543556e-06, "loss": 0.3719, "step": 6081 }, { "epoch": 3.288160028834024, "grad_norm": 0.3082217574119568, "learning_rate": 3.1618350480743733e-06, "loss": 0.3906, "step": 6082 }, { "epoch": 3.2887006667868084, "grad_norm": 0.286344051361084, "learning_rate": 3.1600794159766184e-06, "loss": 0.3894, "step": 6083 }, { "epoch": 3.2892413047395928, "grad_norm": 0.2779954671859741, "learning_rate": 3.158324046211403e-06, "loss": 0.3602, "step": 6084 }, { "epoch": 3.289781942692377, "grad_norm": 0.31051650643348694, "learning_rate": 3.1565689390290067e-06, "loss": 0.3653, "step": 6085 }, { "epoch": 3.2903225806451615, "grad_norm": 0.2746830880641937, "learning_rate": 3.154814094679668e-06, "loss": 0.3704, "step": 6086 }, { "epoch": 3.2908632185979454, "grad_norm": 0.2721113860607147, "learning_rate": 3.153059513413591e-06, "loss": 0.3835, "step": 6087 }, { "epoch": 3.2914038565507298, "grad_norm": 0.2715761363506317, "learning_rate": 3.151305195480939e-06, "loss": 0.3557, "step": 6088 }, { "epoch": 3.291944494503514, "grad_norm": 0.2994410991668701, "learning_rate": 3.1495511411318402e-06, "loss": 0.3719, "step": 6089 }, { "epoch": 3.2924851324562985, "grad_norm": 0.28619715571403503, "learning_rate": 3.147797350616385e-06, "loss": 0.3666, "step": 6090 }, { "epoch": 3.293025770409083, "grad_norm": 0.2863656282424927, "learning_rate": 3.146043824184627e-06, "loss": 0.3739, "step": 6091 }, { "epoch": 3.293566408361867, "grad_norm": 0.2747470736503601, "learning_rate": 3.1442905620865773e-06, "loss": 0.3659, "step": 6092 }, { "epoch": 3.294107046314651, "grad_norm": 0.2745727002620697, "learning_rate": 3.1425375645722147e-06, "loss": 0.3479, "step": 6093 }, { "epoch": 3.2946476842674355, "grad_norm": 0.29741722345352173, "learning_rate": 3.140784831891478e-06, "loss": 0.3618, "step": 6094 }, { "epoch": 3.29518832222022, "grad_norm": 0.3148142695426941, "learning_rate": 3.139032364294271e-06, "loss": 0.3738, "step": 6095 }, { "epoch": 3.295728960173004, "grad_norm": 0.2951778173446655, "learning_rate": 3.1372801620304532e-06, "loss": 0.3362, "step": 6096 }, { "epoch": 3.2962695981257886, "grad_norm": 0.2876696288585663, "learning_rate": 3.135528225349853e-06, "loss": 0.3621, "step": 6097 }, { "epoch": 3.2968102360785725, "grad_norm": 0.29712897539138794, "learning_rate": 3.133776554502258e-06, "loss": 0.3946, "step": 6098 }, { "epoch": 3.297350874031357, "grad_norm": 0.29079294204711914, "learning_rate": 3.1320251497374187e-06, "loss": 0.371, "step": 6099 }, { "epoch": 3.297891511984141, "grad_norm": 0.2989571988582611, "learning_rate": 3.130274011305047e-06, "loss": 0.3702, "step": 6100 }, { "epoch": 3.2984321499369256, "grad_norm": 0.2969949245452881, "learning_rate": 3.1285231394548156e-06, "loss": 0.3692, "step": 6101 }, { "epoch": 3.29897278788971, "grad_norm": 0.28161007165908813, "learning_rate": 3.126772534436362e-06, "loss": 0.3687, "step": 6102 }, { "epoch": 3.2995134258424943, "grad_norm": 0.29006463289260864, "learning_rate": 3.1250221964992855e-06, "loss": 0.3796, "step": 6103 }, { "epoch": 3.3000540637952787, "grad_norm": 0.30350908637046814, "learning_rate": 3.123272125893143e-06, "loss": 0.3919, "step": 6104 }, { "epoch": 3.3005947017480626, "grad_norm": 0.28305956721305847, "learning_rate": 3.1215223228674587e-06, "loss": 0.3848, "step": 6105 }, { "epoch": 3.301135339700847, "grad_norm": 0.2784910202026367, "learning_rate": 3.1197727876717143e-06, "loss": 0.3696, "step": 6106 }, { "epoch": 3.3016759776536313, "grad_norm": 0.2670733332633972, "learning_rate": 3.11802352055536e-06, "loss": 0.3553, "step": 6107 }, { "epoch": 3.3022166156064157, "grad_norm": 0.2794773280620575, "learning_rate": 3.1162745217677976e-06, "loss": 0.3848, "step": 6108 }, { "epoch": 3.3027572535592, "grad_norm": 0.2845619022846222, "learning_rate": 3.114525791558398e-06, "loss": 0.3938, "step": 6109 }, { "epoch": 3.303297891511984, "grad_norm": 0.271085649728775, "learning_rate": 3.1127773301764935e-06, "loss": 0.3858, "step": 6110 }, { "epoch": 3.3038385294647683, "grad_norm": 0.284370481967926, "learning_rate": 3.1110291378713763e-06, "loss": 0.3717, "step": 6111 }, { "epoch": 3.3043791674175527, "grad_norm": 0.255216121673584, "learning_rate": 3.109281214892298e-06, "loss": 0.4005, "step": 6112 }, { "epoch": 3.304919805370337, "grad_norm": 0.2828678488731384, "learning_rate": 3.1075335614884767e-06, "loss": 0.3606, "step": 6113 }, { "epoch": 3.3054604433231214, "grad_norm": 0.27755460143089294, "learning_rate": 3.105786177909088e-06, "loss": 0.366, "step": 6114 }, { "epoch": 3.3060010812759058, "grad_norm": 0.2801077365875244, "learning_rate": 3.1040390644032746e-06, "loss": 0.3757, "step": 6115 }, { "epoch": 3.3065417192286897, "grad_norm": 0.2761895954608917, "learning_rate": 3.1022922212201307e-06, "loss": 0.3744, "step": 6116 }, { "epoch": 3.307082357181474, "grad_norm": 0.27318400144577026, "learning_rate": 3.1005456486087217e-06, "loss": 0.3691, "step": 6117 }, { "epoch": 3.3076229951342584, "grad_norm": 0.30086633563041687, "learning_rate": 3.0987993468180706e-06, "loss": 0.3989, "step": 6118 }, { "epoch": 3.3081636330870428, "grad_norm": 0.34112340211868286, "learning_rate": 3.097053316097163e-06, "loss": 0.4067, "step": 6119 }, { "epoch": 3.308704271039827, "grad_norm": 0.34706053137779236, "learning_rate": 3.095307556694942e-06, "loss": 0.3729, "step": 6120 }, { "epoch": 3.3092449089926115, "grad_norm": 0.27965566515922546, "learning_rate": 3.0935620688603156e-06, "loss": 0.3688, "step": 6121 }, { "epoch": 3.3097855469453954, "grad_norm": 0.2663114666938782, "learning_rate": 3.091816852842153e-06, "loss": 0.3584, "step": 6122 }, { "epoch": 3.3103261848981798, "grad_norm": 0.2900125980377197, "learning_rate": 3.090071908889285e-06, "loss": 0.395, "step": 6123 }, { "epoch": 3.310866822850964, "grad_norm": 0.2807668447494507, "learning_rate": 3.0883272372505004e-06, "loss": 0.3697, "step": 6124 }, { "epoch": 3.3114074608037485, "grad_norm": 0.28236493468284607, "learning_rate": 3.0865828381745515e-06, "loss": 0.3662, "step": 6125 }, { "epoch": 3.311948098756533, "grad_norm": 0.29553312063217163, "learning_rate": 3.084838711910153e-06, "loss": 0.378, "step": 6126 }, { "epoch": 3.3124887367093168, "grad_norm": 0.28542473912239075, "learning_rate": 3.083094858705978e-06, "loss": 0.3646, "step": 6127 }, { "epoch": 3.313029374662101, "grad_norm": 0.28612953424453735, "learning_rate": 3.081351278810664e-06, "loss": 0.4114, "step": 6128 }, { "epoch": 3.3135700126148855, "grad_norm": 0.289182186126709, "learning_rate": 3.0796079724728047e-06, "loss": 0.3918, "step": 6129 }, { "epoch": 3.31411065056767, "grad_norm": 0.2852880656719208, "learning_rate": 3.077864939940959e-06, "loss": 0.4152, "step": 6130 }, { "epoch": 3.314651288520454, "grad_norm": 0.2754029333591461, "learning_rate": 3.076122181463644e-06, "loss": 0.3622, "step": 6131 }, { "epoch": 3.3151919264732386, "grad_norm": 0.29560017585754395, "learning_rate": 3.0743796972893436e-06, "loss": 0.4024, "step": 6132 }, { "epoch": 3.315732564426023, "grad_norm": 0.28034523129463196, "learning_rate": 3.0726374876664923e-06, "loss": 0.3622, "step": 6133 }, { "epoch": 3.316273202378807, "grad_norm": 0.2680700123310089, "learning_rate": 3.0708955528434933e-06, "loss": 0.3837, "step": 6134 }, { "epoch": 3.316813840331591, "grad_norm": 0.2749667167663574, "learning_rate": 3.0691538930687076e-06, "loss": 0.3667, "step": 6135 }, { "epoch": 3.3173544782843756, "grad_norm": 0.2794339954853058, "learning_rate": 3.0674125085904617e-06, "loss": 0.3869, "step": 6136 }, { "epoch": 3.31789511623716, "grad_norm": 0.3205060064792633, "learning_rate": 3.065671399657035e-06, "loss": 0.3634, "step": 6137 }, { "epoch": 3.3184357541899443, "grad_norm": 0.2961784899234772, "learning_rate": 3.0639305665166724e-06, "loss": 0.3605, "step": 6138 }, { "epoch": 3.318976392142728, "grad_norm": 0.31420665979385376, "learning_rate": 3.0621900094175794e-06, "loss": 0.3919, "step": 6139 }, { "epoch": 3.3195170300955126, "grad_norm": 0.27574458718299866, "learning_rate": 3.0604497286079227e-06, "loss": 0.3796, "step": 6140 }, { "epoch": 3.320057668048297, "grad_norm": 0.26589855551719666, "learning_rate": 3.0587097243358254e-06, "loss": 0.3874, "step": 6141 }, { "epoch": 3.3205983060010813, "grad_norm": 0.2985301911830902, "learning_rate": 3.0569699968493764e-06, "loss": 0.3748, "step": 6142 }, { "epoch": 3.3211389439538657, "grad_norm": 0.27616506814956665, "learning_rate": 3.0552305463966224e-06, "loss": 0.3767, "step": 6143 }, { "epoch": 3.32167958190665, "grad_norm": 0.2935973107814789, "learning_rate": 3.053491373225573e-06, "loss": 0.3864, "step": 6144 }, { "epoch": 3.322220219859434, "grad_norm": 0.2736741006374359, "learning_rate": 3.051752477584191e-06, "loss": 0.3912, "step": 6145 }, { "epoch": 3.3227608578122183, "grad_norm": 0.27544134855270386, "learning_rate": 3.05001385972041e-06, "loss": 0.3766, "step": 6146 }, { "epoch": 3.3233014957650027, "grad_norm": 0.25983256101608276, "learning_rate": 3.048275519882116e-06, "loss": 0.3574, "step": 6147 }, { "epoch": 3.323842133717787, "grad_norm": 0.2896862030029297, "learning_rate": 3.0465374583171627e-06, "loss": 0.394, "step": 6148 }, { "epoch": 3.3243827716705714, "grad_norm": 0.2907646596431732, "learning_rate": 3.0447996752733543e-06, "loss": 0.3759, "step": 6149 }, { "epoch": 3.3249234096233558, "grad_norm": 0.28527727723121643, "learning_rate": 3.043062170998464e-06, "loss": 0.3691, "step": 6150 }, { "epoch": 3.3254640475761397, "grad_norm": 0.28328537940979004, "learning_rate": 3.0413249457402206e-06, "loss": 0.38, "step": 6151 }, { "epoch": 3.326004685528924, "grad_norm": 0.2843831777572632, "learning_rate": 3.0395879997463164e-06, "loss": 0.3842, "step": 6152 }, { "epoch": 3.3265453234817084, "grad_norm": 0.2758019268512726, "learning_rate": 3.037851333264399e-06, "loss": 0.3848, "step": 6153 }, { "epoch": 3.3270859614344928, "grad_norm": 0.27419313788414, "learning_rate": 3.0361149465420814e-06, "loss": 0.3772, "step": 6154 }, { "epoch": 3.327626599387277, "grad_norm": 0.2987149655818939, "learning_rate": 3.0343788398269342e-06, "loss": 0.3724, "step": 6155 }, { "epoch": 3.328167237340061, "grad_norm": 0.2917717397212982, "learning_rate": 3.0326430133664888e-06, "loss": 0.3539, "step": 6156 }, { "epoch": 3.3287078752928454, "grad_norm": 0.28162145614624023, "learning_rate": 3.030907467408235e-06, "loss": 0.3897, "step": 6157 }, { "epoch": 3.3292485132456298, "grad_norm": 0.27886053919792175, "learning_rate": 3.029172202199624e-06, "loss": 0.3509, "step": 6158 }, { "epoch": 3.329789151198414, "grad_norm": 0.3052806854248047, "learning_rate": 3.0274372179880667e-06, "loss": 0.3906, "step": 6159 }, { "epoch": 3.3303297891511985, "grad_norm": 0.2795872390270233, "learning_rate": 3.025702515020937e-06, "loss": 0.3521, "step": 6160 }, { "epoch": 3.330870427103983, "grad_norm": 0.2896803021430969, "learning_rate": 3.0239680935455607e-06, "loss": 0.3932, "step": 6161 }, { "epoch": 3.331411065056767, "grad_norm": 0.28419041633605957, "learning_rate": 3.0222339538092306e-06, "loss": 0.3749, "step": 6162 }, { "epoch": 3.331951703009551, "grad_norm": 0.2858562767505646, "learning_rate": 3.020500096059198e-06, "loss": 0.3701, "step": 6163 }, { "epoch": 3.3324923409623355, "grad_norm": 0.2957281768321991, "learning_rate": 3.018766520542673e-06, "loss": 0.3824, "step": 6164 }, { "epoch": 3.33303297891512, "grad_norm": 0.30355215072631836, "learning_rate": 3.0170332275068247e-06, "loss": 0.4036, "step": 6165 }, { "epoch": 3.333573616867904, "grad_norm": 0.3126343786716461, "learning_rate": 3.015300217198784e-06, "loss": 0.4117, "step": 6166 }, { "epoch": 3.3341142548206886, "grad_norm": 0.2678101360797882, "learning_rate": 3.0135674898656392e-06, "loss": 0.3734, "step": 6167 }, { "epoch": 3.3346548927734725, "grad_norm": 0.28154507279396057, "learning_rate": 3.011835045754441e-06, "loss": 0.3974, "step": 6168 }, { "epoch": 3.335195530726257, "grad_norm": 0.26780441403388977, "learning_rate": 3.0101028851121963e-06, "loss": 0.3576, "step": 6169 }, { "epoch": 3.335736168679041, "grad_norm": 0.28477615118026733, "learning_rate": 3.0083710081858748e-06, "loss": 0.3941, "step": 6170 }, { "epoch": 3.3362768066318256, "grad_norm": 0.2977590560913086, "learning_rate": 3.0066394152224034e-06, "loss": 0.3896, "step": 6171 }, { "epoch": 3.33681744458461, "grad_norm": 0.2784702181816101, "learning_rate": 3.004908106468672e-06, "loss": 0.396, "step": 6172 }, { "epoch": 3.3373580825373943, "grad_norm": 0.2852799892425537, "learning_rate": 3.0031770821715233e-06, "loss": 0.3602, "step": 6173 }, { "epoch": 3.337898720490178, "grad_norm": 0.2881910502910614, "learning_rate": 3.001446342577765e-06, "loss": 0.3507, "step": 6174 }, { "epoch": 3.3384393584429626, "grad_norm": 0.305225670337677, "learning_rate": 2.9997158879341647e-06, "loss": 0.3928, "step": 6175 }, { "epoch": 3.338979996395747, "grad_norm": 0.3159213066101074, "learning_rate": 2.9979857184874484e-06, "loss": 0.3838, "step": 6176 }, { "epoch": 3.3395206343485313, "grad_norm": 0.28928208351135254, "learning_rate": 2.9962558344842963e-06, "loss": 0.3992, "step": 6177 }, { "epoch": 3.3400612723013157, "grad_norm": 0.2776452302932739, "learning_rate": 2.9945262361713545e-06, "loss": 0.3805, "step": 6178 }, { "epoch": 3.3406019102541, "grad_norm": 0.2972255051136017, "learning_rate": 2.9927969237952254e-06, "loss": 0.3702, "step": 6179 }, { "epoch": 3.341142548206884, "grad_norm": 0.2896145284175873, "learning_rate": 2.9910678976024733e-06, "loss": 0.3675, "step": 6180 }, { "epoch": 3.3416831861596683, "grad_norm": 0.3609926402568817, "learning_rate": 2.989339157839616e-06, "loss": 0.4042, "step": 6181 }, { "epoch": 3.3422238241124527, "grad_norm": 0.28718194365501404, "learning_rate": 2.9876107047531367e-06, "loss": 0.367, "step": 6182 }, { "epoch": 3.342764462065237, "grad_norm": 0.2966802716255188, "learning_rate": 2.985882538589474e-06, "loss": 0.3647, "step": 6183 }, { "epoch": 3.3433051000180214, "grad_norm": 0.29517579078674316, "learning_rate": 2.984154659595028e-06, "loss": 0.369, "step": 6184 }, { "epoch": 3.3438457379708053, "grad_norm": 0.27919119596481323, "learning_rate": 2.982427068016155e-06, "loss": 0.3851, "step": 6185 }, { "epoch": 3.3443863759235897, "grad_norm": 0.2870360314846039, "learning_rate": 2.9806997640991733e-06, "loss": 0.3736, "step": 6186 }, { "epoch": 3.344927013876374, "grad_norm": 0.2866764962673187, "learning_rate": 2.9789727480903564e-06, "loss": 0.358, "step": 6187 }, { "epoch": 3.3454676518291584, "grad_norm": 0.3390553891658783, "learning_rate": 2.9772460202359437e-06, "loss": 0.3928, "step": 6188 }, { "epoch": 3.3460082897819428, "grad_norm": 0.3193557560443878, "learning_rate": 2.9755195807821236e-06, "loss": 0.3761, "step": 6189 }, { "epoch": 3.346548927734727, "grad_norm": 0.2649048864841461, "learning_rate": 2.9737934299750514e-06, "loss": 0.3747, "step": 6190 }, { "epoch": 3.3470895656875115, "grad_norm": 0.2913542687892914, "learning_rate": 2.972067568060838e-06, "loss": 0.3988, "step": 6191 }, { "epoch": 3.3476302036402954, "grad_norm": 0.27233168482780457, "learning_rate": 2.970341995285553e-06, "loss": 0.3664, "step": 6192 }, { "epoch": 3.3481708415930798, "grad_norm": 0.321533739566803, "learning_rate": 2.968616711895229e-06, "loss": 0.3597, "step": 6193 }, { "epoch": 3.348711479545864, "grad_norm": 0.27270835638046265, "learning_rate": 2.96689171813585e-06, "loss": 0.353, "step": 6194 }, { "epoch": 3.3492521174986485, "grad_norm": 0.29473403096199036, "learning_rate": 2.965167014253363e-06, "loss": 0.371, "step": 6195 }, { "epoch": 3.349792755451433, "grad_norm": 0.27694326639175415, "learning_rate": 2.9634426004936735e-06, "loss": 0.3718, "step": 6196 }, { "epoch": 3.3503333934042168, "grad_norm": 0.2827945649623871, "learning_rate": 2.9617184771026464e-06, "loss": 0.4004, "step": 6197 }, { "epoch": 3.350874031357001, "grad_norm": 0.27870264649391174, "learning_rate": 2.959994644326103e-06, "loss": 0.3717, "step": 6198 }, { "epoch": 3.3514146693097855, "grad_norm": 0.2858085036277771, "learning_rate": 2.958271102409823e-06, "loss": 0.3873, "step": 6199 }, { "epoch": 3.35195530726257, "grad_norm": 0.2923094630241394, "learning_rate": 2.956547851599548e-06, "loss": 0.3943, "step": 6200 }, { "epoch": 3.352495945215354, "grad_norm": 0.30211901664733887, "learning_rate": 2.954824892140978e-06, "loss": 0.3929, "step": 6201 }, { "epoch": 3.3530365831681386, "grad_norm": 0.2655908763408661, "learning_rate": 2.9531022242797646e-06, "loss": 0.3643, "step": 6202 }, { "epoch": 3.353577221120923, "grad_norm": 0.2907673418521881, "learning_rate": 2.951379848261523e-06, "loss": 0.3825, "step": 6203 }, { "epoch": 3.354117859073707, "grad_norm": 0.28303998708724976, "learning_rate": 2.9496577643318302e-06, "loss": 0.3639, "step": 6204 }, { "epoch": 3.354658497026491, "grad_norm": 0.30183348059654236, "learning_rate": 2.947935972736217e-06, "loss": 0.3621, "step": 6205 }, { "epoch": 3.3551991349792756, "grad_norm": 0.26821714639663696, "learning_rate": 2.946214473720171e-06, "loss": 0.3786, "step": 6206 }, { "epoch": 3.35573977293206, "grad_norm": 0.29525643587112427, "learning_rate": 2.944493267529141e-06, "loss": 0.3998, "step": 6207 }, { "epoch": 3.3562804108848443, "grad_norm": 0.27004489302635193, "learning_rate": 2.942772354408534e-06, "loss": 0.3555, "step": 6208 }, { "epoch": 3.356821048837628, "grad_norm": 0.2798943519592285, "learning_rate": 2.941051734603716e-06, "loss": 0.3649, "step": 6209 }, { "epoch": 3.3573616867904126, "grad_norm": 0.28168627619743347, "learning_rate": 2.9393314083600076e-06, "loss": 0.3624, "step": 6210 }, { "epoch": 3.357902324743197, "grad_norm": 0.30059489607810974, "learning_rate": 2.9376113759226903e-06, "loss": 0.395, "step": 6211 }, { "epoch": 3.3584429626959813, "grad_norm": 0.28024792671203613, "learning_rate": 2.935891637537004e-06, "loss": 0.3746, "step": 6212 }, { "epoch": 3.3589836006487657, "grad_norm": 0.27950024604797363, "learning_rate": 2.934172193448147e-06, "loss": 0.3576, "step": 6213 }, { "epoch": 3.3595242386015496, "grad_norm": 0.28534096479415894, "learning_rate": 2.932453043901271e-06, "loss": 0.3775, "step": 6214 }, { "epoch": 3.360064876554334, "grad_norm": 0.294626921415329, "learning_rate": 2.930734189141492e-06, "loss": 0.3291, "step": 6215 }, { "epoch": 3.3606055145071183, "grad_norm": 0.30831092596054077, "learning_rate": 2.9290156294138807e-06, "loss": 0.398, "step": 6216 }, { "epoch": 3.3611461524599027, "grad_norm": 0.31744620203971863, "learning_rate": 2.927297364963468e-06, "loss": 0.3538, "step": 6217 }, { "epoch": 3.361686790412687, "grad_norm": 0.3032221794128418, "learning_rate": 2.9255793960352364e-06, "loss": 0.3664, "step": 6218 }, { "epoch": 3.3622274283654714, "grad_norm": 0.29594576358795166, "learning_rate": 2.923861722874134e-06, "loss": 0.3809, "step": 6219 }, { "epoch": 3.3627680663182558, "grad_norm": 0.28834328055381775, "learning_rate": 2.922144345725062e-06, "loss": 0.3772, "step": 6220 }, { "epoch": 3.3633087042710397, "grad_norm": 0.3238198757171631, "learning_rate": 2.9204272648328835e-06, "loss": 0.383, "step": 6221 }, { "epoch": 3.363849342223824, "grad_norm": 0.27415868639945984, "learning_rate": 2.9187104804424138e-06, "loss": 0.3743, "step": 6222 }, { "epoch": 3.3643899801766084, "grad_norm": 0.2742730379104614, "learning_rate": 2.9169939927984293e-06, "loss": 0.391, "step": 6223 }, { "epoch": 3.3649306181293928, "grad_norm": 0.2784969210624695, "learning_rate": 2.915277802145667e-06, "loss": 0.3785, "step": 6224 }, { "epoch": 3.365471256082177, "grad_norm": 0.30221015214920044, "learning_rate": 2.9135619087288153e-06, "loss": 0.393, "step": 6225 }, { "epoch": 3.366011894034961, "grad_norm": 0.2925558090209961, "learning_rate": 2.9118463127925235e-06, "loss": 0.3772, "step": 6226 }, { "epoch": 3.3665525319877454, "grad_norm": 0.2875816822052002, "learning_rate": 2.9101310145813966e-06, "loss": 0.3817, "step": 6227 }, { "epoch": 3.3670931699405298, "grad_norm": 0.29908493161201477, "learning_rate": 2.908416014340003e-06, "loss": 0.3747, "step": 6228 }, { "epoch": 3.367633807893314, "grad_norm": 0.28526777029037476, "learning_rate": 2.906701312312861e-06, "loss": 0.3823, "step": 6229 }, { "epoch": 3.3681744458460985, "grad_norm": 0.33051398396492004, "learning_rate": 2.9049869087444493e-06, "loss": 0.4201, "step": 6230 }, { "epoch": 3.368715083798883, "grad_norm": 0.36779916286468506, "learning_rate": 2.903272803879207e-06, "loss": 0.3933, "step": 6231 }, { "epoch": 3.369255721751667, "grad_norm": 0.28889724612236023, "learning_rate": 2.9015589979615244e-06, "loss": 0.3779, "step": 6232 }, { "epoch": 3.369796359704451, "grad_norm": 0.2862876057624817, "learning_rate": 2.8998454912357578e-06, "loss": 0.4034, "step": 6233 }, { "epoch": 3.3703369976572355, "grad_norm": 0.29388126730918884, "learning_rate": 2.8981322839462135e-06, "loss": 0.3605, "step": 6234 }, { "epoch": 3.37087763561002, "grad_norm": 0.27290070056915283, "learning_rate": 2.8964193763371546e-06, "loss": 0.4089, "step": 6235 }, { "epoch": 3.371418273562804, "grad_norm": 0.30332091450691223, "learning_rate": 2.894706768652809e-06, "loss": 0.3899, "step": 6236 }, { "epoch": 3.3719589115155886, "grad_norm": 0.30067744851112366, "learning_rate": 2.8929944611373555e-06, "loss": 0.3771, "step": 6237 }, { "epoch": 3.3724995494683725, "grad_norm": 0.3083347976207733, "learning_rate": 2.8912824540349315e-06, "loss": 0.3745, "step": 6238 }, { "epoch": 3.373040187421157, "grad_norm": 0.28111353516578674, "learning_rate": 2.8895707475896295e-06, "loss": 0.3767, "step": 6239 }, { "epoch": 3.373580825373941, "grad_norm": 0.28267189860343933, "learning_rate": 2.887859342045506e-06, "loss": 0.372, "step": 6240 }, { "epoch": 3.3741214633267256, "grad_norm": 0.31190162897109985, "learning_rate": 2.8861482376465684e-06, "loss": 0.3789, "step": 6241 }, { "epoch": 3.37466210127951, "grad_norm": 0.2946929335594177, "learning_rate": 2.88443743463678e-06, "loss": 0.3609, "step": 6242 }, { "epoch": 3.375202739232294, "grad_norm": 0.27581751346588135, "learning_rate": 2.882726933260068e-06, "loss": 0.356, "step": 6243 }, { "epoch": 3.375743377185078, "grad_norm": 0.2839164435863495, "learning_rate": 2.88101673376031e-06, "loss": 0.3769, "step": 6244 }, { "epoch": 3.3762840151378626, "grad_norm": 0.2926939129829407, "learning_rate": 2.879306836381345e-06, "loss": 0.3661, "step": 6245 }, { "epoch": 3.376824653090647, "grad_norm": 0.2834808826446533, "learning_rate": 2.877597241366967e-06, "loss": 0.379, "step": 6246 }, { "epoch": 3.3773652910434313, "grad_norm": 0.29527443647384644, "learning_rate": 2.8758879489609243e-06, "loss": 0.3931, "step": 6247 }, { "epoch": 3.3779059289962157, "grad_norm": 0.2936485707759857, "learning_rate": 2.874178959406928e-06, "loss": 0.3575, "step": 6248 }, { "epoch": 3.378446566949, "grad_norm": 0.2918454706668854, "learning_rate": 2.872470272948642e-06, "loss": 0.3708, "step": 6249 }, { "epoch": 3.378987204901784, "grad_norm": 0.3029497563838959, "learning_rate": 2.8707618898296864e-06, "loss": 0.3921, "step": 6250 }, { "epoch": 3.3795278428545683, "grad_norm": 0.3112659156322479, "learning_rate": 2.869053810293638e-06, "loss": 0.3724, "step": 6251 }, { "epoch": 3.3800684808073527, "grad_norm": 0.2860032916069031, "learning_rate": 2.8673460345840343e-06, "loss": 0.3844, "step": 6252 }, { "epoch": 3.380609118760137, "grad_norm": 0.28944653272628784, "learning_rate": 2.8656385629443694e-06, "loss": 0.3831, "step": 6253 }, { "epoch": 3.3811497567129214, "grad_norm": 0.29049044847488403, "learning_rate": 2.863931395618085e-06, "loss": 0.3866, "step": 6254 }, { "epoch": 3.3816903946657053, "grad_norm": 0.29390716552734375, "learning_rate": 2.862224532848591e-06, "loss": 0.3831, "step": 6255 }, { "epoch": 3.3822310326184897, "grad_norm": 0.3249385356903076, "learning_rate": 2.860517974879245e-06, "loss": 0.3621, "step": 6256 }, { "epoch": 3.382771670571274, "grad_norm": 0.2906329929828644, "learning_rate": 2.858811721953369e-06, "loss": 0.3788, "step": 6257 }, { "epoch": 3.3833123085240584, "grad_norm": 0.28749001026153564, "learning_rate": 2.8571057743142362e-06, "loss": 0.3688, "step": 6258 }, { "epoch": 3.3838529464768428, "grad_norm": 0.285239040851593, "learning_rate": 2.855400132205074e-06, "loss": 0.3551, "step": 6259 }, { "epoch": 3.384393584429627, "grad_norm": 0.28307268023490906, "learning_rate": 2.853694795869074e-06, "loss": 0.3697, "step": 6260 }, { "epoch": 3.3849342223824115, "grad_norm": 0.26978787779808044, "learning_rate": 2.851989765549378e-06, "loss": 0.3703, "step": 6261 }, { "epoch": 3.3854748603351954, "grad_norm": 0.2827216386795044, "learning_rate": 2.850285041489087e-06, "loss": 0.367, "step": 6262 }, { "epoch": 3.3860154982879798, "grad_norm": 0.2850663959980011, "learning_rate": 2.8485806239312583e-06, "loss": 0.3843, "step": 6263 }, { "epoch": 3.386556136240764, "grad_norm": 0.31980374455451965, "learning_rate": 2.8468765131189014e-06, "loss": 0.3701, "step": 6264 }, { "epoch": 3.3870967741935485, "grad_norm": 0.2919912338256836, "learning_rate": 2.845172709294989e-06, "loss": 0.3895, "step": 6265 }, { "epoch": 3.387637412146333, "grad_norm": 0.27873820066452026, "learning_rate": 2.843469212702445e-06, "loss": 0.3873, "step": 6266 }, { "epoch": 3.3881780500991168, "grad_norm": 0.2797130048274994, "learning_rate": 2.8417660235841505e-06, "loss": 0.3817, "step": 6267 }, { "epoch": 3.388718688051901, "grad_norm": 0.27694785594940186, "learning_rate": 2.840063142182941e-06, "loss": 0.3765, "step": 6268 }, { "epoch": 3.3892593260046855, "grad_norm": 0.27863171696662903, "learning_rate": 2.838360568741613e-06, "loss": 0.3735, "step": 6269 }, { "epoch": 3.38979996395747, "grad_norm": 0.27632227540016174, "learning_rate": 2.8366583035029194e-06, "loss": 0.3637, "step": 6270 }, { "epoch": 3.390340601910254, "grad_norm": 0.26599693298339844, "learning_rate": 2.834956346709559e-06, "loss": 0.3661, "step": 6271 }, { "epoch": 3.3908812398630386, "grad_norm": 0.2922784090042114, "learning_rate": 2.8332546986041986e-06, "loss": 0.3835, "step": 6272 }, { "epoch": 3.3914218778158225, "grad_norm": 0.2666107714176178, "learning_rate": 2.831553359429453e-06, "loss": 0.3753, "step": 6273 }, { "epoch": 3.391962515768607, "grad_norm": 0.28382694721221924, "learning_rate": 2.829852329427899e-06, "loss": 0.3893, "step": 6274 }, { "epoch": 3.392503153721391, "grad_norm": 0.2854803800582886, "learning_rate": 2.8281516088420665e-06, "loss": 0.3689, "step": 6275 }, { "epoch": 3.3930437916741756, "grad_norm": 0.2664931118488312, "learning_rate": 2.826451197914437e-06, "loss": 0.3808, "step": 6276 }, { "epoch": 3.39358442962696, "grad_norm": 0.2853836715221405, "learning_rate": 2.824751096887457e-06, "loss": 0.3745, "step": 6277 }, { "epoch": 3.3941250675797443, "grad_norm": 0.2897249758243561, "learning_rate": 2.8230513060035214e-06, "loss": 0.3848, "step": 6278 }, { "epoch": 3.394665705532528, "grad_norm": 0.28952860832214355, "learning_rate": 2.821351825504984e-06, "loss": 0.3816, "step": 6279 }, { "epoch": 3.3952063434853126, "grad_norm": 0.3057219982147217, "learning_rate": 2.819652655634151e-06, "loss": 0.3585, "step": 6280 }, { "epoch": 3.395746981438097, "grad_norm": 0.29877203702926636, "learning_rate": 2.817953796633289e-06, "loss": 0.3764, "step": 6281 }, { "epoch": 3.3962876193908813, "grad_norm": 0.279131680727005, "learning_rate": 2.816255248744622e-06, "loss": 0.3687, "step": 6282 }, { "epoch": 3.3968282573436657, "grad_norm": 0.27392497658729553, "learning_rate": 2.8145570122103187e-06, "loss": 0.3576, "step": 6283 }, { "epoch": 3.3973688952964496, "grad_norm": 0.28131553530693054, "learning_rate": 2.812859087272516e-06, "loss": 0.3697, "step": 6284 }, { "epoch": 3.397909533249234, "grad_norm": 0.2834169268608093, "learning_rate": 2.8111614741732975e-06, "loss": 0.3599, "step": 6285 }, { "epoch": 3.3984501712020183, "grad_norm": 0.3012988269329071, "learning_rate": 2.8094641731547088e-06, "loss": 0.376, "step": 6286 }, { "epoch": 3.3989908091548027, "grad_norm": 0.27377527952194214, "learning_rate": 2.807767184458747e-06, "loss": 0.364, "step": 6287 }, { "epoch": 3.399531447107587, "grad_norm": 0.2820917069911957, "learning_rate": 2.8060705083273633e-06, "loss": 0.3602, "step": 6288 }, { "epoch": 3.4000720850603714, "grad_norm": 0.2810240387916565, "learning_rate": 2.8043741450024707e-06, "loss": 0.3518, "step": 6289 }, { "epoch": 3.4006127230131558, "grad_norm": 0.2800905704498291, "learning_rate": 2.802678094725931e-06, "loss": 0.3767, "step": 6290 }, { "epoch": 3.4011533609659397, "grad_norm": 0.2750107944011688, "learning_rate": 2.8009823577395633e-06, "loss": 0.3773, "step": 6291 }, { "epoch": 3.401693998918724, "grad_norm": 0.2940891981124878, "learning_rate": 2.799286934285146e-06, "loss": 0.3639, "step": 6292 }, { "epoch": 3.4022346368715084, "grad_norm": 0.2891536355018616, "learning_rate": 2.7975918246044047e-06, "loss": 0.3637, "step": 6293 }, { "epoch": 3.4027752748242928, "grad_norm": 0.289329469203949, "learning_rate": 2.7958970289390317e-06, "loss": 0.3712, "step": 6294 }, { "epoch": 3.403315912777077, "grad_norm": 0.2884690463542938, "learning_rate": 2.794202547530661e-06, "loss": 0.3755, "step": 6295 }, { "epoch": 3.403856550729861, "grad_norm": 0.2806275188922882, "learning_rate": 2.7925083806208932e-06, "loss": 0.4142, "step": 6296 }, { "epoch": 3.4043971886826454, "grad_norm": 0.2923715114593506, "learning_rate": 2.7908145284512765e-06, "loss": 0.3557, "step": 6297 }, { "epoch": 3.4049378266354298, "grad_norm": 0.303189754486084, "learning_rate": 2.78912099126332e-06, "loss": 0.3538, "step": 6298 }, { "epoch": 3.405478464588214, "grad_norm": 0.3140781819820404, "learning_rate": 2.7874277692984847e-06, "loss": 0.3787, "step": 6299 }, { "epoch": 3.4060191025409985, "grad_norm": 0.3113609552383423, "learning_rate": 2.785734862798184e-06, "loss": 0.3737, "step": 6300 }, { "epoch": 3.406559740493783, "grad_norm": 0.29418936371803284, "learning_rate": 2.7840422720037943e-06, "loss": 0.3814, "step": 6301 }, { "epoch": 3.4071003784465668, "grad_norm": 0.29379862546920776, "learning_rate": 2.7823499971566393e-06, "loss": 0.3775, "step": 6302 }, { "epoch": 3.407641016399351, "grad_norm": 0.30091267824172974, "learning_rate": 2.7806580384979986e-06, "loss": 0.3897, "step": 6303 }, { "epoch": 3.4081816543521355, "grad_norm": 0.2900888919830322, "learning_rate": 2.7789663962691134e-06, "loss": 0.3649, "step": 6304 }, { "epoch": 3.40872229230492, "grad_norm": 0.2886979579925537, "learning_rate": 2.77727507071117e-06, "loss": 0.3823, "step": 6305 }, { "epoch": 3.409262930257704, "grad_norm": 0.27417394518852234, "learning_rate": 2.7755840620653212e-06, "loss": 0.3676, "step": 6306 }, { "epoch": 3.4098035682104886, "grad_norm": 0.321492999792099, "learning_rate": 2.77389337057266e-06, "loss": 0.396, "step": 6307 }, { "epoch": 3.4103442061632725, "grad_norm": 0.29740339517593384, "learning_rate": 2.7722029964742455e-06, "loss": 0.3658, "step": 6308 }, { "epoch": 3.410884844116057, "grad_norm": 0.2698226869106293, "learning_rate": 2.77051294001109e-06, "loss": 0.3788, "step": 6309 }, { "epoch": 3.411425482068841, "grad_norm": 0.2841397523880005, "learning_rate": 2.768823201424158e-06, "loss": 0.3695, "step": 6310 }, { "epoch": 3.4119661200216256, "grad_norm": 0.30393242835998535, "learning_rate": 2.7671337809543684e-06, "loss": 0.3695, "step": 6311 }, { "epoch": 3.41250675797441, "grad_norm": 0.2840659022331238, "learning_rate": 2.7654446788425935e-06, "loss": 0.3843, "step": 6312 }, { "epoch": 3.413047395927194, "grad_norm": 0.29241564869880676, "learning_rate": 2.7637558953296672e-06, "loss": 0.3643, "step": 6313 }, { "epoch": 3.4135880338799782, "grad_norm": 0.32568466663360596, "learning_rate": 2.7620674306563705e-06, "loss": 0.3919, "step": 6314 }, { "epoch": 3.4141286718327626, "grad_norm": 0.3067713677883148, "learning_rate": 2.7603792850634402e-06, "loss": 0.3785, "step": 6315 }, { "epoch": 3.414669309785547, "grad_norm": 0.29410862922668457, "learning_rate": 2.7586914587915727e-06, "loss": 0.3765, "step": 6316 }, { "epoch": 3.4152099477383313, "grad_norm": 0.27362555265426636, "learning_rate": 2.757003952081411e-06, "loss": 0.3661, "step": 6317 }, { "epoch": 3.4157505856911157, "grad_norm": 0.280722051858902, "learning_rate": 2.7553167651735624e-06, "loss": 0.3637, "step": 6318 }, { "epoch": 3.4162912236439, "grad_norm": 0.29213541746139526, "learning_rate": 2.7536298983085762e-06, "loss": 0.3646, "step": 6319 }, { "epoch": 3.416831861596684, "grad_norm": 0.2971615195274353, "learning_rate": 2.7519433517269665e-06, "loss": 0.3644, "step": 6320 }, { "epoch": 3.4173724995494683, "grad_norm": 0.28067779541015625, "learning_rate": 2.7502571256691996e-06, "loss": 0.3767, "step": 6321 }, { "epoch": 3.4179131375022527, "grad_norm": 0.2956728935241699, "learning_rate": 2.748571220375691e-06, "loss": 0.3907, "step": 6322 }, { "epoch": 3.418453775455037, "grad_norm": 0.2931300699710846, "learning_rate": 2.746885636086819e-06, "loss": 0.3807, "step": 6323 }, { "epoch": 3.4189944134078214, "grad_norm": 0.27655330300331116, "learning_rate": 2.745200373042904e-06, "loss": 0.4191, "step": 6324 }, { "epoch": 3.4195350513606053, "grad_norm": 0.2742091119289398, "learning_rate": 2.7435154314842337e-06, "loss": 0.3764, "step": 6325 }, { "epoch": 3.4200756893133897, "grad_norm": 0.29484522342681885, "learning_rate": 2.7418308116510395e-06, "loss": 0.3795, "step": 6326 }, { "epoch": 3.420616327266174, "grad_norm": 0.28292086720466614, "learning_rate": 2.7401465137835164e-06, "loss": 0.3737, "step": 6327 }, { "epoch": 3.4211569652189584, "grad_norm": 0.2676083743572235, "learning_rate": 2.7384625381218063e-06, "loss": 0.3308, "step": 6328 }, { "epoch": 3.4216976031717428, "grad_norm": 0.30103984475135803, "learning_rate": 2.736778884906004e-06, "loss": 0.3645, "step": 6329 }, { "epoch": 3.422238241124527, "grad_norm": 0.2882280647754669, "learning_rate": 2.7350955543761682e-06, "loss": 0.3736, "step": 6330 }, { "epoch": 3.422778879077311, "grad_norm": 0.28636249899864197, "learning_rate": 2.7334125467723004e-06, "loss": 0.3717, "step": 6331 }, { "epoch": 3.4233195170300954, "grad_norm": 0.2803739309310913, "learning_rate": 2.731729862334361e-06, "loss": 0.3849, "step": 6332 }, { "epoch": 3.4238601549828798, "grad_norm": 0.2867929935455322, "learning_rate": 2.7300475013022666e-06, "loss": 0.3846, "step": 6333 }, { "epoch": 3.424400792935664, "grad_norm": 0.28699979186058044, "learning_rate": 2.7283654639158817e-06, "loss": 0.3647, "step": 6334 }, { "epoch": 3.4249414308884485, "grad_norm": 0.34597089886665344, "learning_rate": 2.7266837504150345e-06, "loss": 0.3619, "step": 6335 }, { "epoch": 3.425482068841233, "grad_norm": 0.2675110995769501, "learning_rate": 2.7250023610394926e-06, "loss": 0.3752, "step": 6336 }, { "epoch": 3.4260227067940168, "grad_norm": 0.28611132502555847, "learning_rate": 2.723321296028989e-06, "loss": 0.3931, "step": 6337 }, { "epoch": 3.426563344746801, "grad_norm": 0.2917852997779846, "learning_rate": 2.7216405556232093e-06, "loss": 0.3918, "step": 6338 }, { "epoch": 3.4271039826995855, "grad_norm": 0.2738979458808899, "learning_rate": 2.719960140061788e-06, "loss": 0.3762, "step": 6339 }, { "epoch": 3.42764462065237, "grad_norm": 0.2562112808227539, "learning_rate": 2.7182800495843166e-06, "loss": 0.389, "step": 6340 }, { "epoch": 3.428185258605154, "grad_norm": 0.28876200318336487, "learning_rate": 2.7166002844303365e-06, "loss": 0.3626, "step": 6341 }, { "epoch": 3.428725896557938, "grad_norm": 0.28462162613868713, "learning_rate": 2.7149208448393494e-06, "loss": 0.3956, "step": 6342 }, { "epoch": 3.4292665345107225, "grad_norm": 0.27688026428222656, "learning_rate": 2.713241731050805e-06, "loss": 0.3967, "step": 6343 }, { "epoch": 3.429807172463507, "grad_norm": 0.28863951563835144, "learning_rate": 2.711562943304107e-06, "loss": 0.3624, "step": 6344 }, { "epoch": 3.430347810416291, "grad_norm": 0.30096060037612915, "learning_rate": 2.7098844818386164e-06, "loss": 0.388, "step": 6345 }, { "epoch": 3.4308884483690756, "grad_norm": 0.2913476228713989, "learning_rate": 2.7082063468936427e-06, "loss": 0.3303, "step": 6346 }, { "epoch": 3.43142908632186, "grad_norm": 0.2739192843437195, "learning_rate": 2.706528538708455e-06, "loss": 0.3583, "step": 6347 }, { "epoch": 3.4319697242746443, "grad_norm": 0.2901630699634552, "learning_rate": 2.70485105752227e-06, "loss": 0.3728, "step": 6348 }, { "epoch": 3.4325103622274282, "grad_norm": 0.2617281973361969, "learning_rate": 2.7031739035742575e-06, "loss": 0.3658, "step": 6349 }, { "epoch": 3.4330510001802126, "grad_norm": 0.26667746901512146, "learning_rate": 2.7014970771035474e-06, "loss": 0.3693, "step": 6350 }, { "epoch": 3.433591638132997, "grad_norm": 0.2691449820995331, "learning_rate": 2.6998205783492167e-06, "loss": 0.3736, "step": 6351 }, { "epoch": 3.4341322760857813, "grad_norm": 0.29237818717956543, "learning_rate": 2.6981444075502973e-06, "loss": 0.3716, "step": 6352 }, { "epoch": 3.4346729140385657, "grad_norm": 0.28428447246551514, "learning_rate": 2.6964685649457727e-06, "loss": 0.3765, "step": 6353 }, { "epoch": 3.4352135519913496, "grad_norm": 0.254072368144989, "learning_rate": 2.694793050774586e-06, "loss": 0.352, "step": 6354 }, { "epoch": 3.435754189944134, "grad_norm": 0.29546475410461426, "learning_rate": 2.6931178652756262e-06, "loss": 0.3975, "step": 6355 }, { "epoch": 3.4362948278969183, "grad_norm": 0.30482378602027893, "learning_rate": 2.6914430086877365e-06, "loss": 0.3714, "step": 6356 }, { "epoch": 3.4368354658497027, "grad_norm": 0.30705294013023376, "learning_rate": 2.6897684812497193e-06, "loss": 0.3917, "step": 6357 }, { "epoch": 3.437376103802487, "grad_norm": 0.2723318636417389, "learning_rate": 2.688094283200321e-06, "loss": 0.3823, "step": 6358 }, { "epoch": 3.4379167417552714, "grad_norm": 0.2744968831539154, "learning_rate": 2.68642041477825e-06, "loss": 0.3712, "step": 6359 }, { "epoch": 3.4384573797080553, "grad_norm": 0.27377021312713623, "learning_rate": 2.6847468762221616e-06, "loss": 0.3744, "step": 6360 }, { "epoch": 3.4389980176608397, "grad_norm": 0.31088465452194214, "learning_rate": 2.6830736677706637e-06, "loss": 0.3433, "step": 6361 }, { "epoch": 3.439538655613624, "grad_norm": 0.28350019454956055, "learning_rate": 2.6814007896623235e-06, "loss": 0.3728, "step": 6362 }, { "epoch": 3.4400792935664084, "grad_norm": 0.293083131313324, "learning_rate": 2.6797282421356546e-06, "loss": 0.3854, "step": 6363 }, { "epoch": 3.4406199315191928, "grad_norm": 0.26938867568969727, "learning_rate": 2.6780560254291267e-06, "loss": 0.3763, "step": 6364 }, { "epoch": 3.441160569471977, "grad_norm": 0.523354709148407, "learning_rate": 2.6763841397811576e-06, "loss": 0.3775, "step": 6365 }, { "epoch": 3.441701207424761, "grad_norm": 0.28603750467300415, "learning_rate": 2.674712585430126e-06, "loss": 0.3551, "step": 6366 }, { "epoch": 3.4422418453775454, "grad_norm": 0.28473588824272156, "learning_rate": 2.673041362614361e-06, "loss": 0.3991, "step": 6367 }, { "epoch": 3.4427824833303298, "grad_norm": 0.2877833843231201, "learning_rate": 2.6713704715721357e-06, "loss": 0.3777, "step": 6368 }, { "epoch": 3.443323121283114, "grad_norm": 0.26864901185035706, "learning_rate": 2.6696999125416887e-06, "loss": 0.3776, "step": 6369 }, { "epoch": 3.4438637592358985, "grad_norm": 0.2873307764530182, "learning_rate": 2.668029685761201e-06, "loss": 0.3812, "step": 6370 }, { "epoch": 3.4444043971886824, "grad_norm": 0.29649487137794495, "learning_rate": 2.666359791468815e-06, "loss": 0.3738, "step": 6371 }, { "epoch": 3.4449450351414668, "grad_norm": 0.2977284789085388, "learning_rate": 2.6646902299026183e-06, "loss": 0.3693, "step": 6372 }, { "epoch": 3.445485673094251, "grad_norm": 0.28812456130981445, "learning_rate": 2.663021001300653e-06, "loss": 0.3965, "step": 6373 }, { "epoch": 3.4460263110470355, "grad_norm": 0.3119604289531708, "learning_rate": 2.6613521059009172e-06, "loss": 0.3864, "step": 6374 }, { "epoch": 3.44656694899982, "grad_norm": 0.3055315911769867, "learning_rate": 2.6596835439413584e-06, "loss": 0.3916, "step": 6375 }, { "epoch": 3.447107586952604, "grad_norm": 0.27192702889442444, "learning_rate": 2.6580153156598742e-06, "loss": 0.3581, "step": 6376 }, { "epoch": 3.4476482249053886, "grad_norm": 0.32477402687072754, "learning_rate": 2.656347421294323e-06, "loss": 0.3677, "step": 6377 }, { "epoch": 3.4481888628581725, "grad_norm": 0.30228251218795776, "learning_rate": 2.6546798610825043e-06, "loss": 0.3747, "step": 6378 }, { "epoch": 3.448729500810957, "grad_norm": 0.281707763671875, "learning_rate": 2.6530126352621834e-06, "loss": 0.3537, "step": 6379 }, { "epoch": 3.4492701387637412, "grad_norm": 0.2878420054912567, "learning_rate": 2.6513457440710612e-06, "loss": 0.3984, "step": 6380 }, { "epoch": 3.4498107767165256, "grad_norm": 0.29713553190231323, "learning_rate": 2.6496791877468063e-06, "loss": 0.3733, "step": 6381 }, { "epoch": 3.45035141466931, "grad_norm": 0.304828941822052, "learning_rate": 2.6480129665270295e-06, "loss": 0.3698, "step": 6382 }, { "epoch": 3.450892052622094, "grad_norm": 0.29700446128845215, "learning_rate": 2.6463470806493012e-06, "loss": 0.3744, "step": 6383 }, { "epoch": 3.4514326905748782, "grad_norm": 0.3028167486190796, "learning_rate": 2.644681530351139e-06, "loss": 0.3363, "step": 6384 }, { "epoch": 3.4519733285276626, "grad_norm": 0.28015580773353577, "learning_rate": 2.6430163158700116e-06, "loss": 0.3655, "step": 6385 }, { "epoch": 3.452513966480447, "grad_norm": 0.2895985543727875, "learning_rate": 2.641351437443347e-06, "loss": 0.3962, "step": 6386 }, { "epoch": 3.4530546044332313, "grad_norm": 0.28600218892097473, "learning_rate": 2.639686895308515e-06, "loss": 0.3939, "step": 6387 }, { "epoch": 3.4535952423860157, "grad_norm": 0.292057067155838, "learning_rate": 2.638022689702849e-06, "loss": 0.3628, "step": 6388 }, { "epoch": 3.4541358803387996, "grad_norm": 0.2844478487968445, "learning_rate": 2.6363588208636246e-06, "loss": 0.3835, "step": 6389 }, { "epoch": 3.454676518291584, "grad_norm": 0.2956898808479309, "learning_rate": 2.634695289028072e-06, "loss": 0.3734, "step": 6390 }, { "epoch": 3.4552171562443683, "grad_norm": 0.29271790385246277, "learning_rate": 2.6330320944333787e-06, "loss": 0.3908, "step": 6391 }, { "epoch": 3.4557577941971527, "grad_norm": 0.2900015711784363, "learning_rate": 2.6313692373166777e-06, "loss": 0.3847, "step": 6392 }, { "epoch": 3.456298432149937, "grad_norm": 0.2838262617588043, "learning_rate": 2.6297067179150566e-06, "loss": 0.3811, "step": 6393 }, { "epoch": 3.4568390701027214, "grad_norm": 0.27128735184669495, "learning_rate": 2.6280445364655516e-06, "loss": 0.3587, "step": 6394 }, { "epoch": 3.4573797080555053, "grad_norm": 0.26563310623168945, "learning_rate": 2.6263826932051562e-06, "loss": 0.4066, "step": 6395 }, { "epoch": 3.4579203460082897, "grad_norm": 0.2762858271598816, "learning_rate": 2.624721188370817e-06, "loss": 0.3763, "step": 6396 }, { "epoch": 3.458460983961074, "grad_norm": 0.2815711796283722, "learning_rate": 2.6230600221994195e-06, "loss": 0.3641, "step": 6397 }, { "epoch": 3.4590016219138584, "grad_norm": 0.28200116753578186, "learning_rate": 2.621399194927817e-06, "loss": 0.372, "step": 6398 }, { "epoch": 3.4595422598666428, "grad_norm": 0.27632999420166016, "learning_rate": 2.619738706792802e-06, "loss": 0.3757, "step": 6399 }, { "epoch": 3.4600828978194267, "grad_norm": 0.2843749523162842, "learning_rate": 2.6180785580311284e-06, "loss": 0.3813, "step": 6400 }, { "epoch": 3.460623535772211, "grad_norm": 0.2992981970310211, "learning_rate": 2.6164187488794958e-06, "loss": 0.3514, "step": 6401 }, { "epoch": 3.4611641737249954, "grad_norm": 0.27975744009017944, "learning_rate": 2.614759279574555e-06, "loss": 0.3778, "step": 6402 }, { "epoch": 3.4617048116777798, "grad_norm": 0.2758598327636719, "learning_rate": 2.613100150352912e-06, "loss": 0.3692, "step": 6403 }, { "epoch": 3.462245449630564, "grad_norm": 0.3005410134792328, "learning_rate": 2.6114413614511227e-06, "loss": 0.3693, "step": 6404 }, { "epoch": 3.4627860875833485, "grad_norm": 0.2945283055305481, "learning_rate": 2.609782913105691e-06, "loss": 0.371, "step": 6405 }, { "epoch": 3.463326725536133, "grad_norm": 0.29336419701576233, "learning_rate": 2.6081248055530796e-06, "loss": 0.3838, "step": 6406 }, { "epoch": 3.4638673634889168, "grad_norm": 0.2629316449165344, "learning_rate": 2.606467039029695e-06, "loss": 0.3449, "step": 6407 }, { "epoch": 3.464408001441701, "grad_norm": 0.2649190127849579, "learning_rate": 2.604809613771904e-06, "loss": 0.3821, "step": 6408 }, { "epoch": 3.4649486393944855, "grad_norm": 0.2621277868747711, "learning_rate": 2.603152530016012e-06, "loss": 0.3835, "step": 6409 }, { "epoch": 3.46548927734727, "grad_norm": 0.2772118151187897, "learning_rate": 2.601495787998288e-06, "loss": 0.3505, "step": 6410 }, { "epoch": 3.466029915300054, "grad_norm": 0.28124377131462097, "learning_rate": 2.5998393879549444e-06, "loss": 0.3998, "step": 6411 }, { "epoch": 3.466570553252838, "grad_norm": 0.30214518308639526, "learning_rate": 2.5981833301221505e-06, "loss": 0.37, "step": 6412 }, { "epoch": 3.4671111912056225, "grad_norm": 0.28282877802848816, "learning_rate": 2.5965276147360226e-06, "loss": 0.3745, "step": 6413 }, { "epoch": 3.467651829158407, "grad_norm": 0.301487535238266, "learning_rate": 2.594872242032628e-06, "loss": 0.3783, "step": 6414 }, { "epoch": 3.4681924671111912, "grad_norm": 0.281084805727005, "learning_rate": 2.59321721224799e-06, "loss": 0.3889, "step": 6415 }, { "epoch": 3.4687331050639756, "grad_norm": 0.29001906514167786, "learning_rate": 2.591562525618078e-06, "loss": 0.404, "step": 6416 }, { "epoch": 3.46927374301676, "grad_norm": 0.2739716172218323, "learning_rate": 2.589908182378813e-06, "loss": 0.3885, "step": 6417 }, { "epoch": 3.4698143809695443, "grad_norm": 0.26952242851257324, "learning_rate": 2.5882541827660713e-06, "loss": 0.3499, "step": 6418 }, { "epoch": 3.4703550189223282, "grad_norm": 0.30054211616516113, "learning_rate": 2.586600527015673e-06, "loss": 0.3917, "step": 6419 }, { "epoch": 3.4708956568751126, "grad_norm": 0.28374937176704407, "learning_rate": 2.5849472153634003e-06, "loss": 0.3627, "step": 6420 }, { "epoch": 3.471436294827897, "grad_norm": 0.2651611864566803, "learning_rate": 2.583294248044971e-06, "loss": 0.3922, "step": 6421 }, { "epoch": 3.4719769327806813, "grad_norm": 0.3104220926761627, "learning_rate": 2.5816416252960673e-06, "loss": 0.3828, "step": 6422 }, { "epoch": 3.4725175707334657, "grad_norm": 0.26732105016708374, "learning_rate": 2.579989347352314e-06, "loss": 0.3582, "step": 6423 }, { "epoch": 3.4730582086862496, "grad_norm": 0.285390704870224, "learning_rate": 2.5783374144492946e-06, "loss": 0.3707, "step": 6424 }, { "epoch": 3.473598846639034, "grad_norm": 0.2892547845840454, "learning_rate": 2.576685826822535e-06, "loss": 0.3966, "step": 6425 }, { "epoch": 3.4741394845918183, "grad_norm": 0.28802600502967834, "learning_rate": 2.575034584707515e-06, "loss": 0.4054, "step": 6426 }, { "epoch": 3.4746801225446027, "grad_norm": 0.2835301458835602, "learning_rate": 2.573383688339669e-06, "loss": 0.3619, "step": 6427 }, { "epoch": 3.475220760497387, "grad_norm": 0.28567731380462646, "learning_rate": 2.5717331379543775e-06, "loss": 0.3949, "step": 6428 }, { "epoch": 3.475761398450171, "grad_norm": 0.3027955889701843, "learning_rate": 2.57008293378697e-06, "loss": 0.3935, "step": 6429 }, { "epoch": 3.4763020364029553, "grad_norm": 0.2885318994522095, "learning_rate": 2.568433076072734e-06, "loss": 0.3822, "step": 6430 }, { "epoch": 3.4768426743557397, "grad_norm": 0.2797601521015167, "learning_rate": 2.566783565046899e-06, "loss": 0.3862, "step": 6431 }, { "epoch": 3.477383312308524, "grad_norm": 0.27476048469543457, "learning_rate": 2.565134400944656e-06, "loss": 0.36, "step": 6432 }, { "epoch": 3.4779239502613084, "grad_norm": 0.2893754541873932, "learning_rate": 2.563485584001132e-06, "loss": 0.3567, "step": 6433 }, { "epoch": 3.4784645882140928, "grad_norm": 0.27068889141082764, "learning_rate": 2.5618371144514147e-06, "loss": 0.4028, "step": 6434 }, { "epoch": 3.479005226166877, "grad_norm": 0.30441489815711975, "learning_rate": 2.5601889925305433e-06, "loss": 0.3997, "step": 6435 }, { "epoch": 3.479545864119661, "grad_norm": 0.291677862405777, "learning_rate": 2.558541218473502e-06, "loss": 0.3899, "step": 6436 }, { "epoch": 3.4800865020724454, "grad_norm": 0.3089935779571533, "learning_rate": 2.5568937925152272e-06, "loss": 0.3796, "step": 6437 }, { "epoch": 3.4806271400252298, "grad_norm": 0.2960951030254364, "learning_rate": 2.5552467148906034e-06, "loss": 0.3662, "step": 6438 }, { "epoch": 3.481167777978014, "grad_norm": 0.2769000232219696, "learning_rate": 2.553599985834472e-06, "loss": 0.3844, "step": 6439 }, { "epoch": 3.4817084159307985, "grad_norm": 0.2807958722114563, "learning_rate": 2.5519536055816194e-06, "loss": 0.3914, "step": 6440 }, { "epoch": 3.4822490538835824, "grad_norm": 0.2794989049434662, "learning_rate": 2.5503075743667815e-06, "loss": 0.3942, "step": 6441 }, { "epoch": 3.4827896918363668, "grad_norm": 0.2745060920715332, "learning_rate": 2.54866189242465e-06, "loss": 0.3946, "step": 6442 }, { "epoch": 3.483330329789151, "grad_norm": 0.293303906917572, "learning_rate": 2.5470165599898588e-06, "loss": 0.3793, "step": 6443 }, { "epoch": 3.4838709677419355, "grad_norm": 0.26405566930770874, "learning_rate": 2.545371577297002e-06, "loss": 0.3808, "step": 6444 }, { "epoch": 3.48441160569472, "grad_norm": 0.28149479627609253, "learning_rate": 2.5437269445806146e-06, "loss": 0.372, "step": 6445 }, { "epoch": 3.484952243647504, "grad_norm": 0.2879987359046936, "learning_rate": 2.5420826620751837e-06, "loss": 0.3932, "step": 6446 }, { "epoch": 3.4854928816002886, "grad_norm": 0.2887576222419739, "learning_rate": 2.540438730015152e-06, "loss": 0.3646, "step": 6447 }, { "epoch": 3.4860335195530725, "grad_norm": 0.2707485258579254, "learning_rate": 2.538795148634907e-06, "loss": 0.3803, "step": 6448 }, { "epoch": 3.486574157505857, "grad_norm": 0.26814743876457214, "learning_rate": 2.5371519181687877e-06, "loss": 0.385, "step": 6449 }, { "epoch": 3.4871147954586412, "grad_norm": 0.2806251645088196, "learning_rate": 2.5355090388510806e-06, "loss": 0.3794, "step": 6450 }, { "epoch": 3.4876554334114256, "grad_norm": 0.275668740272522, "learning_rate": 2.5338665109160274e-06, "loss": 0.3707, "step": 6451 }, { "epoch": 3.48819607136421, "grad_norm": 0.42301616072654724, "learning_rate": 2.5322243345978147e-06, "loss": 0.365, "step": 6452 }, { "epoch": 3.488736709316994, "grad_norm": 0.28061380982398987, "learning_rate": 2.5305825101305835e-06, "loss": 0.3672, "step": 6453 }, { "epoch": 3.4892773472697782, "grad_norm": 0.2842809855937958, "learning_rate": 2.5289410377484202e-06, "loss": 0.3786, "step": 6454 }, { "epoch": 3.4898179852225626, "grad_norm": 0.2861570715904236, "learning_rate": 2.527299917685362e-06, "loss": 0.4012, "step": 6455 }, { "epoch": 3.490358623175347, "grad_norm": 0.30325013399124146, "learning_rate": 2.5256591501754003e-06, "loss": 0.3936, "step": 6456 }, { "epoch": 3.4908992611281313, "grad_norm": 0.25236570835113525, "learning_rate": 2.5240187354524704e-06, "loss": 0.3708, "step": 6457 }, { "epoch": 3.4914398990809152, "grad_norm": 0.2806876003742218, "learning_rate": 2.5223786737504587e-06, "loss": 0.3862, "step": 6458 }, { "epoch": 3.4919805370336996, "grad_norm": 0.2911386787891388, "learning_rate": 2.5207389653032044e-06, "loss": 0.3806, "step": 6459 }, { "epoch": 3.492521174986484, "grad_norm": 0.28653475642204285, "learning_rate": 2.519099610344492e-06, "loss": 0.3868, "step": 6460 }, { "epoch": 3.4930618129392683, "grad_norm": 0.2671637535095215, "learning_rate": 2.517460609108063e-06, "loss": 0.3599, "step": 6461 }, { "epoch": 3.4936024508920527, "grad_norm": 0.2724429965019226, "learning_rate": 2.515821961827595e-06, "loss": 0.3865, "step": 6462 }, { "epoch": 3.494143088844837, "grad_norm": 0.27978870272636414, "learning_rate": 2.5141836687367273e-06, "loss": 0.385, "step": 6463 }, { "epoch": 3.4946837267976214, "grad_norm": 0.3110909163951874, "learning_rate": 2.5125457300690477e-06, "loss": 0.3856, "step": 6464 }, { "epoch": 3.4952243647504053, "grad_norm": 0.27133485674858093, "learning_rate": 2.5109081460580875e-06, "loss": 0.3717, "step": 6465 }, { "epoch": 3.4957650027031897, "grad_norm": 0.2729180157184601, "learning_rate": 2.5092709169373307e-06, "loss": 0.3655, "step": 6466 }, { "epoch": 3.496305640655974, "grad_norm": 0.31530043482780457, "learning_rate": 2.5076340429402086e-06, "loss": 0.3953, "step": 6467 }, { "epoch": 3.4968462786087584, "grad_norm": 0.2778479754924774, "learning_rate": 2.5059975243001077e-06, "loss": 0.3731, "step": 6468 }, { "epoch": 3.4973869165615428, "grad_norm": 0.28354212641716003, "learning_rate": 2.504361361250358e-06, "loss": 0.3647, "step": 6469 }, { "epoch": 3.4979275545143267, "grad_norm": 0.29331958293914795, "learning_rate": 2.502725554024239e-06, "loss": 0.3747, "step": 6470 }, { "epoch": 3.498468192467111, "grad_norm": 0.29107895493507385, "learning_rate": 2.501090102854984e-06, "loss": 0.3673, "step": 6471 }, { "epoch": 3.4990088304198954, "grad_norm": 0.3013686239719391, "learning_rate": 2.4994550079757696e-06, "loss": 0.3842, "step": 6472 }, { "epoch": 3.4995494683726798, "grad_norm": 0.2720975875854492, "learning_rate": 2.497820269619728e-06, "loss": 0.3525, "step": 6473 }, { "epoch": 3.500090106325464, "grad_norm": 0.290024071931839, "learning_rate": 2.4961858880199357e-06, "loss": 0.3963, "step": 6474 }, { "epoch": 3.5006307442782485, "grad_norm": 0.30103790760040283, "learning_rate": 2.494551863409418e-06, "loss": 0.3658, "step": 6475 }, { "epoch": 3.501171382231033, "grad_norm": 0.3023599088191986, "learning_rate": 2.4929181960211553e-06, "loss": 0.372, "step": 6476 }, { "epoch": 3.5017120201838168, "grad_norm": 0.27599939703941345, "learning_rate": 2.49128488608807e-06, "loss": 0.3597, "step": 6477 }, { "epoch": 3.502252658136601, "grad_norm": 0.2933019995689392, "learning_rate": 2.4896519338430376e-06, "loss": 0.3846, "step": 6478 }, { "epoch": 3.5027932960893855, "grad_norm": 0.2922401428222656, "learning_rate": 2.4880193395188785e-06, "loss": 0.3885, "step": 6479 }, { "epoch": 3.50333393404217, "grad_norm": 0.30111080408096313, "learning_rate": 2.4863871033483693e-06, "loss": 0.3723, "step": 6480 }, { "epoch": 3.5038745719949542, "grad_norm": 0.2843570113182068, "learning_rate": 2.48475522556423e-06, "loss": 0.3926, "step": 6481 }, { "epoch": 3.504415209947738, "grad_norm": 0.27177754044532776, "learning_rate": 2.4831237063991277e-06, "loss": 0.3886, "step": 6482 }, { "epoch": 3.5049558479005225, "grad_norm": 0.28537437319755554, "learning_rate": 2.481492546085686e-06, "loss": 0.3618, "step": 6483 }, { "epoch": 3.505496485853307, "grad_norm": 0.29277586936950684, "learning_rate": 2.4798617448564688e-06, "loss": 0.3695, "step": 6484 }, { "epoch": 3.5060371238060912, "grad_norm": 0.27317342162132263, "learning_rate": 2.478231302943997e-06, "loss": 0.3676, "step": 6485 }, { "epoch": 3.5065777617588756, "grad_norm": 0.283464640378952, "learning_rate": 2.4766012205807333e-06, "loss": 0.3673, "step": 6486 }, { "epoch": 3.5071183997116595, "grad_norm": 0.2851029336452484, "learning_rate": 2.474971497999091e-06, "loss": 0.39, "step": 6487 }, { "epoch": 3.5076590376644443, "grad_norm": 0.2650279700756073, "learning_rate": 2.4733421354314355e-06, "loss": 0.387, "step": 6488 }, { "epoch": 3.5081996756172282, "grad_norm": 0.28088340163230896, "learning_rate": 2.471713133110078e-06, "loss": 0.3983, "step": 6489 }, { "epoch": 3.5087403135700126, "grad_norm": 0.2757152020931244, "learning_rate": 2.470084491267278e-06, "loss": 0.3508, "step": 6490 }, { "epoch": 3.509280951522797, "grad_norm": 0.2685333788394928, "learning_rate": 2.4684562101352414e-06, "loss": 0.3643, "step": 6491 }, { "epoch": 3.5098215894755813, "grad_norm": 0.301870197057724, "learning_rate": 2.466828289946129e-06, "loss": 0.3753, "step": 6492 }, { "epoch": 3.5103622274283657, "grad_norm": 0.27955588698387146, "learning_rate": 2.4652007309320497e-06, "loss": 0.3644, "step": 6493 }, { "epoch": 3.5109028653811496, "grad_norm": 0.26468172669410706, "learning_rate": 2.4635735333250506e-06, "loss": 0.3469, "step": 6494 }, { "epoch": 3.511443503333934, "grad_norm": 0.3008752465248108, "learning_rate": 2.46194669735714e-06, "loss": 0.3648, "step": 6495 }, { "epoch": 3.5119841412867183, "grad_norm": 0.2742179036140442, "learning_rate": 2.460320223260266e-06, "loss": 0.3562, "step": 6496 }, { "epoch": 3.5125247792395027, "grad_norm": 0.26549032330513, "learning_rate": 2.4586941112663315e-06, "loss": 0.4081, "step": 6497 }, { "epoch": 3.513065417192287, "grad_norm": 0.2619190216064453, "learning_rate": 2.457068361607183e-06, "loss": 0.3814, "step": 6498 }, { "epoch": 3.513606055145071, "grad_norm": 0.301523357629776, "learning_rate": 2.4554429745146145e-06, "loss": 0.3753, "step": 6499 }, { "epoch": 3.5141466930978553, "grad_norm": 0.2761991620063782, "learning_rate": 2.4538179502203753e-06, "loss": 0.3595, "step": 6500 }, { "epoch": 3.5146873310506397, "grad_norm": 0.28614601492881775, "learning_rate": 2.452193288956157e-06, "loss": 0.3744, "step": 6501 }, { "epoch": 3.515227969003424, "grad_norm": 0.27287283539772034, "learning_rate": 2.4505689909535967e-06, "loss": 0.3642, "step": 6502 }, { "epoch": 3.5157686069562084, "grad_norm": 0.28784000873565674, "learning_rate": 2.4489450564442903e-06, "loss": 0.3818, "step": 6503 }, { "epoch": 3.5163092449089928, "grad_norm": 0.26401007175445557, "learning_rate": 2.44732148565977e-06, "loss": 0.3725, "step": 6504 }, { "epoch": 3.516849882861777, "grad_norm": 0.2846088111400604, "learning_rate": 2.445698278831528e-06, "loss": 0.3716, "step": 6505 }, { "epoch": 3.517390520814561, "grad_norm": 0.2900800406932831, "learning_rate": 2.44407543619099e-06, "loss": 0.3732, "step": 6506 }, { "epoch": 3.5179311587673454, "grad_norm": 0.28280070424079895, "learning_rate": 2.442452957969545e-06, "loss": 0.4047, "step": 6507 }, { "epoch": 3.5184717967201298, "grad_norm": 0.2799927890300751, "learning_rate": 2.4408308443985172e-06, "loss": 0.3684, "step": 6508 }, { "epoch": 3.519012434672914, "grad_norm": 0.30428990721702576, "learning_rate": 2.43920909570919e-06, "loss": 0.3816, "step": 6509 }, { "epoch": 3.5195530726256985, "grad_norm": 0.27521735429763794, "learning_rate": 2.437587712132787e-06, "loss": 0.3545, "step": 6510 }, { "epoch": 3.5200937105784824, "grad_norm": 0.2745533585548401, "learning_rate": 2.4359666939004793e-06, "loss": 0.3666, "step": 6511 }, { "epoch": 3.5206343485312668, "grad_norm": 0.2788096070289612, "learning_rate": 2.4343460412433947e-06, "loss": 0.378, "step": 6512 }, { "epoch": 3.521174986484051, "grad_norm": 0.28982996940612793, "learning_rate": 2.4327257543925986e-06, "loss": 0.3944, "step": 6513 }, { "epoch": 3.5217156244368355, "grad_norm": 0.2812902629375458, "learning_rate": 2.431105833579108e-06, "loss": 0.3789, "step": 6514 }, { "epoch": 3.52225626238962, "grad_norm": 0.29696497321128845, "learning_rate": 2.429486279033892e-06, "loss": 0.3819, "step": 6515 }, { "epoch": 3.522796900342404, "grad_norm": 0.25321662425994873, "learning_rate": 2.4278670909878597e-06, "loss": 0.3673, "step": 6516 }, { "epoch": 3.5233375382951886, "grad_norm": 0.2907755970954895, "learning_rate": 2.4262482696718765e-06, "loss": 0.3633, "step": 6517 }, { "epoch": 3.5238781762479725, "grad_norm": 0.30284127593040466, "learning_rate": 2.424629815316748e-06, "loss": 0.3858, "step": 6518 }, { "epoch": 3.524418814200757, "grad_norm": 0.308379590511322, "learning_rate": 2.4230117281532305e-06, "loss": 0.3742, "step": 6519 }, { "epoch": 3.5249594521535412, "grad_norm": 0.274372935295105, "learning_rate": 2.4213940084120274e-06, "loss": 0.3865, "step": 6520 }, { "epoch": 3.5255000901063256, "grad_norm": 0.301439493894577, "learning_rate": 2.4197766563237908e-06, "loss": 0.3326, "step": 6521 }, { "epoch": 3.52604072805911, "grad_norm": 0.28579795360565186, "learning_rate": 2.418159672119124e-06, "loss": 0.3484, "step": 6522 }, { "epoch": 3.526581366011894, "grad_norm": 0.28113120794296265, "learning_rate": 2.416543056028567e-06, "loss": 0.3645, "step": 6523 }, { "epoch": 3.5271220039646782, "grad_norm": 0.28649863600730896, "learning_rate": 2.414926808282618e-06, "loss": 0.3811, "step": 6524 }, { "epoch": 3.5276626419174626, "grad_norm": 0.2908145785331726, "learning_rate": 2.4133109291117156e-06, "loss": 0.3853, "step": 6525 }, { "epoch": 3.528203279870247, "grad_norm": 0.27370089292526245, "learning_rate": 2.411695418746253e-06, "loss": 0.3836, "step": 6526 }, { "epoch": 3.5287439178230313, "grad_norm": 0.3009878695011139, "learning_rate": 2.4100802774165657e-06, "loss": 0.3527, "step": 6527 }, { "epoch": 3.5292845557758152, "grad_norm": 0.28535473346710205, "learning_rate": 2.4084655053529337e-06, "loss": 0.3824, "step": 6528 }, { "epoch": 3.5298251937285996, "grad_norm": 0.2865583300590515, "learning_rate": 2.4068511027855935e-06, "loss": 0.4155, "step": 6529 }, { "epoch": 3.530365831681384, "grad_norm": 0.28438109159469604, "learning_rate": 2.405237069944721e-06, "loss": 0.3742, "step": 6530 }, { "epoch": 3.5309064696341683, "grad_norm": 0.297911137342453, "learning_rate": 2.403623407060441e-06, "loss": 0.405, "step": 6531 }, { "epoch": 3.5314471075869527, "grad_norm": 0.2770686447620392, "learning_rate": 2.40201011436283e-06, "loss": 0.3728, "step": 6532 }, { "epoch": 3.531987745539737, "grad_norm": 0.28494513034820557, "learning_rate": 2.400397192081904e-06, "loss": 0.393, "step": 6533 }, { "epoch": 3.5325283834925214, "grad_norm": 0.30899500846862793, "learning_rate": 2.3987846404476374e-06, "loss": 0.3759, "step": 6534 }, { "epoch": 3.5330690214453053, "grad_norm": 0.2798628509044647, "learning_rate": 2.397172459689936e-06, "loss": 0.3861, "step": 6535 }, { "epoch": 3.5336096593980897, "grad_norm": 0.2783905863761902, "learning_rate": 2.3955606500386685e-06, "loss": 0.3896, "step": 6536 }, { "epoch": 3.534150297350874, "grad_norm": 0.28773874044418335, "learning_rate": 2.3939492117236397e-06, "loss": 0.3626, "step": 6537 }, { "epoch": 3.5346909353036584, "grad_norm": 0.2815054953098297, "learning_rate": 2.3923381449746086e-06, "loss": 0.3915, "step": 6538 }, { "epoch": 3.5352315732564428, "grad_norm": 0.28169700503349304, "learning_rate": 2.3907274500212767e-06, "loss": 0.358, "step": 6539 }, { "epoch": 3.5357722112092267, "grad_norm": 0.2728629410266876, "learning_rate": 2.3891171270932923e-06, "loss": 0.3783, "step": 6540 }, { "epoch": 3.536312849162011, "grad_norm": 0.30166539549827576, "learning_rate": 2.387507176420256e-06, "loss": 0.3772, "step": 6541 }, { "epoch": 3.5368534871147954, "grad_norm": 0.28927499055862427, "learning_rate": 2.38589759823171e-06, "loss": 0.3843, "step": 6542 }, { "epoch": 3.5373941250675798, "grad_norm": 0.2623683512210846, "learning_rate": 2.3842883927571424e-06, "loss": 0.3501, "step": 6543 }, { "epoch": 3.537934763020364, "grad_norm": 0.3098645508289337, "learning_rate": 2.3826795602259956e-06, "loss": 0.381, "step": 6544 }, { "epoch": 3.538475400973148, "grad_norm": 0.28410932421684265, "learning_rate": 2.3810711008676495e-06, "loss": 0.3844, "step": 6545 }, { "epoch": 3.539016038925933, "grad_norm": 0.3092157542705536, "learning_rate": 2.379463014911441e-06, "loss": 0.3956, "step": 6546 }, { "epoch": 3.539556676878717, "grad_norm": 0.2804025113582611, "learning_rate": 2.3778553025866415e-06, "loss": 0.3666, "step": 6547 }, { "epoch": 3.540097314831501, "grad_norm": 0.29580822587013245, "learning_rate": 2.3762479641224794e-06, "loss": 0.3759, "step": 6548 }, { "epoch": 3.5406379527842855, "grad_norm": 0.30240368843078613, "learning_rate": 2.3746409997481248e-06, "loss": 0.3954, "step": 6549 }, { "epoch": 3.54117859073707, "grad_norm": 0.2760869860649109, "learning_rate": 2.3730344096926974e-06, "loss": 0.3554, "step": 6550 }, { "epoch": 3.5417192286898542, "grad_norm": 0.2570158541202545, "learning_rate": 2.3714281941852608e-06, "loss": 0.3456, "step": 6551 }, { "epoch": 3.542259866642638, "grad_norm": 0.29335662722587585, "learning_rate": 2.3698223534548248e-06, "loss": 0.3965, "step": 6552 }, { "epoch": 3.5428005045954225, "grad_norm": 0.3044717609882355, "learning_rate": 2.3682168877303508e-06, "loss": 0.3732, "step": 6553 }, { "epoch": 3.543341142548207, "grad_norm": 0.29319342970848083, "learning_rate": 2.366611797240741e-06, "loss": 0.4018, "step": 6554 }, { "epoch": 3.5438817805009912, "grad_norm": 0.3057367205619812, "learning_rate": 2.3650070822148447e-06, "loss": 0.3595, "step": 6555 }, { "epoch": 3.5444224184537756, "grad_norm": 0.27213531732559204, "learning_rate": 2.3634027428814632e-06, "loss": 0.3722, "step": 6556 }, { "epoch": 3.5449630564065595, "grad_norm": 0.2879851162433624, "learning_rate": 2.3617987794693358e-06, "loss": 0.3957, "step": 6557 }, { "epoch": 3.545503694359344, "grad_norm": 0.3101233243942261, "learning_rate": 2.360195192207159e-06, "loss": 0.3913, "step": 6558 }, { "epoch": 3.5460443323121282, "grad_norm": 0.27164915204048157, "learning_rate": 2.358591981323562e-06, "loss": 0.3782, "step": 6559 }, { "epoch": 3.5465849702649126, "grad_norm": 0.2786481976509094, "learning_rate": 2.3569891470471308e-06, "loss": 0.361, "step": 6560 }, { "epoch": 3.547125608217697, "grad_norm": 0.2545458674430847, "learning_rate": 2.355386689606397e-06, "loss": 0.3839, "step": 6561 }, { "epoch": 3.5476662461704813, "grad_norm": 0.29571396112442017, "learning_rate": 2.3537846092298337e-06, "loss": 0.3896, "step": 6562 }, { "epoch": 3.5482068841232657, "grad_norm": 0.31128859519958496, "learning_rate": 2.352182906145863e-06, "loss": 0.3525, "step": 6563 }, { "epoch": 3.5487475220760496, "grad_norm": 0.31172728538513184, "learning_rate": 2.3505815805828515e-06, "loss": 0.4075, "step": 6564 }, { "epoch": 3.549288160028834, "grad_norm": 0.2759602665901184, "learning_rate": 2.3489806327691156e-06, "loss": 0.378, "step": 6565 }, { "epoch": 3.5498287979816183, "grad_norm": 0.2782008647918701, "learning_rate": 2.3473800629329145e-06, "loss": 0.3883, "step": 6566 }, { "epoch": 3.5503694359344027, "grad_norm": 0.3004755675792694, "learning_rate": 2.345779871302453e-06, "loss": 0.3815, "step": 6567 }, { "epoch": 3.550910073887187, "grad_norm": 0.25604912638664246, "learning_rate": 2.344180058105887e-06, "loss": 0.3995, "step": 6568 }, { "epoch": 3.551450711839971, "grad_norm": 0.28551968932151794, "learning_rate": 2.342580623571311e-06, "loss": 0.3897, "step": 6569 }, { "epoch": 3.5519913497927553, "grad_norm": 0.2856443226337433, "learning_rate": 2.3409815679267733e-06, "loss": 0.3918, "step": 6570 }, { "epoch": 3.5525319877455397, "grad_norm": 0.2901134788990021, "learning_rate": 2.3393828914002623e-06, "loss": 0.3636, "step": 6571 }, { "epoch": 3.553072625698324, "grad_norm": 0.28899016976356506, "learning_rate": 2.3377845942197133e-06, "loss": 0.3849, "step": 6572 }, { "epoch": 3.5536132636511084, "grad_norm": 0.2661123275756836, "learning_rate": 2.3361866766130114e-06, "loss": 0.3739, "step": 6573 }, { "epoch": 3.5541539016038923, "grad_norm": 0.30045759677886963, "learning_rate": 2.3345891388079837e-06, "loss": 0.3683, "step": 6574 }, { "epoch": 3.554694539556677, "grad_norm": 0.2721780240535736, "learning_rate": 2.3329919810324036e-06, "loss": 0.3842, "step": 6575 }, { "epoch": 3.555235177509461, "grad_norm": 0.27868637442588806, "learning_rate": 2.3313952035139896e-06, "loss": 0.3904, "step": 6576 }, { "epoch": 3.5557758154622454, "grad_norm": 0.2812824249267578, "learning_rate": 2.3297988064804106e-06, "loss": 0.362, "step": 6577 }, { "epoch": 3.5563164534150298, "grad_norm": 0.2818116545677185, "learning_rate": 2.3282027901592762e-06, "loss": 0.3835, "step": 6578 }, { "epoch": 3.556857091367814, "grad_norm": 0.28291478753089905, "learning_rate": 2.3266071547781427e-06, "loss": 0.385, "step": 6579 }, { "epoch": 3.5573977293205985, "grad_norm": 0.27867481112480164, "learning_rate": 2.325011900564515e-06, "loss": 0.3833, "step": 6580 }, { "epoch": 3.5579383672733824, "grad_norm": 0.28014400601387024, "learning_rate": 2.323417027745839e-06, "loss": 0.384, "step": 6581 }, { "epoch": 3.558479005226167, "grad_norm": 0.27470117807388306, "learning_rate": 2.3218225365495117e-06, "loss": 0.3857, "step": 6582 }, { "epoch": 3.559019643178951, "grad_norm": 0.28751593828201294, "learning_rate": 2.3202284272028717e-06, "loss": 0.3762, "step": 6583 }, { "epoch": 3.5595602811317355, "grad_norm": 0.28930631279945374, "learning_rate": 2.3186346999332015e-06, "loss": 0.4086, "step": 6584 }, { "epoch": 3.56010091908452, "grad_norm": 0.29160645604133606, "learning_rate": 2.3170413549677367e-06, "loss": 0.3872, "step": 6585 }, { "epoch": 3.560641557037304, "grad_norm": 0.2774616479873657, "learning_rate": 2.3154483925336486e-06, "loss": 0.3702, "step": 6586 }, { "epoch": 3.561182194990088, "grad_norm": 0.2756505310535431, "learning_rate": 2.3138558128580653e-06, "loss": 0.4019, "step": 6587 }, { "epoch": 3.5617228329428725, "grad_norm": 0.2749829888343811, "learning_rate": 2.3122636161680454e-06, "loss": 0.3687, "step": 6588 }, { "epoch": 3.562263470895657, "grad_norm": 0.26820358633995056, "learning_rate": 2.3106718026906073e-06, "loss": 0.3785, "step": 6589 }, { "epoch": 3.5628041088484412, "grad_norm": 0.31059184670448303, "learning_rate": 2.3090803726527083e-06, "loss": 0.364, "step": 6590 }, { "epoch": 3.5633447468012256, "grad_norm": 0.3090270757675171, "learning_rate": 2.3074893262812513e-06, "loss": 0.3679, "step": 6591 }, { "epoch": 3.56388538475401, "grad_norm": 0.2815909683704376, "learning_rate": 2.305898663803084e-06, "loss": 0.4074, "step": 6592 }, { "epoch": 3.564426022706794, "grad_norm": 0.3041037321090698, "learning_rate": 2.304308385444999e-06, "loss": 0.3909, "step": 6593 }, { "epoch": 3.5649666606595782, "grad_norm": 0.2612096071243286, "learning_rate": 2.3027184914337387e-06, "loss": 0.3678, "step": 6594 }, { "epoch": 3.5655072986123626, "grad_norm": 0.27386558055877686, "learning_rate": 2.301128981995985e-06, "loss": 0.4038, "step": 6595 }, { "epoch": 3.566047936565147, "grad_norm": 0.2815191447734833, "learning_rate": 2.299539857358366e-06, "loss": 0.3886, "step": 6596 }, { "epoch": 3.5665885745179313, "grad_norm": 0.2967953681945801, "learning_rate": 2.2979511177474594e-06, "loss": 0.3516, "step": 6597 }, { "epoch": 3.5671292124707152, "grad_norm": 0.2771591544151306, "learning_rate": 2.2963627633897824e-06, "loss": 0.3541, "step": 6598 }, { "epoch": 3.5676698504234996, "grad_norm": 0.29038968682289124, "learning_rate": 2.2947747945118013e-06, "loss": 0.3862, "step": 6599 }, { "epoch": 3.568210488376284, "grad_norm": 0.7554365992546082, "learning_rate": 2.293187211339926e-06, "loss": 0.3744, "step": 6600 }, { "epoch": 3.5687511263290683, "grad_norm": 0.27392783761024475, "learning_rate": 2.2916000141005077e-06, "loss": 0.373, "step": 6601 }, { "epoch": 3.5692917642818527, "grad_norm": 0.2828829884529114, "learning_rate": 2.2900132030198513e-06, "loss": 0.3884, "step": 6602 }, { "epoch": 3.5698324022346366, "grad_norm": 0.29404449462890625, "learning_rate": 2.288426778324199e-06, "loss": 0.3958, "step": 6603 }, { "epoch": 3.5703730401874214, "grad_norm": 0.3024761378765106, "learning_rate": 2.28684074023974e-06, "loss": 0.3958, "step": 6604 }, { "epoch": 3.5709136781402053, "grad_norm": 0.2863123118877411, "learning_rate": 2.2852550889926067e-06, "loss": 0.3877, "step": 6605 }, { "epoch": 3.5714543160929897, "grad_norm": 0.2825278639793396, "learning_rate": 2.2836698248088814e-06, "loss": 0.3873, "step": 6606 }, { "epoch": 3.571994954045774, "grad_norm": 0.28320351243019104, "learning_rate": 2.282084947914591e-06, "loss": 0.3653, "step": 6607 }, { "epoch": 3.5725355919985584, "grad_norm": 0.3045535385608673, "learning_rate": 2.2805004585356964e-06, "loss": 0.3852, "step": 6608 }, { "epoch": 3.5730762299513428, "grad_norm": 0.30215948820114136, "learning_rate": 2.2789163568981183e-06, "loss": 0.3829, "step": 6609 }, { "epoch": 3.5736168679041267, "grad_norm": 0.2773192822933197, "learning_rate": 2.2773326432277097e-06, "loss": 0.3983, "step": 6610 }, { "epoch": 3.574157505856911, "grad_norm": 0.28059136867523193, "learning_rate": 2.2757493177502795e-06, "loss": 0.3567, "step": 6611 }, { "epoch": 3.5746981438096954, "grad_norm": 0.2972952723503113, "learning_rate": 2.274166380691571e-06, "loss": 0.3764, "step": 6612 }, { "epoch": 3.57523878176248, "grad_norm": 0.3074398636817932, "learning_rate": 2.2725838322772765e-06, "loss": 0.37, "step": 6613 }, { "epoch": 3.575779419715264, "grad_norm": 0.2970588505268097, "learning_rate": 2.271001672733036e-06, "loss": 0.3785, "step": 6614 }, { "epoch": 3.576320057668048, "grad_norm": 0.2987179756164551, "learning_rate": 2.2694199022844284e-06, "loss": 0.4056, "step": 6615 }, { "epoch": 3.5768606956208324, "grad_norm": 0.28840872645378113, "learning_rate": 2.26783852115698e-06, "loss": 0.385, "step": 6616 }, { "epoch": 3.577401333573617, "grad_norm": 0.3053003251552582, "learning_rate": 2.266257529576161e-06, "loss": 0.3917, "step": 6617 }, { "epoch": 3.577941971526401, "grad_norm": 0.2751826047897339, "learning_rate": 2.264676927767386e-06, "loss": 0.3795, "step": 6618 }, { "epoch": 3.5784826094791855, "grad_norm": 0.28055238723754883, "learning_rate": 2.263096715956019e-06, "loss": 0.3611, "step": 6619 }, { "epoch": 3.57902324743197, "grad_norm": 0.3108740448951721, "learning_rate": 2.261516894367356e-06, "loss": 0.3618, "step": 6620 }, { "epoch": 3.5795638853847542, "grad_norm": 0.29139307141304016, "learning_rate": 2.2599374632266514e-06, "loss": 0.3754, "step": 6621 }, { "epoch": 3.580104523337538, "grad_norm": 0.2887786626815796, "learning_rate": 2.2583584227590927e-06, "loss": 0.3881, "step": 6622 }, { "epoch": 3.5806451612903225, "grad_norm": 0.2744469940662384, "learning_rate": 2.2567797731898217e-06, "loss": 0.3749, "step": 6623 }, { "epoch": 3.581185799243107, "grad_norm": 0.27826839685440063, "learning_rate": 2.2552015147439166e-06, "loss": 0.364, "step": 6624 }, { "epoch": 3.5817264371958912, "grad_norm": 0.2944410443305969, "learning_rate": 2.2536236476464007e-06, "loss": 0.3793, "step": 6625 }, { "epoch": 3.5822670751486756, "grad_norm": 0.2951698899269104, "learning_rate": 2.252046172122248e-06, "loss": 0.3636, "step": 6626 }, { "epoch": 3.5828077131014595, "grad_norm": 0.28781840205192566, "learning_rate": 2.250469088396369e-06, "loss": 0.3786, "step": 6627 }, { "epoch": 3.583348351054244, "grad_norm": 0.29098764061927795, "learning_rate": 2.248892396693621e-06, "loss": 0.368, "step": 6628 }, { "epoch": 3.5838889890070282, "grad_norm": 0.2595488429069519, "learning_rate": 2.247316097238809e-06, "loss": 0.3861, "step": 6629 }, { "epoch": 3.5844296269598126, "grad_norm": 0.2806659936904907, "learning_rate": 2.2457401902566745e-06, "loss": 0.4064, "step": 6630 }, { "epoch": 3.584970264912597, "grad_norm": 0.2566904127597809, "learning_rate": 2.244164675971914e-06, "loss": 0.3639, "step": 6631 }, { "epoch": 3.585510902865381, "grad_norm": 0.2670801281929016, "learning_rate": 2.2425895546091534e-06, "loss": 0.3639, "step": 6632 }, { "epoch": 3.5860515408181657, "grad_norm": 0.287205308675766, "learning_rate": 2.2410148263929767e-06, "loss": 0.3775, "step": 6633 }, { "epoch": 3.5865921787709496, "grad_norm": 0.32024917006492615, "learning_rate": 2.2394404915479017e-06, "loss": 0.3577, "step": 6634 }, { "epoch": 3.587132816723734, "grad_norm": 0.2705610394477844, "learning_rate": 2.2378665502983976e-06, "loss": 0.3933, "step": 6635 }, { "epoch": 3.5876734546765183, "grad_norm": 0.2769106328487396, "learning_rate": 2.2362930028688736e-06, "loss": 0.3678, "step": 6636 }, { "epoch": 3.5882140926293027, "grad_norm": 0.27824127674102783, "learning_rate": 2.23471984948368e-06, "loss": 0.3824, "step": 6637 }, { "epoch": 3.588754730582087, "grad_norm": 0.28756049275398254, "learning_rate": 2.2331470903671183e-06, "loss": 0.3901, "step": 6638 }, { "epoch": 3.589295368534871, "grad_norm": 0.2813909351825714, "learning_rate": 2.2315747257434277e-06, "loss": 0.3771, "step": 6639 }, { "epoch": 3.5898360064876553, "grad_norm": 0.26315838098526, "learning_rate": 2.2300027558367917e-06, "loss": 0.4028, "step": 6640 }, { "epoch": 3.5903766444404397, "grad_norm": 0.29794737696647644, "learning_rate": 2.228431180871342e-06, "loss": 0.3755, "step": 6641 }, { "epoch": 3.590917282393224, "grad_norm": 0.27428027987480164, "learning_rate": 2.2268600010711477e-06, "loss": 0.3614, "step": 6642 }, { "epoch": 3.5914579203460084, "grad_norm": 0.30103349685668945, "learning_rate": 2.2252892166602304e-06, "loss": 0.3662, "step": 6643 }, { "epoch": 3.5919985582987923, "grad_norm": 0.32324516773223877, "learning_rate": 2.2237188278625415e-06, "loss": 0.3648, "step": 6644 }, { "epoch": 3.592539196251577, "grad_norm": 0.3174627125263214, "learning_rate": 2.2221488349019903e-06, "loss": 0.3693, "step": 6645 }, { "epoch": 3.593079834204361, "grad_norm": 0.26844707131385803, "learning_rate": 2.22057923800242e-06, "loss": 0.3683, "step": 6646 }, { "epoch": 3.5936204721571454, "grad_norm": 0.30172064900398254, "learning_rate": 2.2190100373876228e-06, "loss": 0.3975, "step": 6647 }, { "epoch": 3.59416111010993, "grad_norm": 0.2951805293560028, "learning_rate": 2.2174412332813353e-06, "loss": 0.39, "step": 6648 }, { "epoch": 3.594701748062714, "grad_norm": 0.28690823912620544, "learning_rate": 2.215872825907228e-06, "loss": 0.3807, "step": 6649 }, { "epoch": 3.5952423860154985, "grad_norm": 0.2679624855518341, "learning_rate": 2.2143048154889272e-06, "loss": 0.3619, "step": 6650 }, { "epoch": 3.5957830239682824, "grad_norm": 0.3021341562271118, "learning_rate": 2.212737202249994e-06, "loss": 0.3732, "step": 6651 }, { "epoch": 3.596323661921067, "grad_norm": 0.2939276695251465, "learning_rate": 2.211169986413938e-06, "loss": 0.3675, "step": 6652 }, { "epoch": 3.596864299873851, "grad_norm": 0.28871920704841614, "learning_rate": 2.209603168204209e-06, "loss": 0.3456, "step": 6653 }, { "epoch": 3.5974049378266355, "grad_norm": 0.2954472601413727, "learning_rate": 2.208036747844199e-06, "loss": 0.3724, "step": 6654 }, { "epoch": 3.59794557577942, "grad_norm": 0.29399073123931885, "learning_rate": 2.2064707255572494e-06, "loss": 0.3684, "step": 6655 }, { "epoch": 3.598486213732204, "grad_norm": 0.3921755254268646, "learning_rate": 2.2049051015666384e-06, "loss": 0.3695, "step": 6656 }, { "epoch": 3.599026851684988, "grad_norm": 0.28325673937797546, "learning_rate": 2.203339876095588e-06, "loss": 0.3729, "step": 6657 }, { "epoch": 3.5995674896377725, "grad_norm": 0.2699066996574402, "learning_rate": 2.2017750493672704e-06, "loss": 0.351, "step": 6658 }, { "epoch": 3.600108127590557, "grad_norm": 0.2776723802089691, "learning_rate": 2.2002106216047904e-06, "loss": 0.3922, "step": 6659 }, { "epoch": 3.6006487655433412, "grad_norm": 0.2835797369480133, "learning_rate": 2.1986465930312067e-06, "loss": 0.3712, "step": 6660 }, { "epoch": 3.601189403496125, "grad_norm": 0.2748779356479645, "learning_rate": 2.1970829638695096e-06, "loss": 0.3867, "step": 6661 }, { "epoch": 3.60173004144891, "grad_norm": 0.27030235528945923, "learning_rate": 2.1955197343426432e-06, "loss": 0.3745, "step": 6662 }, { "epoch": 3.602270679401694, "grad_norm": 0.27696895599365234, "learning_rate": 2.1939569046734865e-06, "loss": 0.3874, "step": 6663 }, { "epoch": 3.6028113173544782, "grad_norm": 0.270863801240921, "learning_rate": 2.192394475084868e-06, "loss": 0.3653, "step": 6664 }, { "epoch": 3.6033519553072626, "grad_norm": 0.26304101943969727, "learning_rate": 2.1908324457995556e-06, "loss": 0.3816, "step": 6665 }, { "epoch": 3.603892593260047, "grad_norm": 0.27188053727149963, "learning_rate": 2.1892708170402572e-06, "loss": 0.3514, "step": 6666 }, { "epoch": 3.6044332312128313, "grad_norm": 0.2782359719276428, "learning_rate": 2.187709589029631e-06, "loss": 0.3509, "step": 6667 }, { "epoch": 3.6049738691656152, "grad_norm": 0.2933637499809265, "learning_rate": 2.1861487619902733e-06, "loss": 0.3836, "step": 6668 }, { "epoch": 3.6055145071183996, "grad_norm": 0.2583782374858856, "learning_rate": 2.1845883361447218e-06, "loss": 0.3881, "step": 6669 }, { "epoch": 3.606055145071184, "grad_norm": 0.3089904189109802, "learning_rate": 2.1830283117154616e-06, "loss": 0.366, "step": 6670 }, { "epoch": 3.6065957830239683, "grad_norm": 0.28782257437705994, "learning_rate": 2.181468688924916e-06, "loss": 0.3873, "step": 6671 }, { "epoch": 3.6071364209767527, "grad_norm": 0.27572405338287354, "learning_rate": 2.1799094679954575e-06, "loss": 0.3646, "step": 6672 }, { "epoch": 3.6076770589295366, "grad_norm": 0.2745559811592102, "learning_rate": 2.1783506491493906e-06, "loss": 0.362, "step": 6673 }, { "epoch": 3.6082176968823214, "grad_norm": 0.27891382575035095, "learning_rate": 2.1767922326089725e-06, "loss": 0.3773, "step": 6674 }, { "epoch": 3.6087583348351053, "grad_norm": 0.2770380973815918, "learning_rate": 2.1752342185964003e-06, "loss": 0.3846, "step": 6675 }, { "epoch": 3.6092989727878897, "grad_norm": 0.30474746227264404, "learning_rate": 2.173676607333812e-06, "loss": 0.3796, "step": 6676 }, { "epoch": 3.609839610740674, "grad_norm": 0.27506735920906067, "learning_rate": 2.172119399043288e-06, "loss": 0.383, "step": 6677 }, { "epoch": 3.6103802486934584, "grad_norm": 0.2605507969856262, "learning_rate": 2.1705625939468517e-06, "loss": 0.3585, "step": 6678 }, { "epoch": 3.6109208866462428, "grad_norm": 0.27277642488479614, "learning_rate": 2.1690061922664722e-06, "loss": 0.3659, "step": 6679 }, { "epoch": 3.6114615245990267, "grad_norm": 0.26635366678237915, "learning_rate": 2.1674501942240567e-06, "loss": 0.3623, "step": 6680 }, { "epoch": 3.612002162551811, "grad_norm": 0.27611610293388367, "learning_rate": 2.1658946000414553e-06, "loss": 0.3771, "step": 6681 }, { "epoch": 3.6125428005045954, "grad_norm": 0.2707509994506836, "learning_rate": 2.1643394099404652e-06, "loss": 0.3705, "step": 6682 }, { "epoch": 3.61308343845738, "grad_norm": 0.26408860087394714, "learning_rate": 2.1627846241428186e-06, "loss": 0.3807, "step": 6683 }, { "epoch": 3.613624076410164, "grad_norm": 0.2970049977302551, "learning_rate": 2.1612302428701993e-06, "loss": 0.3864, "step": 6684 }, { "epoch": 3.614164714362948, "grad_norm": 0.2735101282596588, "learning_rate": 2.159676266344222e-06, "loss": 0.3632, "step": 6685 }, { "epoch": 3.6147053523157324, "grad_norm": 0.2762027978897095, "learning_rate": 2.1581226947864524e-06, "loss": 0.3636, "step": 6686 }, { "epoch": 3.615245990268517, "grad_norm": 0.2806672155857086, "learning_rate": 2.156569528418398e-06, "loss": 0.3651, "step": 6687 }, { "epoch": 3.615786628221301, "grad_norm": 0.28479084372520447, "learning_rate": 2.155016767461505e-06, "loss": 0.3599, "step": 6688 }, { "epoch": 3.6163272661740855, "grad_norm": 0.25960230827331543, "learning_rate": 2.1534644121371633e-06, "loss": 0.3572, "step": 6689 }, { "epoch": 3.61686790412687, "grad_norm": 0.3097879886627197, "learning_rate": 2.151912462666703e-06, "loss": 0.3768, "step": 6690 }, { "epoch": 3.6174085420796542, "grad_norm": 0.26245662569999695, "learning_rate": 2.1503609192714008e-06, "loss": 0.3711, "step": 6691 }, { "epoch": 3.617949180032438, "grad_norm": 0.27237895131111145, "learning_rate": 2.148809782172472e-06, "loss": 0.3926, "step": 6692 }, { "epoch": 3.6184898179852225, "grad_norm": 0.2845414876937866, "learning_rate": 2.147259051591074e-06, "loss": 0.3593, "step": 6693 }, { "epoch": 3.619030455938007, "grad_norm": 0.28626132011413574, "learning_rate": 2.145708727748309e-06, "loss": 0.3747, "step": 6694 }, { "epoch": 3.6195710938907912, "grad_norm": 0.29464709758758545, "learning_rate": 2.144158810865217e-06, "loss": 0.3709, "step": 6695 }, { "epoch": 3.6201117318435756, "grad_norm": 0.28694719076156616, "learning_rate": 2.142609301162786e-06, "loss": 0.3935, "step": 6696 }, { "epoch": 3.6206523697963595, "grad_norm": 0.30178123712539673, "learning_rate": 2.1410601988619394e-06, "loss": 0.3697, "step": 6697 }, { "epoch": 3.621193007749144, "grad_norm": 0.27384212613105774, "learning_rate": 2.1395115041835447e-06, "loss": 0.3738, "step": 6698 }, { "epoch": 3.6217336457019282, "grad_norm": 0.2784256637096405, "learning_rate": 2.137963217348415e-06, "loss": 0.3738, "step": 6699 }, { "epoch": 3.6222742836547126, "grad_norm": 0.2793726921081543, "learning_rate": 2.1364153385773007e-06, "loss": 0.3835, "step": 6700 }, { "epoch": 3.622814921607497, "grad_norm": 0.2873479425907135, "learning_rate": 2.134867868090895e-06, "loss": 0.3775, "step": 6701 }, { "epoch": 3.623355559560281, "grad_norm": 0.266814649105072, "learning_rate": 2.1333208061098325e-06, "loss": 0.3757, "step": 6702 }, { "epoch": 3.6238961975130657, "grad_norm": 0.27134865522384644, "learning_rate": 2.1317741528546913e-06, "loss": 0.3746, "step": 6703 }, { "epoch": 3.6244368354658496, "grad_norm": 0.2763728201389313, "learning_rate": 2.1302279085459953e-06, "loss": 0.3979, "step": 6704 }, { "epoch": 3.624977473418634, "grad_norm": 0.2741091847419739, "learning_rate": 2.128682073404197e-06, "loss": 0.3766, "step": 6705 }, { "epoch": 3.6255181113714183, "grad_norm": 0.2954896092414856, "learning_rate": 2.1271366476497048e-06, "loss": 0.3702, "step": 6706 }, { "epoch": 3.6260587493242027, "grad_norm": 0.28821152448654175, "learning_rate": 2.125591631502858e-06, "loss": 0.3909, "step": 6707 }, { "epoch": 3.626599387276987, "grad_norm": 0.29107242822647095, "learning_rate": 2.124047025183947e-06, "loss": 0.3732, "step": 6708 }, { "epoch": 3.627140025229771, "grad_norm": 0.2932164669036865, "learning_rate": 2.122502828913196e-06, "loss": 0.4049, "step": 6709 }, { "epoch": 3.6276806631825553, "grad_norm": 0.27281996607780457, "learning_rate": 2.1209590429107734e-06, "loss": 0.3443, "step": 6710 }, { "epoch": 3.6282213011353397, "grad_norm": 0.2741299271583557, "learning_rate": 2.119415667396792e-06, "loss": 0.3499, "step": 6711 }, { "epoch": 3.628761939088124, "grad_norm": 0.27361026406288147, "learning_rate": 2.1178727025913005e-06, "loss": 0.3838, "step": 6712 }, { "epoch": 3.6293025770409084, "grad_norm": 0.26464131474494934, "learning_rate": 2.1163301487142945e-06, "loss": 0.3843, "step": 6713 }, { "epoch": 3.6298432149936923, "grad_norm": 0.27855372428894043, "learning_rate": 2.114788005985708e-06, "loss": 0.3646, "step": 6714 }, { "epoch": 3.6303838529464767, "grad_norm": 0.2654821574687958, "learning_rate": 2.1132462746254147e-06, "loss": 0.3646, "step": 6715 }, { "epoch": 3.630924490899261, "grad_norm": 0.2883964776992798, "learning_rate": 2.111704954853235e-06, "loss": 0.3755, "step": 6716 }, { "epoch": 3.6314651288520454, "grad_norm": 0.28079986572265625, "learning_rate": 2.1101640468889255e-06, "loss": 0.3661, "step": 6717 }, { "epoch": 3.63200576680483, "grad_norm": 0.26388463377952576, "learning_rate": 2.1086235509521875e-06, "loss": 0.3709, "step": 6718 }, { "epoch": 3.632546404757614, "grad_norm": 0.2793663442134857, "learning_rate": 2.107083467262659e-06, "loss": 0.3662, "step": 6719 }, { "epoch": 3.6330870427103985, "grad_norm": 0.27099892497062683, "learning_rate": 2.1055437960399266e-06, "loss": 0.3662, "step": 6720 }, { "epoch": 3.6336276806631824, "grad_norm": 0.26378142833709717, "learning_rate": 2.104004537503512e-06, "loss": 0.3746, "step": 6721 }, { "epoch": 3.634168318615967, "grad_norm": 0.27467218041419983, "learning_rate": 2.102465691872877e-06, "loss": 0.3714, "step": 6722 }, { "epoch": 3.634708956568751, "grad_norm": 0.2755574882030487, "learning_rate": 2.1009272593674323e-06, "loss": 0.3444, "step": 6723 }, { "epoch": 3.6352495945215355, "grad_norm": 0.2726738750934601, "learning_rate": 2.0993892402065207e-06, "loss": 0.3741, "step": 6724 }, { "epoch": 3.63579023247432, "grad_norm": 0.25456562638282776, "learning_rate": 2.0978516346094342e-06, "loss": 0.3673, "step": 6725 }, { "epoch": 3.636330870427104, "grad_norm": 0.27345624566078186, "learning_rate": 2.0963144427953998e-06, "loss": 0.355, "step": 6726 }, { "epoch": 3.636871508379888, "grad_norm": 0.2940003275871277, "learning_rate": 2.0947776649835854e-06, "loss": 0.3673, "step": 6727 }, { "epoch": 3.6374121463326725, "grad_norm": 0.26671913266181946, "learning_rate": 2.093241301393106e-06, "loss": 0.3814, "step": 6728 }, { "epoch": 3.637952784285457, "grad_norm": 0.28704431653022766, "learning_rate": 2.0917053522430114e-06, "loss": 0.3849, "step": 6729 }, { "epoch": 3.6384934222382412, "grad_norm": 0.28842297196388245, "learning_rate": 2.0901698177522944e-06, "loss": 0.3746, "step": 6730 }, { "epoch": 3.639034060191025, "grad_norm": 0.28242138028144836, "learning_rate": 2.0886346981398876e-06, "loss": 0.3792, "step": 6731 }, { "epoch": 3.63957469814381, "grad_norm": 0.31240415573120117, "learning_rate": 2.0870999936246662e-06, "loss": 0.3831, "step": 6732 }, { "epoch": 3.640115336096594, "grad_norm": 0.279478520154953, "learning_rate": 2.0855657044254503e-06, "loss": 0.3753, "step": 6733 }, { "epoch": 3.6406559740493782, "grad_norm": 0.2873874306678772, "learning_rate": 2.0840318307609887e-06, "loss": 0.3654, "step": 6734 }, { "epoch": 3.6411966120021626, "grad_norm": 0.28576597571372986, "learning_rate": 2.082498372849983e-06, "loss": 0.4005, "step": 6735 }, { "epoch": 3.641737249954947, "grad_norm": 0.2662777304649353, "learning_rate": 2.0809653309110685e-06, "loss": 0.3665, "step": 6736 }, { "epoch": 3.6422778879077313, "grad_norm": 0.27650347352027893, "learning_rate": 2.0794327051628255e-06, "loss": 0.363, "step": 6737 }, { "epoch": 3.6428185258605152, "grad_norm": 0.29481005668640137, "learning_rate": 2.0779004958237724e-06, "loss": 0.3853, "step": 6738 }, { "epoch": 3.6433591638132996, "grad_norm": 0.26555564999580383, "learning_rate": 2.0763687031123668e-06, "loss": 0.3607, "step": 6739 }, { "epoch": 3.643899801766084, "grad_norm": 0.2699344754219055, "learning_rate": 2.074837327247012e-06, "loss": 0.356, "step": 6740 }, { "epoch": 3.6444404397188683, "grad_norm": 0.29361844062805176, "learning_rate": 2.073306368446048e-06, "loss": 0.3841, "step": 6741 }, { "epoch": 3.6449810776716527, "grad_norm": 0.2983313202857971, "learning_rate": 2.071775826927754e-06, "loss": 0.3809, "step": 6742 }, { "epoch": 3.6455217156244366, "grad_norm": 0.2917824685573578, "learning_rate": 2.0702457029103547e-06, "loss": 0.376, "step": 6743 }, { "epoch": 3.646062353577221, "grad_norm": 0.27565157413482666, "learning_rate": 2.068715996612009e-06, "loss": 0.3784, "step": 6744 }, { "epoch": 3.6466029915300053, "grad_norm": 0.2832021117210388, "learning_rate": 2.067186708250826e-06, "loss": 0.3865, "step": 6745 }, { "epoch": 3.6471436294827897, "grad_norm": 0.2892693281173706, "learning_rate": 2.0656578380448404e-06, "loss": 0.375, "step": 6746 }, { "epoch": 3.647684267435574, "grad_norm": 0.2889207899570465, "learning_rate": 2.064129386212042e-06, "loss": 0.3685, "step": 6747 }, { "epoch": 3.6482249053883584, "grad_norm": 0.2712843418121338, "learning_rate": 2.062601352970351e-06, "loss": 0.3845, "step": 6748 }, { "epoch": 3.648765543341143, "grad_norm": 0.30522647500038147, "learning_rate": 2.061073738537635e-06, "loss": 0.3758, "step": 6749 }, { "epoch": 3.6493061812939267, "grad_norm": 0.2705490291118622, "learning_rate": 2.059546543131696e-06, "loss": 0.3713, "step": 6750 }, { "epoch": 3.649846819246711, "grad_norm": 0.2821035385131836, "learning_rate": 2.058019766970279e-06, "loss": 0.3815, "step": 6751 }, { "epoch": 3.6503874571994954, "grad_norm": 0.26890015602111816, "learning_rate": 2.0564934102710706e-06, "loss": 0.3786, "step": 6752 }, { "epoch": 3.65092809515228, "grad_norm": 0.2866307199001312, "learning_rate": 2.054967473251695e-06, "loss": 0.3686, "step": 6753 }, { "epoch": 3.651468733105064, "grad_norm": 0.3068501651287079, "learning_rate": 2.0534419561297153e-06, "loss": 0.3674, "step": 6754 }, { "epoch": 3.652009371057848, "grad_norm": 0.27632516622543335, "learning_rate": 2.051916859122641e-06, "loss": 0.3744, "step": 6755 }, { "epoch": 3.6525500090106324, "grad_norm": 0.2828952670097351, "learning_rate": 2.050392182447914e-06, "loss": 0.357, "step": 6756 }, { "epoch": 3.653090646963417, "grad_norm": 0.2775524854660034, "learning_rate": 2.0488679263229257e-06, "loss": 0.353, "step": 6757 }, { "epoch": 3.653631284916201, "grad_norm": 0.290147066116333, "learning_rate": 2.0473440909649932e-06, "loss": 0.3719, "step": 6758 }, { "epoch": 3.6541719228689855, "grad_norm": 0.2628694772720337, "learning_rate": 2.045820676591389e-06, "loss": 0.358, "step": 6759 }, { "epoch": 3.6547125608217694, "grad_norm": 0.2677537798881531, "learning_rate": 2.0442976834193146e-06, "loss": 0.3977, "step": 6760 }, { "epoch": 3.6552531987745542, "grad_norm": 0.2836852967739105, "learning_rate": 2.042775111665919e-06, "loss": 0.3677, "step": 6761 }, { "epoch": 3.655793836727338, "grad_norm": 0.32650119066238403, "learning_rate": 2.0412529615482867e-06, "loss": 0.3577, "step": 6762 }, { "epoch": 3.6563344746801225, "grad_norm": 0.27181607484817505, "learning_rate": 2.0397312332834408e-06, "loss": 0.3641, "step": 6763 }, { "epoch": 3.656875112632907, "grad_norm": 0.26769402623176575, "learning_rate": 2.0382099270883493e-06, "loss": 0.3965, "step": 6764 }, { "epoch": 3.6574157505856912, "grad_norm": 0.30101466178894043, "learning_rate": 2.036689043179917e-06, "loss": 0.3786, "step": 6765 }, { "epoch": 3.6579563885384756, "grad_norm": 0.27948784828186035, "learning_rate": 2.0351685817749867e-06, "loss": 0.3748, "step": 6766 }, { "epoch": 3.6584970264912595, "grad_norm": 0.2739773988723755, "learning_rate": 2.0336485430903453e-06, "loss": 0.3867, "step": 6767 }, { "epoch": 3.659037664444044, "grad_norm": 0.2507238984107971, "learning_rate": 2.0321289273427155e-06, "loss": 0.4019, "step": 6768 }, { "epoch": 3.6595783023968282, "grad_norm": 0.26378685235977173, "learning_rate": 2.0306097347487645e-06, "loss": 0.3499, "step": 6769 }, { "epoch": 3.6601189403496126, "grad_norm": 0.2907329201698303, "learning_rate": 2.0290909655250913e-06, "loss": 0.3707, "step": 6770 }, { "epoch": 3.660659578302397, "grad_norm": 0.2970120310783386, "learning_rate": 2.0275726198882404e-06, "loss": 0.3615, "step": 6771 }, { "epoch": 3.661200216255181, "grad_norm": 0.2724264860153198, "learning_rate": 2.026054698054699e-06, "loss": 0.3852, "step": 6772 }, { "epoch": 3.6617408542079652, "grad_norm": 0.28857192397117615, "learning_rate": 2.0245372002408857e-06, "loss": 0.3767, "step": 6773 }, { "epoch": 3.6622814921607496, "grad_norm": 0.2930293083190918, "learning_rate": 2.0230201266631644e-06, "loss": 0.374, "step": 6774 }, { "epoch": 3.662822130113534, "grad_norm": 0.28278347849845886, "learning_rate": 2.0215034775378336e-06, "loss": 0.3733, "step": 6775 }, { "epoch": 3.6633627680663183, "grad_norm": 0.30604609847068787, "learning_rate": 2.019987253081138e-06, "loss": 0.3776, "step": 6776 }, { "epoch": 3.6639034060191027, "grad_norm": 0.2731034457683563, "learning_rate": 2.018471453509256e-06, "loss": 0.3715, "step": 6777 }, { "epoch": 3.664444043971887, "grad_norm": 0.27089616656303406, "learning_rate": 2.016956079038309e-06, "loss": 0.3679, "step": 6778 }, { "epoch": 3.664984681924671, "grad_norm": 0.26166582107543945, "learning_rate": 2.0154411298843564e-06, "loss": 0.3676, "step": 6779 }, { "epoch": 3.6655253198774553, "grad_norm": 0.2884708046913147, "learning_rate": 2.013926606263394e-06, "loss": 0.3587, "step": 6780 }, { "epoch": 3.6660659578302397, "grad_norm": 0.30864015221595764, "learning_rate": 2.0124125083913636e-06, "loss": 0.3716, "step": 6781 }, { "epoch": 3.666606595783024, "grad_norm": 0.24878321588039398, "learning_rate": 2.0108988364841413e-06, "loss": 0.3695, "step": 6782 }, { "epoch": 3.6671472337358084, "grad_norm": 0.25419852137565613, "learning_rate": 2.0093855907575416e-06, "loss": 0.3615, "step": 6783 }, { "epoch": 3.6676878716885923, "grad_norm": 0.2902009189128876, "learning_rate": 2.0078727714273238e-06, "loss": 0.364, "step": 6784 }, { "epoch": 3.6682285096413767, "grad_norm": 0.28060466051101685, "learning_rate": 2.0063603787091788e-06, "loss": 0.3932, "step": 6785 }, { "epoch": 3.668769147594161, "grad_norm": 0.28917181491851807, "learning_rate": 2.0048484128187473e-06, "loss": 0.3891, "step": 6786 }, { "epoch": 3.6693097855469454, "grad_norm": 0.2723115384578705, "learning_rate": 2.0033368739715953e-06, "loss": 0.3769, "step": 6787 }, { "epoch": 3.66985042349973, "grad_norm": 0.2720555365085602, "learning_rate": 2.0018257623832393e-06, "loss": 0.3799, "step": 6788 }, { "epoch": 3.6703910614525137, "grad_norm": 0.2846823036670685, "learning_rate": 2.000315078269129e-06, "loss": 0.3742, "step": 6789 }, { "epoch": 3.6709316994052985, "grad_norm": 0.273112416267395, "learning_rate": 1.9988048218446577e-06, "loss": 0.4078, "step": 6790 }, { "epoch": 3.6714723373580824, "grad_norm": 0.30613189935684204, "learning_rate": 1.9972949933251534e-06, "loss": 0.402, "step": 6791 }, { "epoch": 3.672012975310867, "grad_norm": 0.26700952649116516, "learning_rate": 1.995785592925883e-06, "loss": 0.3789, "step": 6792 }, { "epoch": 3.672553613263651, "grad_norm": 0.28727391362190247, "learning_rate": 1.994276620862057e-06, "loss": 0.3957, "step": 6793 }, { "epoch": 3.6730942512164355, "grad_norm": 0.2805801331996918, "learning_rate": 1.9927680773488216e-06, "loss": 0.3882, "step": 6794 }, { "epoch": 3.67363488916922, "grad_norm": 0.2893593907356262, "learning_rate": 1.9912599626012593e-06, "loss": 0.3681, "step": 6795 }, { "epoch": 3.674175527122004, "grad_norm": 0.2825118899345398, "learning_rate": 1.9897522768343974e-06, "loss": 0.3632, "step": 6796 }, { "epoch": 3.674716165074788, "grad_norm": 0.2712789475917816, "learning_rate": 1.988245020263197e-06, "loss": 0.3782, "step": 6797 }, { "epoch": 3.6752568030275725, "grad_norm": 0.3012026250362396, "learning_rate": 1.9867381931025637e-06, "loss": 0.3926, "step": 6798 }, { "epoch": 3.675797440980357, "grad_norm": 0.2728487253189087, "learning_rate": 1.9852317955673324e-06, "loss": 0.3742, "step": 6799 }, { "epoch": 3.6763380789331412, "grad_norm": 0.2810962498188019, "learning_rate": 1.9837258278722855e-06, "loss": 0.3523, "step": 6800 }, { "epoch": 3.676878716885925, "grad_norm": 0.28678956627845764, "learning_rate": 1.982220290232143e-06, "loss": 0.3617, "step": 6801 }, { "epoch": 3.6774193548387095, "grad_norm": 0.27895835041999817, "learning_rate": 1.98071518286156e-06, "loss": 0.3712, "step": 6802 }, { "epoch": 3.677959992791494, "grad_norm": 0.281220406293869, "learning_rate": 1.9792105059751314e-06, "loss": 0.3925, "step": 6803 }, { "epoch": 3.6785006307442782, "grad_norm": 0.29164451360702515, "learning_rate": 1.977706259787391e-06, "loss": 0.3867, "step": 6804 }, { "epoch": 3.6790412686970626, "grad_norm": 0.2751726806163788, "learning_rate": 1.976202444512813e-06, "loss": 0.3948, "step": 6805 }, { "epoch": 3.679581906649847, "grad_norm": 0.27914339303970337, "learning_rate": 1.974699060365809e-06, "loss": 0.3877, "step": 6806 }, { "epoch": 3.6801225446026313, "grad_norm": 0.2867169976234436, "learning_rate": 1.973196107560725e-06, "loss": 0.3653, "step": 6807 }, { "epoch": 3.6806631825554152, "grad_norm": 0.2664088010787964, "learning_rate": 1.9716935863118546e-06, "loss": 0.3901, "step": 6808 }, { "epoch": 3.6812038205081996, "grad_norm": 0.25899437069892883, "learning_rate": 1.9701914968334197e-06, "loss": 0.3688, "step": 6809 }, { "epoch": 3.681744458460984, "grad_norm": 0.2655908167362213, "learning_rate": 1.96868983933959e-06, "loss": 0.378, "step": 6810 }, { "epoch": 3.6822850964137683, "grad_norm": 0.30866947770118713, "learning_rate": 1.9671886140444667e-06, "loss": 0.3471, "step": 6811 }, { "epoch": 3.6828257343665527, "grad_norm": 0.2657456696033478, "learning_rate": 1.96568782116209e-06, "loss": 0.3515, "step": 6812 }, { "epoch": 3.6833663723193366, "grad_norm": 0.27322810888290405, "learning_rate": 1.9641874609064443e-06, "loss": 0.3968, "step": 6813 }, { "epoch": 3.683907010272121, "grad_norm": 0.27966296672821045, "learning_rate": 1.962687533491446e-06, "loss": 0.3661, "step": 6814 }, { "epoch": 3.6844476482249053, "grad_norm": 0.2770301401615143, "learning_rate": 1.9611880391309524e-06, "loss": 0.3653, "step": 6815 }, { "epoch": 3.6849882861776897, "grad_norm": 0.27900561690330505, "learning_rate": 1.959688978038756e-06, "loss": 0.3933, "step": 6816 }, { "epoch": 3.685528924130474, "grad_norm": 0.29549020528793335, "learning_rate": 1.958190350428595e-06, "loss": 0.4131, "step": 6817 }, { "epoch": 3.686069562083258, "grad_norm": 0.31528905034065247, "learning_rate": 1.956692156514139e-06, "loss": 0.3985, "step": 6818 }, { "epoch": 3.686610200036043, "grad_norm": 0.28341177105903625, "learning_rate": 1.9551943965089947e-06, "loss": 0.3719, "step": 6819 }, { "epoch": 3.6871508379888267, "grad_norm": 0.2914537489414215, "learning_rate": 1.9536970706267156e-06, "loss": 0.3659, "step": 6820 }, { "epoch": 3.687691475941611, "grad_norm": 0.2779093384742737, "learning_rate": 1.952200179080783e-06, "loss": 0.3936, "step": 6821 }, { "epoch": 3.6882321138943954, "grad_norm": 0.3025447130203247, "learning_rate": 1.9507037220846236e-06, "loss": 0.3983, "step": 6822 }, { "epoch": 3.68877275184718, "grad_norm": 0.2952757775783539, "learning_rate": 1.9492076998515997e-06, "loss": 0.38, "step": 6823 }, { "epoch": 3.689313389799964, "grad_norm": 0.27697935700416565, "learning_rate": 1.9477121125950084e-06, "loss": 0.3702, "step": 6824 }, { "epoch": 3.689854027752748, "grad_norm": 0.2632052004337311, "learning_rate": 1.946216960528092e-06, "loss": 0.3746, "step": 6825 }, { "epoch": 3.6903946657055324, "grad_norm": 0.277616947889328, "learning_rate": 1.944722243864024e-06, "loss": 0.3638, "step": 6826 }, { "epoch": 3.690935303658317, "grad_norm": 0.30968624353408813, "learning_rate": 1.9432279628159188e-06, "loss": 0.3812, "step": 6827 }, { "epoch": 3.691475941611101, "grad_norm": 0.27993831038475037, "learning_rate": 1.9417341175968274e-06, "loss": 0.3606, "step": 6828 }, { "epoch": 3.6920165795638855, "grad_norm": 0.27457091212272644, "learning_rate": 1.94024070841974e-06, "loss": 0.35, "step": 6829 }, { "epoch": 3.6925572175166694, "grad_norm": 0.2656506299972534, "learning_rate": 1.9387477354975885e-06, "loss": 0.3716, "step": 6830 }, { "epoch": 3.693097855469454, "grad_norm": 0.2725432813167572, "learning_rate": 1.9372551990432307e-06, "loss": 0.39, "step": 6831 }, { "epoch": 3.693638493422238, "grad_norm": 0.27034181356430054, "learning_rate": 1.9357630992694753e-06, "loss": 0.3845, "step": 6832 }, { "epoch": 3.6941791313750225, "grad_norm": 0.28433382511138916, "learning_rate": 1.9342714363890596e-06, "loss": 0.3649, "step": 6833 }, { "epoch": 3.694719769327807, "grad_norm": 0.28269556164741516, "learning_rate": 1.932780210614666e-06, "loss": 0.3721, "step": 6834 }, { "epoch": 3.6952604072805912, "grad_norm": 0.26568377017974854, "learning_rate": 1.9312894221589085e-06, "loss": 0.4029, "step": 6835 }, { "epoch": 3.6958010452333756, "grad_norm": 0.2716286778450012, "learning_rate": 1.9297990712343396e-06, "loss": 0.378, "step": 6836 }, { "epoch": 3.6963416831861595, "grad_norm": 0.26363158226013184, "learning_rate": 1.9283091580534548e-06, "loss": 0.3699, "step": 6837 }, { "epoch": 3.696882321138944, "grad_norm": 0.27918702363967896, "learning_rate": 1.9268196828286802e-06, "loss": 0.3727, "step": 6838 }, { "epoch": 3.6974229590917282, "grad_norm": 0.2825523912906647, "learning_rate": 1.9253306457723815e-06, "loss": 0.3949, "step": 6839 }, { "epoch": 3.6979635970445126, "grad_norm": 0.2709399461746216, "learning_rate": 1.9238420470968665e-06, "loss": 0.3762, "step": 6840 }, { "epoch": 3.698504234997297, "grad_norm": 0.26949992775917053, "learning_rate": 1.922353887014373e-06, "loss": 0.364, "step": 6841 }, { "epoch": 3.699044872950081, "grad_norm": 0.28536126017570496, "learning_rate": 1.9208661657370843e-06, "loss": 0.366, "step": 6842 }, { "epoch": 3.6995855109028652, "grad_norm": 0.27294227480888367, "learning_rate": 1.919378883477114e-06, "loss": 0.3949, "step": 6843 }, { "epoch": 3.7001261488556496, "grad_norm": 0.2738019526004791, "learning_rate": 1.9178920404465183e-06, "loss": 0.3863, "step": 6844 }, { "epoch": 3.700666786808434, "grad_norm": 0.2529360353946686, "learning_rate": 1.9164056368572847e-06, "loss": 0.361, "step": 6845 }, { "epoch": 3.7012074247612183, "grad_norm": 0.2742631137371063, "learning_rate": 1.9149196729213464e-06, "loss": 0.3908, "step": 6846 }, { "epoch": 3.7017480627140023, "grad_norm": 0.2806893587112427, "learning_rate": 1.9134341488505676e-06, "loss": 0.3768, "step": 6847 }, { "epoch": 3.702288700666787, "grad_norm": 0.28769466280937195, "learning_rate": 1.9119490648567496e-06, "loss": 0.3811, "step": 6848 }, { "epoch": 3.702829338619571, "grad_norm": 0.2752440869808197, "learning_rate": 1.9104644211516373e-06, "loss": 0.4065, "step": 6849 }, { "epoch": 3.7033699765723553, "grad_norm": 0.2697683572769165, "learning_rate": 1.9089802179469036e-06, "loss": 0.3683, "step": 6850 }, { "epoch": 3.7039106145251397, "grad_norm": 0.2717171609401703, "learning_rate": 1.907496455454168e-06, "loss": 0.3905, "step": 6851 }, { "epoch": 3.704451252477924, "grad_norm": 0.28985825181007385, "learning_rate": 1.9060131338849808e-06, "loss": 0.3945, "step": 6852 }, { "epoch": 3.7049918904307084, "grad_norm": 0.2849089503288269, "learning_rate": 1.9045302534508298e-06, "loss": 0.3923, "step": 6853 }, { "epoch": 3.7055325283834923, "grad_norm": 0.2646790146827698, "learning_rate": 1.9030478143631442e-06, "loss": 0.4143, "step": 6854 }, { "epoch": 3.7060731663362767, "grad_norm": 0.2752073407173157, "learning_rate": 1.9015658168332863e-06, "loss": 0.3596, "step": 6855 }, { "epoch": 3.706613804289061, "grad_norm": 0.28556573390960693, "learning_rate": 1.9000842610725562e-06, "loss": 0.3937, "step": 6856 }, { "epoch": 3.7071544422418454, "grad_norm": 0.27374550700187683, "learning_rate": 1.8986031472921902e-06, "loss": 0.3855, "step": 6857 }, { "epoch": 3.70769508019463, "grad_norm": 0.2739173173904419, "learning_rate": 1.897122475703364e-06, "loss": 0.3713, "step": 6858 }, { "epoch": 3.7082357181474137, "grad_norm": 0.28062182664871216, "learning_rate": 1.8956422465171924e-06, "loss": 0.3718, "step": 6859 }, { "epoch": 3.708776356100198, "grad_norm": 0.27734068036079407, "learning_rate": 1.8941624599447178e-06, "loss": 0.3702, "step": 6860 }, { "epoch": 3.7093169940529824, "grad_norm": 0.31852149963378906, "learning_rate": 1.8926831161969306e-06, "loss": 0.3747, "step": 6861 }, { "epoch": 3.709857632005767, "grad_norm": 0.2986151874065399, "learning_rate": 1.8912042154847482e-06, "loss": 0.3909, "step": 6862 }, { "epoch": 3.710398269958551, "grad_norm": 0.2765163481235504, "learning_rate": 1.8897257580190342e-06, "loss": 0.3761, "step": 6863 }, { "epoch": 3.7109389079113355, "grad_norm": 0.279621958732605, "learning_rate": 1.8882477440105824e-06, "loss": 0.3787, "step": 6864 }, { "epoch": 3.71147954586412, "grad_norm": 0.27998921275138855, "learning_rate": 1.8867701736701238e-06, "loss": 0.3677, "step": 6865 }, { "epoch": 3.712020183816904, "grad_norm": 0.2997812330722809, "learning_rate": 1.8852930472083304e-06, "loss": 0.3716, "step": 6866 }, { "epoch": 3.712560821769688, "grad_norm": 0.25540295243263245, "learning_rate": 1.8838163648358071e-06, "loss": 0.3752, "step": 6867 }, { "epoch": 3.7131014597224725, "grad_norm": 0.2944501042366028, "learning_rate": 1.8823401267630952e-06, "loss": 0.3693, "step": 6868 }, { "epoch": 3.713642097675257, "grad_norm": 0.26505914330482483, "learning_rate": 1.8808643332006765e-06, "loss": 0.3775, "step": 6869 }, { "epoch": 3.7141827356280412, "grad_norm": 0.26692885160446167, "learning_rate": 1.8793889843589647e-06, "loss": 0.3777, "step": 6870 }, { "epoch": 3.714723373580825, "grad_norm": 0.26996320486068726, "learning_rate": 1.8779140804483164e-06, "loss": 0.3844, "step": 6871 }, { "epoch": 3.7152640115336095, "grad_norm": 0.29633286595344543, "learning_rate": 1.8764396216790148e-06, "loss": 0.3702, "step": 6872 }, { "epoch": 3.715804649486394, "grad_norm": 0.29292699694633484, "learning_rate": 1.87496560826129e-06, "loss": 0.3902, "step": 6873 }, { "epoch": 3.7163452874391782, "grad_norm": 0.2757956385612488, "learning_rate": 1.8734920404053013e-06, "loss": 0.3812, "step": 6874 }, { "epoch": 3.7168859253919626, "grad_norm": 0.26927411556243896, "learning_rate": 1.87201891832115e-06, "loss": 0.3752, "step": 6875 }, { "epoch": 3.7174265633447465, "grad_norm": 0.2939903140068054, "learning_rate": 1.8705462422188703e-06, "loss": 0.3712, "step": 6876 }, { "epoch": 3.7179672012975313, "grad_norm": 0.3061712980270386, "learning_rate": 1.8690740123084316e-06, "loss": 0.3908, "step": 6877 }, { "epoch": 3.7185078392503153, "grad_norm": 0.283538281917572, "learning_rate": 1.8676022287997454e-06, "loss": 0.3942, "step": 6878 }, { "epoch": 3.7190484772030996, "grad_norm": 0.25709348917007446, "learning_rate": 1.8661308919026533e-06, "loss": 0.3735, "step": 6879 }, { "epoch": 3.719589115155884, "grad_norm": 0.287073016166687, "learning_rate": 1.8646600018269356e-06, "loss": 0.3779, "step": 6880 }, { "epoch": 3.7201297531086683, "grad_norm": 0.27224376797676086, "learning_rate": 1.8631895587823112e-06, "loss": 0.381, "step": 6881 }, { "epoch": 3.7206703910614527, "grad_norm": 0.31198450922966003, "learning_rate": 1.8617195629784308e-06, "loss": 0.4011, "step": 6882 }, { "epoch": 3.7212110290142366, "grad_norm": 0.27526232600212097, "learning_rate": 1.8602500146248885e-06, "loss": 0.3786, "step": 6883 }, { "epoch": 3.721751666967021, "grad_norm": 0.2905593812465668, "learning_rate": 1.858780913931203e-06, "loss": 0.3625, "step": 6884 }, { "epoch": 3.7222923049198053, "grad_norm": 0.2826231122016907, "learning_rate": 1.8573122611068406e-06, "loss": 0.3787, "step": 6885 }, { "epoch": 3.7228329428725897, "grad_norm": 0.29400330781936646, "learning_rate": 1.855844056361197e-06, "loss": 0.3737, "step": 6886 }, { "epoch": 3.723373580825374, "grad_norm": 0.25836947560310364, "learning_rate": 1.854376299903608e-06, "loss": 0.369, "step": 6887 }, { "epoch": 3.723914218778158, "grad_norm": 0.2573744058609009, "learning_rate": 1.8529089919433435e-06, "loss": 0.3723, "step": 6888 }, { "epoch": 3.724454856730943, "grad_norm": 0.28248631954193115, "learning_rate": 1.8514421326896071e-06, "loss": 0.3772, "step": 6889 }, { "epoch": 3.7249954946837267, "grad_norm": 0.2774384319782257, "learning_rate": 1.8499757223515442e-06, "loss": 0.3886, "step": 6890 }, { "epoch": 3.725536132636511, "grad_norm": 0.27647051215171814, "learning_rate": 1.8485097611382312e-06, "loss": 0.3926, "step": 6891 }, { "epoch": 3.7260767705892954, "grad_norm": 0.2677972614765167, "learning_rate": 1.847044249258681e-06, "loss": 0.3624, "step": 6892 }, { "epoch": 3.72661740854208, "grad_norm": 0.28188949823379517, "learning_rate": 1.845579186921847e-06, "loss": 0.3951, "step": 6893 }, { "epoch": 3.727158046494864, "grad_norm": 0.27737295627593994, "learning_rate": 1.8441145743366113e-06, "loss": 0.404, "step": 6894 }, { "epoch": 3.727698684447648, "grad_norm": 0.29896578192710876, "learning_rate": 1.8426504117118011e-06, "loss": 0.3942, "step": 6895 }, { "epoch": 3.7282393224004324, "grad_norm": 0.2849474251270294, "learning_rate": 1.8411866992561667e-06, "loss": 0.3693, "step": 6896 }, { "epoch": 3.728779960353217, "grad_norm": 0.28557971119880676, "learning_rate": 1.8397234371784062e-06, "loss": 0.3778, "step": 6897 }, { "epoch": 3.729320598306001, "grad_norm": 0.2757243812084198, "learning_rate": 1.8382606256871494e-06, "loss": 0.3872, "step": 6898 }, { "epoch": 3.7298612362587855, "grad_norm": 0.27281227707862854, "learning_rate": 1.83679826499096e-06, "loss": 0.3908, "step": 6899 }, { "epoch": 3.7304018742115694, "grad_norm": 0.26603302359580994, "learning_rate": 1.8353363552983382e-06, "loss": 0.3461, "step": 6900 }, { "epoch": 3.730942512164354, "grad_norm": 0.267747700214386, "learning_rate": 1.83387489681772e-06, "loss": 0.3675, "step": 6901 }, { "epoch": 3.731483150117138, "grad_norm": 0.2858692705631256, "learning_rate": 1.83241388975748e-06, "loss": 0.3683, "step": 6902 }, { "epoch": 3.7320237880699225, "grad_norm": 0.27688363194465637, "learning_rate": 1.8309533343259246e-06, "loss": 0.373, "step": 6903 }, { "epoch": 3.732564426022707, "grad_norm": 0.26876163482666016, "learning_rate": 1.8294932307312946e-06, "loss": 0.3468, "step": 6904 }, { "epoch": 3.7331050639754912, "grad_norm": 0.28595924377441406, "learning_rate": 1.8280335791817733e-06, "loss": 0.3799, "step": 6905 }, { "epoch": 3.7336457019282756, "grad_norm": 0.2635115087032318, "learning_rate": 1.8265743798854719e-06, "loss": 0.3904, "step": 6906 }, { "epoch": 3.7341863398810595, "grad_norm": 0.2736213803291321, "learning_rate": 1.8251156330504427e-06, "loss": 0.3771, "step": 6907 }, { "epoch": 3.734726977833844, "grad_norm": 0.29771164059638977, "learning_rate": 1.82365733888467e-06, "loss": 0.3865, "step": 6908 }, { "epoch": 3.7352676157866282, "grad_norm": 0.30484992265701294, "learning_rate": 1.8221994975960739e-06, "loss": 0.4009, "step": 6909 }, { "epoch": 3.7358082537394126, "grad_norm": 0.29641515016555786, "learning_rate": 1.8207421093925127e-06, "loss": 0.3815, "step": 6910 }, { "epoch": 3.736348891692197, "grad_norm": 0.275765597820282, "learning_rate": 1.8192851744817757e-06, "loss": 0.3752, "step": 6911 }, { "epoch": 3.736889529644981, "grad_norm": 0.29648923873901367, "learning_rate": 1.817828693071595e-06, "loss": 0.3919, "step": 6912 }, { "epoch": 3.7374301675977653, "grad_norm": 0.27310508489608765, "learning_rate": 1.8163726653696263e-06, "loss": 0.367, "step": 6913 }, { "epoch": 3.7379708055505496, "grad_norm": 0.28955864906311035, "learning_rate": 1.8149170915834723e-06, "loss": 0.3819, "step": 6914 }, { "epoch": 3.738511443503334, "grad_norm": 0.26339635252952576, "learning_rate": 1.8134619719206624e-06, "loss": 0.3803, "step": 6915 }, { "epoch": 3.7390520814561183, "grad_norm": 0.2843658924102783, "learning_rate": 1.8120073065886695e-06, "loss": 0.3706, "step": 6916 }, { "epoch": 3.7395927194089023, "grad_norm": 0.27032530307769775, "learning_rate": 1.8105530957948941e-06, "loss": 0.3872, "step": 6917 }, { "epoch": 3.740133357361687, "grad_norm": 0.26732128858566284, "learning_rate": 1.809099339746674e-06, "loss": 0.3417, "step": 6918 }, { "epoch": 3.740673995314471, "grad_norm": 0.2693036198616028, "learning_rate": 1.8076460386512855e-06, "loss": 0.389, "step": 6919 }, { "epoch": 3.7412146332672553, "grad_norm": 0.27490392327308655, "learning_rate": 1.8061931927159377e-06, "loss": 0.3756, "step": 6920 }, { "epoch": 3.7417552712200397, "grad_norm": 0.2863559424877167, "learning_rate": 1.8047408021477713e-06, "loss": 0.3897, "step": 6921 }, { "epoch": 3.742295909172824, "grad_norm": 0.2561712861061096, "learning_rate": 1.80328886715387e-06, "loss": 0.3495, "step": 6922 }, { "epoch": 3.7428365471256084, "grad_norm": 0.27250537276268005, "learning_rate": 1.8018373879412442e-06, "loss": 0.3602, "step": 6923 }, { "epoch": 3.7433771850783923, "grad_norm": 0.26523685455322266, "learning_rate": 1.800386364716849e-06, "loss": 0.3701, "step": 6924 }, { "epoch": 3.7439178230311767, "grad_norm": 0.28787243366241455, "learning_rate": 1.7989357976875603e-06, "loss": 0.384, "step": 6925 }, { "epoch": 3.744458460983961, "grad_norm": 0.2766694128513336, "learning_rate": 1.7974856870602025e-06, "loss": 0.3885, "step": 6926 }, { "epoch": 3.7449990989367454, "grad_norm": 0.2729831635951996, "learning_rate": 1.79603603304153e-06, "loss": 0.3803, "step": 6927 }, { "epoch": 3.74553973688953, "grad_norm": 0.27197712659835815, "learning_rate": 1.7945868358382311e-06, "loss": 0.3531, "step": 6928 }, { "epoch": 3.7460803748423137, "grad_norm": 0.28468289971351624, "learning_rate": 1.7931380956569294e-06, "loss": 0.3855, "step": 6929 }, { "epoch": 3.746621012795098, "grad_norm": 0.2688024342060089, "learning_rate": 1.7916898127041815e-06, "loss": 0.3631, "step": 6930 }, { "epoch": 3.7471616507478824, "grad_norm": 0.27505674958229065, "learning_rate": 1.790241987186485e-06, "loss": 0.3652, "step": 6931 }, { "epoch": 3.747702288700667, "grad_norm": 0.25617873668670654, "learning_rate": 1.7887946193102663e-06, "loss": 0.3393, "step": 6932 }, { "epoch": 3.748242926653451, "grad_norm": 0.2912514805793762, "learning_rate": 1.787347709281887e-06, "loss": 0.3834, "step": 6933 }, { "epoch": 3.7487835646062355, "grad_norm": 0.2743835151195526, "learning_rate": 1.7859012573076478e-06, "loss": 0.3702, "step": 6934 }, { "epoch": 3.74932420255902, "grad_norm": 0.2596570551395416, "learning_rate": 1.7844552635937784e-06, "loss": 0.3882, "step": 6935 }, { "epoch": 3.749864840511804, "grad_norm": 0.28975582122802734, "learning_rate": 1.7830097283464486e-06, "loss": 0.3903, "step": 6936 }, { "epoch": 3.750405478464588, "grad_norm": 0.2721347510814667, "learning_rate": 1.7815646517717595e-06, "loss": 0.3865, "step": 6937 }, { "epoch": 3.7509461164173725, "grad_norm": 0.2891277074813843, "learning_rate": 1.7801200340757452e-06, "loss": 0.3686, "step": 6938 }, { "epoch": 3.751486754370157, "grad_norm": 0.2961825132369995, "learning_rate": 1.7786758754643795e-06, "loss": 0.3852, "step": 6939 }, { "epoch": 3.7520273923229412, "grad_norm": 0.25924432277679443, "learning_rate": 1.7772321761435674e-06, "loss": 0.3807, "step": 6940 }, { "epoch": 3.752568030275725, "grad_norm": 0.2748173475265503, "learning_rate": 1.7757889363191484e-06, "loss": 0.3799, "step": 6941 }, { "epoch": 3.7531086682285095, "grad_norm": 0.2915605902671814, "learning_rate": 1.774346156196895e-06, "loss": 0.3723, "step": 6942 }, { "epoch": 3.753649306181294, "grad_norm": 0.2920202314853668, "learning_rate": 1.7729038359825201e-06, "loss": 0.3893, "step": 6943 }, { "epoch": 3.7541899441340782, "grad_norm": 0.2847879230976105, "learning_rate": 1.7714619758816649e-06, "loss": 0.3809, "step": 6944 }, { "epoch": 3.7547305820868626, "grad_norm": 0.28939902782440186, "learning_rate": 1.7700205760999061e-06, "loss": 0.4034, "step": 6945 }, { "epoch": 3.7552712200396465, "grad_norm": 0.26719948649406433, "learning_rate": 1.7685796368427587e-06, "loss": 0.383, "step": 6946 }, { "epoch": 3.7558118579924313, "grad_norm": 0.2696559429168701, "learning_rate": 1.767139158315666e-06, "loss": 0.3681, "step": 6947 }, { "epoch": 3.7563524959452153, "grad_norm": 0.278033584356308, "learning_rate": 1.7656991407240126e-06, "loss": 0.4143, "step": 6948 }, { "epoch": 3.7568931338979996, "grad_norm": 0.2916466295719147, "learning_rate": 1.7642595842731113e-06, "loss": 0.3837, "step": 6949 }, { "epoch": 3.757433771850784, "grad_norm": 0.2705329358577728, "learning_rate": 1.76282048916821e-06, "loss": 0.3577, "step": 6950 }, { "epoch": 3.7579744098035683, "grad_norm": 0.27100685238838196, "learning_rate": 1.7613818556144956e-06, "loss": 0.3997, "step": 6951 }, { "epoch": 3.7585150477563527, "grad_norm": 0.27594810724258423, "learning_rate": 1.7599436838170847e-06, "loss": 0.3903, "step": 6952 }, { "epoch": 3.7590556857091366, "grad_norm": 0.28989678621292114, "learning_rate": 1.7585059739810284e-06, "loss": 0.3876, "step": 6953 }, { "epoch": 3.759596323661921, "grad_norm": 0.27664127945899963, "learning_rate": 1.7570687263113112e-06, "loss": 0.3886, "step": 6954 }, { "epoch": 3.7601369616147053, "grad_norm": 0.2947249710559845, "learning_rate": 1.7556319410128557e-06, "loss": 0.3829, "step": 6955 }, { "epoch": 3.7606775995674897, "grad_norm": 0.27407416701316833, "learning_rate": 1.754195618290519e-06, "loss": 0.3877, "step": 6956 }, { "epoch": 3.761218237520274, "grad_norm": 0.31988996267318726, "learning_rate": 1.7527597583490825e-06, "loss": 0.3944, "step": 6957 }, { "epoch": 3.761758875473058, "grad_norm": 0.25325557589530945, "learning_rate": 1.7513243613932734e-06, "loss": 0.3541, "step": 6958 }, { "epoch": 3.7622995134258423, "grad_norm": 0.2626549303531647, "learning_rate": 1.749889427627745e-06, "loss": 0.3802, "step": 6959 }, { "epoch": 3.7628401513786267, "grad_norm": 0.2787989377975464, "learning_rate": 1.7484549572570913e-06, "loss": 0.3694, "step": 6960 }, { "epoch": 3.763380789331411, "grad_norm": 0.2660752236843109, "learning_rate": 1.7470209504858343e-06, "loss": 0.3541, "step": 6961 }, { "epoch": 3.7639214272841954, "grad_norm": 0.26545456051826477, "learning_rate": 1.7455874075184297e-06, "loss": 0.3999, "step": 6962 }, { "epoch": 3.76446206523698, "grad_norm": 0.27897003293037415, "learning_rate": 1.7441543285592743e-06, "loss": 0.3667, "step": 6963 }, { "epoch": 3.765002703189764, "grad_norm": 0.27769017219543457, "learning_rate": 1.7427217138126916e-06, "loss": 0.3749, "step": 6964 }, { "epoch": 3.765543341142548, "grad_norm": 0.2526131570339203, "learning_rate": 1.7412895634829391e-06, "loss": 0.3932, "step": 6965 }, { "epoch": 3.7660839790953324, "grad_norm": 0.26294299960136414, "learning_rate": 1.7398578777742142e-06, "loss": 0.3746, "step": 6966 }, { "epoch": 3.766624617048117, "grad_norm": 0.26726728677749634, "learning_rate": 1.7384266568906404e-06, "loss": 0.3586, "step": 6967 }, { "epoch": 3.767165255000901, "grad_norm": 0.29011353850364685, "learning_rate": 1.7369959010362836e-06, "loss": 0.3668, "step": 6968 }, { "epoch": 3.7677058929536855, "grad_norm": 0.27279070019721985, "learning_rate": 1.7355656104151314e-06, "loss": 0.3779, "step": 6969 }, { "epoch": 3.7682465309064694, "grad_norm": 0.2736707031726837, "learning_rate": 1.7341357852311175e-06, "loss": 0.4005, "step": 6970 }, { "epoch": 3.768787168859254, "grad_norm": 0.2550220787525177, "learning_rate": 1.7327064256881e-06, "loss": 0.3768, "step": 6971 }, { "epoch": 3.769327806812038, "grad_norm": 0.2679320275783539, "learning_rate": 1.7312775319898768e-06, "loss": 0.3571, "step": 6972 }, { "epoch": 3.7698684447648225, "grad_norm": 0.27891236543655396, "learning_rate": 1.7298491043401794e-06, "loss": 0.3807, "step": 6973 }, { "epoch": 3.770409082717607, "grad_norm": 0.28013134002685547, "learning_rate": 1.7284211429426645e-06, "loss": 0.3754, "step": 6974 }, { "epoch": 3.770949720670391, "grad_norm": 0.26919159293174744, "learning_rate": 1.726993648000933e-06, "loss": 0.3819, "step": 6975 }, { "epoch": 3.7714903586231756, "grad_norm": 0.2816323935985565, "learning_rate": 1.7255666197185111e-06, "loss": 0.3647, "step": 6976 }, { "epoch": 3.7720309965759595, "grad_norm": 0.27056410908699036, "learning_rate": 1.7241400582988654e-06, "loss": 0.389, "step": 6977 }, { "epoch": 3.772571634528744, "grad_norm": 0.26639676094055176, "learning_rate": 1.7227139639453904e-06, "loss": 0.3745, "step": 6978 }, { "epoch": 3.7731122724815283, "grad_norm": 0.27519285678863525, "learning_rate": 1.7212883368614153e-06, "loss": 0.3664, "step": 6979 }, { "epoch": 3.7736529104343126, "grad_norm": 0.2709251046180725, "learning_rate": 1.7198631772502057e-06, "loss": 0.3697, "step": 6980 }, { "epoch": 3.774193548387097, "grad_norm": 0.2782084047794342, "learning_rate": 1.7184384853149566e-06, "loss": 0.3985, "step": 6981 }, { "epoch": 3.774734186339881, "grad_norm": 0.27686184644699097, "learning_rate": 1.7170142612587986e-06, "loss": 0.3673, "step": 6982 }, { "epoch": 3.7752748242926653, "grad_norm": 0.2701174318790436, "learning_rate": 1.7155905052847938e-06, "loss": 0.3876, "step": 6983 }, { "epoch": 3.7758154622454496, "grad_norm": 0.27983951568603516, "learning_rate": 1.714167217595939e-06, "loss": 0.3854, "step": 6984 }, { "epoch": 3.776356100198234, "grad_norm": 0.26138070225715637, "learning_rate": 1.7127443983951687e-06, "loss": 0.3786, "step": 6985 }, { "epoch": 3.7768967381510183, "grad_norm": 0.27046772837638855, "learning_rate": 1.7113220478853375e-06, "loss": 0.369, "step": 6986 }, { "epoch": 3.7774373761038023, "grad_norm": 0.2825208902359009, "learning_rate": 1.7099001662692488e-06, "loss": 0.3846, "step": 6987 }, { "epoch": 3.7779780140565866, "grad_norm": 0.29027751088142395, "learning_rate": 1.7084787537496266e-06, "loss": 0.3749, "step": 6988 }, { "epoch": 3.778518652009371, "grad_norm": 0.2870740294456482, "learning_rate": 1.707057810529138e-06, "loss": 0.4073, "step": 6989 }, { "epoch": 3.7790592899621553, "grad_norm": 0.276230126619339, "learning_rate": 1.7056373368103756e-06, "loss": 0.3773, "step": 6990 }, { "epoch": 3.7795999279149397, "grad_norm": 0.24382857978343964, "learning_rate": 1.7042173327958678e-06, "loss": 0.3642, "step": 6991 }, { "epoch": 3.780140565867724, "grad_norm": 0.26477882266044617, "learning_rate": 1.7027977986880784e-06, "loss": 0.3497, "step": 6992 }, { "epoch": 3.7806812038205084, "grad_norm": 0.27764496207237244, "learning_rate": 1.7013787346894006e-06, "loss": 0.3573, "step": 6993 }, { "epoch": 3.7812218417732923, "grad_norm": 0.2853281795978546, "learning_rate": 1.6999601410021605e-06, "loss": 0.3737, "step": 6994 }, { "epoch": 3.7817624797260767, "grad_norm": 0.2836633026599884, "learning_rate": 1.6985420178286216e-06, "loss": 0.3883, "step": 6995 }, { "epoch": 3.782303117678861, "grad_norm": 0.2709035277366638, "learning_rate": 1.697124365370974e-06, "loss": 0.3629, "step": 6996 }, { "epoch": 3.7828437556316454, "grad_norm": 0.2787929177284241, "learning_rate": 1.69570718383135e-06, "loss": 0.3654, "step": 6997 }, { "epoch": 3.78338439358443, "grad_norm": 0.2762378752231598, "learning_rate": 1.6942904734118004e-06, "loss": 0.3936, "step": 6998 }, { "epoch": 3.7839250315372137, "grad_norm": 0.24872547388076782, "learning_rate": 1.692874234314324e-06, "loss": 0.3461, "step": 6999 }, { "epoch": 3.784465669489998, "grad_norm": 0.2528185546398163, "learning_rate": 1.6914584667408408e-06, "loss": 0.374, "step": 7000 }, { "epoch": 3.7850063074427824, "grad_norm": 0.2733992040157318, "learning_rate": 1.6900431708932124e-06, "loss": 0.3708, "step": 7001 }, { "epoch": 3.785546945395567, "grad_norm": 0.30628329515457153, "learning_rate": 1.6886283469732279e-06, "loss": 0.3975, "step": 7002 }, { "epoch": 3.786087583348351, "grad_norm": 0.26259320974349976, "learning_rate": 1.6872139951826078e-06, "loss": 0.3767, "step": 7003 }, { "epoch": 3.786628221301135, "grad_norm": 0.2805614769458771, "learning_rate": 1.685800115723011e-06, "loss": 0.3974, "step": 7004 }, { "epoch": 3.78716885925392, "grad_norm": 0.2630981504917145, "learning_rate": 1.6843867087960252e-06, "loss": 0.3703, "step": 7005 }, { "epoch": 3.787709497206704, "grad_norm": 0.27293825149536133, "learning_rate": 1.6829737746031687e-06, "loss": 0.3829, "step": 7006 }, { "epoch": 3.788250135159488, "grad_norm": 0.2765561044216156, "learning_rate": 1.6815613133458998e-06, "loss": 0.3638, "step": 7007 }, { "epoch": 3.7887907731122725, "grad_norm": 0.2968769371509552, "learning_rate": 1.6801493252255995e-06, "loss": 0.3745, "step": 7008 }, { "epoch": 3.789331411065057, "grad_norm": 0.2884106934070587, "learning_rate": 1.6787378104435931e-06, "loss": 0.3857, "step": 7009 }, { "epoch": 3.7898720490178412, "grad_norm": 0.2724694013595581, "learning_rate": 1.6773267692011242e-06, "loss": 0.3548, "step": 7010 }, { "epoch": 3.790412686970625, "grad_norm": 0.26447156071662903, "learning_rate": 1.6759162016993808e-06, "loss": 0.3886, "step": 7011 }, { "epoch": 3.7909533249234095, "grad_norm": 0.2667076885700226, "learning_rate": 1.6745061081394792e-06, "loss": 0.3839, "step": 7012 }, { "epoch": 3.791493962876194, "grad_norm": 0.27531537413597107, "learning_rate": 1.6730964887224677e-06, "loss": 0.3818, "step": 7013 }, { "epoch": 3.7920346008289783, "grad_norm": 0.2675216495990753, "learning_rate": 1.6716873436493263e-06, "loss": 0.3761, "step": 7014 }, { "epoch": 3.7925752387817626, "grad_norm": 0.30674728751182556, "learning_rate": 1.6702786731209681e-06, "loss": 0.3599, "step": 7015 }, { "epoch": 3.7931158767345465, "grad_norm": 0.2617681324481964, "learning_rate": 1.6688704773382403e-06, "loss": 0.3785, "step": 7016 }, { "epoch": 3.793656514687331, "grad_norm": 0.2874067425727844, "learning_rate": 1.6674627565019203e-06, "loss": 0.3601, "step": 7017 }, { "epoch": 3.7941971526401153, "grad_norm": 0.2876586318016052, "learning_rate": 1.6660555108127169e-06, "loss": 0.4128, "step": 7018 }, { "epoch": 3.7947377905928996, "grad_norm": 0.28677424788475037, "learning_rate": 1.6646487404712753e-06, "loss": 0.3905, "step": 7019 }, { "epoch": 3.795278428545684, "grad_norm": 0.26501747965812683, "learning_rate": 1.6632424456781675e-06, "loss": 0.3728, "step": 7020 }, { "epoch": 3.7958190664984683, "grad_norm": 0.2984306514263153, "learning_rate": 1.6618366266339048e-06, "loss": 0.3871, "step": 7021 }, { "epoch": 3.7963597044512527, "grad_norm": 0.2869909405708313, "learning_rate": 1.6604312835389202e-06, "loss": 0.3847, "step": 7022 }, { "epoch": 3.7969003424040366, "grad_norm": 0.29960310459136963, "learning_rate": 1.6590264165935882e-06, "loss": 0.3771, "step": 7023 }, { "epoch": 3.797440980356821, "grad_norm": 0.25678813457489014, "learning_rate": 1.657622025998214e-06, "loss": 0.3744, "step": 7024 }, { "epoch": 3.7979816183096053, "grad_norm": 0.26842546463012695, "learning_rate": 1.6562181119530314e-06, "loss": 0.3958, "step": 7025 }, { "epoch": 3.7985222562623897, "grad_norm": 0.28335508704185486, "learning_rate": 1.6548146746582072e-06, "loss": 0.3817, "step": 7026 }, { "epoch": 3.799062894215174, "grad_norm": 0.268964558839798, "learning_rate": 1.6534117143138402e-06, "loss": 0.3837, "step": 7027 }, { "epoch": 3.799603532167958, "grad_norm": 0.2672850489616394, "learning_rate": 1.6520092311199648e-06, "loss": 0.3691, "step": 7028 }, { "epoch": 3.8001441701207423, "grad_norm": 0.26871156692504883, "learning_rate": 1.6506072252765427e-06, "loss": 0.4003, "step": 7029 }, { "epoch": 3.8006848080735267, "grad_norm": 0.28091421723365784, "learning_rate": 1.649205696983468e-06, "loss": 0.3917, "step": 7030 }, { "epoch": 3.801225446026311, "grad_norm": 0.28011780977249146, "learning_rate": 1.6478046464405717e-06, "loss": 0.3744, "step": 7031 }, { "epoch": 3.8017660839790954, "grad_norm": 0.26686403155326843, "learning_rate": 1.646404073847609e-06, "loss": 0.3762, "step": 7032 }, { "epoch": 3.8023067219318794, "grad_norm": 0.28495582938194275, "learning_rate": 1.6450039794042743e-06, "loss": 0.3852, "step": 7033 }, { "epoch": 3.802847359884664, "grad_norm": 0.25214681029319763, "learning_rate": 1.6436043633101901e-06, "loss": 0.3713, "step": 7034 }, { "epoch": 3.803387997837448, "grad_norm": 0.30977126955986023, "learning_rate": 1.642205225764908e-06, "loss": 0.3836, "step": 7035 }, { "epoch": 3.8039286357902324, "grad_norm": 0.27312731742858887, "learning_rate": 1.6408065669679184e-06, "loss": 0.3908, "step": 7036 }, { "epoch": 3.804469273743017, "grad_norm": 0.25544628500938416, "learning_rate": 1.6394083871186362e-06, "loss": 0.3818, "step": 7037 }, { "epoch": 3.805009911695801, "grad_norm": 0.27852949500083923, "learning_rate": 1.6380106864164163e-06, "loss": 0.3981, "step": 7038 }, { "epoch": 3.8055505496485855, "grad_norm": 0.26494163274765015, "learning_rate": 1.6366134650605342e-06, "loss": 0.3477, "step": 7039 }, { "epoch": 3.8060911876013694, "grad_norm": 0.2715262472629547, "learning_rate": 1.635216723250206e-06, "loss": 0.3625, "step": 7040 }, { "epoch": 3.806631825554154, "grad_norm": 0.26712551712989807, "learning_rate": 1.6338204611845775e-06, "loss": 0.3679, "step": 7041 }, { "epoch": 3.807172463506938, "grad_norm": 0.2706104815006256, "learning_rate": 1.6324246790627252e-06, "loss": 0.3953, "step": 7042 }, { "epoch": 3.8077131014597225, "grad_norm": 0.25851455330848694, "learning_rate": 1.631029377083656e-06, "loss": 0.3784, "step": 7043 }, { "epoch": 3.808253739412507, "grad_norm": 0.29127827286720276, "learning_rate": 1.6296345554463084e-06, "loss": 0.3729, "step": 7044 }, { "epoch": 3.808794377365291, "grad_norm": 0.2785845100879669, "learning_rate": 1.6282402143495568e-06, "loss": 0.3614, "step": 7045 }, { "epoch": 3.809335015318075, "grad_norm": 0.28293830156326294, "learning_rate": 1.6268463539922018e-06, "loss": 0.3811, "step": 7046 }, { "epoch": 3.8098756532708595, "grad_norm": 0.2674233019351959, "learning_rate": 1.6254529745729759e-06, "loss": 0.3838, "step": 7047 }, { "epoch": 3.810416291223644, "grad_norm": 0.26191818714141846, "learning_rate": 1.6240600762905485e-06, "loss": 0.3617, "step": 7048 }, { "epoch": 3.8109569291764283, "grad_norm": 0.2639159858226776, "learning_rate": 1.6226676593435126e-06, "loss": 0.3626, "step": 7049 }, { "epoch": 3.8114975671292126, "grad_norm": 0.2967204451560974, "learning_rate": 1.6212757239304e-06, "loss": 0.3813, "step": 7050 }, { "epoch": 3.812038205081997, "grad_norm": 0.257874995470047, "learning_rate": 1.6198842702496687e-06, "loss": 0.3817, "step": 7051 }, { "epoch": 3.812578843034781, "grad_norm": 0.26568150520324707, "learning_rate": 1.6184932984997082e-06, "loss": 0.3399, "step": 7052 }, { "epoch": 3.8131194809875653, "grad_norm": 0.2827441990375519, "learning_rate": 1.6171028088788432e-06, "loss": 0.3653, "step": 7053 }, { "epoch": 3.8136601189403496, "grad_norm": 0.27088630199432373, "learning_rate": 1.6157128015853269e-06, "loss": 0.3645, "step": 7054 }, { "epoch": 3.814200756893134, "grad_norm": 0.2815869152545929, "learning_rate": 1.6143232768173428e-06, "loss": 0.3761, "step": 7055 }, { "epoch": 3.8147413948459183, "grad_norm": 0.2995468080043793, "learning_rate": 1.6129342347730054e-06, "loss": 0.3815, "step": 7056 }, { "epoch": 3.8152820327987023, "grad_norm": 0.28593334555625916, "learning_rate": 1.6115456756503656e-06, "loss": 0.3745, "step": 7057 }, { "epoch": 3.8158226707514866, "grad_norm": 0.26630958914756775, "learning_rate": 1.6101575996473994e-06, "loss": 0.3604, "step": 7058 }, { "epoch": 3.816363308704271, "grad_norm": 0.27000048756599426, "learning_rate": 1.6087700069620155e-06, "loss": 0.3569, "step": 7059 }, { "epoch": 3.8169039466570553, "grad_norm": 0.25779932737350464, "learning_rate": 1.6073828977920564e-06, "loss": 0.3878, "step": 7060 }, { "epoch": 3.8174445846098397, "grad_norm": 0.2818188965320587, "learning_rate": 1.6059962723352912e-06, "loss": 0.3803, "step": 7061 }, { "epoch": 3.8179852225626236, "grad_norm": 0.30023640394210815, "learning_rate": 1.6046101307894251e-06, "loss": 0.4007, "step": 7062 }, { "epoch": 3.8185258605154084, "grad_norm": 0.2842724919319153, "learning_rate": 1.6032244733520901e-06, "loss": 0.3988, "step": 7063 }, { "epoch": 3.8190664984681923, "grad_norm": 0.2938917577266693, "learning_rate": 1.60183930022085e-06, "loss": 0.3605, "step": 7064 }, { "epoch": 3.8196071364209767, "grad_norm": 0.28421473503112793, "learning_rate": 1.6004546115932023e-06, "loss": 0.3936, "step": 7065 }, { "epoch": 3.820147774373761, "grad_norm": 0.2783248722553253, "learning_rate": 1.5990704076665726e-06, "loss": 0.3705, "step": 7066 }, { "epoch": 3.8206884123265454, "grad_norm": 0.2888815999031067, "learning_rate": 1.597686688638318e-06, "loss": 0.3707, "step": 7067 }, { "epoch": 3.82122905027933, "grad_norm": 0.27927088737487793, "learning_rate": 1.5963034547057249e-06, "loss": 0.4013, "step": 7068 }, { "epoch": 3.8217696882321137, "grad_norm": 0.2906007170677185, "learning_rate": 1.5949207060660138e-06, "loss": 0.38, "step": 7069 }, { "epoch": 3.822310326184898, "grad_norm": 0.2735014855861664, "learning_rate": 1.5935384429163376e-06, "loss": 0.3678, "step": 7070 }, { "epoch": 3.8228509641376824, "grad_norm": 0.27961209416389465, "learning_rate": 1.5921566654537706e-06, "loss": 0.4182, "step": 7071 }, { "epoch": 3.823391602090467, "grad_norm": 0.26644429564476013, "learning_rate": 1.5907753738753296e-06, "loss": 0.3791, "step": 7072 }, { "epoch": 3.823932240043251, "grad_norm": 0.2603045403957367, "learning_rate": 1.5893945683779526e-06, "loss": 0.3727, "step": 7073 }, { "epoch": 3.824472877996035, "grad_norm": 0.2659344971179962, "learning_rate": 1.588014249158516e-06, "loss": 0.3748, "step": 7074 }, { "epoch": 3.8250135159488194, "grad_norm": 0.2584983706474304, "learning_rate": 1.5866344164138214e-06, "loss": 0.3872, "step": 7075 }, { "epoch": 3.825554153901604, "grad_norm": 0.2933692932128906, "learning_rate": 1.585255070340601e-06, "loss": 0.3687, "step": 7076 }, { "epoch": 3.826094791854388, "grad_norm": 0.27587223052978516, "learning_rate": 1.5838762111355234e-06, "loss": 0.398, "step": 7077 }, { "epoch": 3.8266354298071725, "grad_norm": 0.2780817747116089, "learning_rate": 1.5824978389951812e-06, "loss": 0.3907, "step": 7078 }, { "epoch": 3.827176067759957, "grad_norm": 0.28126052021980286, "learning_rate": 1.5811199541160994e-06, "loss": 0.3804, "step": 7079 }, { "epoch": 3.8277167057127413, "grad_norm": 0.2787818908691406, "learning_rate": 1.5797425566947378e-06, "loss": 0.3908, "step": 7080 }, { "epoch": 3.828257343665525, "grad_norm": 0.2890182137489319, "learning_rate": 1.578365646927479e-06, "loss": 0.3868, "step": 7081 }, { "epoch": 3.8287979816183095, "grad_norm": 0.2658842206001282, "learning_rate": 1.5769892250106456e-06, "loss": 0.3922, "step": 7082 }, { "epoch": 3.829338619571094, "grad_norm": 0.27497076988220215, "learning_rate": 1.5756132911404792e-06, "loss": 0.3867, "step": 7083 }, { "epoch": 3.8298792575238783, "grad_norm": 0.26899388432502747, "learning_rate": 1.574237845513163e-06, "loss": 0.3635, "step": 7084 }, { "epoch": 3.8304198954766626, "grad_norm": 0.27387529611587524, "learning_rate": 1.572862888324801e-06, "loss": 0.3923, "step": 7085 }, { "epoch": 3.8309605334294465, "grad_norm": 0.2729697823524475, "learning_rate": 1.5714884197714369e-06, "loss": 0.3887, "step": 7086 }, { "epoch": 3.831501171382231, "grad_norm": 0.2589680552482605, "learning_rate": 1.570114440049037e-06, "loss": 0.3986, "step": 7087 }, { "epoch": 3.8320418093350153, "grad_norm": 0.2655174136161804, "learning_rate": 1.5687409493535004e-06, "loss": 0.3948, "step": 7088 }, { "epoch": 3.8325824472877996, "grad_norm": 0.2721310555934906, "learning_rate": 1.5673679478806592e-06, "loss": 0.3716, "step": 7089 }, { "epoch": 3.833123085240584, "grad_norm": 0.273525208234787, "learning_rate": 1.5659954358262724e-06, "loss": 0.369, "step": 7090 }, { "epoch": 3.833663723193368, "grad_norm": 0.25570881366729736, "learning_rate": 1.5646234133860288e-06, "loss": 0.3807, "step": 7091 }, { "epoch": 3.8342043611461527, "grad_norm": 0.27765756845474243, "learning_rate": 1.5632518807555513e-06, "loss": 0.3923, "step": 7092 }, { "epoch": 3.8347449990989366, "grad_norm": 0.26038357615470886, "learning_rate": 1.561880838130388e-06, "loss": 0.3872, "step": 7093 }, { "epoch": 3.835285637051721, "grad_norm": 0.278969407081604, "learning_rate": 1.5605102857060245e-06, "loss": 0.3861, "step": 7094 }, { "epoch": 3.8358262750045053, "grad_norm": 0.2753232717514038, "learning_rate": 1.5591402236778647e-06, "loss": 0.3883, "step": 7095 }, { "epoch": 3.8363669129572897, "grad_norm": 0.31216442584991455, "learning_rate": 1.557770652241255e-06, "loss": 0.3798, "step": 7096 }, { "epoch": 3.836907550910074, "grad_norm": 0.2636694610118866, "learning_rate": 1.5564015715914627e-06, "loss": 0.3922, "step": 7097 }, { "epoch": 3.837448188862858, "grad_norm": 0.2814835011959076, "learning_rate": 1.5550329819236926e-06, "loss": 0.3713, "step": 7098 }, { "epoch": 3.8379888268156424, "grad_norm": 0.29137900471687317, "learning_rate": 1.5536648834330736e-06, "loss": 0.3797, "step": 7099 }, { "epoch": 3.8385294647684267, "grad_norm": 0.2699742615222931, "learning_rate": 1.5522972763146653e-06, "loss": 0.3661, "step": 7100 }, { "epoch": 3.839070102721211, "grad_norm": 0.2807996869087219, "learning_rate": 1.550930160763462e-06, "loss": 0.3548, "step": 7101 }, { "epoch": 3.8396107406739954, "grad_norm": 0.30388930439949036, "learning_rate": 1.5495635369743812e-06, "loss": 0.385, "step": 7102 }, { "epoch": 3.8401513786267794, "grad_norm": 0.2817404866218567, "learning_rate": 1.548197405142277e-06, "loss": 0.3692, "step": 7103 }, { "epoch": 3.840692016579564, "grad_norm": 0.26451969146728516, "learning_rate": 1.546831765461928e-06, "loss": 0.3753, "step": 7104 }, { "epoch": 3.841232654532348, "grad_norm": 0.27831169962882996, "learning_rate": 1.5454666181280437e-06, "loss": 0.3756, "step": 7105 }, { "epoch": 3.8417732924851324, "grad_norm": 0.2777852416038513, "learning_rate": 1.5441019633352666e-06, "loss": 0.3823, "step": 7106 }, { "epoch": 3.842313930437917, "grad_norm": 0.2742907702922821, "learning_rate": 1.5427378012781657e-06, "loss": 0.3715, "step": 7107 }, { "epoch": 3.842854568390701, "grad_norm": 0.2902372479438782, "learning_rate": 1.5413741321512394e-06, "loss": 0.412, "step": 7108 }, { "epoch": 3.8433952063434855, "grad_norm": 0.27315741777420044, "learning_rate": 1.5400109561489196e-06, "loss": 0.3911, "step": 7109 }, { "epoch": 3.8439358442962694, "grad_norm": 0.251729816198349, "learning_rate": 1.5386482734655633e-06, "loss": 0.3626, "step": 7110 }, { "epoch": 3.844476482249054, "grad_norm": 0.27802354097366333, "learning_rate": 1.5372860842954629e-06, "loss": 0.4074, "step": 7111 }, { "epoch": 3.845017120201838, "grad_norm": 0.28890305757522583, "learning_rate": 1.5359243888328317e-06, "loss": 0.3614, "step": 7112 }, { "epoch": 3.8455577581546225, "grad_norm": 0.25736257433891296, "learning_rate": 1.5345631872718214e-06, "loss": 0.377, "step": 7113 }, { "epoch": 3.846098396107407, "grad_norm": 0.24798278510570526, "learning_rate": 1.5332024798065077e-06, "loss": 0.3553, "step": 7114 }, { "epoch": 3.846639034060191, "grad_norm": 0.25928983092308044, "learning_rate": 1.5318422666308997e-06, "loss": 0.3716, "step": 7115 }, { "epoch": 3.847179672012975, "grad_norm": 0.2826147973537445, "learning_rate": 1.5304825479389334e-06, "loss": 0.3605, "step": 7116 }, { "epoch": 3.8477203099657595, "grad_norm": 0.2706470787525177, "learning_rate": 1.5291233239244728e-06, "loss": 0.3676, "step": 7117 }, { "epoch": 3.848260947918544, "grad_norm": 0.25623151659965515, "learning_rate": 1.527764594781318e-06, "loss": 0.3903, "step": 7118 }, { "epoch": 3.8488015858713283, "grad_norm": 0.30254796147346497, "learning_rate": 1.526406360703191e-06, "loss": 0.3538, "step": 7119 }, { "epoch": 3.8493422238241126, "grad_norm": 0.2859158515930176, "learning_rate": 1.5250486218837458e-06, "loss": 0.3651, "step": 7120 }, { "epoch": 3.849882861776897, "grad_norm": 0.2832469940185547, "learning_rate": 1.5236913785165692e-06, "loss": 0.4027, "step": 7121 }, { "epoch": 3.850423499729681, "grad_norm": 0.25426462292671204, "learning_rate": 1.5223346307951713e-06, "loss": 0.3634, "step": 7122 }, { "epoch": 3.8509641376824653, "grad_norm": 0.27135756611824036, "learning_rate": 1.5209783789129995e-06, "loss": 0.3682, "step": 7123 }, { "epoch": 3.8515047756352496, "grad_norm": 0.2907152473926544, "learning_rate": 1.5196226230634193e-06, "loss": 0.3905, "step": 7124 }, { "epoch": 3.852045413588034, "grad_norm": 0.2654511630535126, "learning_rate": 1.5182673634397365e-06, "loss": 0.3814, "step": 7125 }, { "epoch": 3.8525860515408183, "grad_norm": 0.26027148962020874, "learning_rate": 1.5169126002351791e-06, "loss": 0.3711, "step": 7126 }, { "epoch": 3.8531266894936023, "grad_norm": 0.27235719561576843, "learning_rate": 1.5155583336429097e-06, "loss": 0.3989, "step": 7127 }, { "epoch": 3.8536673274463866, "grad_norm": 0.27417096495628357, "learning_rate": 1.5142045638560149e-06, "loss": 0.3799, "step": 7128 }, { "epoch": 3.854207965399171, "grad_norm": 0.28646403551101685, "learning_rate": 1.5128512910675119e-06, "loss": 0.3887, "step": 7129 }, { "epoch": 3.8547486033519553, "grad_norm": 0.2741946876049042, "learning_rate": 1.5114985154703505e-06, "loss": 0.3782, "step": 7130 }, { "epoch": 3.8552892413047397, "grad_norm": 0.28979212045669556, "learning_rate": 1.510146237257406e-06, "loss": 0.3976, "step": 7131 }, { "epoch": 3.8558298792575236, "grad_norm": 0.26713863015174866, "learning_rate": 1.508794456621482e-06, "loss": 0.3836, "step": 7132 }, { "epoch": 3.8563705172103084, "grad_norm": 0.2656858265399933, "learning_rate": 1.5074431737553158e-06, "loss": 0.3975, "step": 7133 }, { "epoch": 3.8569111551630924, "grad_norm": 0.2840319573879242, "learning_rate": 1.5060923888515677e-06, "loss": 0.3587, "step": 7134 }, { "epoch": 3.8574517931158767, "grad_norm": 0.2595888376235962, "learning_rate": 1.5047421021028353e-06, "loss": 0.3604, "step": 7135 }, { "epoch": 3.857992431068661, "grad_norm": 0.2776036858558655, "learning_rate": 1.5033923137016336e-06, "loss": 0.3752, "step": 7136 }, { "epoch": 3.8585330690214454, "grad_norm": 0.27982330322265625, "learning_rate": 1.502043023840416e-06, "loss": 0.3837, "step": 7137 }, { "epoch": 3.85907370697423, "grad_norm": 0.2833450138568878, "learning_rate": 1.5006942327115637e-06, "loss": 0.3706, "step": 7138 }, { "epoch": 3.8596143449270137, "grad_norm": 0.2692069411277771, "learning_rate": 1.4993459405073825e-06, "loss": 0.365, "step": 7139 }, { "epoch": 3.860154982879798, "grad_norm": 0.26060062646865845, "learning_rate": 1.4979981474201106e-06, "loss": 0.3778, "step": 7140 }, { "epoch": 3.8606956208325824, "grad_norm": 0.2728719711303711, "learning_rate": 1.4966508536419111e-06, "loss": 0.3705, "step": 7141 }, { "epoch": 3.861236258785367, "grad_norm": 0.2779045104980469, "learning_rate": 1.4953040593648833e-06, "loss": 0.3998, "step": 7142 }, { "epoch": 3.861776896738151, "grad_norm": 0.2656700313091278, "learning_rate": 1.4939577647810477e-06, "loss": 0.3637, "step": 7143 }, { "epoch": 3.862317534690935, "grad_norm": 0.28458893299102783, "learning_rate": 1.4926119700823554e-06, "loss": 0.3571, "step": 7144 }, { "epoch": 3.8628581726437194, "grad_norm": 0.2771691679954529, "learning_rate": 1.4912666754606914e-06, "loss": 0.3818, "step": 7145 }, { "epoch": 3.863398810596504, "grad_norm": 0.2585548460483551, "learning_rate": 1.489921881107861e-06, "loss": 0.3797, "step": 7146 }, { "epoch": 3.863939448549288, "grad_norm": 0.2747470736503601, "learning_rate": 1.488577587215606e-06, "loss": 0.395, "step": 7147 }, { "epoch": 3.8644800865020725, "grad_norm": 0.27049556374549866, "learning_rate": 1.4872337939755926e-06, "loss": 0.3557, "step": 7148 }, { "epoch": 3.865020724454857, "grad_norm": 0.27144232392311096, "learning_rate": 1.485890501579414e-06, "loss": 0.408, "step": 7149 }, { "epoch": 3.8655613624076413, "grad_norm": 0.26395317912101746, "learning_rate": 1.4845477102185974e-06, "loss": 0.3795, "step": 7150 }, { "epoch": 3.866102000360425, "grad_norm": 0.2912643551826477, "learning_rate": 1.4832054200845947e-06, "loss": 0.386, "step": 7151 }, { "epoch": 3.8666426383132095, "grad_norm": 0.2866555452346802, "learning_rate": 1.4818636313687868e-06, "loss": 0.3854, "step": 7152 }, { "epoch": 3.867183276265994, "grad_norm": 0.2535575330257416, "learning_rate": 1.4805223442624818e-06, "loss": 0.3741, "step": 7153 }, { "epoch": 3.8677239142187783, "grad_norm": 0.2779158353805542, "learning_rate": 1.4791815589569215e-06, "loss": 0.3796, "step": 7154 }, { "epoch": 3.8682645521715626, "grad_norm": 0.29194313287734985, "learning_rate": 1.4778412756432709e-06, "loss": 0.3818, "step": 7155 }, { "epoch": 3.8688051901243465, "grad_norm": 0.29119977355003357, "learning_rate": 1.4765014945126232e-06, "loss": 0.3792, "step": 7156 }, { "epoch": 3.869345828077131, "grad_norm": 0.27721136808395386, "learning_rate": 1.4751622157560065e-06, "loss": 0.4054, "step": 7157 }, { "epoch": 3.8698864660299153, "grad_norm": 0.2895916998386383, "learning_rate": 1.4738234395643674e-06, "loss": 0.392, "step": 7158 }, { "epoch": 3.8704271039826996, "grad_norm": 0.2725016474723816, "learning_rate": 1.472485166128591e-06, "loss": 0.3713, "step": 7159 }, { "epoch": 3.870967741935484, "grad_norm": 0.2660664916038513, "learning_rate": 1.471147395639484e-06, "loss": 0.3855, "step": 7160 }, { "epoch": 3.871508379888268, "grad_norm": 0.27325960993766785, "learning_rate": 1.4698101282877813e-06, "loss": 0.3721, "step": 7161 }, { "epoch": 3.8720490178410527, "grad_norm": 0.2715884745121002, "learning_rate": 1.4684733642641514e-06, "loss": 0.3776, "step": 7162 }, { "epoch": 3.8725896557938366, "grad_norm": 0.2671349048614502, "learning_rate": 1.4671371037591864e-06, "loss": 0.3739, "step": 7163 }, { "epoch": 3.873130293746621, "grad_norm": 0.287344753742218, "learning_rate": 1.4658013469634075e-06, "loss": 0.3593, "step": 7164 }, { "epoch": 3.8736709316994054, "grad_norm": 0.2602589428424835, "learning_rate": 1.4644660940672628e-06, "loss": 0.3675, "step": 7165 }, { "epoch": 3.8742115696521897, "grad_norm": 0.2793913781642914, "learning_rate": 1.4631313452611323e-06, "loss": 0.3722, "step": 7166 }, { "epoch": 3.874752207604974, "grad_norm": 0.2809554636478424, "learning_rate": 1.4617971007353237e-06, "loss": 0.3817, "step": 7167 }, { "epoch": 3.875292845557758, "grad_norm": 0.282959908246994, "learning_rate": 1.4604633606800689e-06, "loss": 0.378, "step": 7168 }, { "epoch": 3.8758334835105424, "grad_norm": 0.2529660165309906, "learning_rate": 1.4591301252855306e-06, "loss": 0.3772, "step": 7169 }, { "epoch": 3.8763741214633267, "grad_norm": 0.2665364444255829, "learning_rate": 1.457797394741798e-06, "loss": 0.3603, "step": 7170 }, { "epoch": 3.876914759416111, "grad_norm": 0.28510573506355286, "learning_rate": 1.4564651692388916e-06, "loss": 0.3705, "step": 7171 }, { "epoch": 3.8774553973688954, "grad_norm": 0.26954007148742676, "learning_rate": 1.4551334489667562e-06, "loss": 0.3949, "step": 7172 }, { "epoch": 3.8779960353216794, "grad_norm": 0.28254538774490356, "learning_rate": 1.4538022341152653e-06, "loss": 0.3858, "step": 7173 }, { "epoch": 3.8785366732744637, "grad_norm": 0.27030500769615173, "learning_rate": 1.4524715248742232e-06, "loss": 0.3921, "step": 7174 }, { "epoch": 3.879077311227248, "grad_norm": 0.2663464844226837, "learning_rate": 1.451141321433358e-06, "loss": 0.3831, "step": 7175 }, { "epoch": 3.8796179491800324, "grad_norm": 0.28364238142967224, "learning_rate": 1.4498116239823301e-06, "loss": 0.3794, "step": 7176 }, { "epoch": 3.880158587132817, "grad_norm": 0.2636932134628296, "learning_rate": 1.448482432710724e-06, "loss": 0.3896, "step": 7177 }, { "epoch": 3.880699225085601, "grad_norm": 0.2695843577384949, "learning_rate": 1.4471537478080516e-06, "loss": 0.3908, "step": 7178 }, { "epoch": 3.8812398630383855, "grad_norm": 0.30649521946907043, "learning_rate": 1.445825569463758e-06, "loss": 0.3775, "step": 7179 }, { "epoch": 3.8817805009911694, "grad_norm": 0.28447046875953674, "learning_rate": 1.4444978978672103e-06, "loss": 0.3681, "step": 7180 }, { "epoch": 3.882321138943954, "grad_norm": 0.271747350692749, "learning_rate": 1.443170733207706e-06, "loss": 0.3762, "step": 7181 }, { "epoch": 3.882861776896738, "grad_norm": 0.26996535062789917, "learning_rate": 1.4418440756744678e-06, "loss": 0.3616, "step": 7182 }, { "epoch": 3.8834024148495225, "grad_norm": 0.25783035159111023, "learning_rate": 1.4405179254566515e-06, "loss": 0.3727, "step": 7183 }, { "epoch": 3.883943052802307, "grad_norm": 0.2718077600002289, "learning_rate": 1.4391922827433359e-06, "loss": 0.3654, "step": 7184 }, { "epoch": 3.884483690755091, "grad_norm": 0.2731360197067261, "learning_rate": 1.4378671477235268e-06, "loss": 0.3733, "step": 7185 }, { "epoch": 3.885024328707875, "grad_norm": 0.2789834439754486, "learning_rate": 1.4365425205861627e-06, "loss": 0.3751, "step": 7186 }, { "epoch": 3.8855649666606595, "grad_norm": 0.2919517755508423, "learning_rate": 1.4352184015201036e-06, "loss": 0.364, "step": 7187 }, { "epoch": 3.886105604613444, "grad_norm": 0.2683340609073639, "learning_rate": 1.4338947907141431e-06, "loss": 0.3696, "step": 7188 }, { "epoch": 3.8866462425662283, "grad_norm": 0.267026424407959, "learning_rate": 1.4325716883569973e-06, "loss": 0.3792, "step": 7189 }, { "epoch": 3.887186880519012, "grad_norm": 0.26800042390823364, "learning_rate": 1.431249094637311e-06, "loss": 0.3877, "step": 7190 }, { "epoch": 3.887727518471797, "grad_norm": 0.29322460293769836, "learning_rate": 1.429927009743659e-06, "loss": 0.4035, "step": 7191 }, { "epoch": 3.888268156424581, "grad_norm": 0.2647303640842438, "learning_rate": 1.4286054338645416e-06, "loss": 0.3989, "step": 7192 }, { "epoch": 3.8888087943773653, "grad_norm": 0.28105291724205017, "learning_rate": 1.4272843671883857e-06, "loss": 0.3661, "step": 7193 }, { "epoch": 3.8893494323301496, "grad_norm": 0.2671509385108948, "learning_rate": 1.4259638099035456e-06, "loss": 0.3713, "step": 7194 }, { "epoch": 3.889890070282934, "grad_norm": 0.286563515663147, "learning_rate": 1.4246437621983057e-06, "loss": 0.3491, "step": 7195 }, { "epoch": 3.8904307082357183, "grad_norm": 0.3068971335887909, "learning_rate": 1.423324224260878e-06, "loss": 0.3804, "step": 7196 }, { "epoch": 3.8909713461885023, "grad_norm": 0.258400559425354, "learning_rate": 1.4220051962793952e-06, "loss": 0.3483, "step": 7197 }, { "epoch": 3.8915119841412866, "grad_norm": 0.25736966729164124, "learning_rate": 1.4206866784419248e-06, "loss": 0.3508, "step": 7198 }, { "epoch": 3.892052622094071, "grad_norm": 0.27688178420066833, "learning_rate": 1.4193686709364574e-06, "loss": 0.4045, "step": 7199 }, { "epoch": 3.8925932600468554, "grad_norm": 0.2596736252307892, "learning_rate": 1.418051173950914e-06, "loss": 0.384, "step": 7200 }, { "epoch": 3.8931338979996397, "grad_norm": 0.29413244128227234, "learning_rate": 1.41673418767314e-06, "loss": 0.3846, "step": 7201 }, { "epoch": 3.8936745359524236, "grad_norm": 0.29847484827041626, "learning_rate": 1.4154177122909068e-06, "loss": 0.3643, "step": 7202 }, { "epoch": 3.894215173905208, "grad_norm": 0.2602514326572418, "learning_rate": 1.4141017479919184e-06, "loss": 0.3579, "step": 7203 }, { "epoch": 3.8947558118579924, "grad_norm": 0.27241677045822144, "learning_rate": 1.412786294963801e-06, "loss": 0.3588, "step": 7204 }, { "epoch": 3.8952964498107767, "grad_norm": 0.26186132431030273, "learning_rate": 1.4114713533941082e-06, "loss": 0.3478, "step": 7205 }, { "epoch": 3.895837087763561, "grad_norm": 0.29094818234443665, "learning_rate": 1.4101569234703256e-06, "loss": 0.4012, "step": 7206 }, { "epoch": 3.8963777257163454, "grad_norm": 0.2835518717765808, "learning_rate": 1.408843005379858e-06, "loss": 0.3631, "step": 7207 }, { "epoch": 3.89691836366913, "grad_norm": 0.28836387395858765, "learning_rate": 1.4075295993100462e-06, "loss": 0.3722, "step": 7208 }, { "epoch": 3.8974590016219137, "grad_norm": 0.27291521430015564, "learning_rate": 1.4062167054481479e-06, "loss": 0.3616, "step": 7209 }, { "epoch": 3.897999639574698, "grad_norm": 0.2893563210964203, "learning_rate": 1.4049043239813575e-06, "loss": 0.3706, "step": 7210 }, { "epoch": 3.8985402775274824, "grad_norm": 0.26893335580825806, "learning_rate": 1.4035924550967888e-06, "loss": 0.3485, "step": 7211 }, { "epoch": 3.899080915480267, "grad_norm": 0.28393498063087463, "learning_rate": 1.4022810989814884e-06, "loss": 0.3735, "step": 7212 }, { "epoch": 3.899621553433051, "grad_norm": 0.27619096636772156, "learning_rate": 1.4009702558224258e-06, "loss": 0.3656, "step": 7213 }, { "epoch": 3.900162191385835, "grad_norm": 0.256229043006897, "learning_rate": 1.3996599258064968e-06, "loss": 0.3792, "step": 7214 }, { "epoch": 3.9007028293386194, "grad_norm": 0.2728364169597626, "learning_rate": 1.3983501091205298e-06, "loss": 0.3768, "step": 7215 }, { "epoch": 3.901243467291404, "grad_norm": 0.27944228053092957, "learning_rate": 1.3970408059512741e-06, "loss": 0.3658, "step": 7216 }, { "epoch": 3.901784105244188, "grad_norm": 0.2501264214515686, "learning_rate": 1.395732016485406e-06, "loss": 0.3527, "step": 7217 }, { "epoch": 3.9023247431969725, "grad_norm": 0.26919788122177124, "learning_rate": 1.3944237409095335e-06, "loss": 0.3738, "step": 7218 }, { "epoch": 3.9028653811497565, "grad_norm": 0.2669914662837982, "learning_rate": 1.3931159794101855e-06, "loss": 0.3725, "step": 7219 }, { "epoch": 3.9034060191025413, "grad_norm": 0.27003178000450134, "learning_rate": 1.3918087321738244e-06, "loss": 0.3713, "step": 7220 }, { "epoch": 3.903946657055325, "grad_norm": 0.2867528200149536, "learning_rate": 1.3905019993868285e-06, "loss": 0.3562, "step": 7221 }, { "epoch": 3.9044872950081095, "grad_norm": 0.2855518162250519, "learning_rate": 1.3891957812355156e-06, "loss": 0.3729, "step": 7222 }, { "epoch": 3.905027932960894, "grad_norm": 0.2666948437690735, "learning_rate": 1.3878900779061194e-06, "loss": 0.3712, "step": 7223 }, { "epoch": 3.9055685709136783, "grad_norm": 0.27920854091644287, "learning_rate": 1.3865848895848078e-06, "loss": 0.3908, "step": 7224 }, { "epoch": 3.9061092088664626, "grad_norm": 0.28267914056777954, "learning_rate": 1.3852802164576717e-06, "loss": 0.3775, "step": 7225 }, { "epoch": 3.9066498468192465, "grad_norm": 0.2642427086830139, "learning_rate": 1.3839760587107271e-06, "loss": 0.3928, "step": 7226 }, { "epoch": 3.907190484772031, "grad_norm": 0.2898047864437103, "learning_rate": 1.3826724165299205e-06, "loss": 0.3631, "step": 7227 }, { "epoch": 3.9077311227248153, "grad_norm": 0.2983902394771576, "learning_rate": 1.3813692901011228e-06, "loss": 0.4023, "step": 7228 }, { "epoch": 3.9082717606775996, "grad_norm": 0.2528410255908966, "learning_rate": 1.3800666796101291e-06, "loss": 0.379, "step": 7229 }, { "epoch": 3.908812398630384, "grad_norm": 0.2616298794746399, "learning_rate": 1.3787645852426663e-06, "loss": 0.3869, "step": 7230 }, { "epoch": 3.909353036583168, "grad_norm": 0.26782476902008057, "learning_rate": 1.3774630071843814e-06, "loss": 0.401, "step": 7231 }, { "epoch": 3.9098936745359523, "grad_norm": 0.26978132128715515, "learning_rate": 1.3761619456208548e-06, "loss": 0.364, "step": 7232 }, { "epoch": 3.9104343124887366, "grad_norm": 0.27226102352142334, "learning_rate": 1.374861400737587e-06, "loss": 0.3954, "step": 7233 }, { "epoch": 3.910974950441521, "grad_norm": 0.27029043436050415, "learning_rate": 1.373561372720007e-06, "loss": 0.3548, "step": 7234 }, { "epoch": 3.9115155883943054, "grad_norm": 0.2666410207748413, "learning_rate": 1.3722618617534727e-06, "loss": 0.3621, "step": 7235 }, { "epoch": 3.9120562263470897, "grad_norm": 0.2646237015724182, "learning_rate": 1.3709628680232628e-06, "loss": 0.3803, "step": 7236 }, { "epoch": 3.912596864299874, "grad_norm": 0.29291343688964844, "learning_rate": 1.3696643917145908e-06, "loss": 0.3845, "step": 7237 }, { "epoch": 3.913137502252658, "grad_norm": 0.2636338174343109, "learning_rate": 1.3683664330125846e-06, "loss": 0.3754, "step": 7238 }, { "epoch": 3.9136781402054424, "grad_norm": 0.25800561904907227, "learning_rate": 1.3670689921023088e-06, "loss": 0.37, "step": 7239 }, { "epoch": 3.9142187781582267, "grad_norm": 0.2600545883178711, "learning_rate": 1.3657720691687481e-06, "loss": 0.386, "step": 7240 }, { "epoch": 3.914759416111011, "grad_norm": 0.2758975327014923, "learning_rate": 1.3644756643968183e-06, "loss": 0.3843, "step": 7241 }, { "epoch": 3.9153000540637954, "grad_norm": 0.2715599536895752, "learning_rate": 1.3631797779713557e-06, "loss": 0.3434, "step": 7242 }, { "epoch": 3.9158406920165794, "grad_norm": 0.261996328830719, "learning_rate": 1.3618844100771256e-06, "loss": 0.3837, "step": 7243 }, { "epoch": 3.9163813299693637, "grad_norm": 0.28563106060028076, "learning_rate": 1.3605895608988212e-06, "loss": 0.3657, "step": 7244 }, { "epoch": 3.916921967922148, "grad_norm": 0.29061299562454224, "learning_rate": 1.3592952306210589e-06, "loss": 0.362, "step": 7245 }, { "epoch": 3.9174626058749324, "grad_norm": 0.2673431634902954, "learning_rate": 1.3580014194283796e-06, "loss": 0.3625, "step": 7246 }, { "epoch": 3.918003243827717, "grad_norm": 0.2761494815349579, "learning_rate": 1.3567081275052562e-06, "loss": 0.3673, "step": 7247 }, { "epoch": 3.9185438817805007, "grad_norm": 0.29554659128189087, "learning_rate": 1.355415355036081e-06, "loss": 0.3734, "step": 7248 }, { "epoch": 3.9190845197332855, "grad_norm": 0.2864658534526825, "learning_rate": 1.3541231022051794e-06, "loss": 0.3463, "step": 7249 }, { "epoch": 3.9196251576860694, "grad_norm": 0.276905357837677, "learning_rate": 1.3528313691967926e-06, "loss": 0.38, "step": 7250 }, { "epoch": 3.920165795638854, "grad_norm": 0.2607681155204773, "learning_rate": 1.3515401561950974e-06, "loss": 0.3925, "step": 7251 }, { "epoch": 3.920706433591638, "grad_norm": 0.2601732015609741, "learning_rate": 1.3502494633841906e-06, "loss": 0.3958, "step": 7252 }, { "epoch": 3.9212470715444225, "grad_norm": 0.25898584723472595, "learning_rate": 1.3489592909480993e-06, "loss": 0.3639, "step": 7253 }, { "epoch": 3.921787709497207, "grad_norm": 0.2667955458164215, "learning_rate": 1.3476696390707727e-06, "loss": 0.4018, "step": 7254 }, { "epoch": 3.922328347449991, "grad_norm": 0.2603578567504883, "learning_rate": 1.3463805079360854e-06, "loss": 0.3357, "step": 7255 }, { "epoch": 3.922868985402775, "grad_norm": 0.28907307982444763, "learning_rate": 1.345091897727842e-06, "loss": 0.398, "step": 7256 }, { "epoch": 3.9234096233555595, "grad_norm": 0.28409260511398315, "learning_rate": 1.343803808629769e-06, "loss": 0.3638, "step": 7257 }, { "epoch": 3.923950261308344, "grad_norm": 0.26542195677757263, "learning_rate": 1.3425162408255188e-06, "loss": 0.3674, "step": 7258 }, { "epoch": 3.9244908992611283, "grad_norm": 0.26268041133880615, "learning_rate": 1.3412291944986726e-06, "loss": 0.3769, "step": 7259 }, { "epoch": 3.925031537213912, "grad_norm": 0.2736770808696747, "learning_rate": 1.3399426698327329e-06, "loss": 0.3651, "step": 7260 }, { "epoch": 3.9255721751666965, "grad_norm": 0.27217063307762146, "learning_rate": 1.3386566670111339e-06, "loss": 0.36, "step": 7261 }, { "epoch": 3.926112813119481, "grad_norm": 0.2648680508136749, "learning_rate": 1.3373711862172262e-06, "loss": 0.3919, "step": 7262 }, { "epoch": 3.9266534510722653, "grad_norm": 0.2955801486968994, "learning_rate": 1.336086227634294e-06, "loss": 0.3488, "step": 7263 }, { "epoch": 3.9271940890250496, "grad_norm": 0.2573787569999695, "learning_rate": 1.3348017914455458e-06, "loss": 0.3637, "step": 7264 }, { "epoch": 3.927734726977834, "grad_norm": 0.25642362236976624, "learning_rate": 1.3335178778341123e-06, "loss": 0.3641, "step": 7265 }, { "epoch": 3.9282753649306184, "grad_norm": 0.2628449499607086, "learning_rate": 1.3322344869830528e-06, "loss": 0.3773, "step": 7266 }, { "epoch": 3.9288160028834023, "grad_norm": 0.277749627828598, "learning_rate": 1.330951619075348e-06, "loss": 0.3815, "step": 7267 }, { "epoch": 3.9293566408361866, "grad_norm": 0.27905476093292236, "learning_rate": 1.3296692742939104e-06, "loss": 0.3664, "step": 7268 }, { "epoch": 3.929897278788971, "grad_norm": 0.25372523069381714, "learning_rate": 1.3283874528215735e-06, "loss": 0.3882, "step": 7269 }, { "epoch": 3.9304379167417554, "grad_norm": 0.2904640734195709, "learning_rate": 1.3271061548410947e-06, "loss": 0.3734, "step": 7270 }, { "epoch": 3.9309785546945397, "grad_norm": 0.2711356282234192, "learning_rate": 1.3258253805351622e-06, "loss": 0.3669, "step": 7271 }, { "epoch": 3.9315191926473236, "grad_norm": 0.2662886679172516, "learning_rate": 1.3245451300863842e-06, "loss": 0.371, "step": 7272 }, { "epoch": 3.932059830600108, "grad_norm": 0.2688289284706116, "learning_rate": 1.323265403677299e-06, "loss": 0.3582, "step": 7273 }, { "epoch": 3.9326004685528924, "grad_norm": 0.26735720038414, "learning_rate": 1.3219862014903663e-06, "loss": 0.373, "step": 7274 }, { "epoch": 3.9331411065056767, "grad_norm": 0.2805106043815613, "learning_rate": 1.3207075237079702e-06, "loss": 0.3777, "step": 7275 }, { "epoch": 3.933681744458461, "grad_norm": 0.2979760766029358, "learning_rate": 1.3194293705124262e-06, "loss": 0.3886, "step": 7276 }, { "epoch": 3.934222382411245, "grad_norm": 0.28045132756233215, "learning_rate": 1.3181517420859696e-06, "loss": 0.3878, "step": 7277 }, { "epoch": 3.93476302036403, "grad_norm": 0.2650907635688782, "learning_rate": 1.3168746386107617e-06, "loss": 0.3503, "step": 7278 }, { "epoch": 3.9353036583168137, "grad_norm": 0.27181705832481384, "learning_rate": 1.3155980602688884e-06, "loss": 0.3728, "step": 7279 }, { "epoch": 3.935844296269598, "grad_norm": 0.2796936333179474, "learning_rate": 1.3143220072423647e-06, "loss": 0.3669, "step": 7280 }, { "epoch": 3.9363849342223824, "grad_norm": 0.2801571190357208, "learning_rate": 1.313046479713127e-06, "loss": 0.3595, "step": 7281 }, { "epoch": 3.936925572175167, "grad_norm": 0.25436931848526, "learning_rate": 1.3117714778630358e-06, "loss": 0.3857, "step": 7282 }, { "epoch": 3.937466210127951, "grad_norm": 0.2766454815864563, "learning_rate": 1.3104970018738812e-06, "loss": 0.3805, "step": 7283 }, { "epoch": 3.938006848080735, "grad_norm": 0.27439621090888977, "learning_rate": 1.3092230519273736e-06, "loss": 0.367, "step": 7284 }, { "epoch": 3.9385474860335195, "grad_norm": 0.2701241970062256, "learning_rate": 1.307949628205153e-06, "loss": 0.3837, "step": 7285 }, { "epoch": 3.939088123986304, "grad_norm": 0.26668402552604675, "learning_rate": 1.3066767308887796e-06, "loss": 0.3855, "step": 7286 }, { "epoch": 3.939628761939088, "grad_norm": 0.27629777789115906, "learning_rate": 1.3054043601597404e-06, "loss": 0.3773, "step": 7287 }, { "epoch": 3.9401693998918725, "grad_norm": 0.28795719146728516, "learning_rate": 1.3041325161994506e-06, "loss": 0.3897, "step": 7288 }, { "epoch": 3.9407100378446565, "grad_norm": 0.2713862657546997, "learning_rate": 1.3028611991892454e-06, "loss": 0.3592, "step": 7289 }, { "epoch": 3.941250675797441, "grad_norm": 0.25722599029541016, "learning_rate": 1.301590409310387e-06, "loss": 0.3506, "step": 7290 }, { "epoch": 3.941791313750225, "grad_norm": 0.2745075821876526, "learning_rate": 1.3003201467440607e-06, "loss": 0.3731, "step": 7291 }, { "epoch": 3.9423319517030095, "grad_norm": 0.27006328105926514, "learning_rate": 1.2990504116713803e-06, "loss": 0.3795, "step": 7292 }, { "epoch": 3.942872589655794, "grad_norm": 0.2963608503341675, "learning_rate": 1.297781204273385e-06, "loss": 0.3752, "step": 7293 }, { "epoch": 3.9434132276085783, "grad_norm": 0.2789985239505768, "learning_rate": 1.2965125247310296e-06, "loss": 0.3512, "step": 7294 }, { "epoch": 3.9439538655613626, "grad_norm": 0.2877563536167145, "learning_rate": 1.2952443732252058e-06, "loss": 0.3747, "step": 7295 }, { "epoch": 3.9444945035141465, "grad_norm": 0.3294397294521332, "learning_rate": 1.29397674993672e-06, "loss": 0.3789, "step": 7296 }, { "epoch": 3.945035141466931, "grad_norm": 0.2651817500591278, "learning_rate": 1.2927096550463114e-06, "loss": 0.3737, "step": 7297 }, { "epoch": 3.9455757794197153, "grad_norm": 0.26083993911743164, "learning_rate": 1.2914430887346385e-06, "loss": 0.3838, "step": 7298 }, { "epoch": 3.9461164173724996, "grad_norm": 0.27897948026657104, "learning_rate": 1.2901770511822843e-06, "loss": 0.3788, "step": 7299 }, { "epoch": 3.946657055325284, "grad_norm": 0.29251763224601746, "learning_rate": 1.2889115425697612e-06, "loss": 0.3824, "step": 7300 }, { "epoch": 3.947197693278068, "grad_norm": 0.2713179290294647, "learning_rate": 1.287646563077501e-06, "loss": 0.372, "step": 7301 }, { "epoch": 3.9477383312308523, "grad_norm": 0.2765255272388458, "learning_rate": 1.2863821128858633e-06, "loss": 0.4016, "step": 7302 }, { "epoch": 3.9482789691836366, "grad_norm": 0.26739639043807983, "learning_rate": 1.2851181921751316e-06, "loss": 0.3687, "step": 7303 }, { "epoch": 3.948819607136421, "grad_norm": 0.29228445887565613, "learning_rate": 1.283854801125511e-06, "loss": 0.3475, "step": 7304 }, { "epoch": 3.9493602450892054, "grad_norm": 0.26782652735710144, "learning_rate": 1.282591939917136e-06, "loss": 0.4014, "step": 7305 }, { "epoch": 3.9499008830419893, "grad_norm": 0.25561168789863586, "learning_rate": 1.2813296087300625e-06, "loss": 0.3914, "step": 7306 }, { "epoch": 3.950441520994774, "grad_norm": 0.29365113377571106, "learning_rate": 1.2800678077442707e-06, "loss": 0.3918, "step": 7307 }, { "epoch": 3.950982158947558, "grad_norm": 0.2888663113117218, "learning_rate": 1.2788065371396652e-06, "loss": 0.374, "step": 7308 }, { "epoch": 3.9515227969003424, "grad_norm": 0.2578252851963043, "learning_rate": 1.2775457970960765e-06, "loss": 0.3657, "step": 7309 }, { "epoch": 3.9520634348531267, "grad_norm": 0.2820899188518524, "learning_rate": 1.2762855877932617e-06, "loss": 0.3671, "step": 7310 }, { "epoch": 3.952604072805911, "grad_norm": 0.26194140315055847, "learning_rate": 1.275025909410893e-06, "loss": 0.3794, "step": 7311 }, { "epoch": 3.9531447107586954, "grad_norm": 0.27738311886787415, "learning_rate": 1.2737667621285782e-06, "loss": 0.4047, "step": 7312 }, { "epoch": 3.9536853487114794, "grad_norm": 0.268296480178833, "learning_rate": 1.272508146125841e-06, "loss": 0.3875, "step": 7313 }, { "epoch": 3.9542259866642637, "grad_norm": 0.27902549505233765, "learning_rate": 1.2712500615821348e-06, "loss": 0.3637, "step": 7314 }, { "epoch": 3.954766624617048, "grad_norm": 0.2829354703426361, "learning_rate": 1.269992508676835e-06, "loss": 0.345, "step": 7315 }, { "epoch": 3.9553072625698324, "grad_norm": 0.26511573791503906, "learning_rate": 1.2687354875892382e-06, "loss": 0.3737, "step": 7316 }, { "epoch": 3.955847900522617, "grad_norm": 0.2595377266407013, "learning_rate": 1.2674789984985725e-06, "loss": 0.3678, "step": 7317 }, { "epoch": 3.9563885384754007, "grad_norm": 0.27837511897087097, "learning_rate": 1.2662230415839831e-06, "loss": 0.3793, "step": 7318 }, { "epoch": 3.9569291764281855, "grad_norm": 0.2738182246685028, "learning_rate": 1.2649676170245433e-06, "loss": 0.3769, "step": 7319 }, { "epoch": 3.9574698143809695, "grad_norm": 0.2933426797389984, "learning_rate": 1.2637127249992465e-06, "loss": 0.4089, "step": 7320 }, { "epoch": 3.958010452333754, "grad_norm": 0.2945869266986847, "learning_rate": 1.2624583656870153e-06, "loss": 0.3705, "step": 7321 }, { "epoch": 3.958551090286538, "grad_norm": 0.2781412899494171, "learning_rate": 1.2612045392666965e-06, "loss": 0.3952, "step": 7322 }, { "epoch": 3.9590917282393225, "grad_norm": 0.28177520632743835, "learning_rate": 1.2599512459170532e-06, "loss": 0.3698, "step": 7323 }, { "epoch": 3.959632366192107, "grad_norm": 0.257065087556839, "learning_rate": 1.2586984858167812e-06, "loss": 0.3961, "step": 7324 }, { "epoch": 3.960173004144891, "grad_norm": 0.27955007553100586, "learning_rate": 1.257446259144494e-06, "loss": 0.3787, "step": 7325 }, { "epoch": 3.960713642097675, "grad_norm": 0.27634406089782715, "learning_rate": 1.2561945660787357e-06, "loss": 0.3677, "step": 7326 }, { "epoch": 3.9612542800504595, "grad_norm": 0.2737906575202942, "learning_rate": 1.2549434067979677e-06, "loss": 0.4032, "step": 7327 }, { "epoch": 3.961794918003244, "grad_norm": 0.2561657428741455, "learning_rate": 1.2536927814805772e-06, "loss": 0.3641, "step": 7328 }, { "epoch": 3.9623355559560283, "grad_norm": 0.2667194604873657, "learning_rate": 1.2524426903048786e-06, "loss": 0.3719, "step": 7329 }, { "epoch": 3.962876193908812, "grad_norm": 0.27609458565711975, "learning_rate": 1.2511931334491068e-06, "loss": 0.36, "step": 7330 }, { "epoch": 3.9634168318615965, "grad_norm": 0.27734148502349854, "learning_rate": 1.2499441110914195e-06, "loss": 0.3823, "step": 7331 }, { "epoch": 3.963957469814381, "grad_norm": 0.28398409485816956, "learning_rate": 1.2486956234099029e-06, "loss": 0.367, "step": 7332 }, { "epoch": 3.9644981077671653, "grad_norm": 0.27384549379348755, "learning_rate": 1.2474476705825611e-06, "loss": 0.3743, "step": 7333 }, { "epoch": 3.9650387457199496, "grad_norm": 0.29215607047080994, "learning_rate": 1.2462002527873301e-06, "loss": 0.3868, "step": 7334 }, { "epoch": 3.9655793836727335, "grad_norm": 0.25008103251457214, "learning_rate": 1.2449533702020578e-06, "loss": 0.3705, "step": 7335 }, { "epoch": 3.9661200216255184, "grad_norm": 0.2669306695461273, "learning_rate": 1.2437070230045272e-06, "loss": 0.3718, "step": 7336 }, { "epoch": 3.9666606595783023, "grad_norm": 0.30136021971702576, "learning_rate": 1.2424612113724372e-06, "loss": 0.3762, "step": 7337 }, { "epoch": 3.9672012975310866, "grad_norm": 0.28893977403640747, "learning_rate": 1.2412159354834159e-06, "loss": 0.3498, "step": 7338 }, { "epoch": 3.967741935483871, "grad_norm": 0.2777029573917389, "learning_rate": 1.2399711955150117e-06, "loss": 0.4018, "step": 7339 }, { "epoch": 3.9682825734366554, "grad_norm": 0.2783864140510559, "learning_rate": 1.238726991644696e-06, "loss": 0.3786, "step": 7340 }, { "epoch": 3.9688232113894397, "grad_norm": 0.2676764726638794, "learning_rate": 1.2374833240498668e-06, "loss": 0.3614, "step": 7341 }, { "epoch": 3.9693638493422236, "grad_norm": 0.2902882695198059, "learning_rate": 1.2362401929078438e-06, "loss": 0.3802, "step": 7342 }, { "epoch": 3.969904487295008, "grad_norm": 0.2899143099784851, "learning_rate": 1.234997598395869e-06, "loss": 0.3983, "step": 7343 }, { "epoch": 3.9704451252477924, "grad_norm": 0.26067546010017395, "learning_rate": 1.2337555406911111e-06, "loss": 0.3695, "step": 7344 }, { "epoch": 3.9709857632005767, "grad_norm": 0.2756107747554779, "learning_rate": 1.232514019970658e-06, "loss": 0.3934, "step": 7345 }, { "epoch": 3.971526401153361, "grad_norm": 0.27496638894081116, "learning_rate": 1.2312730364115282e-06, "loss": 0.4033, "step": 7346 }, { "epoch": 3.972067039106145, "grad_norm": 0.2726435959339142, "learning_rate": 1.2300325901906529e-06, "loss": 0.3791, "step": 7347 }, { "epoch": 3.97260767705893, "grad_norm": 0.26076987385749817, "learning_rate": 1.2287926814848955e-06, "loss": 0.376, "step": 7348 }, { "epoch": 3.9731483150117137, "grad_norm": 0.2784844934940338, "learning_rate": 1.2275533104710413e-06, "loss": 0.3805, "step": 7349 }, { "epoch": 3.973688952964498, "grad_norm": 0.2745589017868042, "learning_rate": 1.2263144773257967e-06, "loss": 0.3862, "step": 7350 }, { "epoch": 3.9742295909172825, "grad_norm": 0.2718479335308075, "learning_rate": 1.2250761822257912e-06, "loss": 0.36, "step": 7351 }, { "epoch": 3.974770228870067, "grad_norm": 0.2668786644935608, "learning_rate": 1.2238384253475783e-06, "loss": 0.3851, "step": 7352 }, { "epoch": 3.975310866822851, "grad_norm": 0.2801748216152191, "learning_rate": 1.222601206867637e-06, "loss": 0.3639, "step": 7353 }, { "epoch": 3.975851504775635, "grad_norm": 0.2815357446670532, "learning_rate": 1.221364526962367e-06, "loss": 0.3723, "step": 7354 }, { "epoch": 3.9763921427284195, "grad_norm": 0.2539142668247223, "learning_rate": 1.2201283858080903e-06, "loss": 0.3728, "step": 7355 }, { "epoch": 3.976932780681204, "grad_norm": 0.27295762300491333, "learning_rate": 1.218892783581056e-06, "loss": 0.3844, "step": 7356 }, { "epoch": 3.977473418633988, "grad_norm": 0.2792266011238098, "learning_rate": 1.2176577204574318e-06, "loss": 0.3751, "step": 7357 }, { "epoch": 3.9780140565867725, "grad_norm": 0.2757304608821869, "learning_rate": 1.2164231966133156e-06, "loss": 0.3813, "step": 7358 }, { "epoch": 3.9785546945395565, "grad_norm": 0.2661166489124298, "learning_rate": 1.215189212224716e-06, "loss": 0.3848, "step": 7359 }, { "epoch": 3.979095332492341, "grad_norm": 0.26561239361763, "learning_rate": 1.2139557674675773e-06, "loss": 0.3671, "step": 7360 }, { "epoch": 3.979635970445125, "grad_norm": 0.2694830596446991, "learning_rate": 1.2127228625177611e-06, "loss": 0.3756, "step": 7361 }, { "epoch": 3.9801766083979095, "grad_norm": 0.27653172612190247, "learning_rate": 1.2114904975510516e-06, "loss": 0.3554, "step": 7362 }, { "epoch": 3.980717246350694, "grad_norm": 0.2755624055862427, "learning_rate": 1.210258672743161e-06, "loss": 0.3605, "step": 7363 }, { "epoch": 3.9812578843034783, "grad_norm": 0.27337151765823364, "learning_rate": 1.209027388269714e-06, "loss": 0.3826, "step": 7364 }, { "epoch": 3.9817985222562626, "grad_norm": 0.2733648419380188, "learning_rate": 1.2077966443062706e-06, "loss": 0.3659, "step": 7365 }, { "epoch": 3.9823391602090465, "grad_norm": 0.26981082558631897, "learning_rate": 1.2065664410283046e-06, "loss": 0.3739, "step": 7366 }, { "epoch": 3.982879798161831, "grad_norm": 0.2549477815628052, "learning_rate": 1.2053367786112185e-06, "loss": 0.3737, "step": 7367 }, { "epoch": 3.9834204361146153, "grad_norm": 0.2778071463108063, "learning_rate": 1.2041076572303345e-06, "loss": 0.3902, "step": 7368 }, { "epoch": 3.9839610740673996, "grad_norm": 0.30516862869262695, "learning_rate": 1.2028790770608968e-06, "loss": 0.385, "step": 7369 }, { "epoch": 3.984501712020184, "grad_norm": 0.2869141101837158, "learning_rate": 1.2016510382780772e-06, "loss": 0.3667, "step": 7370 }, { "epoch": 3.985042349972968, "grad_norm": 0.2690754234790802, "learning_rate": 1.2004235410569659e-06, "loss": 0.3881, "step": 7371 }, { "epoch": 3.9855829879257523, "grad_norm": 0.2717999517917633, "learning_rate": 1.199196585572575e-06, "loss": 0.3745, "step": 7372 }, { "epoch": 3.9861236258785366, "grad_norm": 0.2818255126476288, "learning_rate": 1.1979701719998454e-06, "loss": 0.3766, "step": 7373 }, { "epoch": 3.986664263831321, "grad_norm": 0.28154563903808594, "learning_rate": 1.1967443005136343e-06, "loss": 0.3848, "step": 7374 }, { "epoch": 3.9872049017841054, "grad_norm": 0.2657877504825592, "learning_rate": 1.1955189712887272e-06, "loss": 0.3894, "step": 7375 }, { "epoch": 3.9877455397368893, "grad_norm": 0.29316869378089905, "learning_rate": 1.1942941844998246e-06, "loss": 0.3719, "step": 7376 }, { "epoch": 3.988286177689674, "grad_norm": 0.26492777466773987, "learning_rate": 1.1930699403215573e-06, "loss": 0.3673, "step": 7377 }, { "epoch": 3.988826815642458, "grad_norm": 0.2546434700489044, "learning_rate": 1.1918462389284762e-06, "loss": 0.3582, "step": 7378 }, { "epoch": 3.9893674535952424, "grad_norm": 0.2561786472797394, "learning_rate": 1.1906230804950547e-06, "loss": 0.3791, "step": 7379 }, { "epoch": 3.9899080915480267, "grad_norm": 0.2614768147468567, "learning_rate": 1.189400465195687e-06, "loss": 0.365, "step": 7380 }, { "epoch": 3.990448729500811, "grad_norm": 0.2605297267436981, "learning_rate": 1.1881783932046904e-06, "loss": 0.3858, "step": 7381 }, { "epoch": 3.9909893674535954, "grad_norm": 0.259212851524353, "learning_rate": 1.1869568646963086e-06, "loss": 0.3705, "step": 7382 }, { "epoch": 3.9915300054063794, "grad_norm": 0.2636473476886749, "learning_rate": 1.1857358798447038e-06, "loss": 0.3624, "step": 7383 }, { "epoch": 3.9920706433591637, "grad_norm": 0.2585376799106598, "learning_rate": 1.18451543882396e-06, "loss": 0.378, "step": 7384 }, { "epoch": 3.992611281311948, "grad_norm": 0.2568807899951935, "learning_rate": 1.183295541808089e-06, "loss": 0.387, "step": 7385 }, { "epoch": 3.9931519192647325, "grad_norm": 0.2846553325653076, "learning_rate": 1.1820761889710175e-06, "loss": 0.3925, "step": 7386 }, { "epoch": 3.993692557217517, "grad_norm": 0.30891963839530945, "learning_rate": 1.180857380486602e-06, "loss": 0.3711, "step": 7387 }, { "epoch": 3.9942331951703007, "grad_norm": 0.25803038477897644, "learning_rate": 1.1796391165286169e-06, "loss": 0.3926, "step": 7388 }, { "epoch": 3.994773833123085, "grad_norm": 0.27402520179748535, "learning_rate": 1.1784213972707581e-06, "loss": 0.3925, "step": 7389 }, { "epoch": 3.9953144710758695, "grad_norm": 0.2832760810852051, "learning_rate": 1.1772042228866493e-06, "loss": 0.3906, "step": 7390 }, { "epoch": 3.995855109028654, "grad_norm": 0.28747400641441345, "learning_rate": 1.1759875935498311e-06, "loss": 0.3872, "step": 7391 }, { "epoch": 3.996395746981438, "grad_norm": 0.2541361451148987, "learning_rate": 1.174771509433768e-06, "loss": 0.3697, "step": 7392 }, { "epoch": 3.9969363849342225, "grad_norm": 0.25962769985198975, "learning_rate": 1.1735559707118465e-06, "loss": 0.3778, "step": 7393 }, { "epoch": 3.997477022887007, "grad_norm": 0.26915955543518066, "learning_rate": 1.1723409775573785e-06, "loss": 0.3741, "step": 7394 }, { "epoch": 3.998017660839791, "grad_norm": 0.2751399874687195, "learning_rate": 1.1711265301435937e-06, "loss": 0.3754, "step": 7395 }, { "epoch": 3.998558298792575, "grad_norm": 0.28093770146369934, "learning_rate": 1.1699126286436445e-06, "loss": 0.3397, "step": 7396 }, { "epoch": 3.9990989367453595, "grad_norm": 0.2902621626853943, "learning_rate": 1.1686992732306102e-06, "loss": 0.3721, "step": 7397 }, { "epoch": 3.999639574698144, "grad_norm": 0.27061375975608826, "learning_rate": 1.1674864640774852e-06, "loss": 0.3708, "step": 7398 }, { "epoch": 4.000180212650928, "grad_norm": 0.3631877303123474, "learning_rate": 1.1662742013571926e-06, "loss": 0.4688, "step": 7399 }, { "epoch": 4.000720850603712, "grad_norm": 0.30023327469825745, "learning_rate": 1.165062485242574e-06, "loss": 0.3554, "step": 7400 }, { "epoch": 4.001261488556497, "grad_norm": 0.29913586378097534, "learning_rate": 1.1638513159063914e-06, "loss": 0.4124, "step": 7401 }, { "epoch": 4.001802126509281, "grad_norm": 0.26184019446372986, "learning_rate": 1.1626406935213335e-06, "loss": 0.3153, "step": 7402 }, { "epoch": 4.002342764462065, "grad_norm": 0.2806876301765442, "learning_rate": 1.1614306182600087e-06, "loss": 0.3657, "step": 7403 }, { "epoch": 4.00288340241485, "grad_norm": 0.27725282311439514, "learning_rate": 1.1602210902949462e-06, "loss": 0.3403, "step": 7404 }, { "epoch": 4.0034240403676336, "grad_norm": 0.2558286786079407, "learning_rate": 1.159012109798598e-06, "loss": 0.3563, "step": 7405 }, { "epoch": 4.003964678320418, "grad_norm": 0.29177579283714294, "learning_rate": 1.1578036769433382e-06, "loss": 0.353, "step": 7406 }, { "epoch": 4.004505316273202, "grad_norm": 0.27496418356895447, "learning_rate": 1.156595791901467e-06, "loss": 0.3708, "step": 7407 }, { "epoch": 4.005045954225987, "grad_norm": 0.2852287292480469, "learning_rate": 1.155388454845196e-06, "loss": 0.3692, "step": 7408 }, { "epoch": 4.005586592178771, "grad_norm": 0.2811695337295532, "learning_rate": 1.1541816659466703e-06, "loss": 0.3662, "step": 7409 }, { "epoch": 4.006127230131555, "grad_norm": 0.26302269101142883, "learning_rate": 1.1529754253779486e-06, "loss": 0.3651, "step": 7410 }, { "epoch": 4.00666786808434, "grad_norm": 0.2623359262943268, "learning_rate": 1.1517697333110162e-06, "loss": 0.3767, "step": 7411 }, { "epoch": 4.007208506037124, "grad_norm": 0.26116177439689636, "learning_rate": 1.1505645899177786e-06, "loss": 0.3774, "step": 7412 }, { "epoch": 4.0077491439899084, "grad_norm": 0.2673705816268921, "learning_rate": 1.1493599953700606e-06, "loss": 0.325, "step": 7413 }, { "epoch": 4.008289781942692, "grad_norm": 0.3017926812171936, "learning_rate": 1.1481559498396145e-06, "loss": 0.42, "step": 7414 }, { "epoch": 4.008830419895476, "grad_norm": 0.2560037672519684, "learning_rate": 1.1469524534981091e-06, "loss": 0.3439, "step": 7415 }, { "epoch": 4.009371057848261, "grad_norm": 0.2601366341114044, "learning_rate": 1.1457495065171353e-06, "loss": 0.3643, "step": 7416 }, { "epoch": 4.009911695801045, "grad_norm": 0.27450212836265564, "learning_rate": 1.1445471090682104e-06, "loss": 0.3472, "step": 7417 }, { "epoch": 4.01045233375383, "grad_norm": 0.27664631605148315, "learning_rate": 1.1433452613227664e-06, "loss": 0.3931, "step": 7418 }, { "epoch": 4.010992971706614, "grad_norm": 0.2645692527294159, "learning_rate": 1.1421439634521652e-06, "loss": 0.3292, "step": 7419 }, { "epoch": 4.011533609659398, "grad_norm": 0.25841808319091797, "learning_rate": 1.1409432156276805e-06, "loss": 0.3447, "step": 7420 }, { "epoch": 4.0120742476121825, "grad_norm": 0.26279181241989136, "learning_rate": 1.139743018020517e-06, "loss": 0.3644, "step": 7421 }, { "epoch": 4.012614885564966, "grad_norm": 0.2843421399593353, "learning_rate": 1.1385433708017929e-06, "loss": 0.3647, "step": 7422 }, { "epoch": 4.013155523517751, "grad_norm": 0.3022117614746094, "learning_rate": 1.1373442741425556e-06, "loss": 0.3708, "step": 7423 }, { "epoch": 4.013696161470535, "grad_norm": 0.25190797448158264, "learning_rate": 1.1361457282137677e-06, "loss": 0.335, "step": 7424 }, { "epoch": 4.01423679942332, "grad_norm": 0.2671329081058502, "learning_rate": 1.134947733186315e-06, "loss": 0.3778, "step": 7425 }, { "epoch": 4.014777437376104, "grad_norm": 0.2704976201057434, "learning_rate": 1.1337502892310088e-06, "loss": 0.3754, "step": 7426 }, { "epoch": 4.015318075328888, "grad_norm": 0.2797398865222931, "learning_rate": 1.1325533965185742e-06, "loss": 0.4209, "step": 7427 }, { "epoch": 4.0158587132816725, "grad_norm": 0.23628182709217072, "learning_rate": 1.1313570552196656e-06, "loss": 0.3442, "step": 7428 }, { "epoch": 4.0163993512344565, "grad_norm": 0.271152138710022, "learning_rate": 1.1301612655048545e-06, "loss": 0.3381, "step": 7429 }, { "epoch": 4.016939989187241, "grad_norm": 0.28068703413009644, "learning_rate": 1.1289660275446318e-06, "loss": 0.3767, "step": 7430 }, { "epoch": 4.017480627140025, "grad_norm": 0.2739368677139282, "learning_rate": 1.1277713415094155e-06, "loss": 0.3539, "step": 7431 }, { "epoch": 4.018021265092809, "grad_norm": 0.28415143489837646, "learning_rate": 1.1265772075695409e-06, "loss": 0.3613, "step": 7432 }, { "epoch": 4.018561903045594, "grad_norm": 0.2874448299407959, "learning_rate": 1.125383625895265e-06, "loss": 0.3758, "step": 7433 }, { "epoch": 4.019102540998378, "grad_norm": 0.25903433561325073, "learning_rate": 1.1241905966567652e-06, "loss": 0.3356, "step": 7434 }, { "epoch": 4.019643178951163, "grad_norm": 0.2529353201389313, "learning_rate": 1.1229981200241424e-06, "loss": 0.3417, "step": 7435 }, { "epoch": 4.0201838169039465, "grad_norm": 0.26373714208602905, "learning_rate": 1.1218061961674214e-06, "loss": 0.3887, "step": 7436 }, { "epoch": 4.020724454856731, "grad_norm": 0.2680737376213074, "learning_rate": 1.1206148252565385e-06, "loss": 0.3858, "step": 7437 }, { "epoch": 4.021265092809515, "grad_norm": 0.2669246196746826, "learning_rate": 1.1194240074613617e-06, "loss": 0.3806, "step": 7438 }, { "epoch": 4.021805730762299, "grad_norm": 0.258058100938797, "learning_rate": 1.1182337429516722e-06, "loss": 0.356, "step": 7439 }, { "epoch": 4.022346368715084, "grad_norm": 0.2669731378555298, "learning_rate": 1.1170440318971788e-06, "loss": 0.3808, "step": 7440 }, { "epoch": 4.022887006667868, "grad_norm": 0.27213218808174133, "learning_rate": 1.1158548744675073e-06, "loss": 0.3606, "step": 7441 }, { "epoch": 4.023427644620653, "grad_norm": 0.26563283801078796, "learning_rate": 1.1146662708322043e-06, "loss": 0.3278, "step": 7442 }, { "epoch": 4.023968282573437, "grad_norm": 0.28348419070243835, "learning_rate": 1.113478221160741e-06, "loss": 0.3874, "step": 7443 }, { "epoch": 4.024508920526221, "grad_norm": 0.25104525685310364, "learning_rate": 1.1122907256225064e-06, "loss": 0.3303, "step": 7444 }, { "epoch": 4.025049558479005, "grad_norm": 0.26987916231155396, "learning_rate": 1.1111037843868095e-06, "loss": 0.3581, "step": 7445 }, { "epoch": 4.025590196431789, "grad_norm": 0.2615717351436615, "learning_rate": 1.1099173976228854e-06, "loss": 0.345, "step": 7446 }, { "epoch": 4.026130834384574, "grad_norm": 0.2686591148376465, "learning_rate": 1.1087315654998842e-06, "loss": 0.3736, "step": 7447 }, { "epoch": 4.026671472337358, "grad_norm": 0.2937152683734894, "learning_rate": 1.1075462881868842e-06, "loss": 0.3502, "step": 7448 }, { "epoch": 4.027212110290143, "grad_norm": 0.3030618131160736, "learning_rate": 1.1063615658528742e-06, "loss": 0.381, "step": 7449 }, { "epoch": 4.027752748242927, "grad_norm": 0.2527458965778351, "learning_rate": 1.1051773986667735e-06, "loss": 0.3131, "step": 7450 }, { "epoch": 4.028293386195711, "grad_norm": 0.29645195603370667, "learning_rate": 1.1039937867974166e-06, "loss": 0.4008, "step": 7451 }, { "epoch": 4.0288340241484955, "grad_norm": 0.299660325050354, "learning_rate": 1.1028107304135626e-06, "loss": 0.3426, "step": 7452 }, { "epoch": 4.029374662101279, "grad_norm": 0.26119139790534973, "learning_rate": 1.1016282296838887e-06, "loss": 0.3553, "step": 7453 }, { "epoch": 4.029915300054064, "grad_norm": 0.3071519136428833, "learning_rate": 1.1004462847769925e-06, "loss": 0.4091, "step": 7454 }, { "epoch": 4.030455938006848, "grad_norm": 0.262119859457016, "learning_rate": 1.0992648958613961e-06, "loss": 0.3263, "step": 7455 }, { "epoch": 4.030996575959632, "grad_norm": 0.2911357283592224, "learning_rate": 1.0980840631055378e-06, "loss": 0.3556, "step": 7456 }, { "epoch": 4.031537213912417, "grad_norm": 0.2899060845375061, "learning_rate": 1.0969037866777782e-06, "loss": 0.3751, "step": 7457 }, { "epoch": 4.032077851865201, "grad_norm": 0.25036853551864624, "learning_rate": 1.0957240667464014e-06, "loss": 0.3484, "step": 7458 }, { "epoch": 4.0326184898179855, "grad_norm": 0.2819577157497406, "learning_rate": 1.0945449034796068e-06, "loss": 0.381, "step": 7459 }, { "epoch": 4.0331591277707695, "grad_norm": 0.2584560811519623, "learning_rate": 1.0933662970455217e-06, "loss": 0.3426, "step": 7460 }, { "epoch": 4.033699765723553, "grad_norm": 0.28365328907966614, "learning_rate": 1.0921882476121837e-06, "loss": 0.3842, "step": 7461 }, { "epoch": 4.034240403676338, "grad_norm": 0.28638550639152527, "learning_rate": 1.091010755347562e-06, "loss": 0.3934, "step": 7462 }, { "epoch": 4.034781041629122, "grad_norm": 0.28289759159088135, "learning_rate": 1.0898338204195375e-06, "loss": 0.3702, "step": 7463 }, { "epoch": 4.035321679581907, "grad_norm": 0.2768246829509735, "learning_rate": 1.0886574429959185e-06, "loss": 0.3442, "step": 7464 }, { "epoch": 4.035862317534691, "grad_norm": 0.27342909574508667, "learning_rate": 1.0874816232444297e-06, "loss": 0.417, "step": 7465 }, { "epoch": 4.036402955487476, "grad_norm": 0.27252939343452454, "learning_rate": 1.0863063613327162e-06, "loss": 0.3747, "step": 7466 }, { "epoch": 4.0369435934402595, "grad_norm": 0.2616770267486572, "learning_rate": 1.0851316574283466e-06, "loss": 0.347, "step": 7467 }, { "epoch": 4.0374842313930435, "grad_norm": 0.26894411444664, "learning_rate": 1.0839575116988077e-06, "loss": 0.3305, "step": 7468 }, { "epoch": 4.038024869345828, "grad_norm": 0.2860458195209503, "learning_rate": 1.0827839243115046e-06, "loss": 0.364, "step": 7469 }, { "epoch": 4.038565507298612, "grad_norm": 0.2837781012058258, "learning_rate": 1.081610895433769e-06, "loss": 0.3886, "step": 7470 }, { "epoch": 4.039106145251397, "grad_norm": 0.27345579862594604, "learning_rate": 1.080438425232846e-06, "loss": 0.3965, "step": 7471 }, { "epoch": 4.039646783204181, "grad_norm": 0.25032445788383484, "learning_rate": 1.0792665138759085e-06, "loss": 0.3554, "step": 7472 }, { "epoch": 4.040187421156965, "grad_norm": 0.25677117705345154, "learning_rate": 1.07809516153004e-06, "loss": 0.3647, "step": 7473 }, { "epoch": 4.04072805910975, "grad_norm": 0.29125097393989563, "learning_rate": 1.0769243683622522e-06, "loss": 0.3889, "step": 7474 }, { "epoch": 4.0412686970625336, "grad_norm": 0.2656898498535156, "learning_rate": 1.0757541345394768e-06, "loss": 0.3758, "step": 7475 }, { "epoch": 4.041809335015318, "grad_norm": 0.2595039904117584, "learning_rate": 1.0745844602285615e-06, "loss": 0.3711, "step": 7476 }, { "epoch": 4.042349972968102, "grad_norm": 0.277091920375824, "learning_rate": 1.0734153455962765e-06, "loss": 0.3832, "step": 7477 }, { "epoch": 4.042890610920887, "grad_norm": 0.25206366181373596, "learning_rate": 1.072246790809311e-06, "loss": 0.3557, "step": 7478 }, { "epoch": 4.043431248873671, "grad_norm": 0.2778056263923645, "learning_rate": 1.0710787960342777e-06, "loss": 0.3675, "step": 7479 }, { "epoch": 4.043971886826455, "grad_norm": 0.2501332461833954, "learning_rate": 1.0699113614377065e-06, "loss": 0.3222, "step": 7480 }, { "epoch": 4.04451252477924, "grad_norm": 0.2865346372127533, "learning_rate": 1.0687444871860459e-06, "loss": 0.3606, "step": 7481 }, { "epoch": 4.045053162732024, "grad_norm": 0.25500455498695374, "learning_rate": 1.0675781734456703e-06, "loss": 0.3517, "step": 7482 }, { "epoch": 4.0455938006848084, "grad_norm": 0.26119256019592285, "learning_rate": 1.0664124203828667e-06, "loss": 0.3413, "step": 7483 }, { "epoch": 4.046134438637592, "grad_norm": 0.271000474691391, "learning_rate": 1.0652472281638505e-06, "loss": 0.3477, "step": 7484 }, { "epoch": 4.046675076590376, "grad_norm": 0.2632492780685425, "learning_rate": 1.0640825969547498e-06, "loss": 0.3189, "step": 7485 }, { "epoch": 4.047215714543161, "grad_norm": 0.29628267884254456, "learning_rate": 1.0629185269216147e-06, "loss": 0.3924, "step": 7486 }, { "epoch": 4.047756352495945, "grad_norm": 0.25676894187927246, "learning_rate": 1.0617550182304193e-06, "loss": 0.3379, "step": 7487 }, { "epoch": 4.04829699044873, "grad_norm": 0.2630210518836975, "learning_rate": 1.0605920710470529e-06, "loss": 0.3425, "step": 7488 }, { "epoch": 4.048837628401514, "grad_norm": 0.2740321755409241, "learning_rate": 1.0594296855373265e-06, "loss": 0.3959, "step": 7489 }, { "epoch": 4.049378266354298, "grad_norm": 0.2768412232398987, "learning_rate": 1.058267861866969e-06, "loss": 0.376, "step": 7490 }, { "epoch": 4.0499189043070825, "grad_norm": 0.2641516923904419, "learning_rate": 1.0571066002016345e-06, "loss": 0.3771, "step": 7491 }, { "epoch": 4.050459542259866, "grad_norm": 0.2585950493812561, "learning_rate": 1.0559459007068907e-06, "loss": 0.3135, "step": 7492 }, { "epoch": 4.051000180212651, "grad_norm": 0.27136123180389404, "learning_rate": 1.0547857635482306e-06, "loss": 0.3591, "step": 7493 }, { "epoch": 4.051540818165435, "grad_norm": 0.2911006212234497, "learning_rate": 1.0536261888910637e-06, "loss": 0.3604, "step": 7494 }, { "epoch": 4.05208145611822, "grad_norm": 0.2715640366077423, "learning_rate": 1.0524671769007177e-06, "loss": 0.3647, "step": 7495 }, { "epoch": 4.052622094071004, "grad_norm": 0.2566773295402527, "learning_rate": 1.051308727742446e-06, "loss": 0.334, "step": 7496 }, { "epoch": 4.053162732023788, "grad_norm": 0.2693837583065033, "learning_rate": 1.050150841581416e-06, "loss": 0.3632, "step": 7497 }, { "epoch": 4.0537033699765725, "grad_norm": 0.267478346824646, "learning_rate": 1.0489935185827166e-06, "loss": 0.3364, "step": 7498 }, { "epoch": 4.0542440079293565, "grad_norm": 0.263049453496933, "learning_rate": 1.0478367589113586e-06, "loss": 0.3546, "step": 7499 }, { "epoch": 4.054784645882141, "grad_norm": 0.28416845202445984, "learning_rate": 1.0466805627322685e-06, "loss": 0.3441, "step": 7500 }, { "epoch": 4.055325283834925, "grad_norm": 0.27740010619163513, "learning_rate": 1.0455249302102994e-06, "loss": 0.319, "step": 7501 }, { "epoch": 4.055865921787709, "grad_norm": 0.2731799781322479, "learning_rate": 1.0443698615102121e-06, "loss": 0.3694, "step": 7502 }, { "epoch": 4.056406559740494, "grad_norm": 0.2924058735370636, "learning_rate": 1.0432153567966985e-06, "loss": 0.3872, "step": 7503 }, { "epoch": 4.056947197693278, "grad_norm": 0.2668353319168091, "learning_rate": 1.0420614162343661e-06, "loss": 0.3557, "step": 7504 }, { "epoch": 4.057487835646063, "grad_norm": 0.27553117275238037, "learning_rate": 1.0409080399877413e-06, "loss": 0.4105, "step": 7505 }, { "epoch": 4.0580284735988466, "grad_norm": 0.25538763403892517, "learning_rate": 1.0397552282212698e-06, "loss": 0.3556, "step": 7506 }, { "epoch": 4.058569111551631, "grad_norm": 0.2744927406311035, "learning_rate": 1.0386029810993159e-06, "loss": 0.331, "step": 7507 }, { "epoch": 4.059109749504415, "grad_norm": 0.2575755715370178, "learning_rate": 1.0374512987861679e-06, "loss": 0.3515, "step": 7508 }, { "epoch": 4.059650387457199, "grad_norm": 0.27319324016571045, "learning_rate": 1.0363001814460294e-06, "loss": 0.3937, "step": 7509 }, { "epoch": 4.060191025409984, "grad_norm": 0.24155323207378387, "learning_rate": 1.035149629243023e-06, "loss": 0.3334, "step": 7510 }, { "epoch": 4.060731663362768, "grad_norm": 0.32312268018722534, "learning_rate": 1.0339996423411946e-06, "loss": 0.4033, "step": 7511 }, { "epoch": 4.061272301315553, "grad_norm": 0.265438973903656, "learning_rate": 1.0328502209045056e-06, "loss": 0.3488, "step": 7512 }, { "epoch": 4.061812939268337, "grad_norm": 0.2738054692745209, "learning_rate": 1.0317013650968404e-06, "loss": 0.3697, "step": 7513 }, { "epoch": 4.062353577221121, "grad_norm": 0.2512156367301941, "learning_rate": 1.0305530750819992e-06, "loss": 0.3097, "step": 7514 }, { "epoch": 4.062894215173905, "grad_norm": 0.2932851016521454, "learning_rate": 1.0294053510237028e-06, "loss": 0.3783, "step": 7515 }, { "epoch": 4.063434853126689, "grad_norm": 0.2610452473163605, "learning_rate": 1.0282581930855933e-06, "loss": 0.3499, "step": 7516 }, { "epoch": 4.063975491079474, "grad_norm": 0.2715596556663513, "learning_rate": 1.0271116014312293e-06, "loss": 0.3466, "step": 7517 }, { "epoch": 4.064516129032258, "grad_norm": 0.26254555583000183, "learning_rate": 1.02596557622409e-06, "loss": 0.3654, "step": 7518 }, { "epoch": 4.065056766985042, "grad_norm": 0.28570741415023804, "learning_rate": 1.0248201176275717e-06, "loss": 0.3774, "step": 7519 }, { "epoch": 4.065597404937827, "grad_norm": 0.263133704662323, "learning_rate": 1.0236752258049954e-06, "loss": 0.3464, "step": 7520 }, { "epoch": 4.066138042890611, "grad_norm": 0.2521113157272339, "learning_rate": 1.0225309009195962e-06, "loss": 0.3394, "step": 7521 }, { "epoch": 4.0666786808433955, "grad_norm": 0.2839896082878113, "learning_rate": 1.0213871431345274e-06, "loss": 0.3802, "step": 7522 }, { "epoch": 4.067219318796179, "grad_norm": 0.2782779932022095, "learning_rate": 1.0202439526128677e-06, "loss": 0.3373, "step": 7523 }, { "epoch": 4.067759956748964, "grad_norm": 0.30508559942245483, "learning_rate": 1.0191013295176082e-06, "loss": 0.3945, "step": 7524 }, { "epoch": 4.068300594701748, "grad_norm": 0.2754221260547638, "learning_rate": 1.017959274011665e-06, "loss": 0.3205, "step": 7525 }, { "epoch": 4.068841232654532, "grad_norm": 0.26116254925727844, "learning_rate": 1.0168177862578683e-06, "loss": 0.3332, "step": 7526 }, { "epoch": 4.069381870607317, "grad_norm": 0.27342498302459717, "learning_rate": 1.0156768664189681e-06, "loss": 0.3979, "step": 7527 }, { "epoch": 4.069922508560101, "grad_norm": 0.27583229541778564, "learning_rate": 1.0145365146576375e-06, "loss": 0.3334, "step": 7528 }, { "epoch": 4.0704631465128855, "grad_norm": 0.28629687428474426, "learning_rate": 1.013396731136465e-06, "loss": 0.3668, "step": 7529 }, { "epoch": 4.0710037844656695, "grad_norm": 0.2998204529285431, "learning_rate": 1.0122575160179582e-06, "loss": 0.3859, "step": 7530 }, { "epoch": 4.071544422418453, "grad_norm": 0.2818513810634613, "learning_rate": 1.0111188694645435e-06, "loss": 0.3498, "step": 7531 }, { "epoch": 4.072085060371238, "grad_norm": 0.26502078771591187, "learning_rate": 1.0099807916385673e-06, "loss": 0.3501, "step": 7532 }, { "epoch": 4.072625698324022, "grad_norm": 0.2796849012374878, "learning_rate": 1.0088432827022986e-06, "loss": 0.3642, "step": 7533 }, { "epoch": 4.073166336276807, "grad_norm": 0.284131795167923, "learning_rate": 1.0077063428179156e-06, "loss": 0.375, "step": 7534 }, { "epoch": 4.073706974229591, "grad_norm": 0.28391698002815247, "learning_rate": 1.0065699721475253e-06, "loss": 0.3576, "step": 7535 }, { "epoch": 4.074247612182376, "grad_norm": 0.2852577865123749, "learning_rate": 1.0054341708531462e-06, "loss": 0.3574, "step": 7536 }, { "epoch": 4.0747882501351596, "grad_norm": 0.2701577842235565, "learning_rate": 1.0042989390967218e-06, "loss": 0.3599, "step": 7537 }, { "epoch": 4.0753288880879435, "grad_norm": 0.2770707607269287, "learning_rate": 1.00316427704011e-06, "loss": 0.3553, "step": 7538 }, { "epoch": 4.075869526040728, "grad_norm": 0.28347349166870117, "learning_rate": 1.0020301848450874e-06, "loss": 0.3752, "step": 7539 }, { "epoch": 4.076410163993512, "grad_norm": 0.28405776619911194, "learning_rate": 1.0008966626733541e-06, "loss": 0.3656, "step": 7540 }, { "epoch": 4.076950801946297, "grad_norm": 0.27447542548179626, "learning_rate": 9.997637106865232e-07, "loss": 0.3806, "step": 7541 }, { "epoch": 4.077491439899081, "grad_norm": 0.2700732946395874, "learning_rate": 9.986313290461287e-07, "loss": 0.3215, "step": 7542 }, { "epoch": 4.078032077851865, "grad_norm": 0.279715359210968, "learning_rate": 9.974995179136254e-07, "loss": 0.3964, "step": 7543 }, { "epoch": 4.07857271580465, "grad_norm": 0.2824874520301819, "learning_rate": 9.963682774503824e-07, "loss": 0.3763, "step": 7544 }, { "epoch": 4.079113353757434, "grad_norm": 0.2741328775882721, "learning_rate": 9.95237607817694e-07, "loss": 0.3209, "step": 7545 }, { "epoch": 4.079653991710218, "grad_norm": 0.2551615238189697, "learning_rate": 9.941075091767643e-07, "loss": 0.3558, "step": 7546 }, { "epoch": 4.080194629663002, "grad_norm": 0.281745046377182, "learning_rate": 9.929779816887237e-07, "loss": 0.3688, "step": 7547 }, { "epoch": 4.080735267615786, "grad_norm": 0.2685527503490448, "learning_rate": 9.918490255146158e-07, "loss": 0.3682, "step": 7548 }, { "epoch": 4.081275905568571, "grad_norm": 0.2721521556377411, "learning_rate": 9.90720640815408e-07, "loss": 0.3678, "step": 7549 }, { "epoch": 4.081816543521355, "grad_norm": 0.27016156911849976, "learning_rate": 9.895928277519822e-07, "loss": 0.3401, "step": 7550 }, { "epoch": 4.08235718147414, "grad_norm": 0.27403244376182556, "learning_rate": 9.884655864851384e-07, "loss": 0.3382, "step": 7551 }, { "epoch": 4.082897819426924, "grad_norm": 0.2650195062160492, "learning_rate": 9.873389171755987e-07, "loss": 0.3412, "step": 7552 }, { "epoch": 4.0834384573797085, "grad_norm": 0.28170421719551086, "learning_rate": 9.86212819984001e-07, "loss": 0.3962, "step": 7553 }, { "epoch": 4.083979095332492, "grad_norm": 0.28326892852783203, "learning_rate": 9.850872950709012e-07, "loss": 0.3798, "step": 7554 }, { "epoch": 4.084519733285276, "grad_norm": 0.2657798230648041, "learning_rate": 9.83962342596776e-07, "loss": 0.3543, "step": 7555 }, { "epoch": 4.085060371238061, "grad_norm": 0.2940162718296051, "learning_rate": 9.828379627220174e-07, "loss": 0.3665, "step": 7556 }, { "epoch": 4.085601009190845, "grad_norm": 0.25168517231941223, "learning_rate": 9.817141556069398e-07, "loss": 0.3127, "step": 7557 }, { "epoch": 4.08614164714363, "grad_norm": 0.26851770281791687, "learning_rate": 9.805909214117721e-07, "loss": 0.3598, "step": 7558 }, { "epoch": 4.086682285096414, "grad_norm": 0.27158963680267334, "learning_rate": 9.794682602966637e-07, "loss": 0.3913, "step": 7559 }, { "epoch": 4.087222923049198, "grad_norm": 0.26728105545043945, "learning_rate": 9.783461724216793e-07, "loss": 0.3428, "step": 7560 }, { "epoch": 4.0877635610019825, "grad_norm": 0.29941266775131226, "learning_rate": 9.77224657946806e-07, "loss": 0.3893, "step": 7561 }, { "epoch": 4.088304198954766, "grad_norm": 0.27292317152023315, "learning_rate": 9.761037170319498e-07, "loss": 0.3579, "step": 7562 }, { "epoch": 4.088844836907551, "grad_norm": 0.28188255429267883, "learning_rate": 9.74983349836927e-07, "loss": 0.3684, "step": 7563 }, { "epoch": 4.089385474860335, "grad_norm": 0.26834139227867126, "learning_rate": 9.73863556521482e-07, "loss": 0.3837, "step": 7564 }, { "epoch": 4.08992611281312, "grad_norm": 0.27812516689300537, "learning_rate": 9.7274433724527e-07, "loss": 0.342, "step": 7565 }, { "epoch": 4.090466750765904, "grad_norm": 0.2742692828178406, "learning_rate": 9.716256921678696e-07, "loss": 0.3735, "step": 7566 }, { "epoch": 4.091007388718688, "grad_norm": 0.27141088247299194, "learning_rate": 9.705076214487747e-07, "loss": 0.3504, "step": 7567 }, { "epoch": 4.0915480266714725, "grad_norm": 0.2742808163166046, "learning_rate": 9.693901252473953e-07, "loss": 0.3575, "step": 7568 }, { "epoch": 4.0920886646242565, "grad_norm": 0.27946266531944275, "learning_rate": 9.682732037230652e-07, "loss": 0.3518, "step": 7569 }, { "epoch": 4.092629302577041, "grad_norm": 0.3077234923839569, "learning_rate": 9.671568570350321e-07, "loss": 0.412, "step": 7570 }, { "epoch": 4.093169940529825, "grad_norm": 0.25089436769485474, "learning_rate": 9.660410853424607e-07, "loss": 0.2997, "step": 7571 }, { "epoch": 4.093710578482609, "grad_norm": 0.28145653009414673, "learning_rate": 9.649258888044384e-07, "loss": 0.406, "step": 7572 }, { "epoch": 4.094251216435394, "grad_norm": 0.2497064620256424, "learning_rate": 9.63811267579966e-07, "loss": 0.3202, "step": 7573 }, { "epoch": 4.094791854388178, "grad_norm": 0.2823668122291565, "learning_rate": 9.626972218279674e-07, "loss": 0.3966, "step": 7574 }, { "epoch": 4.095332492340963, "grad_norm": 0.2755703032016754, "learning_rate": 9.615837517072758e-07, "loss": 0.3865, "step": 7575 }, { "epoch": 4.0958731302937466, "grad_norm": 0.2881629765033722, "learning_rate": 9.604708573766525e-07, "loss": 0.3609, "step": 7576 }, { "epoch": 4.0964137682465305, "grad_norm": 0.27658626437187195, "learning_rate": 9.59358538994769e-07, "loss": 0.3351, "step": 7577 }, { "epoch": 4.096954406199315, "grad_norm": 0.2800852954387665, "learning_rate": 9.582467967202202e-07, "loss": 0.3609, "step": 7578 }, { "epoch": 4.097495044152099, "grad_norm": 0.28268930315971375, "learning_rate": 9.571356307115149e-07, "loss": 0.3765, "step": 7579 }, { "epoch": 4.098035682104884, "grad_norm": 0.27372539043426514, "learning_rate": 9.560250411270794e-07, "loss": 0.3326, "step": 7580 }, { "epoch": 4.098576320057668, "grad_norm": 0.29689791798591614, "learning_rate": 9.549150281252633e-07, "loss": 0.3613, "step": 7581 }, { "epoch": 4.099116958010453, "grad_norm": 0.30527156591415405, "learning_rate": 9.53805591864328e-07, "loss": 0.3653, "step": 7582 }, { "epoch": 4.099657595963237, "grad_norm": 0.27635329961776733, "learning_rate": 9.526967325024539e-07, "loss": 0.3771, "step": 7583 }, { "epoch": 4.100198233916021, "grad_norm": 0.2942197918891907, "learning_rate": 9.51588450197743e-07, "loss": 0.3516, "step": 7584 }, { "epoch": 4.100738871868805, "grad_norm": 0.2760907709598541, "learning_rate": 9.504807451082088e-07, "loss": 0.3433, "step": 7585 }, { "epoch": 4.101279509821589, "grad_norm": 0.27573254704475403, "learning_rate": 9.493736173917906e-07, "loss": 0.3617, "step": 7586 }, { "epoch": 4.101820147774374, "grad_norm": 0.292228639125824, "learning_rate": 9.482670672063354e-07, "loss": 0.3543, "step": 7587 }, { "epoch": 4.102360785727158, "grad_norm": 0.2821485996246338, "learning_rate": 9.471610947096166e-07, "loss": 0.3438, "step": 7588 }, { "epoch": 4.102901423679942, "grad_norm": 0.2708372473716736, "learning_rate": 9.460557000593196e-07, "loss": 0.3772, "step": 7589 }, { "epoch": 4.103442061632727, "grad_norm": 0.2668057978153229, "learning_rate": 9.449508834130517e-07, "loss": 0.3831, "step": 7590 }, { "epoch": 4.103982699585511, "grad_norm": 0.28821754455566406, "learning_rate": 9.438466449283345e-07, "loss": 0.3845, "step": 7591 }, { "epoch": 4.1045233375382955, "grad_norm": 0.24543441832065582, "learning_rate": 9.427429847626068e-07, "loss": 0.3344, "step": 7592 }, { "epoch": 4.105063975491079, "grad_norm": 0.283571720123291, "learning_rate": 9.416399030732298e-07, "loss": 0.3689, "step": 7593 }, { "epoch": 4.105604613443864, "grad_norm": 0.28722450137138367, "learning_rate": 9.405374000174772e-07, "loss": 0.3789, "step": 7594 }, { "epoch": 4.106145251396648, "grad_norm": 0.271992564201355, "learning_rate": 9.394354757525404e-07, "loss": 0.3537, "step": 7595 }, { "epoch": 4.106685889349432, "grad_norm": 0.2753427028656006, "learning_rate": 9.383341304355326e-07, "loss": 0.3799, "step": 7596 }, { "epoch": 4.107226527302217, "grad_norm": 0.26946526765823364, "learning_rate": 9.372333642234787e-07, "loss": 0.363, "step": 7597 }, { "epoch": 4.107767165255001, "grad_norm": 0.27203747630119324, "learning_rate": 9.361331772733284e-07, "loss": 0.3729, "step": 7598 }, { "epoch": 4.1083078032077855, "grad_norm": 0.24915803968906403, "learning_rate": 9.350335697419382e-07, "loss": 0.368, "step": 7599 }, { "epoch": 4.1088484411605695, "grad_norm": 0.30128878355026245, "learning_rate": 9.339345417860918e-07, "loss": 0.4005, "step": 7600 }, { "epoch": 4.109389079113353, "grad_norm": 0.2533441483974457, "learning_rate": 9.328360935624875e-07, "loss": 0.3328, "step": 7601 }, { "epoch": 4.109929717066138, "grad_norm": 0.2531205713748932, "learning_rate": 9.317382252277391e-07, "loss": 0.3785, "step": 7602 }, { "epoch": 4.110470355018922, "grad_norm": 0.27615755796432495, "learning_rate": 9.306409369383779e-07, "loss": 0.3252, "step": 7603 }, { "epoch": 4.111010992971707, "grad_norm": 0.27425599098205566, "learning_rate": 9.295442288508522e-07, "loss": 0.3868, "step": 7604 }, { "epoch": 4.111551630924491, "grad_norm": 0.2784785032272339, "learning_rate": 9.284481011215318e-07, "loss": 0.3574, "step": 7605 }, { "epoch": 4.112092268877275, "grad_norm": 0.2659832239151001, "learning_rate": 9.273525539066985e-07, "loss": 0.38, "step": 7606 }, { "epoch": 4.1126329068300596, "grad_norm": 0.25805604457855225, "learning_rate": 9.262575873625529e-07, "loss": 0.3279, "step": 7607 }, { "epoch": 4.1131735447828435, "grad_norm": 0.2785314917564392, "learning_rate": 9.251632016452156e-07, "loss": 0.4154, "step": 7608 }, { "epoch": 4.113714182735628, "grad_norm": 0.2850290834903717, "learning_rate": 9.240693969107195e-07, "loss": 0.355, "step": 7609 }, { "epoch": 4.114254820688412, "grad_norm": 0.2698410153388977, "learning_rate": 9.229761733150205e-07, "loss": 0.35, "step": 7610 }, { "epoch": 4.114795458641197, "grad_norm": 0.3033217489719391, "learning_rate": 9.218835310139862e-07, "loss": 0.407, "step": 7611 }, { "epoch": 4.115336096593981, "grad_norm": 0.2788592576980591, "learning_rate": 9.207914701634024e-07, "loss": 0.3572, "step": 7612 }, { "epoch": 4.115876734546765, "grad_norm": 0.2555527985095978, "learning_rate": 9.196999909189764e-07, "loss": 0.3346, "step": 7613 }, { "epoch": 4.11641737249955, "grad_norm": 0.2944771647453308, "learning_rate": 9.186090934363274e-07, "loss": 0.3987, "step": 7614 }, { "epoch": 4.116958010452334, "grad_norm": 0.28323036432266235, "learning_rate": 9.175187778709937e-07, "loss": 0.3669, "step": 7615 }, { "epoch": 4.117498648405118, "grad_norm": 0.26645544171333313, "learning_rate": 9.164290443784296e-07, "loss": 0.353, "step": 7616 }, { "epoch": 4.118039286357902, "grad_norm": 0.27114954590797424, "learning_rate": 9.153398931140095e-07, "loss": 0.3511, "step": 7617 }, { "epoch": 4.118579924310686, "grad_norm": 0.2679999768733978, "learning_rate": 9.142513242330214e-07, "loss": 0.3677, "step": 7618 }, { "epoch": 4.119120562263471, "grad_norm": 0.2583546042442322, "learning_rate": 9.131633378906707e-07, "loss": 0.3517, "step": 7619 }, { "epoch": 4.119661200216255, "grad_norm": 0.272354394197464, "learning_rate": 9.120759342420821e-07, "loss": 0.4086, "step": 7620 }, { "epoch": 4.12020183816904, "grad_norm": 0.246135875582695, "learning_rate": 9.109891134422944e-07, "loss": 0.3132, "step": 7621 }, { "epoch": 4.120742476121824, "grad_norm": 0.2872975468635559, "learning_rate": 9.099028756462658e-07, "loss": 0.3891, "step": 7622 }, { "epoch": 4.1212831140746085, "grad_norm": 0.27051842212677, "learning_rate": 9.088172210088692e-07, "loss": 0.3507, "step": 7623 }, { "epoch": 4.121823752027392, "grad_norm": 0.26585105061531067, "learning_rate": 9.077321496848945e-07, "loss": 0.3237, "step": 7624 }, { "epoch": 4.122364389980176, "grad_norm": 0.2786099314689636, "learning_rate": 9.066476618290515e-07, "loss": 0.3655, "step": 7625 }, { "epoch": 4.122905027932961, "grad_norm": 0.2532130181789398, "learning_rate": 9.055637575959614e-07, "loss": 0.3422, "step": 7626 }, { "epoch": 4.123445665885745, "grad_norm": 0.2793992757797241, "learning_rate": 9.044804371401699e-07, "loss": 0.3775, "step": 7627 }, { "epoch": 4.12398630383853, "grad_norm": 0.26155126094818115, "learning_rate": 9.033977006161299e-07, "loss": 0.3372, "step": 7628 }, { "epoch": 4.124526941791314, "grad_norm": 0.28930824995040894, "learning_rate": 9.023155481782175e-07, "loss": 0.3846, "step": 7629 }, { "epoch": 4.125067579744098, "grad_norm": 0.27423110604286194, "learning_rate": 9.012339799807263e-07, "loss": 0.3555, "step": 7630 }, { "epoch": 4.1256082176968825, "grad_norm": 0.28901660442352295, "learning_rate": 9.001529961778627e-07, "loss": 0.3563, "step": 7631 }, { "epoch": 4.126148855649666, "grad_norm": 0.2584855258464813, "learning_rate": 8.990725969237513e-07, "loss": 0.336, "step": 7632 }, { "epoch": 4.126689493602451, "grad_norm": 0.28122976422309875, "learning_rate": 8.979927823724321e-07, "loss": 0.3897, "step": 7633 }, { "epoch": 4.127230131555235, "grad_norm": 0.25875017046928406, "learning_rate": 8.96913552677866e-07, "loss": 0.3741, "step": 7634 }, { "epoch": 4.12777076950802, "grad_norm": 0.24487367272377014, "learning_rate": 8.95834907993926e-07, "loss": 0.3215, "step": 7635 }, { "epoch": 4.128311407460804, "grad_norm": 0.27708467841148376, "learning_rate": 8.947568484744029e-07, "loss": 0.3661, "step": 7636 }, { "epoch": 4.128852045413588, "grad_norm": 0.2652656137943268, "learning_rate": 8.936793742730054e-07, "loss": 0.3541, "step": 7637 }, { "epoch": 4.1293926833663726, "grad_norm": 0.2697584331035614, "learning_rate": 8.926024855433569e-07, "loss": 0.3531, "step": 7638 }, { "epoch": 4.1299333213191565, "grad_norm": 0.2621810734272003, "learning_rate": 8.915261824389998e-07, "loss": 0.3563, "step": 7639 }, { "epoch": 4.130473959271941, "grad_norm": 0.2676509916782379, "learning_rate": 8.904504651133905e-07, "loss": 0.4032, "step": 7640 }, { "epoch": 4.131014597224725, "grad_norm": 0.25960397720336914, "learning_rate": 8.893753337199018e-07, "loss": 0.3208, "step": 7641 }, { "epoch": 4.131555235177509, "grad_norm": 0.2807498574256897, "learning_rate": 8.883007884118261e-07, "loss": 0.3703, "step": 7642 }, { "epoch": 4.132095873130294, "grad_norm": 0.28550735116004944, "learning_rate": 8.872268293423691e-07, "loss": 0.3879, "step": 7643 }, { "epoch": 4.132636511083078, "grad_norm": 0.2682303488254547, "learning_rate": 8.861534566646534e-07, "loss": 0.3524, "step": 7644 }, { "epoch": 4.133177149035863, "grad_norm": 0.28757935762405396, "learning_rate": 8.850806705317183e-07, "loss": 0.3955, "step": 7645 }, { "epoch": 4.133717786988647, "grad_norm": 0.28239282965660095, "learning_rate": 8.840084710965202e-07, "loss": 0.3573, "step": 7646 }, { "epoch": 4.1342584249414305, "grad_norm": 0.2718372344970703, "learning_rate": 8.829368585119335e-07, "loss": 0.3518, "step": 7647 }, { "epoch": 4.134799062894215, "grad_norm": 0.2815796434879303, "learning_rate": 8.818658329307428e-07, "loss": 0.3709, "step": 7648 }, { "epoch": 4.135339700846999, "grad_norm": 0.2764212489128113, "learning_rate": 8.807953945056563e-07, "loss": 0.3715, "step": 7649 }, { "epoch": 4.135880338799784, "grad_norm": 0.2787606120109558, "learning_rate": 8.797255433892926e-07, "loss": 0.3421, "step": 7650 }, { "epoch": 4.136420976752568, "grad_norm": 0.2819206118583679, "learning_rate": 8.786562797341913e-07, "loss": 0.383, "step": 7651 }, { "epoch": 4.136961614705353, "grad_norm": 0.2716692090034485, "learning_rate": 8.775876036928055e-07, "loss": 0.35, "step": 7652 }, { "epoch": 4.137502252658137, "grad_norm": 0.27148786187171936, "learning_rate": 8.765195154175032e-07, "loss": 0.3499, "step": 7653 }, { "epoch": 4.138042890610921, "grad_norm": 0.27956753969192505, "learning_rate": 8.754520150605739e-07, "loss": 0.3918, "step": 7654 }, { "epoch": 4.138583528563705, "grad_norm": 0.2726864516735077, "learning_rate": 8.743851027742172e-07, "loss": 0.3444, "step": 7655 }, { "epoch": 4.139124166516489, "grad_norm": 0.2886405289173126, "learning_rate": 8.73318778710553e-07, "loss": 0.3778, "step": 7656 }, { "epoch": 4.139664804469274, "grad_norm": 0.2725919783115387, "learning_rate": 8.722530430216137e-07, "loss": 0.3448, "step": 7657 }, { "epoch": 4.140205442422058, "grad_norm": 0.2917782962322235, "learning_rate": 8.711878958593512e-07, "loss": 0.4016, "step": 7658 }, { "epoch": 4.140746080374842, "grad_norm": 0.252810001373291, "learning_rate": 8.701233373756352e-07, "loss": 0.3422, "step": 7659 }, { "epoch": 4.141286718327627, "grad_norm": 0.2579265236854553, "learning_rate": 8.690593677222431e-07, "loss": 0.3366, "step": 7660 }, { "epoch": 4.141827356280411, "grad_norm": 0.2721611559391022, "learning_rate": 8.67995987050878e-07, "loss": 0.3984, "step": 7661 }, { "epoch": 4.1423679942331955, "grad_norm": 0.276642382144928, "learning_rate": 8.669331955131521e-07, "loss": 0.3421, "step": 7662 }, { "epoch": 4.142908632185979, "grad_norm": 0.2617895007133484, "learning_rate": 8.658709932605985e-07, "loss": 0.3139, "step": 7663 }, { "epoch": 4.143449270138763, "grad_norm": 0.2905745804309845, "learning_rate": 8.648093804446633e-07, "loss": 0.3975, "step": 7664 }, { "epoch": 4.143989908091548, "grad_norm": 0.26618894934654236, "learning_rate": 8.637483572167077e-07, "loss": 0.3516, "step": 7665 }, { "epoch": 4.144530546044332, "grad_norm": 0.24945282936096191, "learning_rate": 8.626879237280128e-07, "loss": 0.352, "step": 7666 }, { "epoch": 4.145071183997117, "grad_norm": 0.2615450918674469, "learning_rate": 8.616280801297727e-07, "loss": 0.3153, "step": 7667 }, { "epoch": 4.145611821949901, "grad_norm": 0.2893877923488617, "learning_rate": 8.60568826573096e-07, "loss": 0.4022, "step": 7668 }, { "epoch": 4.1461524599026855, "grad_norm": 0.25844526290893555, "learning_rate": 8.59510163209012e-07, "loss": 0.371, "step": 7669 }, { "epoch": 4.1466930978554695, "grad_norm": 0.25491437315940857, "learning_rate": 8.584520901884608e-07, "loss": 0.3228, "step": 7670 }, { "epoch": 4.147233735808253, "grad_norm": 0.2766035795211792, "learning_rate": 8.573946076623035e-07, "loss": 0.3799, "step": 7671 }, { "epoch": 4.147774373761038, "grad_norm": 0.2754434049129486, "learning_rate": 8.563377157813102e-07, "loss": 0.3537, "step": 7672 }, { "epoch": 4.148315011713822, "grad_norm": 0.26777127385139465, "learning_rate": 8.55281414696173e-07, "loss": 0.3135, "step": 7673 }, { "epoch": 4.148855649666607, "grad_norm": 0.2970055341720581, "learning_rate": 8.542257045574959e-07, "loss": 0.3784, "step": 7674 }, { "epoch": 4.149396287619391, "grad_norm": 0.25933367013931274, "learning_rate": 8.531705855158024e-07, "loss": 0.3665, "step": 7675 }, { "epoch": 4.149936925572175, "grad_norm": 0.2712830603122711, "learning_rate": 8.521160577215282e-07, "loss": 0.3862, "step": 7676 }, { "epoch": 4.15047756352496, "grad_norm": 0.26192939281463623, "learning_rate": 8.510621213250248e-07, "loss": 0.3636, "step": 7677 }, { "epoch": 4.1510182014777435, "grad_norm": 0.27634140849113464, "learning_rate": 8.500087764765624e-07, "loss": 0.3897, "step": 7678 }, { "epoch": 4.151558839430528, "grad_norm": 0.2597748339176178, "learning_rate": 8.489560233263244e-07, "loss": 0.3558, "step": 7679 }, { "epoch": 4.152099477383312, "grad_norm": 0.26447004079818726, "learning_rate": 8.479038620244089e-07, "loss": 0.3543, "step": 7680 }, { "epoch": 4.152640115336097, "grad_norm": 0.261905312538147, "learning_rate": 8.468522927208345e-07, "loss": 0.3448, "step": 7681 }, { "epoch": 4.153180753288881, "grad_norm": 0.27034062147140503, "learning_rate": 8.458013155655281e-07, "loss": 0.3575, "step": 7682 }, { "epoch": 4.153721391241665, "grad_norm": 0.27242255210876465, "learning_rate": 8.447509307083406e-07, "loss": 0.37, "step": 7683 }, { "epoch": 4.15426202919445, "grad_norm": 0.25309810042381287, "learning_rate": 8.43701138299029e-07, "loss": 0.327, "step": 7684 }, { "epoch": 4.154802667147234, "grad_norm": 0.2773438096046448, "learning_rate": 8.426519384872733e-07, "loss": 0.4048, "step": 7685 }, { "epoch": 4.155343305100018, "grad_norm": 0.2672390937805176, "learning_rate": 8.416033314226679e-07, "loss": 0.3757, "step": 7686 }, { "epoch": 4.155883943052802, "grad_norm": 0.2610105574131012, "learning_rate": 8.405553172547188e-07, "loss": 0.341, "step": 7687 }, { "epoch": 4.156424581005586, "grad_norm": 0.25678926706314087, "learning_rate": 8.395078961328529e-07, "loss": 0.3319, "step": 7688 }, { "epoch": 4.156965218958371, "grad_norm": 0.2516123354434967, "learning_rate": 8.384610682064054e-07, "loss": 0.3575, "step": 7689 }, { "epoch": 4.157505856911155, "grad_norm": 0.26414674520492554, "learning_rate": 8.374148336246352e-07, "loss": 0.3365, "step": 7690 }, { "epoch": 4.15804649486394, "grad_norm": 0.28988468647003174, "learning_rate": 8.36369192536709e-07, "loss": 0.3601, "step": 7691 }, { "epoch": 4.158587132816724, "grad_norm": 0.2789464294910431, "learning_rate": 8.353241450917154e-07, "loss": 0.4034, "step": 7692 }, { "epoch": 4.1591277707695085, "grad_norm": 0.2817886173725128, "learning_rate": 8.342796914386548e-07, "loss": 0.371, "step": 7693 }, { "epoch": 4.159668408722292, "grad_norm": 0.2487310916185379, "learning_rate": 8.332358317264411e-07, "loss": 0.3197, "step": 7694 }, { "epoch": 4.160209046675076, "grad_norm": 0.2966874837875366, "learning_rate": 8.321925661039088e-07, "loss": 0.3647, "step": 7695 }, { "epoch": 4.160749684627861, "grad_norm": 0.2805984616279602, "learning_rate": 8.311498947198037e-07, "loss": 0.3694, "step": 7696 }, { "epoch": 4.161290322580645, "grad_norm": 0.2664489150047302, "learning_rate": 8.301078177227873e-07, "loss": 0.3814, "step": 7697 }, { "epoch": 4.16183096053343, "grad_norm": 0.2719545066356659, "learning_rate": 8.290663352614386e-07, "loss": 0.3699, "step": 7698 }, { "epoch": 4.162371598486214, "grad_norm": 0.2809935510158539, "learning_rate": 8.28025447484248e-07, "loss": 0.3504, "step": 7699 }, { "epoch": 4.162912236438998, "grad_norm": 0.284708708524704, "learning_rate": 8.269851545396279e-07, "loss": 0.4177, "step": 7700 }, { "epoch": 4.1634528743917825, "grad_norm": 0.27322468161582947, "learning_rate": 8.259454565758951e-07, "loss": 0.3687, "step": 7701 }, { "epoch": 4.163993512344566, "grad_norm": 0.27562010288238525, "learning_rate": 8.249063537412926e-07, "loss": 0.3645, "step": 7702 }, { "epoch": 4.164534150297351, "grad_norm": 0.27685800194740295, "learning_rate": 8.238678461839711e-07, "loss": 0.3691, "step": 7703 }, { "epoch": 4.165074788250135, "grad_norm": 0.28065788745880127, "learning_rate": 8.228299340520018e-07, "loss": 0.3663, "step": 7704 }, { "epoch": 4.165615426202919, "grad_norm": 0.25994250178337097, "learning_rate": 8.217926174933665e-07, "loss": 0.3384, "step": 7705 }, { "epoch": 4.166156064155704, "grad_norm": 0.29021185636520386, "learning_rate": 8.207558966559631e-07, "loss": 0.3729, "step": 7706 }, { "epoch": 4.166696702108488, "grad_norm": 0.27554744482040405, "learning_rate": 8.197197716876076e-07, "loss": 0.3611, "step": 7707 }, { "epoch": 4.1672373400612726, "grad_norm": 0.2929653525352478, "learning_rate": 8.186842427360275e-07, "loss": 0.3552, "step": 7708 }, { "epoch": 4.1677779780140565, "grad_norm": 0.2645328640937805, "learning_rate": 8.176493099488664e-07, "loss": 0.3384, "step": 7709 }, { "epoch": 4.168318615966841, "grad_norm": 0.2650566101074219, "learning_rate": 8.166149734736845e-07, "loss": 0.3365, "step": 7710 }, { "epoch": 4.168859253919625, "grad_norm": 0.27955883741378784, "learning_rate": 8.155812334579532e-07, "loss": 0.3455, "step": 7711 }, { "epoch": 4.169399891872409, "grad_norm": 0.2708788812160492, "learning_rate": 8.145480900490654e-07, "loss": 0.3581, "step": 7712 }, { "epoch": 4.169940529825194, "grad_norm": 0.27975592017173767, "learning_rate": 8.135155433943199e-07, "loss": 0.3926, "step": 7713 }, { "epoch": 4.170481167777978, "grad_norm": 0.26336678862571716, "learning_rate": 8.124835936409376e-07, "loss": 0.3385, "step": 7714 }, { "epoch": 4.171021805730763, "grad_norm": 0.24862268567085266, "learning_rate": 8.114522409360531e-07, "loss": 0.3613, "step": 7715 }, { "epoch": 4.171562443683547, "grad_norm": 0.2469266802072525, "learning_rate": 8.104214854267134e-07, "loss": 0.37, "step": 7716 }, { "epoch": 4.1721030816363305, "grad_norm": 0.2724321484565735, "learning_rate": 8.09391327259883e-07, "loss": 0.4061, "step": 7717 }, { "epoch": 4.172643719589115, "grad_norm": 0.2582729458808899, "learning_rate": 8.083617665824373e-07, "loss": 0.3288, "step": 7718 }, { "epoch": 4.173184357541899, "grad_norm": 0.26399657130241394, "learning_rate": 8.073328035411726e-07, "loss": 0.3592, "step": 7719 }, { "epoch": 4.173724995494684, "grad_norm": 0.267774760723114, "learning_rate": 8.063044382827945e-07, "loss": 0.38, "step": 7720 }, { "epoch": 4.174265633447468, "grad_norm": 0.29686951637268066, "learning_rate": 8.05276670953925e-07, "loss": 0.3831, "step": 7721 }, { "epoch": 4.174806271400252, "grad_norm": 0.27032238245010376, "learning_rate": 8.042495017011037e-07, "loss": 0.3454, "step": 7722 }, { "epoch": 4.175346909353037, "grad_norm": 0.27771714329719543, "learning_rate": 8.032229306707795e-07, "loss": 0.3752, "step": 7723 }, { "epoch": 4.175887547305821, "grad_norm": 0.3001413643360138, "learning_rate": 8.021969580093231e-07, "loss": 0.3948, "step": 7724 }, { "epoch": 4.176428185258605, "grad_norm": 0.26011037826538086, "learning_rate": 8.011715838630107e-07, "loss": 0.3121, "step": 7725 }, { "epoch": 4.176968823211389, "grad_norm": 0.2860434055328369, "learning_rate": 8.001468083780418e-07, "loss": 0.3841, "step": 7726 }, { "epoch": 4.177509461164174, "grad_norm": 0.28014615178108215, "learning_rate": 7.991226317005263e-07, "loss": 0.3785, "step": 7727 }, { "epoch": 4.178050099116958, "grad_norm": 0.27071434259414673, "learning_rate": 7.980990539764898e-07, "loss": 0.3471, "step": 7728 }, { "epoch": 4.178590737069742, "grad_norm": 0.2784453332424164, "learning_rate": 7.970760753518713e-07, "loss": 0.3494, "step": 7729 }, { "epoch": 4.179131375022527, "grad_norm": 0.2652834355831146, "learning_rate": 7.960536959725252e-07, "loss": 0.3575, "step": 7730 }, { "epoch": 4.179672012975311, "grad_norm": 0.2955927848815918, "learning_rate": 7.950319159842212e-07, "loss": 0.3861, "step": 7731 }, { "epoch": 4.1802126509280955, "grad_norm": 0.2936646640300751, "learning_rate": 7.940107355326431e-07, "loss": 0.3866, "step": 7732 }, { "epoch": 4.180753288880879, "grad_norm": 0.2582504153251648, "learning_rate": 7.929901547633867e-07, "loss": 0.3455, "step": 7733 }, { "epoch": 4.181293926833663, "grad_norm": 0.29754340648651123, "learning_rate": 7.919701738219677e-07, "loss": 0.3326, "step": 7734 }, { "epoch": 4.181834564786448, "grad_norm": 0.27368760108947754, "learning_rate": 7.909507928538107e-07, "loss": 0.3812, "step": 7735 }, { "epoch": 4.182375202739232, "grad_norm": 0.2942587733268738, "learning_rate": 7.899320120042592e-07, "loss": 0.3581, "step": 7736 }, { "epoch": 4.182915840692017, "grad_norm": 0.27439722418785095, "learning_rate": 7.88913831418568e-07, "loss": 0.3367, "step": 7737 }, { "epoch": 4.183456478644801, "grad_norm": 0.2757376730442047, "learning_rate": 7.878962512419064e-07, "loss": 0.3921, "step": 7738 }, { "epoch": 4.1839971165975856, "grad_norm": 0.25311020016670227, "learning_rate": 7.868792716193613e-07, "loss": 0.3511, "step": 7739 }, { "epoch": 4.1845377545503695, "grad_norm": 0.2917262017726898, "learning_rate": 7.858628926959311e-07, "loss": 0.408, "step": 7740 }, { "epoch": 4.185078392503153, "grad_norm": 0.26574715971946716, "learning_rate": 7.848471146165287e-07, "loss": 0.3321, "step": 7741 }, { "epoch": 4.185619030455938, "grad_norm": 0.2669033110141754, "learning_rate": 7.838319375259806e-07, "loss": 0.3482, "step": 7742 }, { "epoch": 4.186159668408722, "grad_norm": 0.29009807109832764, "learning_rate": 7.828173615690309e-07, "loss": 0.3863, "step": 7743 }, { "epoch": 4.186700306361507, "grad_norm": 0.278519868850708, "learning_rate": 7.81803386890338e-07, "loss": 0.3781, "step": 7744 }, { "epoch": 4.187240944314291, "grad_norm": 0.2703883945941925, "learning_rate": 7.807900136344676e-07, "loss": 0.3731, "step": 7745 }, { "epoch": 4.187781582267075, "grad_norm": 0.2684577405452728, "learning_rate": 7.797772419459082e-07, "loss": 0.3341, "step": 7746 }, { "epoch": 4.18832222021986, "grad_norm": 0.2858303189277649, "learning_rate": 7.78765071969057e-07, "loss": 0.3782, "step": 7747 }, { "epoch": 4.1888628581726435, "grad_norm": 0.24063695967197418, "learning_rate": 7.777535038482293e-07, "loss": 0.3238, "step": 7748 }, { "epoch": 4.189403496125428, "grad_norm": 0.2742394506931305, "learning_rate": 7.767425377276516e-07, "loss": 0.3743, "step": 7749 }, { "epoch": 4.189944134078212, "grad_norm": 0.26466435194015503, "learning_rate": 7.757321737514645e-07, "loss": 0.3632, "step": 7750 }, { "epoch": 4.190484772030997, "grad_norm": 0.2548874616622925, "learning_rate": 7.747224120637265e-07, "loss": 0.3337, "step": 7751 }, { "epoch": 4.191025409983781, "grad_norm": 0.2508438229560852, "learning_rate": 7.73713252808404e-07, "loss": 0.3469, "step": 7752 }, { "epoch": 4.191566047936565, "grad_norm": 0.274068146944046, "learning_rate": 7.727046961293849e-07, "loss": 0.4007, "step": 7753 }, { "epoch": 4.19210668588935, "grad_norm": 0.2605467140674591, "learning_rate": 7.716967421704658e-07, "loss": 0.3373, "step": 7754 }, { "epoch": 4.192647323842134, "grad_norm": 0.269639253616333, "learning_rate": 7.706893910753571e-07, "loss": 0.3881, "step": 7755 }, { "epoch": 4.193187961794918, "grad_norm": 0.26445725560188293, "learning_rate": 7.696826429876885e-07, "loss": 0.3579, "step": 7756 }, { "epoch": 4.193728599747702, "grad_norm": 0.2784506380558014, "learning_rate": 7.686764980509986e-07, "loss": 0.3451, "step": 7757 }, { "epoch": 4.194269237700486, "grad_norm": 0.29687944054603577, "learning_rate": 7.676709564087414e-07, "loss": 0.3371, "step": 7758 }, { "epoch": 4.194809875653271, "grad_norm": 0.25223809480667114, "learning_rate": 7.666660182042845e-07, "loss": 0.3639, "step": 7759 }, { "epoch": 4.195350513606055, "grad_norm": 0.25930145382881165, "learning_rate": 7.656616835809122e-07, "loss": 0.383, "step": 7760 }, { "epoch": 4.19589115155884, "grad_norm": 0.2787495255470276, "learning_rate": 7.646579526818198e-07, "loss": 0.3904, "step": 7761 }, { "epoch": 4.196431789511624, "grad_norm": 0.25165069103240967, "learning_rate": 7.636548256501164e-07, "loss": 0.3399, "step": 7762 }, { "epoch": 4.196972427464408, "grad_norm": 0.2803820073604584, "learning_rate": 7.626523026288279e-07, "loss": 0.369, "step": 7763 }, { "epoch": 4.197513065417192, "grad_norm": 0.2803477942943573, "learning_rate": 7.616503837608907e-07, "loss": 0.3955, "step": 7764 }, { "epoch": 4.198053703369976, "grad_norm": 0.25558480620384216, "learning_rate": 7.606490691891577e-07, "loss": 0.3223, "step": 7765 }, { "epoch": 4.198594341322761, "grad_norm": 0.26520413160324097, "learning_rate": 7.596483590563942e-07, "loss": 0.3362, "step": 7766 }, { "epoch": 4.199134979275545, "grad_norm": 0.2912404537200928, "learning_rate": 7.586482535052781e-07, "loss": 0.4001, "step": 7767 }, { "epoch": 4.19967561722833, "grad_norm": 0.2695380747318268, "learning_rate": 7.576487526784054e-07, "loss": 0.3304, "step": 7768 }, { "epoch": 4.200216255181114, "grad_norm": 0.280534029006958, "learning_rate": 7.566498567182812e-07, "loss": 0.381, "step": 7769 }, { "epoch": 4.200756893133898, "grad_norm": 0.24744150042533875, "learning_rate": 7.556515657673274e-07, "loss": 0.3198, "step": 7770 }, { "epoch": 4.2012975310866825, "grad_norm": 0.2521040439605713, "learning_rate": 7.54653879967876e-07, "loss": 0.3515, "step": 7771 }, { "epoch": 4.201838169039466, "grad_norm": 0.2577064037322998, "learning_rate": 7.536567994621774e-07, "loss": 0.3666, "step": 7772 }, { "epoch": 4.202378806992251, "grad_norm": 0.26439905166625977, "learning_rate": 7.526603243923958e-07, "loss": 0.3363, "step": 7773 }, { "epoch": 4.202919444945035, "grad_norm": 0.2696458399295807, "learning_rate": 7.516644549006019e-07, "loss": 0.3617, "step": 7774 }, { "epoch": 4.203460082897819, "grad_norm": 0.3011111915111542, "learning_rate": 7.506691911287883e-07, "loss": 0.3981, "step": 7775 }, { "epoch": 4.204000720850604, "grad_norm": 0.26315832138061523, "learning_rate": 7.496745332188555e-07, "loss": 0.3306, "step": 7776 }, { "epoch": 4.204541358803388, "grad_norm": 0.27317333221435547, "learning_rate": 7.486804813126224e-07, "loss": 0.3573, "step": 7777 }, { "epoch": 4.205081996756173, "grad_norm": 0.26644495129585266, "learning_rate": 7.47687035551819e-07, "loss": 0.322, "step": 7778 }, { "epoch": 4.2056226347089565, "grad_norm": 0.2756754159927368, "learning_rate": 7.466941960780866e-07, "loss": 0.3571, "step": 7779 }, { "epoch": 4.206163272661741, "grad_norm": 0.2685154676437378, "learning_rate": 7.457019630329848e-07, "loss": 0.3729, "step": 7780 }, { "epoch": 4.206703910614525, "grad_norm": 0.25748974084854126, "learning_rate": 7.447103365579839e-07, "loss": 0.3518, "step": 7781 }, { "epoch": 4.207244548567309, "grad_norm": 0.28093305230140686, "learning_rate": 7.437193167944668e-07, "loss": 0.367, "step": 7782 }, { "epoch": 4.207785186520094, "grad_norm": 0.26227885484695435, "learning_rate": 7.427289038837332e-07, "loss": 0.3396, "step": 7783 }, { "epoch": 4.208325824472878, "grad_norm": 0.28405529260635376, "learning_rate": 7.417390979669925e-07, "loss": 0.3752, "step": 7784 }, { "epoch": 4.208866462425663, "grad_norm": 0.27451229095458984, "learning_rate": 7.407498991853729e-07, "loss": 0.3739, "step": 7785 }, { "epoch": 4.209407100378447, "grad_norm": 0.26741325855255127, "learning_rate": 7.397613076799082e-07, "loss": 0.3477, "step": 7786 }, { "epoch": 4.2099477383312305, "grad_norm": 0.26035529375076294, "learning_rate": 7.387733235915528e-07, "loss": 0.3541, "step": 7787 }, { "epoch": 4.210488376284015, "grad_norm": 0.30314940214157104, "learning_rate": 7.377859470611692e-07, "loss": 0.405, "step": 7788 }, { "epoch": 4.211029014236799, "grad_norm": 0.26151275634765625, "learning_rate": 7.367991782295392e-07, "loss": 0.3248, "step": 7789 }, { "epoch": 4.211569652189584, "grad_norm": 0.2777373492717743, "learning_rate": 7.358130172373523e-07, "loss": 0.3692, "step": 7790 }, { "epoch": 4.212110290142368, "grad_norm": 0.2807091176509857, "learning_rate": 7.348274642252129e-07, "loss": 0.3795, "step": 7791 }, { "epoch": 4.212650928095153, "grad_norm": 0.2608586251735687, "learning_rate": 7.338425193336418e-07, "loss": 0.334, "step": 7792 }, { "epoch": 4.213191566047937, "grad_norm": 0.27377480268478394, "learning_rate": 7.328581827030689e-07, "loss": 0.3557, "step": 7793 }, { "epoch": 4.213732204000721, "grad_norm": 0.2679743766784668, "learning_rate": 7.318744544738387e-07, "loss": 0.3732, "step": 7794 }, { "epoch": 4.214272841953505, "grad_norm": 0.2861184775829315, "learning_rate": 7.308913347862112e-07, "loss": 0.3551, "step": 7795 }, { "epoch": 4.214813479906289, "grad_norm": 0.2709786891937256, "learning_rate": 7.299088237803559e-07, "loss": 0.3327, "step": 7796 }, { "epoch": 4.215354117859074, "grad_norm": 0.2617168426513672, "learning_rate": 7.289269215963602e-07, "loss": 0.3601, "step": 7797 }, { "epoch": 4.215894755811858, "grad_norm": 0.24455086886882782, "learning_rate": 7.279456283742175e-07, "loss": 0.356, "step": 7798 }, { "epoch": 4.216435393764642, "grad_norm": 0.261747807264328, "learning_rate": 7.269649442538435e-07, "loss": 0.3554, "step": 7799 }, { "epoch": 4.216976031717427, "grad_norm": 0.2773091197013855, "learning_rate": 7.259848693750582e-07, "loss": 0.3502, "step": 7800 }, { "epoch": 4.217516669670211, "grad_norm": 0.29389703273773193, "learning_rate": 7.250054038776022e-07, "loss": 0.4092, "step": 7801 }, { "epoch": 4.2180573076229955, "grad_norm": 0.26419511437416077, "learning_rate": 7.240265479011249e-07, "loss": 0.3365, "step": 7802 }, { "epoch": 4.218597945575779, "grad_norm": 0.2726242244243622, "learning_rate": 7.230483015851886e-07, "loss": 0.375, "step": 7803 }, { "epoch": 4.219138583528563, "grad_norm": 0.2631964087486267, "learning_rate": 7.220706650692716e-07, "loss": 0.3767, "step": 7804 }, { "epoch": 4.219679221481348, "grad_norm": 0.2645455002784729, "learning_rate": 7.210936384927631e-07, "loss": 0.3748, "step": 7805 }, { "epoch": 4.220219859434132, "grad_norm": 0.2717190086841583, "learning_rate": 7.201172219949643e-07, "loss": 0.363, "step": 7806 }, { "epoch": 4.220760497386917, "grad_norm": 0.2631307542324066, "learning_rate": 7.191414157150933e-07, "loss": 0.3387, "step": 7807 }, { "epoch": 4.221301135339701, "grad_norm": 0.27695712447166443, "learning_rate": 7.181662197922762e-07, "loss": 0.3628, "step": 7808 }, { "epoch": 4.2218417732924856, "grad_norm": 0.2771584689617157, "learning_rate": 7.171916343655583e-07, "loss": 0.3686, "step": 7809 }, { "epoch": 4.2223824112452695, "grad_norm": 0.29694145917892456, "learning_rate": 7.162176595738895e-07, "loss": 0.4222, "step": 7810 }, { "epoch": 4.222923049198053, "grad_norm": 0.26398229598999023, "learning_rate": 7.1524429555614e-07, "loss": 0.3479, "step": 7811 }, { "epoch": 4.223463687150838, "grad_norm": 0.288509726524353, "learning_rate": 7.142715424510915e-07, "loss": 0.397, "step": 7812 }, { "epoch": 4.224004325103622, "grad_norm": 0.2654188275337219, "learning_rate": 7.132994003974359e-07, "loss": 0.32, "step": 7813 }, { "epoch": 4.224544963056407, "grad_norm": 0.28310319781303406, "learning_rate": 7.123278695337793e-07, "loss": 0.3652, "step": 7814 }, { "epoch": 4.225085601009191, "grad_norm": 0.2651018500328064, "learning_rate": 7.113569499986401e-07, "loss": 0.3697, "step": 7815 }, { "epoch": 4.225626238961975, "grad_norm": 0.27345407009124756, "learning_rate": 7.103866419304517e-07, "loss": 0.3935, "step": 7816 }, { "epoch": 4.22616687691476, "grad_norm": 0.24579817056655884, "learning_rate": 7.094169454675575e-07, "loss": 0.3125, "step": 7817 }, { "epoch": 4.2267075148675435, "grad_norm": 0.284580796957016, "learning_rate": 7.084478607482176e-07, "loss": 0.3887, "step": 7818 }, { "epoch": 4.227248152820328, "grad_norm": 0.2559194564819336, "learning_rate": 7.074793879106001e-07, "loss": 0.3079, "step": 7819 }, { "epoch": 4.227788790773112, "grad_norm": 0.28715476393699646, "learning_rate": 7.065115270927875e-07, "loss": 0.3845, "step": 7820 }, { "epoch": 4.228329428725896, "grad_norm": 0.26812219619750977, "learning_rate": 7.055442784327782e-07, "loss": 0.3495, "step": 7821 }, { "epoch": 4.228870066678681, "grad_norm": 0.2592289447784424, "learning_rate": 7.045776420684791e-07, "loss": 0.3553, "step": 7822 }, { "epoch": 4.229410704631465, "grad_norm": 0.24718797206878662, "learning_rate": 7.036116181377106e-07, "loss": 0.341, "step": 7823 }, { "epoch": 4.22995134258425, "grad_norm": 0.26759886741638184, "learning_rate": 7.026462067782086e-07, "loss": 0.3739, "step": 7824 }, { "epoch": 4.230491980537034, "grad_norm": 0.25544220209121704, "learning_rate": 7.01681408127618e-07, "loss": 0.3363, "step": 7825 }, { "epoch": 4.231032618489818, "grad_norm": 0.2759501039981842, "learning_rate": 7.00717222323501e-07, "loss": 0.4003, "step": 7826 }, { "epoch": 4.231573256442602, "grad_norm": 0.26685962080955505, "learning_rate": 6.997536495033252e-07, "loss": 0.3538, "step": 7827 }, { "epoch": 4.232113894395386, "grad_norm": 0.28557097911834717, "learning_rate": 6.987906898044783e-07, "loss": 0.3911, "step": 7828 }, { "epoch": 4.232654532348171, "grad_norm": 0.25702571868896484, "learning_rate": 6.978283433642552e-07, "loss": 0.3415, "step": 7829 }, { "epoch": 4.233195170300955, "grad_norm": 0.2851904630661011, "learning_rate": 6.968666103198679e-07, "loss": 0.3892, "step": 7830 }, { "epoch": 4.23373580825374, "grad_norm": 0.27647873759269714, "learning_rate": 6.959054908084367e-07, "loss": 0.3471, "step": 7831 }, { "epoch": 4.234276446206524, "grad_norm": 0.2786784768104553, "learning_rate": 6.949449849669965e-07, "loss": 0.3326, "step": 7832 }, { "epoch": 4.234817084159308, "grad_norm": 0.2770426571369171, "learning_rate": 6.939850929324954e-07, "loss": 0.3848, "step": 7833 }, { "epoch": 4.235357722112092, "grad_norm": 0.2511410117149353, "learning_rate": 6.930258148417924e-07, "loss": 0.3682, "step": 7834 }, { "epoch": 4.235898360064876, "grad_norm": 0.26557570695877075, "learning_rate": 6.920671508316584e-07, "loss": 0.379, "step": 7835 }, { "epoch": 4.236438998017661, "grad_norm": 0.26921749114990234, "learning_rate": 6.911091010387805e-07, "loss": 0.347, "step": 7836 }, { "epoch": 4.236979635970445, "grad_norm": 0.2844080924987793, "learning_rate": 6.901516655997536e-07, "loss": 0.3849, "step": 7837 }, { "epoch": 4.23752027392323, "grad_norm": 0.27364280819892883, "learning_rate": 6.891948446510899e-07, "loss": 0.3396, "step": 7838 }, { "epoch": 4.238060911876014, "grad_norm": 0.29257693886756897, "learning_rate": 6.882386383292072e-07, "loss": 0.4091, "step": 7839 }, { "epoch": 4.238601549828798, "grad_norm": 0.26295551657676697, "learning_rate": 6.872830467704417e-07, "loss": 0.3373, "step": 7840 }, { "epoch": 4.2391421877815825, "grad_norm": 0.2602781653404236, "learning_rate": 6.863280701110409e-07, "loss": 0.359, "step": 7841 }, { "epoch": 4.239682825734366, "grad_norm": 0.2525506019592285, "learning_rate": 6.853737084871631e-07, "loss": 0.3564, "step": 7842 }, { "epoch": 4.240223463687151, "grad_norm": 0.2838027775287628, "learning_rate": 6.844199620348784e-07, "loss": 0.386, "step": 7843 }, { "epoch": 4.240764101639935, "grad_norm": 0.2779183089733124, "learning_rate": 6.834668308901704e-07, "loss": 0.3735, "step": 7844 }, { "epoch": 4.241304739592719, "grad_norm": 0.26235607266426086, "learning_rate": 6.825143151889358e-07, "loss": 0.3564, "step": 7845 }, { "epoch": 4.241845377545504, "grad_norm": 0.25980979204177856, "learning_rate": 6.815624150669825e-07, "loss": 0.3438, "step": 7846 }, { "epoch": 4.242386015498288, "grad_norm": 0.2772977948188782, "learning_rate": 6.806111306600289e-07, "loss": 0.379, "step": 7847 }, { "epoch": 4.242926653451073, "grad_norm": 0.2637321650981903, "learning_rate": 6.7966046210371e-07, "loss": 0.3652, "step": 7848 }, { "epoch": 4.2434672914038565, "grad_norm": 0.28105807304382324, "learning_rate": 6.787104095335678e-07, "loss": 0.3663, "step": 7849 }, { "epoch": 4.244007929356641, "grad_norm": 0.28423410654067993, "learning_rate": 6.777609730850615e-07, "loss": 0.3369, "step": 7850 }, { "epoch": 4.244548567309425, "grad_norm": 0.2776162326335907, "learning_rate": 6.768121528935595e-07, "loss": 0.4145, "step": 7851 }, { "epoch": 4.245089205262209, "grad_norm": 0.2545382082462311, "learning_rate": 6.758639490943408e-07, "loss": 0.3385, "step": 7852 }, { "epoch": 4.245629843214994, "grad_norm": 0.273418664932251, "learning_rate": 6.749163618226006e-07, "loss": 0.37, "step": 7853 }, { "epoch": 4.246170481167778, "grad_norm": 0.2679308354854584, "learning_rate": 6.739693912134448e-07, "loss": 0.3502, "step": 7854 }, { "epoch": 4.246711119120563, "grad_norm": 0.29377102851867676, "learning_rate": 6.730230374018886e-07, "loss": 0.3969, "step": 7855 }, { "epoch": 4.247251757073347, "grad_norm": 0.2562815546989441, "learning_rate": 6.72077300522862e-07, "loss": 0.3387, "step": 7856 }, { "epoch": 4.2477923950261305, "grad_norm": 0.2699720859527588, "learning_rate": 6.711321807112076e-07, "loss": 0.3437, "step": 7857 }, { "epoch": 4.248333032978915, "grad_norm": 0.27340906858444214, "learning_rate": 6.701876781016786e-07, "loss": 0.3707, "step": 7858 }, { "epoch": 4.248873670931699, "grad_norm": 0.2673344314098358, "learning_rate": 6.692437928289385e-07, "loss": 0.3436, "step": 7859 }, { "epoch": 4.249414308884484, "grad_norm": 0.2760732173919678, "learning_rate": 6.683005250275676e-07, "loss": 0.3901, "step": 7860 }, { "epoch": 4.249954946837268, "grad_norm": 0.262892484664917, "learning_rate": 6.673578748320525e-07, "loss": 0.3256, "step": 7861 }, { "epoch": 4.250495584790052, "grad_norm": 0.26716530323028564, "learning_rate": 6.664158423767975e-07, "loss": 0.341, "step": 7862 }, { "epoch": 4.251036222742837, "grad_norm": 0.2554490864276886, "learning_rate": 6.654744277961139e-07, "loss": 0.3437, "step": 7863 }, { "epoch": 4.251576860695621, "grad_norm": 0.2965814471244812, "learning_rate": 6.645336312242267e-07, "loss": 0.4271, "step": 7864 }, { "epoch": 4.252117498648405, "grad_norm": 0.2557962238788605, "learning_rate": 6.635934527952747e-07, "loss": 0.3366, "step": 7865 }, { "epoch": 4.252658136601189, "grad_norm": 0.27538374066352844, "learning_rate": 6.626538926433057e-07, "loss": 0.3835, "step": 7866 }, { "epoch": 4.253198774553974, "grad_norm": 0.27699223160743713, "learning_rate": 6.617149509022807e-07, "loss": 0.3508, "step": 7867 }, { "epoch": 4.253739412506758, "grad_norm": 0.28138530254364014, "learning_rate": 6.607766277060712e-07, "loss": 0.3594, "step": 7868 }, { "epoch": 4.254280050459542, "grad_norm": 0.2613767385482788, "learning_rate": 6.598389231884628e-07, "loss": 0.3442, "step": 7869 }, { "epoch": 4.254820688412327, "grad_norm": 0.2648288607597351, "learning_rate": 6.589018374831529e-07, "loss": 0.3515, "step": 7870 }, { "epoch": 4.255361326365111, "grad_norm": 0.2746146321296692, "learning_rate": 6.579653707237465e-07, "loss": 0.3661, "step": 7871 }, { "epoch": 4.2559019643178955, "grad_norm": 0.2693077623844147, "learning_rate": 6.570295230437663e-07, "loss": 0.3794, "step": 7872 }, { "epoch": 4.256442602270679, "grad_norm": 0.265910267829895, "learning_rate": 6.560942945766408e-07, "loss": 0.3581, "step": 7873 }, { "epoch": 4.256983240223463, "grad_norm": 0.27036747336387634, "learning_rate": 6.551596854557158e-07, "loss": 0.3578, "step": 7874 }, { "epoch": 4.257523878176248, "grad_norm": 0.2775130569934845, "learning_rate": 6.542256958142456e-07, "loss": 0.3833, "step": 7875 }, { "epoch": 4.258064516129032, "grad_norm": 0.2686364948749542, "learning_rate": 6.532923257853952e-07, "loss": 0.3435, "step": 7876 }, { "epoch": 4.258605154081817, "grad_norm": 0.27295759320259094, "learning_rate": 6.523595755022444e-07, "loss": 0.3478, "step": 7877 }, { "epoch": 4.259145792034601, "grad_norm": 0.28856146335601807, "learning_rate": 6.514274450977831e-07, "loss": 0.3973, "step": 7878 }, { "epoch": 4.259686429987385, "grad_norm": 0.26978960633277893, "learning_rate": 6.504959347049111e-07, "loss": 0.3632, "step": 7879 }, { "epoch": 4.2602270679401695, "grad_norm": 0.28080758452415466, "learning_rate": 6.495650444564433e-07, "loss": 0.3511, "step": 7880 }, { "epoch": 4.260767705892953, "grad_norm": 0.2930925786495209, "learning_rate": 6.486347744851029e-07, "loss": 0.3753, "step": 7881 }, { "epoch": 4.261308343845738, "grad_norm": 0.26841792464256287, "learning_rate": 6.477051249235278e-07, "loss": 0.3594, "step": 7882 }, { "epoch": 4.261848981798522, "grad_norm": 0.2728954553604126, "learning_rate": 6.467760959042646e-07, "loss": 0.3729, "step": 7883 }, { "epoch": 4.262389619751307, "grad_norm": 0.2672972083091736, "learning_rate": 6.458476875597731e-07, "loss": 0.357, "step": 7884 }, { "epoch": 4.262930257704091, "grad_norm": 0.2825981378555298, "learning_rate": 6.449199000224221e-07, "loss": 0.3432, "step": 7885 }, { "epoch": 4.263470895656875, "grad_norm": 0.2652915120124817, "learning_rate": 6.439927334244972e-07, "loss": 0.3693, "step": 7886 }, { "epoch": 4.26401153360966, "grad_norm": 0.25142279267311096, "learning_rate": 6.430661878981898e-07, "loss": 0.3296, "step": 7887 }, { "epoch": 4.2645521715624435, "grad_norm": 0.27410030364990234, "learning_rate": 6.421402635756053e-07, "loss": 0.3734, "step": 7888 }, { "epoch": 4.265092809515228, "grad_norm": 0.2869071662425995, "learning_rate": 6.412149605887613e-07, "loss": 0.3995, "step": 7889 }, { "epoch": 4.265633447468012, "grad_norm": 0.25402575731277466, "learning_rate": 6.402902790695842e-07, "loss": 0.3128, "step": 7890 }, { "epoch": 4.266174085420796, "grad_norm": 0.27300745248794556, "learning_rate": 6.393662191499156e-07, "loss": 0.3782, "step": 7891 }, { "epoch": 4.266714723373581, "grad_norm": 0.26714909076690674, "learning_rate": 6.384427809615052e-07, "loss": 0.3728, "step": 7892 }, { "epoch": 4.267255361326365, "grad_norm": 0.27628183364868164, "learning_rate": 6.375199646360142e-07, "loss": 0.375, "step": 7893 }, { "epoch": 4.26779599927915, "grad_norm": 0.26573359966278076, "learning_rate": 6.36597770305018e-07, "loss": 0.3544, "step": 7894 }, { "epoch": 4.268336637231934, "grad_norm": 0.2471429854631424, "learning_rate": 6.356761980999998e-07, "loss": 0.3516, "step": 7895 }, { "epoch": 4.268877275184718, "grad_norm": 0.2759144604206085, "learning_rate": 6.347552481523567e-07, "loss": 0.3688, "step": 7896 }, { "epoch": 4.269417913137502, "grad_norm": 0.2551169991493225, "learning_rate": 6.338349205933947e-07, "loss": 0.3372, "step": 7897 }, { "epoch": 4.269958551090286, "grad_norm": 0.2697770893573761, "learning_rate": 6.329152155543333e-07, "loss": 0.3766, "step": 7898 }, { "epoch": 4.270499189043071, "grad_norm": 0.25746408104896545, "learning_rate": 6.319961331663043e-07, "loss": 0.3511, "step": 7899 }, { "epoch": 4.271039826995855, "grad_norm": 0.2764779031276703, "learning_rate": 6.310776735603452e-07, "loss": 0.3884, "step": 7900 }, { "epoch": 4.27158046494864, "grad_norm": 0.25703078508377075, "learning_rate": 6.301598368674106e-07, "loss": 0.3577, "step": 7901 }, { "epoch": 4.272121102901424, "grad_norm": 0.2684788107872009, "learning_rate": 6.29242623218363e-07, "loss": 0.3632, "step": 7902 }, { "epoch": 4.272661740854208, "grad_norm": 0.2789716422557831, "learning_rate": 6.283260327439777e-07, "loss": 0.3806, "step": 7903 }, { "epoch": 4.273202378806992, "grad_norm": 0.2688591182231903, "learning_rate": 6.27410065574941e-07, "loss": 0.3551, "step": 7904 }, { "epoch": 4.273743016759776, "grad_norm": 0.29315346479415894, "learning_rate": 6.264947218418482e-07, "loss": 0.3705, "step": 7905 }, { "epoch": 4.274283654712561, "grad_norm": 0.2573510408401489, "learning_rate": 6.255800016752089e-07, "loss": 0.3632, "step": 7906 }, { "epoch": 4.274824292665345, "grad_norm": 0.2660392224788666, "learning_rate": 6.246659052054416e-07, "loss": 0.3503, "step": 7907 }, { "epoch": 4.27536493061813, "grad_norm": 0.26365071535110474, "learning_rate": 6.237524325628757e-07, "loss": 0.3484, "step": 7908 }, { "epoch": 4.275905568570914, "grad_norm": 0.2605839669704437, "learning_rate": 6.228395838777545e-07, "loss": 0.3662, "step": 7909 }, { "epoch": 4.276446206523698, "grad_norm": 0.2649060785770416, "learning_rate": 6.219273592802278e-07, "loss": 0.3571, "step": 7910 }, { "epoch": 4.2769868444764825, "grad_norm": 0.26063892245292664, "learning_rate": 6.210157589003624e-07, "loss": 0.3796, "step": 7911 }, { "epoch": 4.277527482429266, "grad_norm": 0.24683474004268646, "learning_rate": 6.201047828681289e-07, "loss": 0.3394, "step": 7912 }, { "epoch": 4.278068120382051, "grad_norm": 0.26715049147605896, "learning_rate": 6.191944313134146e-07, "loss": 0.3765, "step": 7913 }, { "epoch": 4.278608758334835, "grad_norm": 0.27711889147758484, "learning_rate": 6.182847043660145e-07, "loss": 0.3673, "step": 7914 }, { "epoch": 4.279149396287619, "grad_norm": 0.2727544605731964, "learning_rate": 6.173756021556377e-07, "loss": 0.3727, "step": 7915 }, { "epoch": 4.279690034240404, "grad_norm": 0.26233989000320435, "learning_rate": 6.164671248119014e-07, "loss": 0.3434, "step": 7916 }, { "epoch": 4.280230672193188, "grad_norm": 0.2919680178165436, "learning_rate": 6.155592724643339e-07, "loss": 0.3797, "step": 7917 }, { "epoch": 4.280771310145973, "grad_norm": 0.26716017723083496, "learning_rate": 6.146520452423765e-07, "loss": 0.3695, "step": 7918 }, { "epoch": 4.2813119480987565, "grad_norm": 0.2504497766494751, "learning_rate": 6.137454432753798e-07, "loss": 0.3341, "step": 7919 }, { "epoch": 4.28185258605154, "grad_norm": 0.2768787741661072, "learning_rate": 6.128394666926035e-07, "loss": 0.3859, "step": 7920 }, { "epoch": 4.282393224004325, "grad_norm": 0.2738492488861084, "learning_rate": 6.119341156232228e-07, "loss": 0.4069, "step": 7921 }, { "epoch": 4.282933861957109, "grad_norm": 0.2559797167778015, "learning_rate": 6.110293901963188e-07, "loss": 0.3368, "step": 7922 }, { "epoch": 4.283474499909894, "grad_norm": 0.26948344707489014, "learning_rate": 6.101252905408883e-07, "loss": 0.3714, "step": 7923 }, { "epoch": 4.284015137862678, "grad_norm": 0.2513277530670166, "learning_rate": 6.092218167858327e-07, "loss": 0.3311, "step": 7924 }, { "epoch": 4.284555775815463, "grad_norm": 0.27394694089889526, "learning_rate": 6.083189690599712e-07, "loss": 0.3546, "step": 7925 }, { "epoch": 4.285096413768247, "grad_norm": 0.27439451217651367, "learning_rate": 6.074167474920267e-07, "loss": 0.369, "step": 7926 }, { "epoch": 4.2856370517210305, "grad_norm": 0.27608034014701843, "learning_rate": 6.065151522106394e-07, "loss": 0.411, "step": 7927 }, { "epoch": 4.286177689673815, "grad_norm": 0.24546043574810028, "learning_rate": 6.056141833443552e-07, "loss": 0.3314, "step": 7928 }, { "epoch": 4.286718327626599, "grad_norm": 0.28318652510643005, "learning_rate": 6.04713841021633e-07, "loss": 0.371, "step": 7929 }, { "epoch": 4.287258965579384, "grad_norm": 0.2628489136695862, "learning_rate": 6.038141253708429e-07, "loss": 0.3466, "step": 7930 }, { "epoch": 4.287799603532168, "grad_norm": 0.2661830186843872, "learning_rate": 6.02915036520264e-07, "loss": 0.3781, "step": 7931 }, { "epoch": 4.288340241484952, "grad_norm": 0.25960102677345276, "learning_rate": 6.020165745980855e-07, "loss": 0.3274, "step": 7932 }, { "epoch": 4.288880879437737, "grad_norm": 0.262113094329834, "learning_rate": 6.011187397324114e-07, "loss": 0.3639, "step": 7933 }, { "epoch": 4.289421517390521, "grad_norm": 0.28189143538475037, "learning_rate": 6.0022153205125e-07, "loss": 0.3905, "step": 7934 }, { "epoch": 4.289962155343305, "grad_norm": 0.273446649312973, "learning_rate": 5.993249516825278e-07, "loss": 0.3563, "step": 7935 }, { "epoch": 4.290502793296089, "grad_norm": 0.2756039798259735, "learning_rate": 5.984289987540726e-07, "loss": 0.3835, "step": 7936 }, { "epoch": 4.291043431248873, "grad_norm": 0.2741425931453705, "learning_rate": 5.975336733936305e-07, "loss": 0.3448, "step": 7937 }, { "epoch": 4.291584069201658, "grad_norm": 0.28097325563430786, "learning_rate": 5.96638975728856e-07, "loss": 0.3627, "step": 7938 }, { "epoch": 4.292124707154442, "grad_norm": 0.26080629229545593, "learning_rate": 5.957449058873127e-07, "loss": 0.3403, "step": 7939 }, { "epoch": 4.292665345107227, "grad_norm": 0.29398655891418457, "learning_rate": 5.948514639964748e-07, "loss": 0.3888, "step": 7940 }, { "epoch": 4.293205983060011, "grad_norm": 0.2571951746940613, "learning_rate": 5.939586501837275e-07, "loss": 0.3038, "step": 7941 }, { "epoch": 4.2937466210127955, "grad_norm": 0.2912275195121765, "learning_rate": 5.930664645763684e-07, "loss": 0.3928, "step": 7942 }, { "epoch": 4.294287258965579, "grad_norm": 0.2838453948497772, "learning_rate": 5.92174907301602e-07, "loss": 0.4175, "step": 7943 }, { "epoch": 4.294827896918363, "grad_norm": 0.26570606231689453, "learning_rate": 5.912839784865448e-07, "loss": 0.3613, "step": 7944 }, { "epoch": 4.295368534871148, "grad_norm": 0.25844183564186096, "learning_rate": 5.903936782582253e-07, "loss": 0.3463, "step": 7945 }, { "epoch": 4.295909172823932, "grad_norm": 0.2600902020931244, "learning_rate": 5.895040067435793e-07, "loss": 0.342, "step": 7946 }, { "epoch": 4.296449810776717, "grad_norm": 0.2728821635246277, "learning_rate": 5.886149640694561e-07, "loss": 0.3715, "step": 7947 }, { "epoch": 4.296990448729501, "grad_norm": 0.2795557975769043, "learning_rate": 5.877265503626129e-07, "loss": 0.3523, "step": 7948 }, { "epoch": 4.297531086682286, "grad_norm": 0.2914360463619232, "learning_rate": 5.868387657497171e-07, "loss": 0.3669, "step": 7949 }, { "epoch": 4.2980717246350695, "grad_norm": 0.2712486684322357, "learning_rate": 5.859516103573492e-07, "loss": 0.3495, "step": 7950 }, { "epoch": 4.298612362587853, "grad_norm": 0.26411083340644836, "learning_rate": 5.850650843119971e-07, "loss": 0.3447, "step": 7951 }, { "epoch": 4.299153000540638, "grad_norm": 0.2834503948688507, "learning_rate": 5.841791877400627e-07, "loss": 0.3628, "step": 7952 }, { "epoch": 4.299693638493422, "grad_norm": 0.2847490906715393, "learning_rate": 5.832939207678507e-07, "loss": 0.343, "step": 7953 }, { "epoch": 4.300234276446207, "grad_norm": 0.2924007773399353, "learning_rate": 5.82409283521585e-07, "loss": 0.3697, "step": 7954 }, { "epoch": 4.300774914398991, "grad_norm": 0.28452688455581665, "learning_rate": 5.815252761273927e-07, "loss": 0.4201, "step": 7955 }, { "epoch": 4.301315552351775, "grad_norm": 0.2323017120361328, "learning_rate": 5.806418987113161e-07, "loss": 0.2984, "step": 7956 }, { "epoch": 4.30185619030456, "grad_norm": 0.2853470742702484, "learning_rate": 5.797591513993051e-07, "loss": 0.3815, "step": 7957 }, { "epoch": 4.3023968282573435, "grad_norm": 0.2698294222354889, "learning_rate": 5.78877034317219e-07, "loss": 0.3643, "step": 7958 }, { "epoch": 4.302937466210128, "grad_norm": 0.25463762879371643, "learning_rate": 5.7799554759083e-07, "loss": 0.3744, "step": 7959 }, { "epoch": 4.303478104162912, "grad_norm": 0.2504331171512604, "learning_rate": 5.771146913458187e-07, "loss": 0.3385, "step": 7960 }, { "epoch": 4.304018742115696, "grad_norm": 0.2713220715522766, "learning_rate": 5.76234465707774e-07, "loss": 0.3943, "step": 7961 }, { "epoch": 4.304559380068481, "grad_norm": 0.2801530957221985, "learning_rate": 5.753548708022e-07, "loss": 0.3833, "step": 7962 }, { "epoch": 4.305100018021265, "grad_norm": 0.25731515884399414, "learning_rate": 5.744759067545047e-07, "loss": 0.329, "step": 7963 }, { "epoch": 4.30564065597405, "grad_norm": 0.2828752100467682, "learning_rate": 5.735975736900123e-07, "loss": 0.3945, "step": 7964 }, { "epoch": 4.306181293926834, "grad_norm": 0.24314965307712555, "learning_rate": 5.727198717339511e-07, "loss": 0.3121, "step": 7965 }, { "epoch": 4.306721931879618, "grad_norm": 0.26475682854652405, "learning_rate": 5.718428010114629e-07, "loss": 0.3674, "step": 7966 }, { "epoch": 4.307262569832402, "grad_norm": 0.2800459563732147, "learning_rate": 5.709663616476002e-07, "loss": 0.3533, "step": 7967 }, { "epoch": 4.307803207785186, "grad_norm": 0.2647830843925476, "learning_rate": 5.700905537673234e-07, "loss": 0.3776, "step": 7968 }, { "epoch": 4.308343845737971, "grad_norm": 0.2571731209754944, "learning_rate": 5.69215377495504e-07, "loss": 0.3346, "step": 7969 }, { "epoch": 4.308884483690755, "grad_norm": 0.31464195251464844, "learning_rate": 5.683408329569212e-07, "loss": 0.41, "step": 7970 }, { "epoch": 4.30942512164354, "grad_norm": 0.2723197638988495, "learning_rate": 5.674669202762684e-07, "loss": 0.343, "step": 7971 }, { "epoch": 4.309965759596324, "grad_norm": 0.2688208818435669, "learning_rate": 5.665936395781452e-07, "loss": 0.3383, "step": 7972 }, { "epoch": 4.310506397549108, "grad_norm": 0.26611441373825073, "learning_rate": 5.657209909870621e-07, "loss": 0.3676, "step": 7973 }, { "epoch": 4.311047035501892, "grad_norm": 0.2646256387233734, "learning_rate": 5.648489746274405e-07, "loss": 0.3511, "step": 7974 }, { "epoch": 4.311587673454676, "grad_norm": 0.2690008580684662, "learning_rate": 5.6397759062361e-07, "loss": 0.3677, "step": 7975 }, { "epoch": 4.312128311407461, "grad_norm": 0.25940191745758057, "learning_rate": 5.631068390998129e-07, "loss": 0.3556, "step": 7976 }, { "epoch": 4.312668949360245, "grad_norm": 0.2780297100543976, "learning_rate": 5.622367201801976e-07, "loss": 0.3874, "step": 7977 }, { "epoch": 4.313209587313029, "grad_norm": 0.2555844187736511, "learning_rate": 5.613672339888238e-07, "loss": 0.3506, "step": 7978 }, { "epoch": 4.313750225265814, "grad_norm": 0.2518936097621918, "learning_rate": 5.604983806496633e-07, "loss": 0.3615, "step": 7979 }, { "epoch": 4.314290863218598, "grad_norm": 0.2545207142829895, "learning_rate": 5.596301602865938e-07, "loss": 0.3641, "step": 7980 }, { "epoch": 4.3148315011713825, "grad_norm": 0.26856258511543274, "learning_rate": 5.587625730234059e-07, "loss": 0.3988, "step": 7981 }, { "epoch": 4.315372139124166, "grad_norm": 0.26470619440078735, "learning_rate": 5.578956189837964e-07, "loss": 0.3908, "step": 7982 }, { "epoch": 4.315912777076951, "grad_norm": 0.25664377212524414, "learning_rate": 5.57029298291376e-07, "loss": 0.3184, "step": 7983 }, { "epoch": 4.316453415029735, "grad_norm": 0.2958470284938812, "learning_rate": 5.561636110696634e-07, "loss": 0.3678, "step": 7984 }, { "epoch": 4.316994052982519, "grad_norm": 0.2789238393306732, "learning_rate": 5.55298557442085e-07, "loss": 0.3753, "step": 7985 }, { "epoch": 4.317534690935304, "grad_norm": 0.2565482258796692, "learning_rate": 5.544341375319801e-07, "loss": 0.3489, "step": 7986 }, { "epoch": 4.318075328888088, "grad_norm": 0.30428025126457214, "learning_rate": 5.535703514625946e-07, "loss": 0.3777, "step": 7987 }, { "epoch": 4.318615966840873, "grad_norm": 0.28930214047431946, "learning_rate": 5.527071993570876e-07, "loss": 0.3816, "step": 7988 }, { "epoch": 4.3191566047936565, "grad_norm": 0.2668907344341278, "learning_rate": 5.518446813385248e-07, "loss": 0.3654, "step": 7989 }, { "epoch": 4.31969724274644, "grad_norm": 0.2620943486690521, "learning_rate": 5.509827975298809e-07, "loss": 0.3649, "step": 7990 }, { "epoch": 4.320237880699225, "grad_norm": 0.2591368556022644, "learning_rate": 5.501215480540445e-07, "loss": 0.3546, "step": 7991 }, { "epoch": 4.320778518652009, "grad_norm": 0.2721455693244934, "learning_rate": 5.492609330338095e-07, "loss": 0.4084, "step": 7992 }, { "epoch": 4.321319156604794, "grad_norm": 0.24927014112472534, "learning_rate": 5.48400952591881e-07, "loss": 0.3344, "step": 7993 }, { "epoch": 4.321859794557578, "grad_norm": 0.265815794467926, "learning_rate": 5.475416068508721e-07, "loss": 0.3558, "step": 7994 }, { "epoch": 4.322400432510362, "grad_norm": 0.25444939732551575, "learning_rate": 5.466828959333087e-07, "loss": 0.329, "step": 7995 }, { "epoch": 4.322941070463147, "grad_norm": 0.29459238052368164, "learning_rate": 5.45824819961625e-07, "loss": 0.397, "step": 7996 }, { "epoch": 4.3234817084159305, "grad_norm": 0.2681017220020294, "learning_rate": 5.449673790581611e-07, "loss": 0.3831, "step": 7997 }, { "epoch": 4.324022346368715, "grad_norm": 0.2532649338245392, "learning_rate": 5.441105733451713e-07, "loss": 0.3515, "step": 7998 }, { "epoch": 4.324562984321499, "grad_norm": 0.2653908431529999, "learning_rate": 5.432544029448162e-07, "loss": 0.3999, "step": 7999 }, { "epoch": 4.325103622274284, "grad_norm": 0.24886465072631836, "learning_rate": 5.423988679791686e-07, "loss": 0.3504, "step": 8000 }, { "epoch": 4.325644260227068, "grad_norm": 0.24687694013118744, "learning_rate": 5.415439685702085e-07, "loss": 0.3281, "step": 8001 }, { "epoch": 4.326184898179852, "grad_norm": 0.2668905556201935, "learning_rate": 5.406897048398247e-07, "loss": 0.3535, "step": 8002 }, { "epoch": 4.326725536132637, "grad_norm": 0.2891635000705719, "learning_rate": 5.398360769098182e-07, "loss": 0.3809, "step": 8003 }, { "epoch": 4.327266174085421, "grad_norm": 0.2738891541957855, "learning_rate": 5.389830849018973e-07, "loss": 0.3764, "step": 8004 }, { "epoch": 4.327806812038205, "grad_norm": 0.27342963218688965, "learning_rate": 5.381307289376786e-07, "loss": 0.3664, "step": 8005 }, { "epoch": 4.328347449990989, "grad_norm": 0.27071672677993774, "learning_rate": 5.37279009138692e-07, "loss": 0.3812, "step": 8006 }, { "epoch": 4.328888087943774, "grad_norm": 0.256654292345047, "learning_rate": 5.364279256263716e-07, "loss": 0.3596, "step": 8007 }, { "epoch": 4.329428725896558, "grad_norm": 0.2899309992790222, "learning_rate": 5.355774785220669e-07, "loss": 0.385, "step": 8008 }, { "epoch": 4.329969363849342, "grad_norm": 0.2631729245185852, "learning_rate": 5.347276679470281e-07, "loss": 0.349, "step": 8009 }, { "epoch": 4.330510001802127, "grad_norm": 0.26797574758529663, "learning_rate": 5.338784940224239e-07, "loss": 0.3836, "step": 8010 }, { "epoch": 4.331050639754911, "grad_norm": 0.28088507056236267, "learning_rate": 5.330299568693253e-07, "loss": 0.3902, "step": 8011 }, { "epoch": 4.3315912777076955, "grad_norm": 0.25558122992515564, "learning_rate": 5.321820566087166e-07, "loss": 0.3424, "step": 8012 }, { "epoch": 4.332131915660479, "grad_norm": 0.26682865619659424, "learning_rate": 5.313347933614915e-07, "loss": 0.3561, "step": 8013 }, { "epoch": 4.332672553613263, "grad_norm": 0.2649161219596863, "learning_rate": 5.304881672484475e-07, "loss": 0.3951, "step": 8014 }, { "epoch": 4.333213191566048, "grad_norm": 0.2671195864677429, "learning_rate": 5.296421783902972e-07, "loss": 0.3678, "step": 8015 }, { "epoch": 4.333753829518832, "grad_norm": 0.26547911763191223, "learning_rate": 5.287968269076593e-07, "loss": 0.3428, "step": 8016 }, { "epoch": 4.334294467471617, "grad_norm": 0.25527921319007874, "learning_rate": 5.27952112921064e-07, "loss": 0.3552, "step": 8017 }, { "epoch": 4.334835105424401, "grad_norm": 0.27284345030784607, "learning_rate": 5.271080365509479e-07, "loss": 0.3538, "step": 8018 }, { "epoch": 4.335375743377185, "grad_norm": 0.2744116485118866, "learning_rate": 5.262645979176572e-07, "loss": 0.3986, "step": 8019 }, { "epoch": 4.3359163813299695, "grad_norm": 0.260938823223114, "learning_rate": 5.254217971414499e-07, "loss": 0.3283, "step": 8020 }, { "epoch": 4.336457019282753, "grad_norm": 0.2803042232990265, "learning_rate": 5.245796343424897e-07, "loss": 0.371, "step": 8021 }, { "epoch": 4.336997657235538, "grad_norm": 0.2794663906097412, "learning_rate": 5.237381096408512e-07, "loss": 0.3735, "step": 8022 }, { "epoch": 4.337538295188322, "grad_norm": 0.2702719569206238, "learning_rate": 5.228972231565155e-07, "loss": 0.3818, "step": 8023 }, { "epoch": 4.338078933141107, "grad_norm": 0.2615105211734772, "learning_rate": 5.220569750093763e-07, "loss": 0.3623, "step": 8024 }, { "epoch": 4.338619571093891, "grad_norm": 0.2775976061820984, "learning_rate": 5.212173653192365e-07, "loss": 0.3621, "step": 8025 }, { "epoch": 4.339160209046675, "grad_norm": 0.2725214660167694, "learning_rate": 5.203783942058021e-07, "loss": 0.3462, "step": 8026 }, { "epoch": 4.33970084699946, "grad_norm": 0.2889401316642761, "learning_rate": 5.195400617886959e-07, "loss": 0.3699, "step": 8027 }, { "epoch": 4.3402414849522435, "grad_norm": 0.28472912311553955, "learning_rate": 5.187023681874426e-07, "loss": 0.3433, "step": 8028 }, { "epoch": 4.340782122905028, "grad_norm": 0.2755737900733948, "learning_rate": 5.178653135214811e-07, "loss": 0.3711, "step": 8029 }, { "epoch": 4.341322760857812, "grad_norm": 0.260037899017334, "learning_rate": 5.170288979101573e-07, "loss": 0.372, "step": 8030 }, { "epoch": 4.341863398810596, "grad_norm": 0.25487974286079407, "learning_rate": 5.16193121472724e-07, "loss": 0.3784, "step": 8031 }, { "epoch": 4.342404036763381, "grad_norm": 0.253598690032959, "learning_rate": 5.153579843283463e-07, "loss": 0.3414, "step": 8032 }, { "epoch": 4.342944674716165, "grad_norm": 0.27968794107437134, "learning_rate": 5.145234865960963e-07, "loss": 0.3731, "step": 8033 }, { "epoch": 4.34348531266895, "grad_norm": 0.2652742266654968, "learning_rate": 5.136896283949544e-07, "loss": 0.3334, "step": 8034 }, { "epoch": 4.344025950621734, "grad_norm": 0.2636103630065918, "learning_rate": 5.128564098438116e-07, "loss": 0.3793, "step": 8035 }, { "epoch": 4.3445665885745175, "grad_norm": 0.24500229954719543, "learning_rate": 5.12023831061465e-07, "loss": 0.3394, "step": 8036 }, { "epoch": 4.345107226527302, "grad_norm": 0.28855642676353455, "learning_rate": 5.111918921666254e-07, "loss": 0.4037, "step": 8037 }, { "epoch": 4.345647864480086, "grad_norm": 0.27344831824302673, "learning_rate": 5.103605932779055e-07, "loss": 0.3424, "step": 8038 }, { "epoch": 4.346188502432871, "grad_norm": 0.2778145670890808, "learning_rate": 5.095299345138327e-07, "loss": 0.3836, "step": 8039 }, { "epoch": 4.346729140385655, "grad_norm": 0.2843970060348511, "learning_rate": 5.086999159928391e-07, "loss": 0.3607, "step": 8040 }, { "epoch": 4.34726977833844, "grad_norm": 0.25553035736083984, "learning_rate": 5.078705378332693e-07, "loss": 0.3075, "step": 8041 }, { "epoch": 4.347810416291224, "grad_norm": 0.2807932496070862, "learning_rate": 5.070418001533733e-07, "loss": 0.3858, "step": 8042 }, { "epoch": 4.348351054244008, "grad_norm": 0.2711091637611389, "learning_rate": 5.062137030713105e-07, "loss": 0.3428, "step": 8043 }, { "epoch": 4.348891692196792, "grad_norm": 0.2752896547317505, "learning_rate": 5.053862467051507e-07, "loss": 0.3883, "step": 8044 }, { "epoch": 4.349432330149576, "grad_norm": 0.25309717655181885, "learning_rate": 5.045594311728708e-07, "loss": 0.346, "step": 8045 }, { "epoch": 4.349972968102361, "grad_norm": 0.2608318328857422, "learning_rate": 5.037332565923558e-07, "loss": 0.3685, "step": 8046 }, { "epoch": 4.350513606055145, "grad_norm": 0.2767925262451172, "learning_rate": 5.029077230814011e-07, "loss": 0.3693, "step": 8047 }, { "epoch": 4.351054244007929, "grad_norm": 0.2608930170536041, "learning_rate": 5.020828307577091e-07, "loss": 0.3341, "step": 8048 }, { "epoch": 4.351594881960714, "grad_norm": 0.2874591648578644, "learning_rate": 5.012585797388936e-07, "loss": 0.3364, "step": 8049 }, { "epoch": 4.352135519913498, "grad_norm": 0.2939295470714569, "learning_rate": 5.00434970142471e-07, "loss": 0.3745, "step": 8050 }, { "epoch": 4.3526761578662825, "grad_norm": 0.2753792107105255, "learning_rate": 4.996120020858725e-07, "loss": 0.3452, "step": 8051 }, { "epoch": 4.353216795819066, "grad_norm": 0.25311657786369324, "learning_rate": 4.987896756864357e-07, "loss": 0.3241, "step": 8052 }, { "epoch": 4.35375743377185, "grad_norm": 0.2743209898471832, "learning_rate": 4.97967991061406e-07, "loss": 0.358, "step": 8053 }, { "epoch": 4.354298071724635, "grad_norm": 0.2603088617324829, "learning_rate": 4.971469483279373e-07, "loss": 0.3443, "step": 8054 }, { "epoch": 4.354838709677419, "grad_norm": 0.26635169982910156, "learning_rate": 4.963265476030916e-07, "loss": 0.355, "step": 8055 }, { "epoch": 4.355379347630204, "grad_norm": 0.265351265668869, "learning_rate": 4.955067890038417e-07, "loss": 0.3395, "step": 8056 }, { "epoch": 4.355919985582988, "grad_norm": 0.2658833861351013, "learning_rate": 4.946876726470667e-07, "loss": 0.3568, "step": 8057 }, { "epoch": 4.356460623535773, "grad_norm": 0.2775501608848572, "learning_rate": 4.938691986495542e-07, "loss": 0.3966, "step": 8058 }, { "epoch": 4.3570012614885565, "grad_norm": 0.2574467957019806, "learning_rate": 4.930513671280018e-07, "loss": 0.3336, "step": 8059 }, { "epoch": 4.35754189944134, "grad_norm": 0.2852720320224762, "learning_rate": 4.922341781990131e-07, "loss": 0.3811, "step": 8060 }, { "epoch": 4.358082537394125, "grad_norm": 0.2721101641654968, "learning_rate": 4.914176319791037e-07, "loss": 0.4074, "step": 8061 }, { "epoch": 4.358623175346909, "grad_norm": 0.2626602351665497, "learning_rate": 4.906017285846921e-07, "loss": 0.3849, "step": 8062 }, { "epoch": 4.359163813299694, "grad_norm": 0.2770666182041168, "learning_rate": 4.897864681321101e-07, "loss": 0.3448, "step": 8063 }, { "epoch": 4.359704451252478, "grad_norm": 0.26337531208992004, "learning_rate": 4.889718507375968e-07, "loss": 0.3607, "step": 8064 }, { "epoch": 4.360245089205263, "grad_norm": 0.2588321268558502, "learning_rate": 4.881578765172979e-07, "loss": 0.3656, "step": 8065 }, { "epoch": 4.360785727158047, "grad_norm": 0.27049943804740906, "learning_rate": 4.873445455872689e-07, "loss": 0.3831, "step": 8066 }, { "epoch": 4.3613263651108305, "grad_norm": 0.2564168870449066, "learning_rate": 4.865318580634714e-07, "loss": 0.3848, "step": 8067 }, { "epoch": 4.361867003063615, "grad_norm": 0.2557253837585449, "learning_rate": 4.857198140617786e-07, "loss": 0.3286, "step": 8068 }, { "epoch": 4.362407641016399, "grad_norm": 0.2757374048233032, "learning_rate": 4.849084136979703e-07, "loss": 0.3498, "step": 8069 }, { "epoch": 4.362948278969184, "grad_norm": 0.2650987505912781, "learning_rate": 4.840976570877332e-07, "loss": 0.3872, "step": 8070 }, { "epoch": 4.363488916921968, "grad_norm": 0.2564825415611267, "learning_rate": 4.83287544346665e-07, "loss": 0.3634, "step": 8071 }, { "epoch": 4.364029554874752, "grad_norm": 0.2613122761249542, "learning_rate": 4.824780755902686e-07, "loss": 0.3629, "step": 8072 }, { "epoch": 4.364570192827537, "grad_norm": 0.27393218874931335, "learning_rate": 4.816692509339583e-07, "loss": 0.4092, "step": 8073 }, { "epoch": 4.365110830780321, "grad_norm": 0.25998541712760925, "learning_rate": 4.808610704930539e-07, "loss": 0.339, "step": 8074 }, { "epoch": 4.365651468733105, "grad_norm": 0.2728840112686157, "learning_rate": 4.800535343827834e-07, "loss": 0.3727, "step": 8075 }, { "epoch": 4.366192106685889, "grad_norm": 0.25305524468421936, "learning_rate": 4.792466427182857e-07, "loss": 0.3337, "step": 8076 }, { "epoch": 4.366732744638673, "grad_norm": 0.27281883358955383, "learning_rate": 4.784403956146039e-07, "loss": 0.3623, "step": 8077 }, { "epoch": 4.367273382591458, "grad_norm": 0.26225027441978455, "learning_rate": 4.776347931866948e-07, "loss": 0.3333, "step": 8078 }, { "epoch": 4.367814020544242, "grad_norm": 0.25581321120262146, "learning_rate": 4.7682983554941495e-07, "loss": 0.3641, "step": 8079 }, { "epoch": 4.368354658497027, "grad_norm": 0.2649233043193817, "learning_rate": 4.7602552281753647e-07, "loss": 0.3971, "step": 8080 }, { "epoch": 4.368895296449811, "grad_norm": 0.2630901336669922, "learning_rate": 4.752218551057369e-07, "loss": 0.3278, "step": 8081 }, { "epoch": 4.3694359344025955, "grad_norm": 0.2889975607395172, "learning_rate": 4.7441883252860143e-07, "loss": 0.3677, "step": 8082 }, { "epoch": 4.369976572355379, "grad_norm": 0.2752499282360077, "learning_rate": 4.736164552006239e-07, "loss": 0.3918, "step": 8083 }, { "epoch": 4.370517210308163, "grad_norm": 0.24661561846733093, "learning_rate": 4.72814723236204e-07, "loss": 0.3262, "step": 8084 }, { "epoch": 4.371057848260948, "grad_norm": 0.2726038694381714, "learning_rate": 4.720136367496536e-07, "loss": 0.3611, "step": 8085 }, { "epoch": 4.371598486213732, "grad_norm": 0.25609326362609863, "learning_rate": 4.7121319585518907e-07, "loss": 0.3769, "step": 8086 }, { "epoch": 4.372139124166517, "grad_norm": 0.25862959027290344, "learning_rate": 4.704134006669347e-07, "loss": 0.3496, "step": 8087 }, { "epoch": 4.372679762119301, "grad_norm": 0.25048795342445374, "learning_rate": 4.6961425129892655e-07, "loss": 0.3117, "step": 8088 }, { "epoch": 4.373220400072085, "grad_norm": 0.2924381196498871, "learning_rate": 4.688157478651029e-07, "loss": 0.3875, "step": 8089 }, { "epoch": 4.3737610380248695, "grad_norm": 0.26332539319992065, "learning_rate": 4.6801789047931535e-07, "loss": 0.368, "step": 8090 }, { "epoch": 4.374301675977653, "grad_norm": 0.25561949610710144, "learning_rate": 4.6722067925532024e-07, "loss": 0.3496, "step": 8091 }, { "epoch": 4.374842313930438, "grad_norm": 0.2899666130542755, "learning_rate": 4.6642411430678105e-07, "loss": 0.3957, "step": 8092 }, { "epoch": 4.375382951883222, "grad_norm": 0.2681226134300232, "learning_rate": 4.6562819574727304e-07, "loss": 0.3366, "step": 8093 }, { "epoch": 4.375923589836006, "grad_norm": 0.2914165258407593, "learning_rate": 4.6483292369027487e-07, "loss": 0.3676, "step": 8094 }, { "epoch": 4.376464227788791, "grad_norm": 0.26949185132980347, "learning_rate": 4.6403829824917643e-07, "loss": 0.3305, "step": 8095 }, { "epoch": 4.377004865741575, "grad_norm": 0.2690637409687042, "learning_rate": 4.632443195372716e-07, "loss": 0.3927, "step": 8096 }, { "epoch": 4.37754550369436, "grad_norm": 0.2764582931995392, "learning_rate": 4.624509876677674e-07, "loss": 0.3597, "step": 8097 }, { "epoch": 4.3780861416471435, "grad_norm": 0.2755921483039856, "learning_rate": 4.616583027537741e-07, "loss": 0.3582, "step": 8098 }, { "epoch": 4.378626779599928, "grad_norm": 0.27690377831459045, "learning_rate": 4.6086626490831067e-07, "loss": 0.3525, "step": 8099 }, { "epoch": 4.379167417552712, "grad_norm": 0.26605695486068726, "learning_rate": 4.6007487424430565e-07, "loss": 0.3481, "step": 8100 }, { "epoch": 4.379708055505496, "grad_norm": 0.2655012905597687, "learning_rate": 4.5928413087459325e-07, "loss": 0.3713, "step": 8101 }, { "epoch": 4.380248693458281, "grad_norm": 0.2613704204559326, "learning_rate": 4.584940349119177e-07, "loss": 0.349, "step": 8102 }, { "epoch": 4.380789331411065, "grad_norm": 0.26111432909965515, "learning_rate": 4.577045864689278e-07, "loss": 0.3666, "step": 8103 }, { "epoch": 4.38132996936385, "grad_norm": 0.30547577142715454, "learning_rate": 4.569157856581818e-07, "loss": 0.413, "step": 8104 }, { "epoch": 4.381870607316634, "grad_norm": 0.2722497582435608, "learning_rate": 4.5612763259214653e-07, "loss": 0.3337, "step": 8105 }, { "epoch": 4.3824112452694175, "grad_norm": 0.24953995645046234, "learning_rate": 4.553401273831948e-07, "loss": 0.36, "step": 8106 }, { "epoch": 4.382951883222202, "grad_norm": 0.2641351521015167, "learning_rate": 4.545532701436084e-07, "loss": 0.34, "step": 8107 }, { "epoch": 4.383492521174986, "grad_norm": 0.2744714319705963, "learning_rate": 4.5376706098557376e-07, "loss": 0.3453, "step": 8108 }, { "epoch": 4.384033159127771, "grad_norm": 0.26822155714035034, "learning_rate": 4.52981500021189e-07, "loss": 0.3623, "step": 8109 }, { "epoch": 4.384573797080555, "grad_norm": 0.27691131830215454, "learning_rate": 4.52196587362459e-07, "loss": 0.3635, "step": 8110 }, { "epoch": 4.38511443503334, "grad_norm": 0.25168412923812866, "learning_rate": 4.5141232312129247e-07, "loss": 0.3559, "step": 8111 }, { "epoch": 4.385655072986124, "grad_norm": 0.2736569046974182, "learning_rate": 4.5062870740951113e-07, "loss": 0.3815, "step": 8112 }, { "epoch": 4.386195710938908, "grad_norm": 0.2856607139110565, "learning_rate": 4.4984574033883846e-07, "loss": 0.355, "step": 8113 }, { "epoch": 4.386736348891692, "grad_norm": 0.2555479109287262, "learning_rate": 4.490634220209117e-07, "loss": 0.342, "step": 8114 }, { "epoch": 4.387276986844476, "grad_norm": 0.26296502351760864, "learning_rate": 4.4828175256727056e-07, "loss": 0.3566, "step": 8115 }, { "epoch": 4.387817624797261, "grad_norm": 0.25987017154693604, "learning_rate": 4.4750073208936373e-07, "loss": 0.3644, "step": 8116 }, { "epoch": 4.388358262750045, "grad_norm": 0.28312379121780396, "learning_rate": 4.4672036069854876e-07, "loss": 0.3795, "step": 8117 }, { "epoch": 4.388898900702829, "grad_norm": 0.2769124507904053, "learning_rate": 4.459406385060894e-07, "loss": 0.3512, "step": 8118 }, { "epoch": 4.389439538655614, "grad_norm": 0.2513204514980316, "learning_rate": 4.451615656231556e-07, "loss": 0.3399, "step": 8119 }, { "epoch": 4.389980176608398, "grad_norm": 0.28434517979621887, "learning_rate": 4.4438314216082856e-07, "loss": 0.3707, "step": 8120 }, { "epoch": 4.3905208145611825, "grad_norm": 0.2877276539802551, "learning_rate": 4.436053682300923e-07, "loss": 0.3706, "step": 8121 }, { "epoch": 4.391061452513966, "grad_norm": 0.2597291171550751, "learning_rate": 4.4282824394184297e-07, "loss": 0.341, "step": 8122 }, { "epoch": 4.391602090466751, "grad_norm": 0.2784497141838074, "learning_rate": 4.4205176940687823e-07, "loss": 0.3641, "step": 8123 }, { "epoch": 4.392142728419535, "grad_norm": 0.28118282556533813, "learning_rate": 4.412759447359094e-07, "loss": 0.3819, "step": 8124 }, { "epoch": 4.392683366372319, "grad_norm": 0.2607361078262329, "learning_rate": 4.405007700395497e-07, "loss": 0.3425, "step": 8125 }, { "epoch": 4.393224004325104, "grad_norm": 0.2748037576675415, "learning_rate": 4.397262454283241e-07, "loss": 0.4068, "step": 8126 }, { "epoch": 4.393764642277888, "grad_norm": 0.2521860599517822, "learning_rate": 4.3895237101266195e-07, "loss": 0.3411, "step": 8127 }, { "epoch": 4.394305280230673, "grad_norm": 0.2599768340587616, "learning_rate": 4.3817914690290064e-07, "loss": 0.3614, "step": 8128 }, { "epoch": 4.3948459181834565, "grad_norm": 0.2581174969673157, "learning_rate": 4.374065732092858e-07, "loss": 0.3733, "step": 8129 }, { "epoch": 4.39538655613624, "grad_norm": 0.24220918118953705, "learning_rate": 4.3663465004196995e-07, "loss": 0.3331, "step": 8130 }, { "epoch": 4.395927194089025, "grad_norm": 0.26094678044319153, "learning_rate": 4.358633775110105e-07, "loss": 0.3831, "step": 8131 }, { "epoch": 4.396467832041809, "grad_norm": 0.25512203574180603, "learning_rate": 4.3509275572637623e-07, "loss": 0.3708, "step": 8132 }, { "epoch": 4.397008469994594, "grad_norm": 0.27119317650794983, "learning_rate": 4.343227847979392e-07, "loss": 0.3969, "step": 8133 }, { "epoch": 4.397549107947378, "grad_norm": 0.26803910732269287, "learning_rate": 4.335534648354833e-07, "loss": 0.3357, "step": 8134 }, { "epoch": 4.398089745900162, "grad_norm": 0.2746613621711731, "learning_rate": 4.3278479594869307e-07, "loss": 0.3577, "step": 8135 }, { "epoch": 4.398630383852947, "grad_norm": 0.2638416588306427, "learning_rate": 4.320167782471668e-07, "loss": 0.3924, "step": 8136 }, { "epoch": 4.3991710218057305, "grad_norm": 0.27367305755615234, "learning_rate": 4.312494118404048e-07, "loss": 0.3774, "step": 8137 }, { "epoch": 4.399711659758515, "grad_norm": 0.2653174102306366, "learning_rate": 4.3048269683781894e-07, "loss": 0.3545, "step": 8138 }, { "epoch": 4.400252297711299, "grad_norm": 0.25145334005355835, "learning_rate": 4.297166333487257e-07, "loss": 0.3486, "step": 8139 }, { "epoch": 4.400792935664084, "grad_norm": 0.25891542434692383, "learning_rate": 4.289512214823466e-07, "loss": 0.3622, "step": 8140 }, { "epoch": 4.401333573616868, "grad_norm": 0.23026682436466217, "learning_rate": 4.281864613478159e-07, "loss": 0.3058, "step": 8141 }, { "epoch": 4.401874211569652, "grad_norm": 0.2723310589790344, "learning_rate": 4.2742235305416936e-07, "loss": 0.3836, "step": 8142 }, { "epoch": 4.402414849522437, "grad_norm": 0.2739313542842865, "learning_rate": 4.2665889671035407e-07, "loss": 0.3952, "step": 8143 }, { "epoch": 4.402955487475221, "grad_norm": 0.2878687381744385, "learning_rate": 4.258960924252215e-07, "loss": 0.3775, "step": 8144 }, { "epoch": 4.403496125428005, "grad_norm": 0.2662462592124939, "learning_rate": 4.251339403075294e-07, "loss": 0.3382, "step": 8145 }, { "epoch": 4.404036763380789, "grad_norm": 0.2829868793487549, "learning_rate": 4.243724404659466e-07, "loss": 0.371, "step": 8146 }, { "epoch": 4.404577401333573, "grad_norm": 0.28387123346328735, "learning_rate": 4.2361159300904454e-07, "loss": 0.3885, "step": 8147 }, { "epoch": 4.405118039286358, "grad_norm": 0.28340548276901245, "learning_rate": 4.228513980453036e-07, "loss": 0.3648, "step": 8148 }, { "epoch": 4.405658677239142, "grad_norm": 0.2605474889278412, "learning_rate": 4.2209185568311216e-07, "loss": 0.3234, "step": 8149 }, { "epoch": 4.406199315191927, "grad_norm": 0.2815400958061218, "learning_rate": 4.21332966030763e-07, "loss": 0.3625, "step": 8150 }, { "epoch": 4.406739953144711, "grad_norm": 0.265581876039505, "learning_rate": 4.2057472919645957e-07, "loss": 0.3703, "step": 8151 }, { "epoch": 4.407280591097495, "grad_norm": 0.26388537883758545, "learning_rate": 4.1981714528830596e-07, "loss": 0.3737, "step": 8152 }, { "epoch": 4.407821229050279, "grad_norm": 0.2655022144317627, "learning_rate": 4.1906021441432074e-07, "loss": 0.3557, "step": 8153 }, { "epoch": 4.408361867003063, "grad_norm": 0.26379454135894775, "learning_rate": 4.1830393668242376e-07, "loss": 0.3495, "step": 8154 }, { "epoch": 4.408902504955848, "grad_norm": 0.2671830952167511, "learning_rate": 4.175483122004448e-07, "loss": 0.3895, "step": 8155 }, { "epoch": 4.409443142908632, "grad_norm": 0.267318457365036, "learning_rate": 4.167933410761188e-07, "loss": 0.3803, "step": 8156 }, { "epoch": 4.409983780861417, "grad_norm": 0.3021106421947479, "learning_rate": 4.1603902341708804e-07, "loss": 0.3878, "step": 8157 }, { "epoch": 4.410524418814201, "grad_norm": 0.2709572911262512, "learning_rate": 4.1528535933090253e-07, "loss": 0.3403, "step": 8158 }, { "epoch": 4.411065056766985, "grad_norm": 0.26195964217185974, "learning_rate": 4.1453234892501804e-07, "loss": 0.3541, "step": 8159 }, { "epoch": 4.4116056947197695, "grad_norm": 0.27845486998558044, "learning_rate": 4.1377999230679646e-07, "loss": 0.3818, "step": 8160 }, { "epoch": 4.412146332672553, "grad_norm": 0.2635180652141571, "learning_rate": 4.130282895835086e-07, "loss": 0.299, "step": 8161 }, { "epoch": 4.412686970625338, "grad_norm": 0.2581978738307953, "learning_rate": 4.1227724086233045e-07, "loss": 0.3304, "step": 8162 }, { "epoch": 4.413227608578122, "grad_norm": 0.2778031826019287, "learning_rate": 4.1152684625034633e-07, "loss": 0.3798, "step": 8163 }, { "epoch": 4.413768246530907, "grad_norm": 0.25137409567832947, "learning_rate": 4.1077710585454344e-07, "loss": 0.3231, "step": 8164 }, { "epoch": 4.414308884483691, "grad_norm": 0.2556723654270172, "learning_rate": 4.100280197818207e-07, "loss": 0.388, "step": 8165 }, { "epoch": 4.414849522436475, "grad_norm": 0.2570623755455017, "learning_rate": 4.092795881389805e-07, "loss": 0.3387, "step": 8166 }, { "epoch": 4.41539016038926, "grad_norm": 0.27952271699905396, "learning_rate": 4.0853181103273356e-07, "loss": 0.3874, "step": 8167 }, { "epoch": 4.4159307983420435, "grad_norm": 0.26863715052604675, "learning_rate": 4.0778468856969623e-07, "loss": 0.3947, "step": 8168 }, { "epoch": 4.416471436294828, "grad_norm": 0.2620304226875305, "learning_rate": 4.0703822085639057e-07, "loss": 0.3569, "step": 8169 }, { "epoch": 4.417012074247612, "grad_norm": 0.2710581421852112, "learning_rate": 4.062924079992492e-07, "loss": 0.3922, "step": 8170 }, { "epoch": 4.417552712200396, "grad_norm": 0.25852853059768677, "learning_rate": 4.0554725010460704e-07, "loss": 0.3544, "step": 8171 }, { "epoch": 4.418093350153181, "grad_norm": 0.2708817720413208, "learning_rate": 4.0480274727870696e-07, "loss": 0.3567, "step": 8172 }, { "epoch": 4.418633988105965, "grad_norm": 0.27214178442955017, "learning_rate": 4.040588996277006e-07, "loss": 0.3465, "step": 8173 }, { "epoch": 4.41917462605875, "grad_norm": 0.28624093532562256, "learning_rate": 4.0331570725764215e-07, "loss": 0.3512, "step": 8174 }, { "epoch": 4.419715264011534, "grad_norm": 0.2977980375289917, "learning_rate": 4.025731702744978e-07, "loss": 0.388, "step": 8175 }, { "epoch": 4.4202559019643175, "grad_norm": 0.26138395071029663, "learning_rate": 4.0183128878413356e-07, "loss": 0.3167, "step": 8176 }, { "epoch": 4.420796539917102, "grad_norm": 0.277291476726532, "learning_rate": 4.0109006289232646e-07, "loss": 0.3832, "step": 8177 }, { "epoch": 4.421337177869886, "grad_norm": 0.24840909242630005, "learning_rate": 4.003494927047613e-07, "loss": 0.3348, "step": 8178 }, { "epoch": 4.421877815822671, "grad_norm": 0.2633357644081116, "learning_rate": 3.9960957832702594e-07, "loss": 0.3663, "step": 8179 }, { "epoch": 4.422418453775455, "grad_norm": 0.28666359186172485, "learning_rate": 3.9887031986461546e-07, "loss": 0.3985, "step": 8180 }, { "epoch": 4.42295909172824, "grad_norm": 0.2765139639377594, "learning_rate": 3.9813171742293156e-07, "loss": 0.3887, "step": 8181 }, { "epoch": 4.423499729681024, "grad_norm": 0.2654871642589569, "learning_rate": 3.9739377110728504e-07, "loss": 0.3288, "step": 8182 }, { "epoch": 4.424040367633808, "grad_norm": 0.26870468258857727, "learning_rate": 3.96656481022889e-07, "loss": 0.3754, "step": 8183 }, { "epoch": 4.424581005586592, "grad_norm": 0.2566099762916565, "learning_rate": 3.959198472748649e-07, "loss": 0.3629, "step": 8184 }, { "epoch": 4.425121643539376, "grad_norm": 0.28181445598602295, "learning_rate": 3.9518386996824196e-07, "loss": 0.3706, "step": 8185 }, { "epoch": 4.425662281492161, "grad_norm": 0.26720869541168213, "learning_rate": 3.9444854920795307e-07, "loss": 0.3453, "step": 8186 }, { "epoch": 4.426202919444945, "grad_norm": 0.26364749670028687, "learning_rate": 3.9371388509884033e-07, "loss": 0.3735, "step": 8187 }, { "epoch": 4.426743557397729, "grad_norm": 0.2517613470554352, "learning_rate": 3.9297987774565003e-07, "loss": 0.3587, "step": 8188 }, { "epoch": 4.427284195350514, "grad_norm": 0.2620115578174591, "learning_rate": 3.9224652725303514e-07, "loss": 0.3773, "step": 8189 }, { "epoch": 4.427824833303298, "grad_norm": 0.2648788094520569, "learning_rate": 3.9151383372555696e-07, "loss": 0.3507, "step": 8190 }, { "epoch": 4.4283654712560825, "grad_norm": 0.2630835175514221, "learning_rate": 3.9078179726768027e-07, "loss": 0.3661, "step": 8191 }, { "epoch": 4.428906109208866, "grad_norm": 0.26218724250793457, "learning_rate": 3.9005041798377827e-07, "loss": 0.3777, "step": 8192 }, { "epoch": 4.42944674716165, "grad_norm": 0.265948086977005, "learning_rate": 3.8931969597812813e-07, "loss": 0.3542, "step": 8193 }, { "epoch": 4.429987385114435, "grad_norm": 0.2752498984336853, "learning_rate": 3.88589631354917e-07, "loss": 0.3637, "step": 8194 }, { "epoch": 4.430528023067219, "grad_norm": 0.2578594982624054, "learning_rate": 3.8786022421823497e-07, "loss": 0.3829, "step": 8195 }, { "epoch": 4.431068661020004, "grad_norm": 0.25675004720687866, "learning_rate": 3.8713147467207946e-07, "loss": 0.3783, "step": 8196 }, { "epoch": 4.431609298972788, "grad_norm": 0.28650611639022827, "learning_rate": 3.8640338282035507e-07, "loss": 0.3796, "step": 8197 }, { "epoch": 4.432149936925573, "grad_norm": 0.2919948399066925, "learning_rate": 3.85675948766871e-07, "loss": 0.3345, "step": 8198 }, { "epoch": 4.4326905748783565, "grad_norm": 0.3023063838481903, "learning_rate": 3.8494917261534427e-07, "loss": 0.3488, "step": 8199 }, { "epoch": 4.43323121283114, "grad_norm": 0.2518389821052551, "learning_rate": 3.84223054469397e-07, "loss": 0.3614, "step": 8200 }, { "epoch": 4.433771850783925, "grad_norm": 0.2588181793689728, "learning_rate": 3.83497594432557e-07, "loss": 0.3692, "step": 8201 }, { "epoch": 4.434312488736709, "grad_norm": 0.25840917229652405, "learning_rate": 3.827727926082603e-07, "loss": 0.3659, "step": 8202 }, { "epoch": 4.434853126689494, "grad_norm": 0.25818681716918945, "learning_rate": 3.8204864909984764e-07, "loss": 0.3509, "step": 8203 }, { "epoch": 4.435393764642278, "grad_norm": 0.2708577513694763, "learning_rate": 3.813251640105653e-07, "loss": 0.3754, "step": 8204 }, { "epoch": 4.435934402595062, "grad_norm": 0.2632346451282501, "learning_rate": 3.8060233744356634e-07, "loss": 0.3706, "step": 8205 }, { "epoch": 4.436475040547847, "grad_norm": 0.27098697423934937, "learning_rate": 3.7988016950191055e-07, "loss": 0.3473, "step": 8206 }, { "epoch": 4.4370156785006305, "grad_norm": 0.2811775803565979, "learning_rate": 3.791586602885644e-07, "loss": 0.3416, "step": 8207 }, { "epoch": 4.437556316453415, "grad_norm": 0.2631094753742218, "learning_rate": 3.7843780990639787e-07, "loss": 0.3689, "step": 8208 }, { "epoch": 4.438096954406199, "grad_norm": 0.2584248483181, "learning_rate": 3.777176184581893e-07, "loss": 0.3532, "step": 8209 }, { "epoch": 4.438637592358983, "grad_norm": 0.2771499454975128, "learning_rate": 3.76998086046621e-07, "loss": 0.3666, "step": 8210 }, { "epoch": 4.439178230311768, "grad_norm": 0.281101793050766, "learning_rate": 3.762792127742848e-07, "loss": 0.418, "step": 8211 }, { "epoch": 4.439718868264552, "grad_norm": 0.2672911286354065, "learning_rate": 3.755609987436748e-07, "loss": 0.3356, "step": 8212 }, { "epoch": 4.440259506217337, "grad_norm": 0.2672131359577179, "learning_rate": 3.7484344405719186e-07, "loss": 0.3506, "step": 8213 }, { "epoch": 4.440800144170121, "grad_norm": 0.2638314664363861, "learning_rate": 3.741265488171458e-07, "loss": 0.3783, "step": 8214 }, { "epoch": 4.441340782122905, "grad_norm": 0.26125532388687134, "learning_rate": 3.7341031312574827e-07, "loss": 0.365, "step": 8215 }, { "epoch": 4.441881420075689, "grad_norm": 0.25414150953292847, "learning_rate": 3.7269473708512084e-07, "loss": 0.3544, "step": 8216 }, { "epoch": 4.442422058028473, "grad_norm": 0.2596067190170288, "learning_rate": 3.7197982079728745e-07, "loss": 0.3733, "step": 8217 }, { "epoch": 4.442962695981258, "grad_norm": 0.2612897455692291, "learning_rate": 3.7126556436417993e-07, "loss": 0.3468, "step": 8218 }, { "epoch": 4.443503333934042, "grad_norm": 0.26086223125457764, "learning_rate": 3.7055196788763625e-07, "loss": 0.3606, "step": 8219 }, { "epoch": 4.444043971886827, "grad_norm": 0.26433905959129333, "learning_rate": 3.6983903146939894e-07, "loss": 0.3635, "step": 8220 }, { "epoch": 4.444584609839611, "grad_norm": 0.29702532291412354, "learning_rate": 3.691267552111183e-07, "loss": 0.3817, "step": 8221 }, { "epoch": 4.4451252477923955, "grad_norm": 0.2539384961128235, "learning_rate": 3.6841513921434704e-07, "loss": 0.3267, "step": 8222 }, { "epoch": 4.445665885745179, "grad_norm": 0.2569662630558014, "learning_rate": 3.6770418358054894e-07, "loss": 0.3415, "step": 8223 }, { "epoch": 4.446206523697963, "grad_norm": 0.26443180441856384, "learning_rate": 3.6699388841108907e-07, "loss": 0.3618, "step": 8224 }, { "epoch": 4.446747161650748, "grad_norm": 0.26646578311920166, "learning_rate": 3.6628425380723975e-07, "loss": 0.3675, "step": 8225 }, { "epoch": 4.447287799603532, "grad_norm": 0.2674654424190521, "learning_rate": 3.6557527987018114e-07, "loss": 0.3586, "step": 8226 }, { "epoch": 4.447828437556317, "grad_norm": 0.271610289812088, "learning_rate": 3.648669667009952e-07, "loss": 0.3737, "step": 8227 }, { "epoch": 4.448369075509101, "grad_norm": 0.2587081789970398, "learning_rate": 3.6415931440067443e-07, "loss": 0.3455, "step": 8228 }, { "epoch": 4.448909713461885, "grad_norm": 0.2835042476654053, "learning_rate": 3.6345232307011257e-07, "loss": 0.4038, "step": 8229 }, { "epoch": 4.4494503514146695, "grad_norm": 0.26958781480789185, "learning_rate": 3.627459928101118e-07, "loss": 0.3582, "step": 8230 }, { "epoch": 4.449990989367453, "grad_norm": 0.2784334123134613, "learning_rate": 3.620403237213799e-07, "loss": 0.3644, "step": 8231 }, { "epoch": 4.450531627320238, "grad_norm": 0.26102492213249207, "learning_rate": 3.6133531590452963e-07, "loss": 0.3449, "step": 8232 }, { "epoch": 4.451072265273022, "grad_norm": 0.27513423562049866, "learning_rate": 3.606309694600796e-07, "loss": 0.3646, "step": 8233 }, { "epoch": 4.451612903225806, "grad_norm": 0.27059316635131836, "learning_rate": 3.5992728448845326e-07, "loss": 0.3599, "step": 8234 }, { "epoch": 4.452153541178591, "grad_norm": 0.27595481276512146, "learning_rate": 3.5922426108998154e-07, "loss": 0.362, "step": 8235 }, { "epoch": 4.452694179131375, "grad_norm": 0.28539323806762695, "learning_rate": 3.5852189936490255e-07, "loss": 0.367, "step": 8236 }, { "epoch": 4.45323481708416, "grad_norm": 0.2783656418323517, "learning_rate": 3.5782019941335345e-07, "loss": 0.3888, "step": 8237 }, { "epoch": 4.4537754550369435, "grad_norm": 0.2655050754547119, "learning_rate": 3.571191613353847e-07, "loss": 0.3494, "step": 8238 }, { "epoch": 4.454316092989728, "grad_norm": 0.26864656805992126, "learning_rate": 3.5641878523094697e-07, "loss": 0.3511, "step": 8239 }, { "epoch": 4.454856730942512, "grad_norm": 0.2563180923461914, "learning_rate": 3.5571907119990033e-07, "loss": 0.3507, "step": 8240 }, { "epoch": 4.455397368895296, "grad_norm": 0.26620420813560486, "learning_rate": 3.550200193420078e-07, "loss": 0.3501, "step": 8241 }, { "epoch": 4.455938006848081, "grad_norm": 0.2963588535785675, "learning_rate": 3.543216297569385e-07, "loss": 0.3868, "step": 8242 }, { "epoch": 4.456478644800865, "grad_norm": 0.2520950138568878, "learning_rate": 3.5362390254426836e-07, "loss": 0.367, "step": 8243 }, { "epoch": 4.45701928275365, "grad_norm": 0.24793481826782227, "learning_rate": 3.5292683780347834e-07, "loss": 0.3411, "step": 8244 }, { "epoch": 4.457559920706434, "grad_norm": 0.280124306678772, "learning_rate": 3.522304356339529e-07, "loss": 0.3872, "step": 8245 }, { "epoch": 4.4581005586592175, "grad_norm": 0.2688871920108795, "learning_rate": 3.5153469613498583e-07, "loss": 0.3764, "step": 8246 }, { "epoch": 4.458641196612002, "grad_norm": 0.26355719566345215, "learning_rate": 3.508396194057728e-07, "loss": 0.3501, "step": 8247 }, { "epoch": 4.459181834564786, "grad_norm": 0.27029556035995483, "learning_rate": 3.501452055454191e-07, "loss": 0.3733, "step": 8248 }, { "epoch": 4.459722472517571, "grad_norm": 0.26230961084365845, "learning_rate": 3.4945145465292987e-07, "loss": 0.3632, "step": 8249 }, { "epoch": 4.460263110470355, "grad_norm": 0.2691025137901306, "learning_rate": 3.4875836682722096e-07, "loss": 0.3772, "step": 8250 }, { "epoch": 4.460803748423139, "grad_norm": 0.26911598443984985, "learning_rate": 3.4806594216710956e-07, "loss": 0.3758, "step": 8251 }, { "epoch": 4.461344386375924, "grad_norm": 0.2633644938468933, "learning_rate": 3.473741807713232e-07, "loss": 0.3691, "step": 8252 }, { "epoch": 4.461885024328708, "grad_norm": 0.25110191106796265, "learning_rate": 3.4668308273848985e-07, "loss": 0.3279, "step": 8253 }, { "epoch": 4.462425662281492, "grad_norm": 0.2690924108028412, "learning_rate": 3.4599264816714497e-07, "loss": 0.3688, "step": 8254 }, { "epoch": 4.462966300234276, "grad_norm": 0.2681609094142914, "learning_rate": 3.45302877155731e-07, "loss": 0.3894, "step": 8255 }, { "epoch": 4.463506938187061, "grad_norm": 0.25030145049095154, "learning_rate": 3.4461376980259307e-07, "loss": 0.3546, "step": 8256 }, { "epoch": 4.464047576139845, "grad_norm": 0.261446475982666, "learning_rate": 3.439253262059822e-07, "loss": 0.389, "step": 8257 }, { "epoch": 4.464588214092629, "grad_norm": 0.2674567401409149, "learning_rate": 3.4323754646405747e-07, "loss": 0.3604, "step": 8258 }, { "epoch": 4.465128852045414, "grad_norm": 0.2563493251800537, "learning_rate": 3.4255043067487893e-07, "loss": 0.338, "step": 8259 }, { "epoch": 4.465669489998198, "grad_norm": 0.27313074469566345, "learning_rate": 3.418639789364175e-07, "loss": 0.3599, "step": 8260 }, { "epoch": 4.4662101279509825, "grad_norm": 0.26698416471481323, "learning_rate": 3.411781913465423e-07, "loss": 0.3919, "step": 8261 }, { "epoch": 4.466750765903766, "grad_norm": 0.2576042115688324, "learning_rate": 3.404930680030344e-07, "loss": 0.369, "step": 8262 }, { "epoch": 4.46729140385655, "grad_norm": 0.2721457779407501, "learning_rate": 3.398086090035757e-07, "loss": 0.3669, "step": 8263 }, { "epoch": 4.467832041809335, "grad_norm": 0.265586256980896, "learning_rate": 3.3912481444575763e-07, "loss": 0.3697, "step": 8264 }, { "epoch": 4.468372679762119, "grad_norm": 0.2615722417831421, "learning_rate": 3.3844168442707213e-07, "loss": 0.3671, "step": 8265 }, { "epoch": 4.468913317714904, "grad_norm": 0.26328277587890625, "learning_rate": 3.377592190449186e-07, "loss": 0.3512, "step": 8266 }, { "epoch": 4.469453955667688, "grad_norm": 0.31026870012283325, "learning_rate": 3.370774183966036e-07, "loss": 0.3721, "step": 8267 }, { "epoch": 4.469994593620472, "grad_norm": 0.26836591958999634, "learning_rate": 3.363962825793354e-07, "loss": 0.3935, "step": 8268 }, { "epoch": 4.4705352315732565, "grad_norm": 0.25650352239608765, "learning_rate": 3.357158116902287e-07, "loss": 0.3358, "step": 8269 }, { "epoch": 4.47107586952604, "grad_norm": 0.2633151113986969, "learning_rate": 3.350360058263058e-07, "loss": 0.3537, "step": 8270 }, { "epoch": 4.471616507478825, "grad_norm": 0.2551864683628082, "learning_rate": 3.3435686508449026e-07, "loss": 0.3431, "step": 8271 }, { "epoch": 4.472157145431609, "grad_norm": 0.2526355981826782, "learning_rate": 3.336783895616147e-07, "loss": 0.3347, "step": 8272 }, { "epoch": 4.472697783384394, "grad_norm": 0.2782466411590576, "learning_rate": 3.330005793544133e-07, "loss": 0.3964, "step": 8273 }, { "epoch": 4.473238421337178, "grad_norm": 0.27180591225624084, "learning_rate": 3.3232343455952664e-07, "loss": 0.3659, "step": 8274 }, { "epoch": 4.473779059289962, "grad_norm": 0.2725878059864044, "learning_rate": 3.3164695527350244e-07, "loss": 0.3364, "step": 8275 }, { "epoch": 4.474319697242747, "grad_norm": 0.2825901508331299, "learning_rate": 3.309711415927908e-07, "loss": 0.3589, "step": 8276 }, { "epoch": 4.4748603351955305, "grad_norm": 0.26633691787719727, "learning_rate": 3.3029599361374955e-07, "loss": 0.3908, "step": 8277 }, { "epoch": 4.475400973148315, "grad_norm": 0.34270578622817993, "learning_rate": 3.296215114326368e-07, "loss": 0.3681, "step": 8278 }, { "epoch": 4.475941611101099, "grad_norm": 0.2730425298213959, "learning_rate": 3.289476951456222e-07, "loss": 0.3264, "step": 8279 }, { "epoch": 4.476482249053884, "grad_norm": 0.2668716311454773, "learning_rate": 3.2827454484877564e-07, "loss": 0.3531, "step": 8280 }, { "epoch": 4.477022887006668, "grad_norm": 0.2741301655769348, "learning_rate": 3.276020606380742e-07, "loss": 0.3679, "step": 8281 }, { "epoch": 4.477563524959452, "grad_norm": 0.2575318217277527, "learning_rate": 3.269302426094001e-07, "loss": 0.3535, "step": 8282 }, { "epoch": 4.478104162912237, "grad_norm": 0.2573346197605133, "learning_rate": 3.262590908585378e-07, "loss": 0.3599, "step": 8283 }, { "epoch": 4.478644800865021, "grad_norm": 0.27236178517341614, "learning_rate": 3.255886054811813e-07, "loss": 0.395, "step": 8284 }, { "epoch": 4.479185438817805, "grad_norm": 0.2652297019958496, "learning_rate": 3.2491878657292643e-07, "loss": 0.3193, "step": 8285 }, { "epoch": 4.479726076770589, "grad_norm": 0.2728855609893799, "learning_rate": 3.2424963422927335e-07, "loss": 0.3745, "step": 8286 }, { "epoch": 4.480266714723373, "grad_norm": 0.26942577958106995, "learning_rate": 3.2358114854563086e-07, "loss": 0.3725, "step": 8287 }, { "epoch": 4.480807352676158, "grad_norm": 0.2845991849899292, "learning_rate": 3.2291332961730817e-07, "loss": 0.3882, "step": 8288 }, { "epoch": 4.481347990628942, "grad_norm": 0.27013665437698364, "learning_rate": 3.222461775395247e-07, "loss": 0.3673, "step": 8289 }, { "epoch": 4.481888628581727, "grad_norm": 0.2531822919845581, "learning_rate": 3.215796924073983e-07, "loss": 0.3679, "step": 8290 }, { "epoch": 4.482429266534511, "grad_norm": 0.2829582989215851, "learning_rate": 3.209138743159573e-07, "loss": 0.3566, "step": 8291 }, { "epoch": 4.482969904487295, "grad_norm": 0.29151779413223267, "learning_rate": 3.2024872336013204e-07, "loss": 0.3817, "step": 8292 }, { "epoch": 4.483510542440079, "grad_norm": 0.2873472273349762, "learning_rate": 3.195842396347598e-07, "loss": 0.3305, "step": 8293 }, { "epoch": 4.484051180392863, "grad_norm": 0.24851979315280914, "learning_rate": 3.1892042323457995e-07, "loss": 0.3635, "step": 8294 }, { "epoch": 4.484591818345648, "grad_norm": 0.2865833640098572, "learning_rate": 3.1825727425423837e-07, "loss": 0.3742, "step": 8295 }, { "epoch": 4.485132456298432, "grad_norm": 0.2495800107717514, "learning_rate": 3.1759479278828665e-07, "loss": 0.3423, "step": 8296 }, { "epoch": 4.485673094251217, "grad_norm": 0.29070883989334106, "learning_rate": 3.169329789311798e-07, "loss": 0.4068, "step": 8297 }, { "epoch": 4.486213732204001, "grad_norm": 0.24659143388271332, "learning_rate": 3.1627183277727734e-07, "loss": 0.3373, "step": 8298 }, { "epoch": 4.486754370156785, "grad_norm": 0.28675511479377747, "learning_rate": 3.1561135442084556e-07, "loss": 0.3762, "step": 8299 }, { "epoch": 4.4872950081095695, "grad_norm": 0.271820604801178, "learning_rate": 3.149515439560524e-07, "loss": 0.3444, "step": 8300 }, { "epoch": 4.487835646062353, "grad_norm": 0.25091075897216797, "learning_rate": 3.142924014769755e-07, "loss": 0.3268, "step": 8301 }, { "epoch": 4.488376284015138, "grad_norm": 0.2675650715827942, "learning_rate": 3.136339270775901e-07, "loss": 0.3969, "step": 8302 }, { "epoch": 4.488916921967922, "grad_norm": 0.2635056674480438, "learning_rate": 3.1297612085178284e-07, "loss": 0.3673, "step": 8303 }, { "epoch": 4.489457559920706, "grad_norm": 0.25954359769821167, "learning_rate": 3.123189828933432e-07, "loss": 0.3568, "step": 8304 }, { "epoch": 4.489998197873491, "grad_norm": 0.26797178387641907, "learning_rate": 3.116625132959633e-07, "loss": 0.384, "step": 8305 }, { "epoch": 4.490538835826275, "grad_norm": 0.2877870202064514, "learning_rate": 3.110067121532417e-07, "loss": 0.3672, "step": 8306 }, { "epoch": 4.49107947377906, "grad_norm": 0.2584758400917053, "learning_rate": 3.103515795586809e-07, "loss": 0.361, "step": 8307 }, { "epoch": 4.4916201117318435, "grad_norm": 0.27120763063430786, "learning_rate": 3.0969711560568996e-07, "loss": 0.3548, "step": 8308 }, { "epoch": 4.492160749684627, "grad_norm": 0.281546026468277, "learning_rate": 3.0904332038757977e-07, "loss": 0.3869, "step": 8309 }, { "epoch": 4.492701387637412, "grad_norm": 0.2806074619293213, "learning_rate": 3.083901939975675e-07, "loss": 0.361, "step": 8310 }, { "epoch": 4.493242025590196, "grad_norm": 0.26624199748039246, "learning_rate": 3.0773773652877537e-07, "loss": 0.3682, "step": 8311 }, { "epoch": 4.493782663542981, "grad_norm": 0.24980990588665009, "learning_rate": 3.070859480742283e-07, "loss": 0.3591, "step": 8312 }, { "epoch": 4.494323301495765, "grad_norm": 0.28086620569229126, "learning_rate": 3.064348287268587e-07, "loss": 0.3963, "step": 8313 }, { "epoch": 4.49486393944855, "grad_norm": 0.2712896466255188, "learning_rate": 3.0578437857950117e-07, "loss": 0.3577, "step": 8314 }, { "epoch": 4.495404577401334, "grad_norm": 0.24350707232952118, "learning_rate": 3.051345977248954e-07, "loss": 0.3017, "step": 8315 }, { "epoch": 4.4959452153541175, "grad_norm": 0.26775676012039185, "learning_rate": 3.044854862556867e-07, "loss": 0.3622, "step": 8316 }, { "epoch": 4.496485853306902, "grad_norm": 0.29231777787208557, "learning_rate": 3.0383704426442396e-07, "loss": 0.3987, "step": 8317 }, { "epoch": 4.497026491259686, "grad_norm": 0.26771900057792664, "learning_rate": 3.0318927184356086e-07, "loss": 0.3696, "step": 8318 }, { "epoch": 4.497567129212471, "grad_norm": 0.2685577869415283, "learning_rate": 3.025421690854552e-07, "loss": 0.3372, "step": 8319 }, { "epoch": 4.498107767165255, "grad_norm": 0.2681006193161011, "learning_rate": 3.018957360823699e-07, "loss": 0.3474, "step": 8320 }, { "epoch": 4.498648405118039, "grad_norm": 0.26560941338539124, "learning_rate": 3.0124997292647286e-07, "loss": 0.3773, "step": 8321 }, { "epoch": 4.499189043070824, "grad_norm": 0.271924763917923, "learning_rate": 3.006048797098349e-07, "loss": 0.376, "step": 8322 }, { "epoch": 4.499729681023608, "grad_norm": 0.2682948410511017, "learning_rate": 2.9996045652443294e-07, "loss": 0.3559, "step": 8323 }, { "epoch": 4.500270318976392, "grad_norm": 0.2738342583179474, "learning_rate": 2.993167034621464e-07, "loss": 0.3634, "step": 8324 }, { "epoch": 4.500810956929176, "grad_norm": 0.24666568636894226, "learning_rate": 2.986736206147628e-07, "loss": 0.3307, "step": 8325 }, { "epoch": 4.50135159488196, "grad_norm": 0.26715728640556335, "learning_rate": 2.9803120807397003e-07, "loss": 0.3704, "step": 8326 }, { "epoch": 4.501892232834745, "grad_norm": 0.25745439529418945, "learning_rate": 2.9738946593136144e-07, "loss": 0.3563, "step": 8327 }, { "epoch": 4.502432870787529, "grad_norm": 0.27403581142425537, "learning_rate": 2.9674839427843715e-07, "loss": 0.3414, "step": 8328 }, { "epoch": 4.502973508740314, "grad_norm": 0.28739896416664124, "learning_rate": 2.9610799320659964e-07, "loss": 0.3801, "step": 8329 }, { "epoch": 4.503514146693098, "grad_norm": 0.27232155203819275, "learning_rate": 2.9546826280715536e-07, "loss": 0.3586, "step": 8330 }, { "epoch": 4.5040547846458825, "grad_norm": 0.28253716230392456, "learning_rate": 2.948292031713157e-07, "loss": 0.3742, "step": 8331 }, { "epoch": 4.504595422598666, "grad_norm": 0.2904265522956848, "learning_rate": 2.9419081439019727e-07, "loss": 0.3555, "step": 8332 }, { "epoch": 4.50513606055145, "grad_norm": 0.2432306855916977, "learning_rate": 2.9355309655482224e-07, "loss": 0.3587, "step": 8333 }, { "epoch": 4.505676698504235, "grad_norm": 0.27025797963142395, "learning_rate": 2.9291604975611123e-07, "loss": 0.3758, "step": 8334 }, { "epoch": 4.506217336457019, "grad_norm": 0.2538128197193146, "learning_rate": 2.9227967408489653e-07, "loss": 0.3542, "step": 8335 }, { "epoch": 4.506757974409804, "grad_norm": 0.2916772663593292, "learning_rate": 2.9164396963190954e-07, "loss": 0.4006, "step": 8336 }, { "epoch": 4.507298612362588, "grad_norm": 0.26848316192626953, "learning_rate": 2.910089364877888e-07, "loss": 0.3406, "step": 8337 }, { "epoch": 4.507839250315373, "grad_norm": 0.2920156419277191, "learning_rate": 2.903745747430764e-07, "loss": 0.4109, "step": 8338 }, { "epoch": 4.5083798882681565, "grad_norm": 0.26490744948387146, "learning_rate": 2.897408844882171e-07, "loss": 0.3825, "step": 8339 }, { "epoch": 4.50892052622094, "grad_norm": 0.2634319067001343, "learning_rate": 2.891078658135632e-07, "loss": 0.3604, "step": 8340 }, { "epoch": 4.509461164173725, "grad_norm": 0.25874748826026917, "learning_rate": 2.88475518809368e-07, "loss": 0.3843, "step": 8341 }, { "epoch": 4.510001802126509, "grad_norm": 0.2628515958786011, "learning_rate": 2.87843843565791e-07, "loss": 0.3653, "step": 8342 }, { "epoch": 4.510542440079294, "grad_norm": 0.25855329632759094, "learning_rate": 2.8721284017289517e-07, "loss": 0.3514, "step": 8343 }, { "epoch": 4.511083078032078, "grad_norm": 0.27656733989715576, "learning_rate": 2.8658250872064696e-07, "loss": 0.3682, "step": 8344 }, { "epoch": 4.511623715984862, "grad_norm": 0.27417272329330444, "learning_rate": 2.859528492989194e-07, "loss": 0.3484, "step": 8345 }, { "epoch": 4.512164353937647, "grad_norm": 0.2899097502231598, "learning_rate": 2.853238619974874e-07, "loss": 0.3567, "step": 8346 }, { "epoch": 4.5127049918904305, "grad_norm": 0.2767457365989685, "learning_rate": 2.8469554690603143e-07, "loss": 0.3438, "step": 8347 }, { "epoch": 4.513245629843215, "grad_norm": 0.2730007767677307, "learning_rate": 2.8406790411413366e-07, "loss": 0.3362, "step": 8348 }, { "epoch": 4.513786267795999, "grad_norm": 0.25781869888305664, "learning_rate": 2.834409337112842e-07, "loss": 0.3599, "step": 8349 }, { "epoch": 4.514326905748783, "grad_norm": 0.27375370264053345, "learning_rate": 2.828146357868755e-07, "loss": 0.3511, "step": 8350 }, { "epoch": 4.514867543701568, "grad_norm": 0.27994829416275024, "learning_rate": 2.821890104302022e-07, "loss": 0.3468, "step": 8351 }, { "epoch": 4.515408181654352, "grad_norm": 0.2658286988735199, "learning_rate": 2.815640577304668e-07, "loss": 0.3529, "step": 8352 }, { "epoch": 4.515948819607137, "grad_norm": 0.2623860239982605, "learning_rate": 2.8093977777677195e-07, "loss": 0.3604, "step": 8353 }, { "epoch": 4.516489457559921, "grad_norm": 0.2674957513809204, "learning_rate": 2.803161706581281e-07, "loss": 0.3431, "step": 8354 }, { "epoch": 4.517030095512705, "grad_norm": 0.27279454469680786, "learning_rate": 2.796932364634475e-07, "loss": 0.3777, "step": 8355 }, { "epoch": 4.517570733465489, "grad_norm": 0.24342961609363556, "learning_rate": 2.790709752815457e-07, "loss": 0.3348, "step": 8356 }, { "epoch": 4.518111371418273, "grad_norm": 0.2706700563430786, "learning_rate": 2.7844938720114566e-07, "loss": 0.3639, "step": 8357 }, { "epoch": 4.518652009371058, "grad_norm": 0.27748116850852966, "learning_rate": 2.7782847231087095e-07, "loss": 0.3534, "step": 8358 }, { "epoch": 4.519192647323842, "grad_norm": 0.2530544698238373, "learning_rate": 2.772082306992513e-07, "loss": 0.357, "step": 8359 }, { "epoch": 4.519733285276627, "grad_norm": 0.2769469618797302, "learning_rate": 2.765886624547182e-07, "loss": 0.4058, "step": 8360 }, { "epoch": 4.520273923229411, "grad_norm": 0.277963250875473, "learning_rate": 2.7596976766560977e-07, "loss": 0.359, "step": 8361 }, { "epoch": 4.520814561182195, "grad_norm": 0.2735162675380707, "learning_rate": 2.753515464201678e-07, "loss": 0.3487, "step": 8362 }, { "epoch": 4.521355199134979, "grad_norm": 0.26233893632888794, "learning_rate": 2.747339988065345e-07, "loss": 0.3617, "step": 8363 }, { "epoch": 4.521895837087763, "grad_norm": 0.2705513536930084, "learning_rate": 2.741171249127611e-07, "loss": 0.3567, "step": 8364 }, { "epoch": 4.522436475040548, "grad_norm": 0.24441170692443848, "learning_rate": 2.7350092482679836e-07, "loss": 0.3372, "step": 8365 }, { "epoch": 4.522977112993332, "grad_norm": 0.2987588942050934, "learning_rate": 2.7288539863650544e-07, "loss": 0.3831, "step": 8366 }, { "epoch": 4.523517750946116, "grad_norm": 0.2871190309524536, "learning_rate": 2.72270546429641e-07, "loss": 0.3495, "step": 8367 }, { "epoch": 4.524058388898901, "grad_norm": 0.2715509533882141, "learning_rate": 2.7165636829387e-07, "loss": 0.3692, "step": 8368 }, { "epoch": 4.524599026851685, "grad_norm": 0.26629409193992615, "learning_rate": 2.710428643167612e-07, "loss": 0.3899, "step": 8369 }, { "epoch": 4.5251396648044695, "grad_norm": 0.2310323566198349, "learning_rate": 2.7043003458578685e-07, "loss": 0.309, "step": 8370 }, { "epoch": 4.525680302757253, "grad_norm": 0.2734542489051819, "learning_rate": 2.6981787918832216e-07, "loss": 0.3796, "step": 8371 }, { "epoch": 4.526220940710038, "grad_norm": 0.2826656103134155, "learning_rate": 2.6920639821164883e-07, "loss": 0.3614, "step": 8372 }, { "epoch": 4.526761578662822, "grad_norm": 0.2604270577430725, "learning_rate": 2.685955917429489e-07, "loss": 0.3586, "step": 8373 }, { "epoch": 4.527302216615606, "grad_norm": 0.255300909280777, "learning_rate": 2.6798545986931214e-07, "loss": 0.3541, "step": 8374 }, { "epoch": 4.527842854568391, "grad_norm": 0.273477703332901, "learning_rate": 2.673760026777272e-07, "loss": 0.3721, "step": 8375 }, { "epoch": 4.528383492521175, "grad_norm": 0.2807256281375885, "learning_rate": 2.667672202550925e-07, "loss": 0.4015, "step": 8376 }, { "epoch": 4.52892413047396, "grad_norm": 0.2528318464756012, "learning_rate": 2.661591126882046e-07, "loss": 0.3233, "step": 8377 }, { "epoch": 4.5294647684267435, "grad_norm": 0.2718980312347412, "learning_rate": 2.6555168006376755e-07, "loss": 0.3847, "step": 8378 }, { "epoch": 4.530005406379528, "grad_norm": 0.26820477843284607, "learning_rate": 2.6494492246838863e-07, "loss": 0.3554, "step": 8379 }, { "epoch": 4.530546044332312, "grad_norm": 0.25228914618492126, "learning_rate": 2.6433883998857657e-07, "loss": 0.3264, "step": 8380 }, { "epoch": 4.531086682285096, "grad_norm": 0.308725506067276, "learning_rate": 2.6373343271074657e-07, "loss": 0.4118, "step": 8381 }, { "epoch": 4.531627320237881, "grad_norm": 0.2523513436317444, "learning_rate": 2.631287007212169e-07, "loss": 0.3539, "step": 8382 }, { "epoch": 4.532167958190665, "grad_norm": 0.2601075768470764, "learning_rate": 2.6252464410620793e-07, "loss": 0.3166, "step": 8383 }, { "epoch": 4.532708596143449, "grad_norm": 0.2990708351135254, "learning_rate": 2.6192126295184584e-07, "loss": 0.3905, "step": 8384 }, { "epoch": 4.533249234096234, "grad_norm": 0.2582680881023407, "learning_rate": 2.61318557344159e-07, "loss": 0.3348, "step": 8385 }, { "epoch": 4.5337898720490175, "grad_norm": 0.25455453991889954, "learning_rate": 2.6071652736908203e-07, "loss": 0.3461, "step": 8386 }, { "epoch": 4.534330510001802, "grad_norm": 0.272830069065094, "learning_rate": 2.601151731124485e-07, "loss": 0.3809, "step": 8387 }, { "epoch": 4.534871147954586, "grad_norm": 0.282148540019989, "learning_rate": 2.5951449465999925e-07, "loss": 0.3666, "step": 8388 }, { "epoch": 4.535411785907371, "grad_norm": 0.26278114318847656, "learning_rate": 2.5891449209737906e-07, "loss": 0.3652, "step": 8389 }, { "epoch": 4.535952423860155, "grad_norm": 0.2743205726146698, "learning_rate": 2.5831516551013405e-07, "loss": 0.355, "step": 8390 }, { "epoch": 4.536493061812939, "grad_norm": 0.2829950749874115, "learning_rate": 2.577165149837163e-07, "loss": 0.41, "step": 8391 }, { "epoch": 4.537033699765724, "grad_norm": 0.2564539313316345, "learning_rate": 2.5711854060347817e-07, "loss": 0.3283, "step": 8392 }, { "epoch": 4.537574337718508, "grad_norm": 0.2629248797893524, "learning_rate": 2.5652124245468033e-07, "loss": 0.3654, "step": 8393 }, { "epoch": 4.538114975671292, "grad_norm": 0.2670019865036011, "learning_rate": 2.5592462062248304e-07, "loss": 0.3524, "step": 8394 }, { "epoch": 4.538655613624076, "grad_norm": 0.27533864974975586, "learning_rate": 2.553286751919509e-07, "loss": 0.3658, "step": 8395 }, { "epoch": 4.539196251576861, "grad_norm": 0.25868937373161316, "learning_rate": 2.547334062480544e-07, "loss": 0.3483, "step": 8396 }, { "epoch": 4.539736889529645, "grad_norm": 0.27601099014282227, "learning_rate": 2.541388138756645e-07, "loss": 0.3938, "step": 8397 }, { "epoch": 4.540277527482429, "grad_norm": 0.26688718795776367, "learning_rate": 2.535448981595595e-07, "loss": 0.3755, "step": 8398 }, { "epoch": 4.540818165435214, "grad_norm": 0.27354729175567627, "learning_rate": 2.52951659184415e-07, "loss": 0.3688, "step": 8399 }, { "epoch": 4.541358803387998, "grad_norm": 0.27443838119506836, "learning_rate": 2.523590970348166e-07, "loss": 0.3485, "step": 8400 }, { "epoch": 4.5418994413407825, "grad_norm": 0.26458561420440674, "learning_rate": 2.517672117952502e-07, "loss": 0.3582, "step": 8401 }, { "epoch": 4.542440079293566, "grad_norm": 0.2567475140094757, "learning_rate": 2.511760035501054e-07, "loss": 0.3792, "step": 8402 }, { "epoch": 4.54298071724635, "grad_norm": 0.259311705827713, "learning_rate": 2.5058547238367703e-07, "loss": 0.3247, "step": 8403 }, { "epoch": 4.543521355199135, "grad_norm": 0.2569766342639923, "learning_rate": 2.4999561838015996e-07, "loss": 0.3825, "step": 8404 }, { "epoch": 4.544061993151919, "grad_norm": 0.2629188895225525, "learning_rate": 2.4940644162365523e-07, "loss": 0.3596, "step": 8405 }, { "epoch": 4.544602631104704, "grad_norm": 0.2798813581466675, "learning_rate": 2.4881794219816624e-07, "loss": 0.36, "step": 8406 }, { "epoch": 4.545143269057488, "grad_norm": 0.2585632801055908, "learning_rate": 2.482301201876014e-07, "loss": 0.3513, "step": 8407 }, { "epoch": 4.545683907010272, "grad_norm": 0.26448342204093933, "learning_rate": 2.4764297567577035e-07, "loss": 0.3891, "step": 8408 }, { "epoch": 4.5462245449630565, "grad_norm": 0.2770906686782837, "learning_rate": 2.4705650874638667e-07, "loss": 0.3563, "step": 8409 }, { "epoch": 4.54676518291584, "grad_norm": 0.2554166913032532, "learning_rate": 2.46470719483069e-07, "loss": 0.3375, "step": 8410 }, { "epoch": 4.547305820868625, "grad_norm": 0.23727665841579437, "learning_rate": 2.458856079693378e-07, "loss": 0.3559, "step": 8411 }, { "epoch": 4.547846458821409, "grad_norm": 0.25934919714927673, "learning_rate": 2.4530117428861576e-07, "loss": 0.3787, "step": 8412 }, { "epoch": 4.548387096774194, "grad_norm": 0.2681872844696045, "learning_rate": 2.447174185242324e-07, "loss": 0.3744, "step": 8413 }, { "epoch": 4.548927734726978, "grad_norm": 0.26082801818847656, "learning_rate": 2.4413434075941657e-07, "loss": 0.336, "step": 8414 }, { "epoch": 4.549468372679762, "grad_norm": 0.2786238193511963, "learning_rate": 2.435519410773052e-07, "loss": 0.3871, "step": 8415 }, { "epoch": 4.550009010632547, "grad_norm": 0.281931608915329, "learning_rate": 2.429702195609329e-07, "loss": 0.404, "step": 8416 }, { "epoch": 4.5505496485853305, "grad_norm": 0.2451360821723938, "learning_rate": 2.4238917629324124e-07, "loss": 0.329, "step": 8417 }, { "epoch": 4.551090286538115, "grad_norm": 0.27641189098358154, "learning_rate": 2.4180881135707547e-07, "loss": 0.4183, "step": 8418 }, { "epoch": 4.551630924490899, "grad_norm": 0.24302369356155396, "learning_rate": 2.4122912483518226e-07, "loss": 0.3248, "step": 8419 }, { "epoch": 4.552171562443684, "grad_norm": 0.2982977628707886, "learning_rate": 2.4065011681021266e-07, "loss": 0.3694, "step": 8420 }, { "epoch": 4.552712200396468, "grad_norm": 0.25425615906715393, "learning_rate": 2.40071787364719e-07, "loss": 0.3448, "step": 8421 }, { "epoch": 4.553252838349252, "grad_norm": 0.2629137933254242, "learning_rate": 2.3949413658116037e-07, "loss": 0.3513, "step": 8422 }, { "epoch": 4.553793476302037, "grad_norm": 0.2689550817012787, "learning_rate": 2.389171645418964e-07, "loss": 0.381, "step": 8423 }, { "epoch": 4.554334114254821, "grad_norm": 0.282644659280777, "learning_rate": 2.3834087132919016e-07, "loss": 0.4088, "step": 8424 }, { "epoch": 4.5548747522076045, "grad_norm": 0.2677907645702362, "learning_rate": 2.3776525702520925e-07, "loss": 0.3203, "step": 8425 }, { "epoch": 4.555415390160389, "grad_norm": 0.2817721962928772, "learning_rate": 2.3719032171202362e-07, "loss": 0.3726, "step": 8426 }, { "epoch": 4.555956028113173, "grad_norm": 0.2728714346885681, "learning_rate": 2.3661606547160653e-07, "loss": 0.3582, "step": 8427 }, { "epoch": 4.556496666065958, "grad_norm": 0.2762387692928314, "learning_rate": 2.3604248838583421e-07, "loss": 0.3635, "step": 8428 }, { "epoch": 4.557037304018742, "grad_norm": 0.26854419708251953, "learning_rate": 2.3546959053648565e-07, "loss": 0.3672, "step": 8429 }, { "epoch": 4.557577941971527, "grad_norm": 0.25551918148994446, "learning_rate": 2.3489737200524498e-07, "loss": 0.3367, "step": 8430 }, { "epoch": 4.558118579924311, "grad_norm": 0.27728936076164246, "learning_rate": 2.3432583287369747e-07, "loss": 0.3621, "step": 8431 }, { "epoch": 4.558659217877095, "grad_norm": 0.26189514994621277, "learning_rate": 2.3375497322333186e-07, "loss": 0.329, "step": 8432 }, { "epoch": 4.559199855829879, "grad_norm": 0.27320173382759094, "learning_rate": 2.3318479313554022e-07, "loss": 0.3576, "step": 8433 }, { "epoch": 4.559740493782663, "grad_norm": 0.26311782002449036, "learning_rate": 2.326152926916181e-07, "loss": 0.3691, "step": 8434 }, { "epoch": 4.560281131735448, "grad_norm": 0.25582045316696167, "learning_rate": 2.3204647197276387e-07, "loss": 0.3302, "step": 8435 }, { "epoch": 4.560821769688232, "grad_norm": 0.26089537143707275, "learning_rate": 2.3147833106007823e-07, "loss": 0.346, "step": 8436 }, { "epoch": 4.561362407641017, "grad_norm": 0.27061784267425537, "learning_rate": 2.309108700345669e-07, "loss": 0.4184, "step": 8437 }, { "epoch": 4.561903045593801, "grad_norm": 0.27126196026802063, "learning_rate": 2.303440889771358e-07, "loss": 0.3583, "step": 8438 }, { "epoch": 4.562443683546585, "grad_norm": 0.2695308029651642, "learning_rate": 2.2977798796859796e-07, "loss": 0.3792, "step": 8439 }, { "epoch": 4.5629843214993695, "grad_norm": 0.2534771263599396, "learning_rate": 2.29212567089665e-07, "loss": 0.3195, "step": 8440 }, { "epoch": 4.563524959452153, "grad_norm": 0.2534823715686798, "learning_rate": 2.2864782642095407e-07, "loss": 0.3346, "step": 8441 }, { "epoch": 4.564065597404937, "grad_norm": 0.28253212571144104, "learning_rate": 2.2808376604298522e-07, "loss": 0.3789, "step": 8442 }, { "epoch": 4.564606235357722, "grad_norm": 0.2741342782974243, "learning_rate": 2.2752038603618132e-07, "loss": 0.3627, "step": 8443 }, { "epoch": 4.565146873310506, "grad_norm": 0.2669365406036377, "learning_rate": 2.2695768648086758e-07, "loss": 0.3652, "step": 8444 }, { "epoch": 4.565687511263291, "grad_norm": 0.27536723017692566, "learning_rate": 2.2639566745727203e-07, "loss": 0.3726, "step": 8445 }, { "epoch": 4.566228149216075, "grad_norm": 0.2741495668888092, "learning_rate": 2.2583432904552726e-07, "loss": 0.3858, "step": 8446 }, { "epoch": 4.56676878716886, "grad_norm": 0.2663235366344452, "learning_rate": 2.2527367132566925e-07, "loss": 0.3532, "step": 8447 }, { "epoch": 4.5673094251216435, "grad_norm": 0.2848953604698181, "learning_rate": 2.2471369437763234e-07, "loss": 0.3713, "step": 8448 }, { "epoch": 4.567850063074427, "grad_norm": 0.2695309519767761, "learning_rate": 2.241543982812594e-07, "loss": 0.3672, "step": 8449 }, { "epoch": 4.568390701027212, "grad_norm": 0.26727294921875, "learning_rate": 2.2359578311629272e-07, "loss": 0.3472, "step": 8450 }, { "epoch": 4.568931338979996, "grad_norm": 0.26325517892837524, "learning_rate": 2.2303784896238022e-07, "loss": 0.3649, "step": 8451 }, { "epoch": 4.569471976932781, "grad_norm": 0.26693716645240784, "learning_rate": 2.2248059589906944e-07, "loss": 0.4039, "step": 8452 }, { "epoch": 4.570012614885565, "grad_norm": 0.2606445848941803, "learning_rate": 2.2192402400581237e-07, "loss": 0.3561, "step": 8453 }, { "epoch": 4.57055325283835, "grad_norm": 0.24945513904094696, "learning_rate": 2.2136813336196606e-07, "loss": 0.3808, "step": 8454 }, { "epoch": 4.571093890791134, "grad_norm": 0.25371333956718445, "learning_rate": 2.2081292404678655e-07, "loss": 0.3405, "step": 8455 }, { "epoch": 4.5716345287439175, "grad_norm": 0.2556355893611908, "learning_rate": 2.2025839613943445e-07, "loss": 0.3594, "step": 8456 }, { "epoch": 4.572175166696702, "grad_norm": 0.27583563327789307, "learning_rate": 2.1970454971897483e-07, "loss": 0.396, "step": 8457 }, { "epoch": 4.572715804649486, "grad_norm": 0.24128134548664093, "learning_rate": 2.1915138486437281e-07, "loss": 0.3408, "step": 8458 }, { "epoch": 4.573256442602271, "grad_norm": 0.278267502784729, "learning_rate": 2.1859890165449926e-07, "loss": 0.3846, "step": 8459 }, { "epoch": 4.573797080555055, "grad_norm": 0.2626984715461731, "learning_rate": 2.1804710016812337e-07, "loss": 0.3731, "step": 8460 }, { "epoch": 4.574337718507839, "grad_norm": 0.25800734758377075, "learning_rate": 2.1749598048392272e-07, "loss": 0.336, "step": 8461 }, { "epoch": 4.574878356460624, "grad_norm": 0.2768734097480774, "learning_rate": 2.1694554268047342e-07, "loss": 0.3683, "step": 8462 }, { "epoch": 4.575418994413408, "grad_norm": 0.26480376720428467, "learning_rate": 2.1639578683625707e-07, "loss": 0.3771, "step": 8463 }, { "epoch": 4.575959632366192, "grad_norm": 0.3137277066707611, "learning_rate": 2.1584671302965598e-07, "loss": 0.339, "step": 8464 }, { "epoch": 4.576500270318976, "grad_norm": 0.2833629548549652, "learning_rate": 2.152983213389559e-07, "loss": 0.3893, "step": 8465 }, { "epoch": 4.57704090827176, "grad_norm": 0.27549314498901367, "learning_rate": 2.1475061184234648e-07, "loss": 0.3544, "step": 8466 }, { "epoch": 4.577581546224545, "grad_norm": 0.27217674255371094, "learning_rate": 2.1420358461791745e-07, "loss": 0.3616, "step": 8467 }, { "epoch": 4.578122184177329, "grad_norm": 0.28430917859077454, "learning_rate": 2.1365723974366537e-07, "loss": 0.3506, "step": 8468 }, { "epoch": 4.578662822130114, "grad_norm": 0.2661433219909668, "learning_rate": 2.1311157729748566e-07, "loss": 0.3662, "step": 8469 }, { "epoch": 4.579203460082898, "grad_norm": 0.260281503200531, "learning_rate": 2.1256659735717777e-07, "loss": 0.347, "step": 8470 }, { "epoch": 4.5797440980356825, "grad_norm": 0.2790489196777344, "learning_rate": 2.1202230000044455e-07, "loss": 0.3983, "step": 8471 }, { "epoch": 4.580284735988466, "grad_norm": 0.24340921640396118, "learning_rate": 2.1147868530489113e-07, "loss": 0.3427, "step": 8472 }, { "epoch": 4.58082537394125, "grad_norm": 0.2590758502483368, "learning_rate": 2.1093575334802385e-07, "loss": 0.3462, "step": 8473 }, { "epoch": 4.581366011894035, "grad_norm": 0.2715151309967041, "learning_rate": 2.1039350420725358e-07, "loss": 0.3955, "step": 8474 }, { "epoch": 4.581906649846819, "grad_norm": 0.2760257422924042, "learning_rate": 2.0985193795989345e-07, "loss": 0.3629, "step": 8475 }, { "epoch": 4.582447287799604, "grad_norm": 0.26709234714508057, "learning_rate": 2.0931105468316005e-07, "loss": 0.3857, "step": 8476 }, { "epoch": 4.582987925752388, "grad_norm": 0.24396254122257233, "learning_rate": 2.0877085445416889e-07, "loss": 0.3469, "step": 8477 }, { "epoch": 4.583528563705173, "grad_norm": 0.262498140335083, "learning_rate": 2.082313373499434e-07, "loss": 0.3802, "step": 8478 }, { "epoch": 4.5840692016579565, "grad_norm": 0.28324538469314575, "learning_rate": 2.0769250344740476e-07, "loss": 0.3766, "step": 8479 }, { "epoch": 4.58460983961074, "grad_norm": 0.29584234952926636, "learning_rate": 2.071543528233805e-07, "loss": 0.3435, "step": 8480 }, { "epoch": 4.585150477563525, "grad_norm": 0.2602497935295105, "learning_rate": 2.0661688555459915e-07, "loss": 0.3223, "step": 8481 }, { "epoch": 4.585691115516309, "grad_norm": 0.28299152851104736, "learning_rate": 2.0608010171768998e-07, "loss": 0.3996, "step": 8482 }, { "epoch": 4.586231753469093, "grad_norm": 0.2951735854148865, "learning_rate": 2.0554400138918907e-07, "loss": 0.3568, "step": 8483 }, { "epoch": 4.586772391421878, "grad_norm": 0.26150617003440857, "learning_rate": 2.0500858464553186e-07, "loss": 0.3524, "step": 8484 }, { "epoch": 4.587313029374662, "grad_norm": 2.3454174995422363, "learning_rate": 2.0447385156305565e-07, "loss": 0.3718, "step": 8485 }, { "epoch": 4.587853667327447, "grad_norm": 0.27141591906547546, "learning_rate": 2.0393980221800337e-07, "loss": 0.3694, "step": 8486 }, { "epoch": 4.5883943052802305, "grad_norm": 0.2717527449131012, "learning_rate": 2.0340643668651794e-07, "loss": 0.3807, "step": 8487 }, { "epoch": 4.588934943233015, "grad_norm": 0.2657257616519928, "learning_rate": 2.0287375504464746e-07, "loss": 0.3714, "step": 8488 }, { "epoch": 4.589475581185799, "grad_norm": 0.2644079923629761, "learning_rate": 2.0234175736833727e-07, "loss": 0.3854, "step": 8489 }, { "epoch": 4.590016219138583, "grad_norm": 0.2563624978065491, "learning_rate": 2.0181044373344172e-07, "loss": 0.3454, "step": 8490 }, { "epoch": 4.590556857091368, "grad_norm": 0.26360106468200684, "learning_rate": 2.0127981421571295e-07, "loss": 0.3908, "step": 8491 }, { "epoch": 4.591097495044152, "grad_norm": 0.243517205119133, "learning_rate": 2.0074986889080826e-07, "loss": 0.3355, "step": 8492 }, { "epoch": 4.591638132996937, "grad_norm": 0.2634882628917694, "learning_rate": 2.0022060783428553e-07, "loss": 0.3639, "step": 8493 }, { "epoch": 4.592178770949721, "grad_norm": 0.2720161974430084, "learning_rate": 1.9969203112160497e-07, "loss": 0.3923, "step": 8494 }, { "epoch": 4.592719408902505, "grad_norm": 0.2561497688293457, "learning_rate": 1.9916413882813235e-07, "loss": 0.3674, "step": 8495 }, { "epoch": 4.593260046855289, "grad_norm": 0.2585059106349945, "learning_rate": 1.9863693102913195e-07, "loss": 0.3531, "step": 8496 }, { "epoch": 4.593800684808073, "grad_norm": 0.2782512903213501, "learning_rate": 1.9811040779977196e-07, "loss": 0.3882, "step": 8497 }, { "epoch": 4.594341322760858, "grad_norm": 0.25891584157943726, "learning_rate": 1.9758456921512403e-07, "loss": 0.3651, "step": 8498 }, { "epoch": 4.594881960713642, "grad_norm": 0.2681910991668701, "learning_rate": 1.9705941535016092e-07, "loss": 0.3663, "step": 8499 }, { "epoch": 4.595422598666426, "grad_norm": 0.25091880559921265, "learning_rate": 1.9653494627975888e-07, "loss": 0.3547, "step": 8500 }, { "epoch": 4.595963236619211, "grad_norm": 0.24863027036190033, "learning_rate": 1.9601116207869365e-07, "loss": 0.3442, "step": 8501 }, { "epoch": 4.596503874571995, "grad_norm": 0.2696053385734558, "learning_rate": 1.9548806282164768e-07, "loss": 0.3943, "step": 8502 }, { "epoch": 4.597044512524779, "grad_norm": 0.25909149646759033, "learning_rate": 1.9496564858320187e-07, "loss": 0.3766, "step": 8503 }, { "epoch": 4.597585150477563, "grad_norm": 0.25561127066612244, "learning_rate": 1.9444391943784225e-07, "loss": 0.3593, "step": 8504 }, { "epoch": 4.598125788430348, "grad_norm": 0.2775280177593231, "learning_rate": 1.9392287545995536e-07, "loss": 0.3556, "step": 8505 }, { "epoch": 4.598666426383132, "grad_norm": 0.27647945284843445, "learning_rate": 1.934025167238307e-07, "loss": 0.3747, "step": 8506 }, { "epoch": 4.599207064335916, "grad_norm": 0.26714831590652466, "learning_rate": 1.9288284330366113e-07, "loss": 0.3921, "step": 8507 }, { "epoch": 4.599747702288701, "grad_norm": 0.2643846273422241, "learning_rate": 1.9236385527353906e-07, "loss": 0.356, "step": 8508 }, { "epoch": 4.600288340241485, "grad_norm": 0.25512391328811646, "learning_rate": 1.9184555270746198e-07, "loss": 0.3306, "step": 8509 }, { "epoch": 4.6008289781942695, "grad_norm": 0.26640191674232483, "learning_rate": 1.913279356793285e-07, "loss": 0.3658, "step": 8510 }, { "epoch": 4.601369616147053, "grad_norm": 0.2815695106983185, "learning_rate": 1.9081100426293852e-07, "loss": 0.3834, "step": 8511 }, { "epoch": 4.601910254099838, "grad_norm": 0.27226167917251587, "learning_rate": 1.9029475853199754e-07, "loss": 0.3463, "step": 8512 }, { "epoch": 4.602450892052622, "grad_norm": 0.2682137191295624, "learning_rate": 1.8977919856010773e-07, "loss": 0.3779, "step": 8513 }, { "epoch": 4.602991530005406, "grad_norm": 0.26758265495300293, "learning_rate": 1.8926432442077868e-07, "loss": 0.3668, "step": 8514 }, { "epoch": 4.603532167958191, "grad_norm": 0.2831733226776123, "learning_rate": 1.8875013618742e-07, "loss": 0.4153, "step": 8515 }, { "epoch": 4.604072805910975, "grad_norm": 0.26426440477371216, "learning_rate": 1.8823663393334358e-07, "loss": 0.3115, "step": 8516 }, { "epoch": 4.60461344386376, "grad_norm": 0.27677780389785767, "learning_rate": 1.8772381773176417e-07, "loss": 0.3663, "step": 8517 }, { "epoch": 4.6051540818165435, "grad_norm": 0.2731216549873352, "learning_rate": 1.8721168765579668e-07, "loss": 0.3564, "step": 8518 }, { "epoch": 4.605694719769327, "grad_norm": 0.26677405834198, "learning_rate": 1.8670024377846098e-07, "loss": 0.3406, "step": 8519 }, { "epoch": 4.606235357722112, "grad_norm": 0.2840011417865753, "learning_rate": 1.8618948617267764e-07, "loss": 0.3752, "step": 8520 }, { "epoch": 4.606775995674896, "grad_norm": 0.2730851471424103, "learning_rate": 1.8567941491126896e-07, "loss": 0.3844, "step": 8521 }, { "epoch": 4.607316633627681, "grad_norm": 0.25464585423469543, "learning_rate": 1.8517003006696067e-07, "loss": 0.3598, "step": 8522 }, { "epoch": 4.607857271580465, "grad_norm": 0.2869928181171417, "learning_rate": 1.846613317123791e-07, "loss": 0.4345, "step": 8523 }, { "epoch": 4.608397909533249, "grad_norm": 0.26284706592559814, "learning_rate": 1.8415331992005514e-07, "loss": 0.3499, "step": 8524 }, { "epoch": 4.608938547486034, "grad_norm": 0.27476251125335693, "learning_rate": 1.8364599476241862e-07, "loss": 0.3402, "step": 8525 }, { "epoch": 4.6094791854388175, "grad_norm": 0.27824050188064575, "learning_rate": 1.8313935631180334e-07, "loss": 0.3668, "step": 8526 }, { "epoch": 4.610019823391602, "grad_norm": 0.2557879090309143, "learning_rate": 1.8263340464044542e-07, "loss": 0.3522, "step": 8527 }, { "epoch": 4.610560461344386, "grad_norm": 0.28685206174850464, "learning_rate": 1.8212813982048217e-07, "loss": 0.3824, "step": 8528 }, { "epoch": 4.611101099297171, "grad_norm": 0.2578665018081665, "learning_rate": 1.8162356192395368e-07, "loss": 0.3158, "step": 8529 }, { "epoch": 4.611641737249955, "grad_norm": 0.25863519310951233, "learning_rate": 1.8111967102280082e-07, "loss": 0.3714, "step": 8530 }, { "epoch": 4.612182375202739, "grad_norm": 0.2879449725151062, "learning_rate": 1.8061646718886882e-07, "loss": 0.3867, "step": 8531 }, { "epoch": 4.612723013155524, "grad_norm": 0.26990994811058044, "learning_rate": 1.8011395049390202e-07, "loss": 0.3338, "step": 8532 }, { "epoch": 4.613263651108308, "grad_norm": 0.26068446040153503, "learning_rate": 1.796121210095497e-07, "loss": 0.3387, "step": 8533 }, { "epoch": 4.613804289061092, "grad_norm": 0.2538585066795349, "learning_rate": 1.7911097880736083e-07, "loss": 0.3633, "step": 8534 }, { "epoch": 4.614344927013876, "grad_norm": 0.26799604296684265, "learning_rate": 1.7861052395878764e-07, "loss": 0.3882, "step": 8535 }, { "epoch": 4.614885564966661, "grad_norm": 0.24625979363918304, "learning_rate": 1.781107565351853e-07, "loss": 0.3487, "step": 8536 }, { "epoch": 4.615426202919445, "grad_norm": 0.25957322120666504, "learning_rate": 1.7761167660780787e-07, "loss": 0.3542, "step": 8537 }, { "epoch": 4.615966840872229, "grad_norm": 0.26084718108177185, "learning_rate": 1.771132842478135e-07, "loss": 0.3694, "step": 8538 }, { "epoch": 4.616507478825014, "grad_norm": 0.25296398997306824, "learning_rate": 1.7661557952626361e-07, "loss": 0.3508, "step": 8539 }, { "epoch": 4.617048116777798, "grad_norm": 0.26038071513175964, "learning_rate": 1.7611856251411818e-07, "loss": 0.3655, "step": 8540 }, { "epoch": 4.617588754730582, "grad_norm": 0.281929075717926, "learning_rate": 1.7562223328224327e-07, "loss": 0.3744, "step": 8541 }, { "epoch": 4.618129392683366, "grad_norm": 0.2631310224533081, "learning_rate": 1.751265919014017e-07, "loss": 0.3511, "step": 8542 }, { "epoch": 4.61867003063615, "grad_norm": 0.2645679712295532, "learning_rate": 1.7463163844226304e-07, "loss": 0.3575, "step": 8543 }, { "epoch": 4.619210668588935, "grad_norm": 0.2860918343067169, "learning_rate": 1.7413737297539647e-07, "loss": 0.3759, "step": 8544 }, { "epoch": 4.619751306541719, "grad_norm": 0.2724156975746155, "learning_rate": 1.7364379557127387e-07, "loss": 0.3489, "step": 8545 }, { "epoch": 4.620291944494504, "grad_norm": 0.28222528100013733, "learning_rate": 1.7315090630026788e-07, "loss": 0.3622, "step": 8546 }, { "epoch": 4.620832582447288, "grad_norm": 0.25786492228507996, "learning_rate": 1.7265870523265393e-07, "loss": 0.3657, "step": 8547 }, { "epoch": 4.621373220400072, "grad_norm": 0.24484671652317047, "learning_rate": 1.7216719243860924e-07, "loss": 0.3474, "step": 8548 }, { "epoch": 4.6219138583528565, "grad_norm": 0.255718469619751, "learning_rate": 1.716763679882133e-07, "loss": 0.3242, "step": 8549 }, { "epoch": 4.62245449630564, "grad_norm": 0.2624998390674591, "learning_rate": 1.711862319514457e-07, "loss": 0.3797, "step": 8550 }, { "epoch": 4.622995134258425, "grad_norm": 0.2688087821006775, "learning_rate": 1.7069678439819047e-07, "loss": 0.3833, "step": 8551 }, { "epoch": 4.623535772211209, "grad_norm": 0.25261446833610535, "learning_rate": 1.702080253982308e-07, "loss": 0.3768, "step": 8552 }, { "epoch": 4.624076410163994, "grad_norm": 0.25700151920318604, "learning_rate": 1.697199550212547e-07, "loss": 0.3668, "step": 8553 }, { "epoch": 4.624617048116778, "grad_norm": 0.2585085332393646, "learning_rate": 1.6923257333684995e-07, "loss": 0.3557, "step": 8554 }, { "epoch": 4.625157686069562, "grad_norm": 0.2769460380077362, "learning_rate": 1.6874588041450535e-07, "loss": 0.3694, "step": 8555 }, { "epoch": 4.625698324022347, "grad_norm": 0.27887338399887085, "learning_rate": 1.6825987632361373e-07, "loss": 0.3733, "step": 8556 }, { "epoch": 4.6262389619751305, "grad_norm": 0.2611374855041504, "learning_rate": 1.6777456113346857e-07, "loss": 0.3301, "step": 8557 }, { "epoch": 4.626779599927915, "grad_norm": 0.266277939081192, "learning_rate": 1.672899349132656e-07, "loss": 0.3639, "step": 8558 }, { "epoch": 4.627320237880699, "grad_norm": 0.257068932056427, "learning_rate": 1.6680599773210017e-07, "loss": 0.3363, "step": 8559 }, { "epoch": 4.627860875833483, "grad_norm": 0.2819574177265167, "learning_rate": 1.6632274965897365e-07, "loss": 0.4051, "step": 8560 }, { "epoch": 4.628401513786268, "grad_norm": 0.25879088044166565, "learning_rate": 1.6584019076278492e-07, "loss": 0.3298, "step": 8561 }, { "epoch": 4.628942151739052, "grad_norm": 0.2662845849990845, "learning_rate": 1.6535832111233662e-07, "loss": 0.3971, "step": 8562 }, { "epoch": 4.629482789691837, "grad_norm": 0.26270678639411926, "learning_rate": 1.6487714077633387e-07, "loss": 0.3357, "step": 8563 }, { "epoch": 4.630023427644621, "grad_norm": 0.2785986661911011, "learning_rate": 1.643966498233812e-07, "loss": 0.3668, "step": 8564 }, { "epoch": 4.6305640655974045, "grad_norm": 0.2605617940425873, "learning_rate": 1.639168483219872e-07, "loss": 0.3591, "step": 8565 }, { "epoch": 4.631104703550189, "grad_norm": 0.2554558515548706, "learning_rate": 1.6343773634056038e-07, "loss": 0.3616, "step": 8566 }, { "epoch": 4.631645341502973, "grad_norm": 0.2634442150592804, "learning_rate": 1.6295931394741116e-07, "loss": 0.3675, "step": 8567 }, { "epoch": 4.632185979455758, "grad_norm": 0.2761819362640381, "learning_rate": 1.6248158121075387e-07, "loss": 0.3621, "step": 8568 }, { "epoch": 4.632726617408542, "grad_norm": 0.2668101191520691, "learning_rate": 1.6200453819870122e-07, "loss": 0.346, "step": 8569 }, { "epoch": 4.633267255361327, "grad_norm": 0.28411874175071716, "learning_rate": 1.6152818497926993e-07, "loss": 0.3801, "step": 8570 }, { "epoch": 4.633807893314111, "grad_norm": 0.27118346095085144, "learning_rate": 1.6105252162037677e-07, "loss": 0.3863, "step": 8571 }, { "epoch": 4.634348531266895, "grad_norm": 0.26166531443595886, "learning_rate": 1.6057754818984195e-07, "loss": 0.3405, "step": 8572 }, { "epoch": 4.634889169219679, "grad_norm": 0.283855676651001, "learning_rate": 1.6010326475538628e-07, "loss": 0.342, "step": 8573 }, { "epoch": 4.635429807172463, "grad_norm": 0.27583634853363037, "learning_rate": 1.5962967138463126e-07, "loss": 0.3663, "step": 8574 }, { "epoch": 4.635970445125248, "grad_norm": 0.2751389145851135, "learning_rate": 1.5915676814510173e-07, "loss": 0.3658, "step": 8575 }, { "epoch": 4.636511083078032, "grad_norm": 0.26085153222084045, "learning_rate": 1.5868455510422266e-07, "loss": 0.3592, "step": 8576 }, { "epoch": 4.637051721030816, "grad_norm": 0.25824809074401855, "learning_rate": 1.5821303232932239e-07, "loss": 0.3468, "step": 8577 }, { "epoch": 4.637592358983601, "grad_norm": 0.2741011083126068, "learning_rate": 1.577421998876294e-07, "loss": 0.3728, "step": 8578 }, { "epoch": 4.638132996936385, "grad_norm": 0.2605046033859253, "learning_rate": 1.5727205784627388e-07, "loss": 0.3647, "step": 8579 }, { "epoch": 4.6386736348891695, "grad_norm": 0.2632634937763214, "learning_rate": 1.5680260627228772e-07, "loss": 0.3515, "step": 8580 }, { "epoch": 4.639214272841953, "grad_norm": 0.2835623621940613, "learning_rate": 1.563338452326052e-07, "loss": 0.3799, "step": 8581 }, { "epoch": 4.639754910794737, "grad_norm": 0.2817102074623108, "learning_rate": 1.5586577479406006e-07, "loss": 0.342, "step": 8582 }, { "epoch": 4.640295548747522, "grad_norm": 0.2846224904060364, "learning_rate": 1.5539839502339005e-07, "loss": 0.3808, "step": 8583 }, { "epoch": 4.640836186700306, "grad_norm": 0.2742244601249695, "learning_rate": 1.5493170598723296e-07, "loss": 0.3507, "step": 8584 }, { "epoch": 4.641376824653091, "grad_norm": 0.2718614935874939, "learning_rate": 1.5446570775212944e-07, "loss": 0.3235, "step": 8585 }, { "epoch": 4.641917462605875, "grad_norm": 0.24936747550964355, "learning_rate": 1.5400040038451913e-07, "loss": 0.3444, "step": 8586 }, { "epoch": 4.64245810055866, "grad_norm": 0.2608190178871155, "learning_rate": 1.5353578395074563e-07, "loss": 0.3764, "step": 8587 }, { "epoch": 4.6429987385114435, "grad_norm": 0.2773157060146332, "learning_rate": 1.530718585170521e-07, "loss": 0.3802, "step": 8588 }, { "epoch": 4.643539376464227, "grad_norm": 0.254817932844162, "learning_rate": 1.5260862414958554e-07, "loss": 0.365, "step": 8589 }, { "epoch": 4.644080014417012, "grad_norm": 0.28640419244766235, "learning_rate": 1.5214608091439265e-07, "loss": 0.3834, "step": 8590 }, { "epoch": 4.644620652369796, "grad_norm": 0.2514747083187103, "learning_rate": 1.5168422887742174e-07, "loss": 0.3186, "step": 8591 }, { "epoch": 4.645161290322581, "grad_norm": 0.2734508812427521, "learning_rate": 1.5122306810452292e-07, "loss": 0.397, "step": 8592 }, { "epoch": 4.645701928275365, "grad_norm": 0.2521887719631195, "learning_rate": 1.5076259866144748e-07, "loss": 0.3206, "step": 8593 }, { "epoch": 4.64624256622815, "grad_norm": 0.26810285449028015, "learning_rate": 1.5030282061384848e-07, "loss": 0.3685, "step": 8594 }, { "epoch": 4.646783204180934, "grad_norm": 0.27116814255714417, "learning_rate": 1.4984373402728014e-07, "loss": 0.3657, "step": 8595 }, { "epoch": 4.6473238421337175, "grad_norm": 0.2740531265735626, "learning_rate": 1.4938533896719843e-07, "loss": 0.3719, "step": 8596 }, { "epoch": 4.647864480086502, "grad_norm": 0.27691882848739624, "learning_rate": 1.489276354989605e-07, "loss": 0.3637, "step": 8597 }, { "epoch": 4.648405118039286, "grad_norm": 0.2679653763771057, "learning_rate": 1.4847062368782473e-07, "loss": 0.3186, "step": 8598 }, { "epoch": 4.64894575599207, "grad_norm": 0.27665218710899353, "learning_rate": 1.480143035989512e-07, "loss": 0.3707, "step": 8599 }, { "epoch": 4.649486393944855, "grad_norm": 0.27336403727531433, "learning_rate": 1.4755867529740064e-07, "loss": 0.3846, "step": 8600 }, { "epoch": 4.650027031897639, "grad_norm": 0.2560247480869293, "learning_rate": 1.4710373884813612e-07, "loss": 0.3475, "step": 8601 }, { "epoch": 4.650567669850424, "grad_norm": 0.25172582268714905, "learning_rate": 1.4664949431602238e-07, "loss": 0.3465, "step": 8602 }, { "epoch": 4.651108307803208, "grad_norm": 0.25150173902511597, "learning_rate": 1.4619594176582318e-07, "loss": 0.3624, "step": 8603 }, { "epoch": 4.651648945755992, "grad_norm": 0.2822224497795105, "learning_rate": 1.4574308126220682e-07, "loss": 0.3827, "step": 8604 }, { "epoch": 4.652189583708776, "grad_norm": 0.26971569657325745, "learning_rate": 1.4529091286973994e-07, "loss": 0.3686, "step": 8605 }, { "epoch": 4.65273022166156, "grad_norm": 0.29332101345062256, "learning_rate": 1.448394366528927e-07, "loss": 0.3802, "step": 8606 }, { "epoch": 4.653270859614345, "grad_norm": 0.2725779116153717, "learning_rate": 1.443886526760363e-07, "loss": 0.3729, "step": 8607 }, { "epoch": 4.653811497567129, "grad_norm": 0.25920751690864563, "learning_rate": 1.4393856100344107e-07, "loss": 0.3562, "step": 8608 }, { "epoch": 4.654352135519914, "grad_norm": 0.24775615334510803, "learning_rate": 1.4348916169928173e-07, "loss": 0.3508, "step": 8609 }, { "epoch": 4.654892773472698, "grad_norm": 0.25473129749298096, "learning_rate": 1.4304045482763263e-07, "loss": 0.372, "step": 8610 }, { "epoch": 4.6554334114254825, "grad_norm": 0.247678741812706, "learning_rate": 1.425924404524681e-07, "loss": 0.3693, "step": 8611 }, { "epoch": 4.655974049378266, "grad_norm": 0.24631451070308685, "learning_rate": 1.4214511863766767e-07, "loss": 0.3654, "step": 8612 }, { "epoch": 4.65651468733105, "grad_norm": 0.2660142481327057, "learning_rate": 1.4169848944700748e-07, "loss": 0.3671, "step": 8613 }, { "epoch": 4.657055325283835, "grad_norm": 0.2603265643119812, "learning_rate": 1.4125255294416885e-07, "loss": 0.3441, "step": 8614 }, { "epoch": 4.657595963236619, "grad_norm": 0.2734012305736542, "learning_rate": 1.408073091927309e-07, "loss": 0.4051, "step": 8615 }, { "epoch": 4.658136601189404, "grad_norm": 0.2639838457107544, "learning_rate": 1.403627582561773e-07, "loss": 0.3557, "step": 8616 }, { "epoch": 4.658677239142188, "grad_norm": 0.2553168535232544, "learning_rate": 1.3991890019788956e-07, "loss": 0.3512, "step": 8617 }, { "epoch": 4.659217877094972, "grad_norm": 0.25102850794792175, "learning_rate": 1.3947573508115374e-07, "loss": 0.3293, "step": 8618 }, { "epoch": 4.6597585150477565, "grad_norm": 0.3015282154083252, "learning_rate": 1.3903326296915543e-07, "loss": 0.3693, "step": 8619 }, { "epoch": 4.66029915300054, "grad_norm": 0.27171650528907776, "learning_rate": 1.3859148392498023e-07, "loss": 0.4019, "step": 8620 }, { "epoch": 4.660839790953325, "grad_norm": 0.2531915009021759, "learning_rate": 1.3815039801161723e-07, "loss": 0.3722, "step": 8621 }, { "epoch": 4.661380428906109, "grad_norm": 0.24812015891075134, "learning_rate": 1.3771000529195555e-07, "loss": 0.3424, "step": 8622 }, { "epoch": 4.661921066858893, "grad_norm": 0.2861292362213135, "learning_rate": 1.3727030582878498e-07, "loss": 0.3951, "step": 8623 }, { "epoch": 4.662461704811678, "grad_norm": 0.2552623450756073, "learning_rate": 1.368312996847976e-07, "loss": 0.3466, "step": 8624 }, { "epoch": 4.663002342764462, "grad_norm": 0.2605513036251068, "learning_rate": 1.3639298692258606e-07, "loss": 0.3674, "step": 8625 }, { "epoch": 4.663542980717247, "grad_norm": 0.27852219343185425, "learning_rate": 1.359553676046449e-07, "loss": 0.3538, "step": 8626 }, { "epoch": 4.6640836186700305, "grad_norm": 0.2792563736438751, "learning_rate": 1.3551844179336748e-07, "loss": 0.3595, "step": 8627 }, { "epoch": 4.664624256622815, "grad_norm": 0.26457229256629944, "learning_rate": 1.3508220955105122e-07, "loss": 0.3704, "step": 8628 }, { "epoch": 4.665164894575599, "grad_norm": 0.2538105845451355, "learning_rate": 1.3464667093989248e-07, "loss": 0.3153, "step": 8629 }, { "epoch": 4.665705532528383, "grad_norm": 0.27984851598739624, "learning_rate": 1.342118260219899e-07, "loss": 0.3732, "step": 8630 }, { "epoch": 4.666246170481168, "grad_norm": 0.2557315230369568, "learning_rate": 1.3377767485934333e-07, "loss": 0.3524, "step": 8631 }, { "epoch": 4.666786808433952, "grad_norm": 0.2679039239883423, "learning_rate": 1.3334421751385275e-07, "loss": 0.3676, "step": 8632 }, { "epoch": 4.667327446386737, "grad_norm": 0.2723951041698456, "learning_rate": 1.3291145404731976e-07, "loss": 0.3827, "step": 8633 }, { "epoch": 4.667868084339521, "grad_norm": 0.24252431094646454, "learning_rate": 1.3247938452144727e-07, "loss": 0.3614, "step": 8634 }, { "epoch": 4.668408722292305, "grad_norm": 0.25721076130867004, "learning_rate": 1.320480089978382e-07, "loss": 0.3989, "step": 8635 }, { "epoch": 4.668949360245089, "grad_norm": 0.2676815986633301, "learning_rate": 1.3161732753799838e-07, "loss": 0.3976, "step": 8636 }, { "epoch": 4.669489998197873, "grad_norm": 0.24728180468082428, "learning_rate": 1.3118734020333257e-07, "loss": 0.3154, "step": 8637 }, { "epoch": 4.670030636150658, "grad_norm": 0.27126219868659973, "learning_rate": 1.3075804705514894e-07, "loss": 0.3669, "step": 8638 }, { "epoch": 4.670571274103442, "grad_norm": 0.281053751707077, "learning_rate": 1.303294481546541e-07, "loss": 0.3981, "step": 8639 }, { "epoch": 4.671111912056226, "grad_norm": 0.27033770084381104, "learning_rate": 1.2990154356295636e-07, "loss": 0.3418, "step": 8640 }, { "epoch": 4.671652550009011, "grad_norm": 0.2688961923122406, "learning_rate": 1.294743333410675e-07, "loss": 0.3703, "step": 8641 }, { "epoch": 4.672193187961795, "grad_norm": 0.25255006551742554, "learning_rate": 1.2904781754989715e-07, "loss": 0.3725, "step": 8642 }, { "epoch": 4.672733825914579, "grad_norm": 0.2689112722873688, "learning_rate": 1.2862199625025772e-07, "loss": 0.3504, "step": 8643 }, { "epoch": 4.673274463867363, "grad_norm": 0.2672416567802429, "learning_rate": 1.2819686950286125e-07, "loss": 0.4042, "step": 8644 }, { "epoch": 4.673815101820148, "grad_norm": 0.26514679193496704, "learning_rate": 1.2777243736832202e-07, "loss": 0.3839, "step": 8645 }, { "epoch": 4.674355739772932, "grad_norm": 0.2719597816467285, "learning_rate": 1.2734869990715493e-07, "loss": 0.3735, "step": 8646 }, { "epoch": 4.674896377725716, "grad_norm": 0.27342677116394043, "learning_rate": 1.26925657179775e-07, "loss": 0.3675, "step": 8647 }, { "epoch": 4.675437015678501, "grad_norm": 0.24384188652038574, "learning_rate": 1.2650330924650013e-07, "loss": 0.3447, "step": 8648 }, { "epoch": 4.675977653631285, "grad_norm": 0.26947176456451416, "learning_rate": 1.2608165616754653e-07, "loss": 0.3502, "step": 8649 }, { "epoch": 4.6765182915840695, "grad_norm": 0.2672954797744751, "learning_rate": 1.2566069800303393e-07, "loss": 0.3774, "step": 8650 }, { "epoch": 4.677058929536853, "grad_norm": 0.2530985176563263, "learning_rate": 1.25240434812981e-07, "loss": 0.3526, "step": 8651 }, { "epoch": 4.677599567489638, "grad_norm": 0.26235222816467285, "learning_rate": 1.2482086665730862e-07, "loss": 0.3638, "step": 8652 }, { "epoch": 4.678140205442422, "grad_norm": 0.2701789140701294, "learning_rate": 1.2440199359583792e-07, "loss": 0.3511, "step": 8653 }, { "epoch": 4.678680843395206, "grad_norm": 0.27301889657974243, "learning_rate": 1.2398381568829055e-07, "loss": 0.3391, "step": 8654 }, { "epoch": 4.679221481347991, "grad_norm": 0.2889951169490814, "learning_rate": 1.2356633299429044e-07, "loss": 0.3925, "step": 8655 }, { "epoch": 4.679762119300775, "grad_norm": 0.25548380613327026, "learning_rate": 1.2314954557336055e-07, "loss": 0.3488, "step": 8656 }, { "epoch": 4.680302757253559, "grad_norm": 0.252651184797287, "learning_rate": 1.2273345348492614e-07, "loss": 0.3554, "step": 8657 }, { "epoch": 4.6808433952063435, "grad_norm": 0.24984490871429443, "learning_rate": 1.2231805678831365e-07, "loss": 0.3489, "step": 8658 }, { "epoch": 4.681384033159127, "grad_norm": 0.26597264409065247, "learning_rate": 1.219033555427479e-07, "loss": 0.3746, "step": 8659 }, { "epoch": 4.681924671111912, "grad_norm": 0.28862953186035156, "learning_rate": 1.2148934980735772e-07, "loss": 0.3719, "step": 8660 }, { "epoch": 4.682465309064696, "grad_norm": 0.27415987849235535, "learning_rate": 1.2107603964117033e-07, "loss": 0.3694, "step": 8661 }, { "epoch": 4.683005947017481, "grad_norm": 0.24074959754943848, "learning_rate": 1.2066342510311523e-07, "loss": 0.3506, "step": 8662 }, { "epoch": 4.683546584970265, "grad_norm": 0.2678668797016144, "learning_rate": 1.2025150625202265e-07, "loss": 0.4074, "step": 8663 }, { "epoch": 4.684087222923049, "grad_norm": 0.2623341381549835, "learning_rate": 1.198402831466222e-07, "loss": 0.3623, "step": 8664 }, { "epoch": 4.684627860875834, "grad_norm": 0.2642357349395752, "learning_rate": 1.1942975584554594e-07, "loss": 0.3547, "step": 8665 }, { "epoch": 4.6851684988286175, "grad_norm": 0.25888511538505554, "learning_rate": 1.1901992440732591e-07, "loss": 0.3565, "step": 8666 }, { "epoch": 4.685709136781402, "grad_norm": 0.2689421474933624, "learning_rate": 1.1861078889039646e-07, "loss": 0.3529, "step": 8667 }, { "epoch": 4.686249774734186, "grad_norm": 0.27439960837364197, "learning_rate": 1.1820234935308927e-07, "loss": 0.3886, "step": 8668 }, { "epoch": 4.686790412686971, "grad_norm": 0.3315673768520355, "learning_rate": 1.1779460585363945e-07, "loss": 0.3214, "step": 8669 }, { "epoch": 4.687331050639755, "grad_norm": 0.2640538513660431, "learning_rate": 1.1738755845018323e-07, "loss": 0.3665, "step": 8670 }, { "epoch": 4.687871688592539, "grad_norm": 0.26083624362945557, "learning_rate": 1.1698120720075645e-07, "loss": 0.3813, "step": 8671 }, { "epoch": 4.688412326545324, "grad_norm": 0.2745174765586853, "learning_rate": 1.1657555216329553e-07, "loss": 0.405, "step": 8672 }, { "epoch": 4.688952964498108, "grad_norm": 0.2702055871486664, "learning_rate": 1.1617059339563807e-07, "loss": 0.3339, "step": 8673 }, { "epoch": 4.689493602450892, "grad_norm": 0.27788063883781433, "learning_rate": 1.1576633095552237e-07, "loss": 0.3624, "step": 8674 }, { "epoch": 4.690034240403676, "grad_norm": 0.2618255019187927, "learning_rate": 1.1536276490058784e-07, "loss": 0.3667, "step": 8675 }, { "epoch": 4.69057487835646, "grad_norm": 0.2847457826137543, "learning_rate": 1.1495989528837347e-07, "loss": 0.3795, "step": 8676 }, { "epoch": 4.691115516309245, "grad_norm": 0.2758665680885315, "learning_rate": 1.1455772217632e-07, "loss": 0.3722, "step": 8677 }, { "epoch": 4.691656154262029, "grad_norm": 0.2383020520210266, "learning_rate": 1.1415624562176875e-07, "loss": 0.3336, "step": 8678 }, { "epoch": 4.692196792214814, "grad_norm": 0.26600027084350586, "learning_rate": 1.1375546568196172e-07, "loss": 0.3632, "step": 8679 }, { "epoch": 4.692737430167598, "grad_norm": 0.2663227915763855, "learning_rate": 1.1335538241404099e-07, "loss": 0.3406, "step": 8680 }, { "epoch": 4.693278068120382, "grad_norm": 0.2766534090042114, "learning_rate": 1.1295599587504924e-07, "loss": 0.3687, "step": 8681 }, { "epoch": 4.693818706073166, "grad_norm": 0.2662915587425232, "learning_rate": 1.125573061219315e-07, "loss": 0.3535, "step": 8682 }, { "epoch": 4.69435934402595, "grad_norm": 0.2799853980541229, "learning_rate": 1.1215931321153172e-07, "loss": 0.3573, "step": 8683 }, { "epoch": 4.694899981978735, "grad_norm": 0.28400909900665283, "learning_rate": 1.1176201720059454e-07, "loss": 0.3882, "step": 8684 }, { "epoch": 4.695440619931519, "grad_norm": 0.26994532346725464, "learning_rate": 1.1136541814576574e-07, "loss": 0.338, "step": 8685 }, { "epoch": 4.695981257884304, "grad_norm": 0.2557099759578705, "learning_rate": 1.1096951610359174e-07, "loss": 0.3383, "step": 8686 }, { "epoch": 4.696521895837088, "grad_norm": 0.26642677187919617, "learning_rate": 1.1057431113052075e-07, "loss": 0.399, "step": 8687 }, { "epoch": 4.697062533789872, "grad_norm": 0.272213339805603, "learning_rate": 1.1017980328289823e-07, "loss": 0.3846, "step": 8688 }, { "epoch": 4.6976031717426565, "grad_norm": 0.26038268208503723, "learning_rate": 1.0978599261697476e-07, "loss": 0.362, "step": 8689 }, { "epoch": 4.69814380969544, "grad_norm": 0.2504449486732483, "learning_rate": 1.0939287918889652e-07, "loss": 0.3786, "step": 8690 }, { "epoch": 4.698684447648225, "grad_norm": 0.26650890707969666, "learning_rate": 1.0900046305471535e-07, "loss": 0.3734, "step": 8691 }, { "epoch": 4.699225085601009, "grad_norm": 0.2502971589565277, "learning_rate": 1.0860874427038037e-07, "loss": 0.3511, "step": 8692 }, { "epoch": 4.699765723553794, "grad_norm": 0.26762133836746216, "learning_rate": 1.0821772289174138e-07, "loss": 0.3579, "step": 8693 }, { "epoch": 4.700306361506578, "grad_norm": 0.29343181848526, "learning_rate": 1.0782739897455041e-07, "loss": 0.4183, "step": 8694 }, { "epoch": 4.700846999459362, "grad_norm": 0.24956832826137543, "learning_rate": 1.0743777257445853e-07, "loss": 0.33, "step": 8695 }, { "epoch": 4.701387637412147, "grad_norm": 0.2719976007938385, "learning_rate": 1.0704884374701908e-07, "loss": 0.3587, "step": 8696 }, { "epoch": 4.7019282753649305, "grad_norm": 0.2573912739753723, "learning_rate": 1.0666061254768268e-07, "loss": 0.3655, "step": 8697 }, { "epoch": 4.702468913317714, "grad_norm": 0.27466002106666565, "learning_rate": 1.0627307903180451e-07, "loss": 0.3967, "step": 8698 }, { "epoch": 4.703009551270499, "grad_norm": 0.2574291527271271, "learning_rate": 1.058862432546387e-07, "loss": 0.3396, "step": 8699 }, { "epoch": 4.703550189223283, "grad_norm": 0.2641294300556183, "learning_rate": 1.055001052713378e-07, "loss": 0.3767, "step": 8700 }, { "epoch": 4.704090827176068, "grad_norm": 0.2860811948776245, "learning_rate": 1.0511466513695778e-07, "loss": 0.3512, "step": 8701 }, { "epoch": 4.704631465128852, "grad_norm": 0.27938511967658997, "learning_rate": 1.0472992290645356e-07, "loss": 0.3663, "step": 8702 }, { "epoch": 4.705172103081637, "grad_norm": 0.27721625566482544, "learning_rate": 1.0434587863468182e-07, "loss": 0.3847, "step": 8703 }, { "epoch": 4.705712741034421, "grad_norm": 0.28188326954841614, "learning_rate": 1.039625323763982e-07, "loss": 0.3615, "step": 8704 }, { "epoch": 4.7062533789872045, "grad_norm": 0.262968510389328, "learning_rate": 1.0357988418625897e-07, "loss": 0.3405, "step": 8705 }, { "epoch": 4.706794016939989, "grad_norm": 0.2621868848800659, "learning_rate": 1.0319793411882273e-07, "loss": 0.3761, "step": 8706 }, { "epoch": 4.707334654892773, "grad_norm": 0.2894149124622345, "learning_rate": 1.0281668222854645e-07, "loss": 0.4006, "step": 8707 }, { "epoch": 4.707875292845558, "grad_norm": 0.25812411308288574, "learning_rate": 1.0243612856978835e-07, "loss": 0.3484, "step": 8708 }, { "epoch": 4.708415930798342, "grad_norm": 0.25666138529777527, "learning_rate": 1.0205627319680723e-07, "loss": 0.3476, "step": 8709 }, { "epoch": 4.708956568751127, "grad_norm": 0.27466803789138794, "learning_rate": 1.0167711616376196e-07, "loss": 0.3759, "step": 8710 }, { "epoch": 4.709497206703911, "grad_norm": 0.2503379285335541, "learning_rate": 1.0129865752471325e-07, "loss": 0.3087, "step": 8711 }, { "epoch": 4.710037844656695, "grad_norm": 0.254130482673645, "learning_rate": 1.0092089733361898e-07, "loss": 0.3427, "step": 8712 }, { "epoch": 4.710578482609479, "grad_norm": 0.24710287153720856, "learning_rate": 1.0054383564434056e-07, "loss": 0.3637, "step": 8713 }, { "epoch": 4.711119120562263, "grad_norm": 0.2547876834869385, "learning_rate": 1.0016747251063885e-07, "loss": 0.399, "step": 8714 }, { "epoch": 4.711659758515047, "grad_norm": 0.2763873338699341, "learning_rate": 9.979180798617538e-08, "loss": 0.3925, "step": 8715 }, { "epoch": 4.712200396467832, "grad_norm": 0.26490187644958496, "learning_rate": 9.941684212451119e-08, "loss": 0.3227, "step": 8716 }, { "epoch": 4.712741034420616, "grad_norm": 0.2720109522342682, "learning_rate": 9.904257497910796e-08, "loss": 0.3736, "step": 8717 }, { "epoch": 4.713281672373401, "grad_norm": 0.29464131593704224, "learning_rate": 9.866900660332912e-08, "loss": 0.3878, "step": 8718 }, { "epoch": 4.713822310326185, "grad_norm": 0.2727126181125641, "learning_rate": 9.829613705043594e-08, "loss": 0.3353, "step": 8719 }, { "epoch": 4.7143629482789695, "grad_norm": 0.27135735750198364, "learning_rate": 9.792396637359203e-08, "loss": 0.3429, "step": 8720 }, { "epoch": 4.714903586231753, "grad_norm": 0.27487772703170776, "learning_rate": 9.755249462586158e-08, "loss": 0.3821, "step": 8721 }, { "epoch": 4.715444224184537, "grad_norm": 0.2727549076080322, "learning_rate": 9.718172186020724e-08, "loss": 0.3586, "step": 8722 }, { "epoch": 4.715984862137322, "grad_norm": 0.2874700129032135, "learning_rate": 9.68116481294945e-08, "loss": 0.3546, "step": 8723 }, { "epoch": 4.716525500090106, "grad_norm": 0.26061776280403137, "learning_rate": 9.644227348648616e-08, "loss": 0.3738, "step": 8724 }, { "epoch": 4.717066138042891, "grad_norm": 0.256819486618042, "learning_rate": 9.607359798384785e-08, "loss": 0.3529, "step": 8725 }, { "epoch": 4.717606775995675, "grad_norm": 0.2749132513999939, "learning_rate": 9.570562167414477e-08, "loss": 0.3625, "step": 8726 }, { "epoch": 4.71814741394846, "grad_norm": 0.24957223236560822, "learning_rate": 9.533834460984159e-08, "loss": 0.3048, "step": 8727 }, { "epoch": 4.7186880519012435, "grad_norm": 0.2842405438423157, "learning_rate": 9.497176684330534e-08, "loss": 0.3962, "step": 8728 }, { "epoch": 4.719228689854027, "grad_norm": 0.26971182227134705, "learning_rate": 9.46058884268003e-08, "loss": 0.3884, "step": 8729 }, { "epoch": 4.719769327806812, "grad_norm": 0.2548538148403168, "learning_rate": 9.424070941249419e-08, "loss": 0.3491, "step": 8730 }, { "epoch": 4.720309965759596, "grad_norm": 0.2831175923347473, "learning_rate": 9.387622985245259e-08, "loss": 0.3825, "step": 8731 }, { "epoch": 4.720850603712381, "grad_norm": 0.263994961977005, "learning_rate": 9.351244979864338e-08, "loss": 0.3717, "step": 8732 }, { "epoch": 4.721391241665165, "grad_norm": 0.26066383719444275, "learning_rate": 9.314936930293283e-08, "loss": 0.3565, "step": 8733 }, { "epoch": 4.721931879617949, "grad_norm": 0.2533550262451172, "learning_rate": 9.278698841708844e-08, "loss": 0.3714, "step": 8734 }, { "epoch": 4.722472517570734, "grad_norm": 0.24598509073257446, "learning_rate": 9.242530719277776e-08, "loss": 0.3527, "step": 8735 }, { "epoch": 4.7230131555235175, "grad_norm": 0.2712627649307251, "learning_rate": 9.206432568156953e-08, "loss": 0.409, "step": 8736 }, { "epoch": 4.723553793476302, "grad_norm": 0.26807889342308044, "learning_rate": 9.170404393492982e-08, "loss": 0.3909, "step": 8737 }, { "epoch": 4.724094431429086, "grad_norm": 0.2640073001384735, "learning_rate": 9.134446200422919e-08, "loss": 0.3569, "step": 8738 }, { "epoch": 4.72463506938187, "grad_norm": 0.26649776101112366, "learning_rate": 9.098557994073443e-08, "loss": 0.3611, "step": 8739 }, { "epoch": 4.725175707334655, "grad_norm": 0.293518990278244, "learning_rate": 9.062739779561624e-08, "loss": 0.3994, "step": 8740 }, { "epoch": 4.725716345287439, "grad_norm": 0.261219322681427, "learning_rate": 9.026991561994158e-08, "loss": 0.3336, "step": 8741 }, { "epoch": 4.726256983240224, "grad_norm": 0.27696412801742554, "learning_rate": 8.991313346468078e-08, "loss": 0.3654, "step": 8742 }, { "epoch": 4.726797621193008, "grad_norm": 0.275884211063385, "learning_rate": 8.955705138070258e-08, "loss": 0.3417, "step": 8743 }, { "epoch": 4.727338259145792, "grad_norm": 0.2763936221599579, "learning_rate": 8.920166941877695e-08, "loss": 0.3618, "step": 8744 }, { "epoch": 4.727878897098576, "grad_norm": 0.2690500319004059, "learning_rate": 8.884698762957334e-08, "loss": 0.3577, "step": 8745 }, { "epoch": 4.72841953505136, "grad_norm": 0.2605028450489044, "learning_rate": 8.849300606366185e-08, "loss": 0.3246, "step": 8746 }, { "epoch": 4.728960173004145, "grad_norm": 0.27978426218032837, "learning_rate": 8.813972477151211e-08, "loss": 0.3892, "step": 8747 }, { "epoch": 4.729500810956929, "grad_norm": 0.24701228737831116, "learning_rate": 8.778714380349551e-08, "loss": 0.3488, "step": 8748 }, { "epoch": 4.730041448909714, "grad_norm": 0.26005181670188904, "learning_rate": 8.743526320988016e-08, "loss": 0.3773, "step": 8749 }, { "epoch": 4.730582086862498, "grad_norm": 0.2552785277366638, "learning_rate": 8.708408304083927e-08, "loss": 0.3593, "step": 8750 }, { "epoch": 4.7311227248152825, "grad_norm": 0.25979378819465637, "learning_rate": 8.67336033464411e-08, "loss": 0.3496, "step": 8751 }, { "epoch": 4.731663362768066, "grad_norm": 0.24903003871440887, "learning_rate": 8.638382417665847e-08, "loss": 0.3521, "step": 8752 }, { "epoch": 4.73220400072085, "grad_norm": 0.26014840602874756, "learning_rate": 8.603474558136038e-08, "loss": 0.3489, "step": 8753 }, { "epoch": 4.732744638673635, "grad_norm": 0.2891535460948944, "learning_rate": 8.568636761031868e-08, "loss": 0.4002, "step": 8754 }, { "epoch": 4.733285276626419, "grad_norm": 0.2797335386276245, "learning_rate": 8.53386903132053e-08, "loss": 0.3369, "step": 8755 }, { "epoch": 4.733825914579203, "grad_norm": 0.2770991921424866, "learning_rate": 8.499171373959004e-08, "loss": 0.3516, "step": 8756 }, { "epoch": 4.734366552531988, "grad_norm": 0.27074334025382996, "learning_rate": 8.464543793894498e-08, "loss": 0.3832, "step": 8757 }, { "epoch": 4.734907190484772, "grad_norm": 0.2525061368942261, "learning_rate": 8.429986296064118e-08, "loss": 0.357, "step": 8758 }, { "epoch": 4.7354478284375565, "grad_norm": 0.2681528925895691, "learning_rate": 8.395498885394981e-08, "loss": 0.3473, "step": 8759 }, { "epoch": 4.73598846639034, "grad_norm": 0.25846293568611145, "learning_rate": 8.361081566804318e-08, "loss": 0.3748, "step": 8760 }, { "epoch": 4.736529104343125, "grad_norm": 0.25526463985443115, "learning_rate": 8.326734345199261e-08, "loss": 0.3901, "step": 8761 }, { "epoch": 4.737069742295909, "grad_norm": 0.2344433218240738, "learning_rate": 8.292457225476946e-08, "loss": 0.3507, "step": 8762 }, { "epoch": 4.737610380248693, "grad_norm": 0.2653151750564575, "learning_rate": 8.258250212524522e-08, "loss": 0.3851, "step": 8763 }, { "epoch": 4.738151018201478, "grad_norm": 0.26171717047691345, "learning_rate": 8.224113311219251e-08, "loss": 0.3576, "step": 8764 }, { "epoch": 4.738691656154262, "grad_norm": 0.2592165470123291, "learning_rate": 8.190046526428241e-08, "loss": 0.3615, "step": 8765 }, { "epoch": 4.739232294107047, "grad_norm": 0.2712307572364807, "learning_rate": 8.156049863008664e-08, "loss": 0.352, "step": 8766 }, { "epoch": 4.7397729320598305, "grad_norm": 0.2761649489402771, "learning_rate": 8.122123325807751e-08, "loss": 0.3976, "step": 8767 }, { "epoch": 4.740313570012615, "grad_norm": 0.2663705050945282, "learning_rate": 8.088266919662635e-08, "loss": 0.3314, "step": 8768 }, { "epoch": 4.740854207965399, "grad_norm": 0.2638547420501709, "learning_rate": 8.054480649400565e-08, "loss": 0.3517, "step": 8769 }, { "epoch": 4.741394845918183, "grad_norm": 0.262474000453949, "learning_rate": 8.020764519838686e-08, "loss": 0.3455, "step": 8770 }, { "epoch": 4.741935483870968, "grad_norm": 0.2786615788936615, "learning_rate": 7.987118535784155e-08, "loss": 0.3489, "step": 8771 }, { "epoch": 4.742476121823752, "grad_norm": 0.2552831172943115, "learning_rate": 7.953542702034245e-08, "loss": 0.3454, "step": 8772 }, { "epoch": 4.743016759776537, "grad_norm": 0.25150927901268005, "learning_rate": 7.920037023376014e-08, "loss": 0.3582, "step": 8773 }, { "epoch": 4.743557397729321, "grad_norm": 0.28234729170799255, "learning_rate": 7.886601504586755e-08, "loss": 0.3829, "step": 8774 }, { "epoch": 4.7440980356821045, "grad_norm": 0.26238328218460083, "learning_rate": 7.853236150433541e-08, "loss": 0.372, "step": 8775 }, { "epoch": 4.744638673634889, "grad_norm": 0.2578582763671875, "learning_rate": 7.819940965673678e-08, "loss": 0.3385, "step": 8776 }, { "epoch": 4.745179311587673, "grad_norm": 0.27182072401046753, "learning_rate": 7.786715955054202e-08, "loss": 0.3575, "step": 8777 }, { "epoch": 4.745719949540458, "grad_norm": 0.2770176827907562, "learning_rate": 7.753561123312326e-08, "loss": 0.3527, "step": 8778 }, { "epoch": 4.746260587493242, "grad_norm": 0.2684987783432007, "learning_rate": 7.720476475175209e-08, "loss": 0.3528, "step": 8779 }, { "epoch": 4.746801225446026, "grad_norm": 0.26776623725891113, "learning_rate": 7.687462015360026e-08, "loss": 0.3877, "step": 8780 }, { "epoch": 4.747341863398811, "grad_norm": 0.24656769633293152, "learning_rate": 7.654517748573842e-08, "loss": 0.3399, "step": 8781 }, { "epoch": 4.747882501351595, "grad_norm": 0.2421443909406662, "learning_rate": 7.621643679513846e-08, "loss": 0.3287, "step": 8782 }, { "epoch": 4.748423139304379, "grad_norm": 0.26763293147087097, "learning_rate": 7.588839812867177e-08, "loss": 0.3768, "step": 8783 }, { "epoch": 4.748963777257163, "grad_norm": 0.2524373233318329, "learning_rate": 7.556106153310927e-08, "loss": 0.375, "step": 8784 }, { "epoch": 4.749504415209948, "grad_norm": 0.2663939297199249, "learning_rate": 7.523442705512196e-08, "loss": 0.3695, "step": 8785 }, { "epoch": 4.750045053162732, "grad_norm": 0.2635733187198639, "learning_rate": 7.490849474128093e-08, "loss": 0.37, "step": 8786 }, { "epoch": 4.750585691115516, "grad_norm": 0.257835328578949, "learning_rate": 7.458326463805677e-08, "loss": 0.3749, "step": 8787 }, { "epoch": 4.751126329068301, "grad_norm": 0.262051522731781, "learning_rate": 7.425873679182072e-08, "loss": 0.3664, "step": 8788 }, { "epoch": 4.751666967021085, "grad_norm": 0.26971325278282166, "learning_rate": 7.3934911248843e-08, "loss": 0.3289, "step": 8789 }, { "epoch": 4.7522076049738695, "grad_norm": 0.27077168226242065, "learning_rate": 7.36117880552939e-08, "loss": 0.3871, "step": 8790 }, { "epoch": 4.752748242926653, "grad_norm": 0.27058687806129456, "learning_rate": 7.328936725724378e-08, "loss": 0.3692, "step": 8791 }, { "epoch": 4.753288880879437, "grad_norm": 0.278969407081604, "learning_rate": 7.29676489006631e-08, "loss": 0.3556, "step": 8792 }, { "epoch": 4.753829518832222, "grad_norm": 0.2769360840320587, "learning_rate": 7.264663303142239e-08, "loss": 0.3483, "step": 8793 }, { "epoch": 4.754370156785006, "grad_norm": 0.2593977749347687, "learning_rate": 7.232631969529058e-08, "loss": 0.3288, "step": 8794 }, { "epoch": 4.754910794737791, "grad_norm": 0.2765919864177704, "learning_rate": 7.200670893793727e-08, "loss": 0.3402, "step": 8795 }, { "epoch": 4.755451432690575, "grad_norm": 0.25877347588539124, "learning_rate": 7.168780080493265e-08, "loss": 0.3466, "step": 8796 }, { "epoch": 4.755992070643359, "grad_norm": 0.27611830830574036, "learning_rate": 7.136959534174592e-08, "loss": 0.3944, "step": 8797 }, { "epoch": 4.7565327085961435, "grad_norm": 0.2555706202983856, "learning_rate": 7.105209259374579e-08, "loss": 0.3752, "step": 8798 }, { "epoch": 4.757073346548927, "grad_norm": 0.25722599029541016, "learning_rate": 7.07352926062016e-08, "loss": 0.3599, "step": 8799 }, { "epoch": 4.757613984501712, "grad_norm": 0.263687402009964, "learning_rate": 7.041919542428221e-08, "loss": 0.3653, "step": 8800 }, { "epoch": 4.758154622454496, "grad_norm": 0.28717342019081116, "learning_rate": 7.010380109305603e-08, "loss": 0.4053, "step": 8801 }, { "epoch": 4.758695260407281, "grad_norm": 0.2686096727848053, "learning_rate": 6.978910965749097e-08, "loss": 0.3319, "step": 8802 }, { "epoch": 4.759235898360065, "grad_norm": 0.2717582583427429, "learning_rate": 6.947512116245669e-08, "loss": 0.3801, "step": 8803 }, { "epoch": 4.759776536312849, "grad_norm": 0.27994662523269653, "learning_rate": 6.916183565271905e-08, "loss": 0.3736, "step": 8804 }, { "epoch": 4.760317174265634, "grad_norm": 0.26790112257003784, "learning_rate": 6.884925317294678e-08, "loss": 0.3872, "step": 8805 }, { "epoch": 4.7608578122184175, "grad_norm": 0.2642842233181, "learning_rate": 6.853737376770752e-08, "loss": 0.3568, "step": 8806 }, { "epoch": 4.761398450171202, "grad_norm": 0.278881698846817, "learning_rate": 6.822619748146797e-08, "loss": 0.3994, "step": 8807 }, { "epoch": 4.761939088123986, "grad_norm": 0.28038862347602844, "learning_rate": 6.791572435859595e-08, "loss": 0.3639, "step": 8808 }, { "epoch": 4.762479726076771, "grad_norm": 0.25316354632377625, "learning_rate": 6.760595444335716e-08, "loss": 0.3563, "step": 8809 }, { "epoch": 4.763020364029555, "grad_norm": 0.25088751316070557, "learning_rate": 6.72968877799185e-08, "loss": 0.3758, "step": 8810 }, { "epoch": 4.763561001982339, "grad_norm": 0.25079676508903503, "learning_rate": 6.698852441234527e-08, "loss": 0.3484, "step": 8811 }, { "epoch": 4.764101639935124, "grad_norm": 0.25764012336730957, "learning_rate": 6.668086438460453e-08, "loss": 0.3488, "step": 8812 }, { "epoch": 4.764642277887908, "grad_norm": 0.2521570920944214, "learning_rate": 6.63739077405623e-08, "loss": 0.3526, "step": 8813 }, { "epoch": 4.7651829158406915, "grad_norm": 0.2827494442462921, "learning_rate": 6.60676545239819e-08, "loss": 0.3994, "step": 8814 }, { "epoch": 4.765723553793476, "grad_norm": 0.26093700528144836, "learning_rate": 6.576210477853007e-08, "loss": 0.3585, "step": 8815 }, { "epoch": 4.76626419174626, "grad_norm": 0.25771933794021606, "learning_rate": 6.545725854777086e-08, "loss": 0.3815, "step": 8816 }, { "epoch": 4.766804829699045, "grad_norm": 0.2567058801651001, "learning_rate": 6.515311587516893e-08, "loss": 0.3344, "step": 8817 }, { "epoch": 4.767345467651829, "grad_norm": 0.25780367851257324, "learning_rate": 6.484967680408849e-08, "loss": 0.3375, "step": 8818 }, { "epoch": 4.767886105604614, "grad_norm": 0.2630016505718231, "learning_rate": 6.454694137779272e-08, "loss": 0.3289, "step": 8819 }, { "epoch": 4.768426743557398, "grad_norm": 0.2782217264175415, "learning_rate": 6.424490963944597e-08, "loss": 0.3748, "step": 8820 }, { "epoch": 4.768967381510182, "grad_norm": 0.28443044424057007, "learning_rate": 6.394358163211046e-08, "loss": 0.3899, "step": 8821 }, { "epoch": 4.769508019462966, "grad_norm": 0.2556963264942169, "learning_rate": 6.36429573987496e-08, "loss": 0.3531, "step": 8822 }, { "epoch": 4.77004865741575, "grad_norm": 0.2648627460002899, "learning_rate": 6.334303698222577e-08, "loss": 0.3637, "step": 8823 }, { "epoch": 4.770589295368535, "grad_norm": 0.248936265707016, "learning_rate": 6.304382042530088e-08, "loss": 0.3878, "step": 8824 }, { "epoch": 4.771129933321319, "grad_norm": 0.25302061438560486, "learning_rate": 6.274530777063747e-08, "loss": 0.3514, "step": 8825 }, { "epoch": 4.771670571274104, "grad_norm": 0.2634265124797821, "learning_rate": 6.244749906079539e-08, "loss": 0.3356, "step": 8826 }, { "epoch": 4.772211209226888, "grad_norm": 0.26157331466674805, "learning_rate": 6.215039433823677e-08, "loss": 0.3807, "step": 8827 }, { "epoch": 4.772751847179672, "grad_norm": 0.25523486733436584, "learning_rate": 6.185399364532163e-08, "loss": 0.373, "step": 8828 }, { "epoch": 4.7732924851324565, "grad_norm": 0.2643509805202484, "learning_rate": 6.15582970243117e-08, "loss": 0.3446, "step": 8829 }, { "epoch": 4.77383312308524, "grad_norm": 0.2760935127735138, "learning_rate": 6.126330451736495e-08, "loss": 0.3738, "step": 8830 }, { "epoch": 4.774373761038025, "grad_norm": 0.26611626148223877, "learning_rate": 6.096901616654216e-08, "loss": 0.3294, "step": 8831 }, { "epoch": 4.774914398990809, "grad_norm": 0.25371184945106506, "learning_rate": 6.067543201380199e-08, "loss": 0.3523, "step": 8832 }, { "epoch": 4.775455036943593, "grad_norm": 0.2639772891998291, "learning_rate": 6.03825521010032e-08, "loss": 0.3566, "step": 8833 }, { "epoch": 4.775995674896378, "grad_norm": 0.2542242407798767, "learning_rate": 6.009037646990346e-08, "loss": 0.379, "step": 8834 }, { "epoch": 4.776536312849162, "grad_norm": 0.2683775722980499, "learning_rate": 5.97989051621617e-08, "loss": 0.3855, "step": 8835 }, { "epoch": 4.777076950801947, "grad_norm": 0.2643851041793823, "learning_rate": 5.950813821933465e-08, "loss": 0.3858, "step": 8836 }, { "epoch": 4.7776175887547305, "grad_norm": 0.2688920497894287, "learning_rate": 5.9218075682880293e-08, "loss": 0.3392, "step": 8837 }, { "epoch": 4.7781582267075144, "grad_norm": 0.2585931122303009, "learning_rate": 5.892871759415386e-08, "loss": 0.3595, "step": 8838 }, { "epoch": 4.778698864660299, "grad_norm": 0.2401088923215866, "learning_rate": 5.864006399441236e-08, "loss": 0.3158, "step": 8839 }, { "epoch": 4.779239502613083, "grad_norm": 0.25406181812286377, "learning_rate": 5.835211492481063e-08, "loss": 0.3688, "step": 8840 }, { "epoch": 4.779780140565868, "grad_norm": 0.26199793815612793, "learning_rate": 5.8064870426405295e-08, "loss": 0.3774, "step": 8841 }, { "epoch": 4.780320778518652, "grad_norm": 0.28002819418907166, "learning_rate": 5.777833054015025e-08, "loss": 0.3555, "step": 8842 }, { "epoch": 4.780861416471437, "grad_norm": 0.25967445969581604, "learning_rate": 5.749249530690004e-08, "loss": 0.3708, "step": 8843 }, { "epoch": 4.781402054424221, "grad_norm": 0.24986760318279266, "learning_rate": 5.7207364767408734e-08, "loss": 0.3409, "step": 8844 }, { "epoch": 4.7819426923770045, "grad_norm": 0.26675549149513245, "learning_rate": 5.6922938962329364e-08, "loss": 0.3778, "step": 8845 }, { "epoch": 4.782483330329789, "grad_norm": 0.26861754059791565, "learning_rate": 5.66392179322145e-08, "loss": 0.3701, "step": 8846 }, { "epoch": 4.783023968282573, "grad_norm": 0.26682743430137634, "learning_rate": 5.635620171751732e-08, "loss": 0.3815, "step": 8847 }, { "epoch": 4.783564606235358, "grad_norm": 0.268576443195343, "learning_rate": 5.6073890358589454e-08, "loss": 0.3668, "step": 8848 }, { "epoch": 4.784105244188142, "grad_norm": 0.2416997253894806, "learning_rate": 5.579228389568314e-08, "loss": 0.3382, "step": 8849 }, { "epoch": 4.784645882140927, "grad_norm": 0.26931247115135193, "learning_rate": 5.551138236894793e-08, "loss": 0.378, "step": 8850 }, { "epoch": 4.785186520093711, "grad_norm": 0.2577950954437256, "learning_rate": 5.5231185818434563e-08, "loss": 0.3394, "step": 8851 }, { "epoch": 4.785727158046495, "grad_norm": 0.27060166001319885, "learning_rate": 5.495169428409386e-08, "loss": 0.342, "step": 8852 }, { "epoch": 4.786267795999279, "grad_norm": 0.2755787968635559, "learning_rate": 5.46729078057745e-08, "loss": 0.3485, "step": 8853 }, { "epoch": 4.786808433952063, "grad_norm": 0.27702271938323975, "learning_rate": 5.43948264232258e-08, "loss": 0.3765, "step": 8854 }, { "epoch": 4.787349071904847, "grad_norm": 0.2815020978450775, "learning_rate": 5.411745017609493e-08, "loss": 0.3504, "step": 8855 }, { "epoch": 4.787889709857632, "grad_norm": 0.2912232577800751, "learning_rate": 5.384077910393137e-08, "loss": 0.3901, "step": 8856 }, { "epoch": 4.788430347810416, "grad_norm": 0.26429423689842224, "learning_rate": 5.3564813246181345e-08, "loss": 0.3414, "step": 8857 }, { "epoch": 4.788970985763201, "grad_norm": 0.27630752325057983, "learning_rate": 5.328955264219171e-08, "loss": 0.3731, "step": 8858 }, { "epoch": 4.789511623715985, "grad_norm": 0.24857951700687408, "learning_rate": 5.301499733120885e-08, "loss": 0.3579, "step": 8859 }, { "epoch": 4.7900522616687695, "grad_norm": 0.24628034234046936, "learning_rate": 5.274114735237812e-08, "loss": 0.3303, "step": 8860 }, { "epoch": 4.790592899621553, "grad_norm": 0.256889671087265, "learning_rate": 5.246800274474439e-08, "loss": 0.3466, "step": 8861 }, { "epoch": 4.791133537574337, "grad_norm": 0.25723087787628174, "learning_rate": 5.219556354725264e-08, "loss": 0.3496, "step": 8862 }, { "epoch": 4.791674175527122, "grad_norm": 0.2660854160785675, "learning_rate": 5.192382979874677e-08, "loss": 0.3271, "step": 8863 }, { "epoch": 4.792214813479906, "grad_norm": 0.27716538310050964, "learning_rate": 5.16528015379697e-08, "loss": 0.3943, "step": 8864 }, { "epoch": 4.792755451432691, "grad_norm": 0.26225516200065613, "learning_rate": 5.138247880356384e-08, "loss": 0.3606, "step": 8865 }, { "epoch": 4.793296089385475, "grad_norm": 0.2535618543624878, "learning_rate": 5.1112861634072256e-08, "loss": 0.3415, "step": 8866 }, { "epoch": 4.79383672733826, "grad_norm": 0.2489616423845291, "learning_rate": 5.08439500679353e-08, "loss": 0.3303, "step": 8867 }, { "epoch": 4.7943773652910435, "grad_norm": 0.26061302423477173, "learning_rate": 5.0575744143495084e-08, "loss": 0.3604, "step": 8868 }, { "epoch": 4.794918003243827, "grad_norm": 0.25412821769714355, "learning_rate": 5.0308243898991025e-08, "loss": 0.3668, "step": 8869 }, { "epoch": 4.795458641196612, "grad_norm": 0.31018903851509094, "learning_rate": 5.004144937256372e-08, "loss": 0.3978, "step": 8870 }, { "epoch": 4.795999279149396, "grad_norm": 0.26542267203330994, "learning_rate": 4.977536060225163e-08, "loss": 0.3442, "step": 8871 }, { "epoch": 4.79653991710218, "grad_norm": 0.2577354907989502, "learning_rate": 4.9509977625992745e-08, "loss": 0.3666, "step": 8872 }, { "epoch": 4.797080555054965, "grad_norm": 0.2683751881122589, "learning_rate": 4.9245300481626234e-08, "loss": 0.3355, "step": 8873 }, { "epoch": 4.797621193007749, "grad_norm": 0.24945707619190216, "learning_rate": 4.898132920688803e-08, "loss": 0.366, "step": 8874 }, { "epoch": 4.798161830960534, "grad_norm": 0.26416832208633423, "learning_rate": 4.8718063839414683e-08, "loss": 0.3606, "step": 8875 }, { "epoch": 4.7987024689133175, "grad_norm": 0.28955134749412537, "learning_rate": 4.84555044167434e-08, "loss": 0.3629, "step": 8876 }, { "epoch": 4.799243106866102, "grad_norm": 0.25828447937965393, "learning_rate": 4.8193650976308124e-08, "loss": 0.3518, "step": 8877 }, { "epoch": 4.799783744818886, "grad_norm": 0.27702564001083374, "learning_rate": 4.7932503555443986e-08, "loss": 0.3753, "step": 8878 }, { "epoch": 4.80032438277167, "grad_norm": 0.26961031556129456, "learning_rate": 4.7672062191385094e-08, "loss": 0.3611, "step": 8879 }, { "epoch": 4.800865020724455, "grad_norm": 0.2614027261734009, "learning_rate": 4.741232692126396e-08, "loss": 0.3303, "step": 8880 }, { "epoch": 4.801405658677239, "grad_norm": 0.24823996424674988, "learning_rate": 4.715329778211375e-08, "loss": 0.3457, "step": 8881 }, { "epoch": 4.801946296630024, "grad_norm": 0.2648354470729828, "learning_rate": 4.6894974810866575e-08, "loss": 0.3897, "step": 8882 }, { "epoch": 4.802486934582808, "grad_norm": 0.24569006264209747, "learning_rate": 4.6637358044352985e-08, "loss": 0.3837, "step": 8883 }, { "epoch": 4.803027572535592, "grad_norm": 0.26638856530189514, "learning_rate": 4.638044751930415e-08, "loss": 0.3491, "step": 8884 }, { "epoch": 4.803568210488376, "grad_norm": 0.27749237418174744, "learning_rate": 4.612424327234966e-08, "loss": 0.3965, "step": 8885 }, { "epoch": 4.80410884844116, "grad_norm": 0.27472639083862305, "learning_rate": 4.5868745340018064e-08, "loss": 0.3524, "step": 8886 }, { "epoch": 4.804649486393945, "grad_norm": 0.26227518916130066, "learning_rate": 4.561395375873856e-08, "loss": 0.3653, "step": 8887 }, { "epoch": 4.805190124346729, "grad_norm": 0.26707348227500916, "learning_rate": 4.5359868564839317e-08, "loss": 0.3761, "step": 8888 }, { "epoch": 4.805730762299514, "grad_norm": 0.274200975894928, "learning_rate": 4.510648979454579e-08, "loss": 0.3796, "step": 8889 }, { "epoch": 4.806271400252298, "grad_norm": 0.26825666427612305, "learning_rate": 4.485381748398576e-08, "loss": 0.3631, "step": 8890 }, { "epoch": 4.806812038205082, "grad_norm": 0.25708481669425964, "learning_rate": 4.4601851669183736e-08, "loss": 0.3635, "step": 8891 }, { "epoch": 4.807352676157866, "grad_norm": 0.26601260900497437, "learning_rate": 4.435059238606543e-08, "loss": 0.38, "step": 8892 }, { "epoch": 4.80789331411065, "grad_norm": 0.25704678893089294, "learning_rate": 4.41000396704544e-08, "loss": 0.3608, "step": 8893 }, { "epoch": 4.808433952063435, "grad_norm": 0.25019845366477966, "learning_rate": 4.3850193558073736e-08, "loss": 0.3548, "step": 8894 }, { "epoch": 4.808974590016219, "grad_norm": 0.2615974545478821, "learning_rate": 4.360105408454718e-08, "loss": 0.3566, "step": 8895 }, { "epoch": 4.809515227969003, "grad_norm": 0.2789876461029053, "learning_rate": 4.335262128539519e-08, "loss": 0.3867, "step": 8896 }, { "epoch": 4.810055865921788, "grad_norm": 0.27569377422332764, "learning_rate": 4.310489519603944e-08, "loss": 0.358, "step": 8897 }, { "epoch": 4.810596503874572, "grad_norm": 0.26313361525535583, "learning_rate": 4.285787585180057e-08, "loss": 0.3527, "step": 8898 }, { "epoch": 4.8111371418273565, "grad_norm": 0.24816150963306427, "learning_rate": 4.261156328789762e-08, "loss": 0.3361, "step": 8899 }, { "epoch": 4.81167777978014, "grad_norm": 0.26686063408851624, "learning_rate": 4.236595753944972e-08, "loss": 0.3979, "step": 8900 }, { "epoch": 4.812218417732925, "grad_norm": 0.2610894739627838, "learning_rate": 4.2121058641474974e-08, "loss": 0.366, "step": 8901 }, { "epoch": 4.812759055685709, "grad_norm": 0.2641092836856842, "learning_rate": 4.187686662889045e-08, "loss": 0.3702, "step": 8902 }, { "epoch": 4.813299693638493, "grad_norm": 0.2756359279155731, "learning_rate": 4.163338153651275e-08, "loss": 0.3579, "step": 8903 }, { "epoch": 4.813840331591278, "grad_norm": 0.2738838493824005, "learning_rate": 4.139060339905743e-08, "loss": 0.347, "step": 8904 }, { "epoch": 4.814380969544062, "grad_norm": 0.2661038041114807, "learning_rate": 4.114853225113902e-08, "loss": 0.3798, "step": 8905 }, { "epoch": 4.814921607496847, "grad_norm": 0.24749571084976196, "learning_rate": 4.090716812727214e-08, "loss": 0.3497, "step": 8906 }, { "epoch": 4.8154622454496305, "grad_norm": 0.2771914005279541, "learning_rate": 4.0666511061869804e-08, "loss": 0.3636, "step": 8907 }, { "epoch": 4.816002883402415, "grad_norm": 0.2644779086112976, "learning_rate": 4.042656108924459e-08, "loss": 0.3731, "step": 8908 }, { "epoch": 4.816543521355199, "grad_norm": 0.24924729764461517, "learning_rate": 4.018731824360744e-08, "loss": 0.3561, "step": 8909 }, { "epoch": 4.817084159307983, "grad_norm": 0.26898232102394104, "learning_rate": 3.994878255907053e-08, "loss": 0.3302, "step": 8910 }, { "epoch": 4.817624797260768, "grad_norm": 0.28812891244888306, "learning_rate": 3.971095406964276e-08, "loss": 0.4111, "step": 8911 }, { "epoch": 4.818165435213552, "grad_norm": 0.26695603132247925, "learning_rate": 3.947383280923367e-08, "loss": 0.3736, "step": 8912 }, { "epoch": 4.818706073166336, "grad_norm": 0.24491271376609802, "learning_rate": 3.923741881165122e-08, "loss": 0.3297, "step": 8913 }, { "epoch": 4.819246711119121, "grad_norm": 0.26488637924194336, "learning_rate": 3.900171211060344e-08, "loss": 0.3876, "step": 8914 }, { "epoch": 4.8197873490719045, "grad_norm": 0.26854172348976135, "learning_rate": 3.8766712739696786e-08, "loss": 0.3586, "step": 8915 }, { "epoch": 4.820327987024689, "grad_norm": 0.26286134123802185, "learning_rate": 3.853242073243668e-08, "loss": 0.3338, "step": 8916 }, { "epoch": 4.820868624977473, "grad_norm": 0.26648515462875366, "learning_rate": 3.8298836122228064e-08, "loss": 0.4151, "step": 8917 }, { "epoch": 4.821409262930258, "grad_norm": 0.2651606500148773, "learning_rate": 3.8065958942375966e-08, "loss": 0.3663, "step": 8918 }, { "epoch": 4.821949900883042, "grad_norm": 0.2581683099269867, "learning_rate": 3.783378922608216e-08, "loss": 0.3803, "step": 8919 }, { "epoch": 4.822490538835826, "grad_norm": 0.2398492991924286, "learning_rate": 3.7602327006450166e-08, "loss": 0.3348, "step": 8920 }, { "epoch": 4.823031176788611, "grad_norm": 0.25325441360473633, "learning_rate": 3.7371572316480806e-08, "loss": 0.3443, "step": 8921 }, { "epoch": 4.823571814741395, "grad_norm": 0.2561415433883667, "learning_rate": 3.714152518907499e-08, "loss": 0.3763, "step": 8922 }, { "epoch": 4.824112452694179, "grad_norm": 0.24386979639530182, "learning_rate": 3.691218565703203e-08, "loss": 0.334, "step": 8923 }, { "epoch": 4.824653090646963, "grad_norm": 0.25620338320732117, "learning_rate": 3.6683553753051326e-08, "loss": 0.3649, "step": 8924 }, { "epoch": 4.825193728599748, "grad_norm": 0.27384117245674133, "learning_rate": 3.645562950973014e-08, "loss": 0.408, "step": 8925 }, { "epoch": 4.825734366552532, "grad_norm": 0.2522866427898407, "learning_rate": 3.6228412959565805e-08, "loss": 0.3711, "step": 8926 }, { "epoch": 4.826275004505316, "grad_norm": 0.2863280475139618, "learning_rate": 3.600190413495463e-08, "loss": 0.3813, "step": 8927 }, { "epoch": 4.826815642458101, "grad_norm": 0.26387637853622437, "learning_rate": 3.57761030681919e-08, "loss": 0.3987, "step": 8928 }, { "epoch": 4.827356280410885, "grad_norm": 0.2458793967962265, "learning_rate": 3.55510097914713e-08, "loss": 0.3436, "step": 8929 }, { "epoch": 4.827896918363669, "grad_norm": 0.27182090282440186, "learning_rate": 3.5326624336886604e-08, "loss": 0.3549, "step": 8930 }, { "epoch": 4.828437556316453, "grad_norm": 0.2573881447315216, "learning_rate": 3.510294673643056e-08, "loss": 0.3556, "step": 8931 }, { "epoch": 4.828978194269237, "grad_norm": 0.26516303420066833, "learning_rate": 3.4879977021994304e-08, "loss": 0.3717, "step": 8932 }, { "epoch": 4.829518832222022, "grad_norm": 0.23965980112552643, "learning_rate": 3.465771522536854e-08, "loss": 0.3464, "step": 8933 }, { "epoch": 4.830059470174806, "grad_norm": 0.272983580827713, "learning_rate": 3.4436161378242907e-08, "loss": 0.3539, "step": 8934 }, { "epoch": 4.830600108127591, "grad_norm": 0.28406354784965515, "learning_rate": 3.4215315512206584e-08, "loss": 0.371, "step": 8935 }, { "epoch": 4.831140746080375, "grad_norm": 0.271476149559021, "learning_rate": 3.399517765874716e-08, "loss": 0.3745, "step": 8936 }, { "epoch": 4.831681384033159, "grad_norm": 0.24661186337471008, "learning_rate": 3.377574784925064e-08, "loss": 0.3329, "step": 8937 }, { "epoch": 4.8322220219859435, "grad_norm": 0.2657792270183563, "learning_rate": 3.355702611500422e-08, "loss": 0.348, "step": 8938 }, { "epoch": 4.8327626599387274, "grad_norm": 0.2854871153831482, "learning_rate": 3.33390124871924e-08, "loss": 0.3917, "step": 8939 }, { "epoch": 4.833303297891512, "grad_norm": 0.2525235116481781, "learning_rate": 3.312170699689865e-08, "loss": 0.3605, "step": 8940 }, { "epoch": 4.833843935844296, "grad_norm": 0.27078115940093994, "learning_rate": 3.2905109675106515e-08, "loss": 0.3555, "step": 8941 }, { "epoch": 4.834384573797081, "grad_norm": 0.26227429509162903, "learning_rate": 3.268922055269741e-08, "loss": 0.3676, "step": 8942 }, { "epoch": 4.834925211749865, "grad_norm": 0.2645623981952667, "learning_rate": 3.247403966045393e-08, "loss": 0.3534, "step": 8943 }, { "epoch": 4.835465849702649, "grad_norm": 0.25363367795944214, "learning_rate": 3.225956702905486e-08, "loss": 0.372, "step": 8944 }, { "epoch": 4.836006487655434, "grad_norm": 0.26763975620269775, "learning_rate": 3.204580268907909e-08, "loss": 0.375, "step": 8945 }, { "epoch": 4.8365471256082175, "grad_norm": 0.27136990427970886, "learning_rate": 3.183274667100611e-08, "loss": 0.376, "step": 8946 }, { "epoch": 4.837087763561002, "grad_norm": 0.2796955406665802, "learning_rate": 3.1620399005211634e-08, "loss": 0.3741, "step": 8947 }, { "epoch": 4.837628401513786, "grad_norm": 0.24199721217155457, "learning_rate": 3.140875972197255e-08, "loss": 0.3395, "step": 8948 }, { "epoch": 4.83816903946657, "grad_norm": 0.25753477215766907, "learning_rate": 3.1197828851464164e-08, "loss": 0.3678, "step": 8949 }, { "epoch": 4.838709677419355, "grad_norm": 0.270973265171051, "learning_rate": 3.0987606423759644e-08, "loss": 0.3769, "step": 8950 }, { "epoch": 4.839250315372139, "grad_norm": 0.2731953561306, "learning_rate": 3.0778092468833897e-08, "loss": 0.3535, "step": 8951 }, { "epoch": 4.839790953324924, "grad_norm": 0.3085453510284424, "learning_rate": 3.056928701655692e-08, "loss": 0.3554, "step": 8952 }, { "epoch": 4.840331591277708, "grad_norm": 0.2651394307613373, "learning_rate": 3.0361190096701573e-08, "loss": 0.3614, "step": 8953 }, { "epoch": 4.8408722292304915, "grad_norm": 0.2528552711009979, "learning_rate": 3.015380173893689e-08, "loss": 0.3334, "step": 8954 }, { "epoch": 4.841412867183276, "grad_norm": 0.2840980589389801, "learning_rate": 2.9947121972832e-08, "loss": 0.3762, "step": 8955 }, { "epoch": 4.84195350513606, "grad_norm": 0.23905372619628906, "learning_rate": 2.974115082785556e-08, "loss": 0.3235, "step": 8956 }, { "epoch": 4.842494143088845, "grad_norm": 0.25684115290641785, "learning_rate": 2.9535888333374064e-08, "loss": 0.3945, "step": 8957 }, { "epoch": 4.843034781041629, "grad_norm": 0.27147263288497925, "learning_rate": 2.9331334518653554e-08, "loss": 0.3767, "step": 8958 }, { "epoch": 4.843575418994414, "grad_norm": 0.26529526710510254, "learning_rate": 2.9127489412859033e-08, "loss": 0.4167, "step": 8959 }, { "epoch": 4.844116056947198, "grad_norm": 0.2645881175994873, "learning_rate": 2.8924353045054475e-08, "loss": 0.3754, "step": 8960 }, { "epoch": 4.844656694899982, "grad_norm": 0.27821385860443115, "learning_rate": 2.872192544420227e-08, "loss": 0.3982, "step": 8961 }, { "epoch": 4.845197332852766, "grad_norm": 0.26622623205184937, "learning_rate": 2.8520206639164328e-08, "loss": 0.3406, "step": 8962 }, { "epoch": 4.84573797080555, "grad_norm": 0.25798436999320984, "learning_rate": 2.8319196658702087e-08, "loss": 0.3635, "step": 8963 }, { "epoch": 4.846278608758335, "grad_norm": 0.26354706287384033, "learning_rate": 2.8118895531473733e-08, "loss": 0.3447, "step": 8964 }, { "epoch": 4.846819246711119, "grad_norm": 0.2673446238040924, "learning_rate": 2.7919303286039202e-08, "loss": 0.3957, "step": 8965 }, { "epoch": 4.847359884663904, "grad_norm": 0.2549054026603699, "learning_rate": 2.772041995085517e-08, "loss": 0.3468, "step": 8966 }, { "epoch": 4.847900522616688, "grad_norm": 0.2655335068702698, "learning_rate": 2.7522245554278404e-08, "loss": 0.3316, "step": 8967 }, { "epoch": 4.848441160569472, "grad_norm": 0.2460017055273056, "learning_rate": 2.7324780124564633e-08, "loss": 0.3642, "step": 8968 }, { "epoch": 4.8489817985222565, "grad_norm": 0.28284773230552673, "learning_rate": 2.7128023689866888e-08, "loss": 0.4121, "step": 8969 }, { "epoch": 4.84952243647504, "grad_norm": 0.2812318205833435, "learning_rate": 2.693197627823996e-08, "loss": 0.3492, "step": 8970 }, { "epoch": 4.850063074427824, "grad_norm": 0.2731960713863373, "learning_rate": 2.673663791763481e-08, "loss": 0.356, "step": 8971 }, { "epoch": 4.850603712380609, "grad_norm": 0.25460806488990784, "learning_rate": 2.6542008635902504e-08, "loss": 0.344, "step": 8972 }, { "epoch": 4.851144350333393, "grad_norm": 0.27592790126800537, "learning_rate": 2.6348088460793064e-08, "loss": 0.3392, "step": 8973 }, { "epoch": 4.851684988286178, "grad_norm": 0.2760111689567566, "learning_rate": 2.6154877419955483e-08, "loss": 0.3602, "step": 8974 }, { "epoch": 4.852225626238962, "grad_norm": 0.26083844900131226, "learning_rate": 2.5962375540937724e-08, "loss": 0.3553, "step": 8975 }, { "epoch": 4.852766264191747, "grad_norm": 0.26439404487609863, "learning_rate": 2.577058285118561e-08, "loss": 0.3644, "step": 8976 }, { "epoch": 4.8533069021445305, "grad_norm": 0.2698628604412079, "learning_rate": 2.557949937804505e-08, "loss": 0.3721, "step": 8977 }, { "epoch": 4.8538475400973145, "grad_norm": 0.26598867774009705, "learning_rate": 2.5389125148760353e-08, "loss": 0.3501, "step": 8978 }, { "epoch": 4.854388178050099, "grad_norm": 0.26747986674308777, "learning_rate": 2.5199460190474255e-08, "loss": 0.4075, "step": 8979 }, { "epoch": 4.854928816002883, "grad_norm": 0.2578566372394562, "learning_rate": 2.5010504530229574e-08, "loss": 0.3406, "step": 8980 }, { "epoch": 4.855469453955668, "grad_norm": 0.2632170617580414, "learning_rate": 2.4822258194966975e-08, "loss": 0.3354, "step": 8981 }, { "epoch": 4.856010091908452, "grad_norm": 0.2677387595176697, "learning_rate": 2.4634721211526102e-08, "loss": 0.3469, "step": 8982 }, { "epoch": 4.856550729861237, "grad_norm": 0.27075430750846863, "learning_rate": 2.4447893606645567e-08, "loss": 0.3443, "step": 8983 }, { "epoch": 4.857091367814021, "grad_norm": 0.26497140526771545, "learning_rate": 2.4261775406963505e-08, "loss": 0.3584, "step": 8984 }, { "epoch": 4.8576320057668045, "grad_norm": 0.2607765197753906, "learning_rate": 2.4076366639015914e-08, "loss": 0.369, "step": 8985 }, { "epoch": 4.858172643719589, "grad_norm": 0.28138893842697144, "learning_rate": 2.3891667329237756e-08, "loss": 0.3623, "step": 8986 }, { "epoch": 4.858713281672373, "grad_norm": 0.2865180969238281, "learning_rate": 2.3707677503963523e-08, "loss": 0.3966, "step": 8987 }, { "epoch": 4.859253919625158, "grad_norm": 0.2480352520942688, "learning_rate": 2.3524397189426117e-08, "loss": 0.3419, "step": 8988 }, { "epoch": 4.859794557577942, "grad_norm": 0.2529453635215759, "learning_rate": 2.3341826411756863e-08, "loss": 0.3579, "step": 8989 }, { "epoch": 4.860335195530726, "grad_norm": 0.27298611402511597, "learning_rate": 2.3159965196987156e-08, "loss": 0.3687, "step": 8990 }, { "epoch": 4.860875833483511, "grad_norm": 0.24878400564193726, "learning_rate": 2.29788135710457e-08, "loss": 0.3376, "step": 8991 }, { "epoch": 4.861416471436295, "grad_norm": 0.28801393508911133, "learning_rate": 2.2798371559761835e-08, "loss": 0.3756, "step": 8992 }, { "epoch": 4.861957109389079, "grad_norm": 0.2819991409778595, "learning_rate": 2.261863918886109e-08, "loss": 0.3451, "step": 8993 }, { "epoch": 4.862497747341863, "grad_norm": 0.2526842951774597, "learning_rate": 2.2439616483970748e-08, "loss": 0.3584, "step": 8994 }, { "epoch": 4.863038385294647, "grad_norm": 0.2582084536552429, "learning_rate": 2.2261303470614282e-08, "loss": 0.3716, "step": 8995 }, { "epoch": 4.863579023247432, "grad_norm": 0.2674556076526642, "learning_rate": 2.2083700174216348e-08, "loss": 0.38, "step": 8996 }, { "epoch": 4.864119661200216, "grad_norm": 0.2626592218875885, "learning_rate": 2.1906806620099473e-08, "loss": 0.3382, "step": 8997 }, { "epoch": 4.864660299153001, "grad_norm": 0.27217191457748413, "learning_rate": 2.1730622833483484e-08, "loss": 0.3627, "step": 8998 }, { "epoch": 4.865200937105785, "grad_norm": 0.2672560214996338, "learning_rate": 2.1555148839489392e-08, "loss": 0.3331, "step": 8999 }, { "epoch": 4.8657415750585695, "grad_norm": 0.2712641656398773, "learning_rate": 2.1380384663135523e-08, "loss": 0.401, "step": 9000 }, { "epoch": 4.866282213011353, "grad_norm": 0.2781943380832672, "learning_rate": 2.1206330329339718e-08, "loss": 0.3508, "step": 9001 }, { "epoch": 4.866822850964137, "grad_norm": 0.2640174329280853, "learning_rate": 2.1032985862918242e-08, "loss": 0.355, "step": 9002 }, { "epoch": 4.867363488916922, "grad_norm": 0.2671608626842499, "learning_rate": 2.086035128858632e-08, "loss": 0.3841, "step": 9003 }, { "epoch": 4.867904126869706, "grad_norm": 0.25497519969940186, "learning_rate": 2.0688426630958158e-08, "loss": 0.3467, "step": 9004 }, { "epoch": 4.868444764822491, "grad_norm": 0.2529909312725067, "learning_rate": 2.0517211914545254e-08, "loss": 0.3519, "step": 9005 }, { "epoch": 4.868985402775275, "grad_norm": 0.2696881592273712, "learning_rate": 2.0346707163760304e-08, "loss": 0.3768, "step": 9006 }, { "epoch": 4.869526040728059, "grad_norm": 0.2658160626888275, "learning_rate": 2.0176912402912752e-08, "loss": 0.3659, "step": 9007 }, { "epoch": 4.8700666786808435, "grad_norm": 0.24771720170974731, "learning_rate": 2.0007827656212674e-08, "loss": 0.3515, "step": 9008 }, { "epoch": 4.8706073166336274, "grad_norm": 0.27035635709762573, "learning_rate": 1.98394529477669e-08, "loss": 0.3962, "step": 9009 }, { "epoch": 4.871147954586412, "grad_norm": 0.2588082551956177, "learning_rate": 1.967178830158234e-08, "loss": 0.3257, "step": 9010 }, { "epoch": 4.871688592539196, "grad_norm": 0.2636779248714447, "learning_rate": 1.950483374156431e-08, "loss": 0.3616, "step": 9011 }, { "epoch": 4.87222923049198, "grad_norm": 0.2517573833465576, "learning_rate": 1.9338589291516553e-08, "loss": 0.3473, "step": 9012 }, { "epoch": 4.872769868444765, "grad_norm": 0.2549690306186676, "learning_rate": 1.9173054975142326e-08, "loss": 0.3874, "step": 9013 }, { "epoch": 4.873310506397549, "grad_norm": 0.2793142795562744, "learning_rate": 1.900823081604386e-08, "loss": 0.3795, "step": 9014 }, { "epoch": 4.873851144350334, "grad_norm": 0.2743738889694214, "learning_rate": 1.8844116837719582e-08, "loss": 0.3434, "step": 9015 }, { "epoch": 4.8743917823031175, "grad_norm": 0.2686752676963806, "learning_rate": 1.8680713063570777e-08, "loss": 0.3588, "step": 9016 }, { "epoch": 4.874932420255902, "grad_norm": 0.2675032317638397, "learning_rate": 1.8518019516893803e-08, "loss": 0.3351, "step": 9017 }, { "epoch": 4.875473058208686, "grad_norm": 0.28255701065063477, "learning_rate": 1.835603622088511e-08, "loss": 0.3802, "step": 9018 }, { "epoch": 4.87601369616147, "grad_norm": 0.27471938729286194, "learning_rate": 1.819476319864122e-08, "loss": 0.4098, "step": 9019 }, { "epoch": 4.876554334114255, "grad_norm": 0.24794790148735046, "learning_rate": 1.803420047315485e-08, "loss": 0.3012, "step": 9020 }, { "epoch": 4.877094972067039, "grad_norm": 0.26921573281288147, "learning_rate": 1.7874348067319912e-08, "loss": 0.374, "step": 9021 }, { "epoch": 4.877635610019824, "grad_norm": 0.2633867859840393, "learning_rate": 1.771520600392651e-08, "loss": 0.3559, "step": 9022 }, { "epoch": 4.878176247972608, "grad_norm": 0.26123112440109253, "learning_rate": 1.7556774305665935e-08, "loss": 0.3666, "step": 9023 }, { "epoch": 4.878716885925392, "grad_norm": 0.25380924344062805, "learning_rate": 1.7399052995126787e-08, "loss": 0.3476, "step": 9024 }, { "epoch": 4.879257523878176, "grad_norm": 0.26871591806411743, "learning_rate": 1.724204209479663e-08, "loss": 0.3613, "step": 9025 }, { "epoch": 4.87979816183096, "grad_norm": 0.2530689537525177, "learning_rate": 1.7085741627062003e-08, "loss": 0.3532, "step": 9026 }, { "epoch": 4.880338799783745, "grad_norm": 0.2639843225479126, "learning_rate": 1.6930151614207302e-08, "loss": 0.3783, "step": 9027 }, { "epoch": 4.880879437736529, "grad_norm": 0.2442837506532669, "learning_rate": 1.6775272078417004e-08, "loss": 0.3196, "step": 9028 }, { "epoch": 4.881420075689313, "grad_norm": 0.2900562882423401, "learning_rate": 1.662110304177289e-08, "loss": 0.3927, "step": 9029 }, { "epoch": 4.881960713642098, "grad_norm": 0.2681589126586914, "learning_rate": 1.646764452625682e-08, "loss": 0.3594, "step": 9030 }, { "epoch": 4.882501351594882, "grad_norm": 0.27078258991241455, "learning_rate": 1.6314896553748515e-08, "loss": 0.3651, "step": 9031 }, { "epoch": 4.883041989547666, "grad_norm": 0.2650523781776428, "learning_rate": 1.6162859146025557e-08, "loss": 0.3488, "step": 9032 }, { "epoch": 4.88358262750045, "grad_norm": 0.26287534832954407, "learning_rate": 1.601153232476671e-08, "loss": 0.3725, "step": 9033 }, { "epoch": 4.884123265453235, "grad_norm": 0.23999665677547455, "learning_rate": 1.5860916111546386e-08, "loss": 0.3072, "step": 9034 }, { "epoch": 4.884663903406019, "grad_norm": 0.2521497905254364, "learning_rate": 1.5711010527839633e-08, "loss": 0.3726, "step": 9035 }, { "epoch": 4.885204541358803, "grad_norm": 0.2601372003555298, "learning_rate": 1.5561815595020457e-08, "loss": 0.3824, "step": 9036 }, { "epoch": 4.885745179311588, "grad_norm": 0.24594874680042267, "learning_rate": 1.541333133436018e-08, "loss": 0.3534, "step": 9037 }, { "epoch": 4.886285817264372, "grad_norm": 0.2619156241416931, "learning_rate": 1.526555776702965e-08, "loss": 0.3611, "step": 9038 }, { "epoch": 4.8868264552171565, "grad_norm": 0.25986140966415405, "learning_rate": 1.511849491409756e-08, "loss": 0.3362, "step": 9039 }, { "epoch": 4.8873670931699404, "grad_norm": 0.2690037488937378, "learning_rate": 1.4972142796532696e-08, "loss": 0.3891, "step": 9040 }, { "epoch": 4.887907731122725, "grad_norm": 0.2490912824869156, "learning_rate": 1.482650143520059e-08, "loss": 0.3477, "step": 9041 }, { "epoch": 4.888448369075509, "grad_norm": 0.270921528339386, "learning_rate": 1.4681570850867966e-08, "loss": 0.3392, "step": 9042 }, { "epoch": 4.888989007028293, "grad_norm": 0.2785066068172455, "learning_rate": 1.4537351064197736e-08, "loss": 0.3663, "step": 9043 }, { "epoch": 4.889529644981078, "grad_norm": 0.24923615157604218, "learning_rate": 1.4393842095752896e-08, "loss": 0.3329, "step": 9044 }, { "epoch": 4.890070282933862, "grad_norm": 0.2749128043651581, "learning_rate": 1.4251043965994304e-08, "loss": 0.409, "step": 9045 }, { "epoch": 4.890610920886647, "grad_norm": 0.25903937220573425, "learning_rate": 1.410895669528234e-08, "loss": 0.3411, "step": 9046 }, { "epoch": 4.8911515588394305, "grad_norm": 0.2720274031162262, "learning_rate": 1.3967580303875239e-08, "loss": 0.3729, "step": 9047 }, { "epoch": 4.8916921967922145, "grad_norm": 0.2366250902414322, "learning_rate": 1.3826914811930214e-08, "loss": 0.3264, "step": 9048 }, { "epoch": 4.892232834744999, "grad_norm": 0.25673407316207886, "learning_rate": 1.3686960239503444e-08, "loss": 0.3811, "step": 9049 }, { "epoch": 4.892773472697783, "grad_norm": 0.25436335802078247, "learning_rate": 1.3547716606548967e-08, "loss": 0.3911, "step": 9050 }, { "epoch": 4.893314110650568, "grad_norm": 0.25915470719337463, "learning_rate": 1.3409183932919788e-08, "loss": 0.395, "step": 9051 }, { "epoch": 4.893854748603352, "grad_norm": 0.26253634691238403, "learning_rate": 1.3271362238368447e-08, "loss": 0.366, "step": 9052 }, { "epoch": 4.894395386556136, "grad_norm": 0.26160550117492676, "learning_rate": 1.3134251542544774e-08, "loss": 0.3661, "step": 9053 }, { "epoch": 4.894936024508921, "grad_norm": 0.2520962655544281, "learning_rate": 1.2997851864997024e-08, "loss": 0.3599, "step": 9054 }, { "epoch": 4.8954766624617045, "grad_norm": 0.2571839392185211, "learning_rate": 1.2862163225174084e-08, "loss": 0.376, "step": 9055 }, { "epoch": 4.896017300414489, "grad_norm": 0.24826057255268097, "learning_rate": 1.272718564242159e-08, "loss": 0.3455, "step": 9056 }, { "epoch": 4.896557938367273, "grad_norm": 0.25594672560691833, "learning_rate": 1.259291913598415e-08, "loss": 0.3614, "step": 9057 }, { "epoch": 4.897098576320058, "grad_norm": 0.2774297297000885, "learning_rate": 1.2459363725005891e-08, "loss": 0.4316, "step": 9058 }, { "epoch": 4.897639214272842, "grad_norm": 0.25605112314224243, "learning_rate": 1.2326519428528805e-08, "loss": 0.3426, "step": 9059 }, { "epoch": 4.898179852225626, "grad_norm": 0.27040329575538635, "learning_rate": 1.2194386265492742e-08, "loss": 0.3753, "step": 9060 }, { "epoch": 4.898720490178411, "grad_norm": 0.26323947310447693, "learning_rate": 1.2062964254738186e-08, "loss": 0.3588, "step": 9061 }, { "epoch": 4.899261128131195, "grad_norm": 0.27045533061027527, "learning_rate": 1.193225341500237e-08, "loss": 0.3274, "step": 9062 }, { "epoch": 4.899801766083979, "grad_norm": 0.29912617802619934, "learning_rate": 1.18022537649215e-08, "loss": 0.3502, "step": 9063 }, { "epoch": 4.900342404036763, "grad_norm": 0.27602618932724, "learning_rate": 1.1672965323031304e-08, "loss": 0.3213, "step": 9064 }, { "epoch": 4.900883041989548, "grad_norm": 0.2750244438648224, "learning_rate": 1.1544388107765924e-08, "loss": 0.4147, "step": 9065 }, { "epoch": 4.901423679942332, "grad_norm": 0.2645430266857147, "learning_rate": 1.1416522137456254e-08, "loss": 0.3692, "step": 9066 }, { "epoch": 4.901964317895116, "grad_norm": 0.2459685057401657, "learning_rate": 1.1289367430334375e-08, "loss": 0.3079, "step": 9067 }, { "epoch": 4.902504955847901, "grad_norm": 0.27539220452308655, "learning_rate": 1.116292400452912e-08, "loss": 0.4131, "step": 9068 }, { "epoch": 4.903045593800685, "grad_norm": 0.24851562082767487, "learning_rate": 1.1037191878068843e-08, "loss": 0.3484, "step": 9069 }, { "epoch": 4.903586231753469, "grad_norm": 0.24963010847568512, "learning_rate": 1.0912171068880318e-08, "loss": 0.3385, "step": 9070 }, { "epoch": 4.9041268697062534, "grad_norm": 0.26785072684288025, "learning_rate": 1.0787861594788728e-08, "loss": 0.3773, "step": 9071 }, { "epoch": 4.904667507659037, "grad_norm": 0.26428255438804626, "learning_rate": 1.0664263473517677e-08, "loss": 0.3968, "step": 9072 }, { "epoch": 4.905208145611822, "grad_norm": 0.2475302517414093, "learning_rate": 1.0541376722689734e-08, "loss": 0.3291, "step": 9073 }, { "epoch": 4.905748783564606, "grad_norm": 0.2793020009994507, "learning_rate": 1.0419201359825881e-08, "loss": 0.3545, "step": 9074 }, { "epoch": 4.906289421517391, "grad_norm": 0.2719111144542694, "learning_rate": 1.029773740234552e-08, "loss": 0.4021, "step": 9075 }, { "epoch": 4.906830059470175, "grad_norm": 0.2759902775287628, "learning_rate": 1.0176984867567018e-08, "loss": 0.3442, "step": 9076 }, { "epoch": 4.907370697422959, "grad_norm": 0.2784024775028229, "learning_rate": 1.0056943772706607e-08, "loss": 0.3617, "step": 9077 }, { "epoch": 4.9079113353757435, "grad_norm": 0.2713329792022705, "learning_rate": 9.937614134880036e-09, "loss": 0.374, "step": 9078 }, { "epoch": 4.9084519733285275, "grad_norm": 0.2651738226413727, "learning_rate": 9.81899597110092e-09, "loss": 0.3532, "step": 9079 }, { "epoch": 4.908992611281312, "grad_norm": 0.2688627541065216, "learning_rate": 9.701089298281285e-09, "loss": 0.3752, "step": 9080 }, { "epoch": 4.909533249234096, "grad_norm": 0.27973148226737976, "learning_rate": 9.583894133232685e-09, "loss": 0.3755, "step": 9081 }, { "epoch": 4.910073887186881, "grad_norm": 0.26119181513786316, "learning_rate": 9.46741049266453e-09, "loss": 0.3515, "step": 9082 }, { "epoch": 4.910614525139665, "grad_norm": 0.2695755958557129, "learning_rate": 9.351638393184092e-09, "loss": 0.3567, "step": 9083 }, { "epoch": 4.911155163092449, "grad_norm": 0.2547578513622284, "learning_rate": 9.236577851298168e-09, "loss": 0.3326, "step": 9084 }, { "epoch": 4.911695801045234, "grad_norm": 0.27387887239456177, "learning_rate": 9.12222888341252e-09, "loss": 0.3887, "step": 9085 }, { "epoch": 4.9122364389980175, "grad_norm": 0.2561882734298706, "learning_rate": 9.008591505830777e-09, "loss": 0.3682, "step": 9086 }, { "epoch": 4.9127770769508015, "grad_norm": 0.25791123509407043, "learning_rate": 8.895665734754422e-09, "loss": 0.3444, "step": 9087 }, { "epoch": 4.913317714903586, "grad_norm": 0.27500414848327637, "learning_rate": 8.783451586284464e-09, "loss": 0.3766, "step": 9088 }, { "epoch": 4.91385835285637, "grad_norm": 0.2534409761428833, "learning_rate": 8.671949076420883e-09, "loss": 0.3507, "step": 9089 }, { "epoch": 4.914398990809155, "grad_norm": 0.2635025084018707, "learning_rate": 8.561158221060406e-09, "loss": 0.3895, "step": 9090 }, { "epoch": 4.914939628761939, "grad_norm": 0.25874948501586914, "learning_rate": 8.451079035999843e-09, "loss": 0.3622, "step": 9091 }, { "epoch": 4.915480266714724, "grad_norm": 0.26105883717536926, "learning_rate": 8.341711536934415e-09, "loss": 0.3452, "step": 9092 }, { "epoch": 4.916020904667508, "grad_norm": 0.252156138420105, "learning_rate": 8.233055739457762e-09, "loss": 0.3682, "step": 9093 }, { "epoch": 4.9165615426202915, "grad_norm": 0.266645222902298, "learning_rate": 8.125111659060826e-09, "loss": 0.3792, "step": 9094 }, { "epoch": 4.917102180573076, "grad_norm": 0.2581704258918762, "learning_rate": 8.017879311134624e-09, "loss": 0.3233, "step": 9095 }, { "epoch": 4.91764281852586, "grad_norm": 0.27391037344932556, "learning_rate": 7.911358710968042e-09, "loss": 0.3656, "step": 9096 }, { "epoch": 4.918183456478645, "grad_norm": 0.2647905647754669, "learning_rate": 7.805549873749485e-09, "loss": 0.3522, "step": 9097 }, { "epoch": 4.918724094431429, "grad_norm": 0.2863467335700989, "learning_rate": 7.700452814563552e-09, "loss": 0.387, "step": 9098 }, { "epoch": 4.919264732384214, "grad_norm": 0.2514527440071106, "learning_rate": 7.596067548395481e-09, "loss": 0.363, "step": 9099 }, { "epoch": 4.919805370336998, "grad_norm": 0.2609706223011017, "learning_rate": 7.492394090128364e-09, "loss": 0.3352, "step": 9100 }, { "epoch": 4.920346008289782, "grad_norm": 0.2651589810848236, "learning_rate": 7.38943245454371e-09, "loss": 0.373, "step": 9101 }, { "epoch": 4.920886646242566, "grad_norm": 0.2527099847793579, "learning_rate": 7.2871826563214454e-09, "loss": 0.3637, "step": 9102 }, { "epoch": 4.92142728419535, "grad_norm": 0.23803900182247162, "learning_rate": 7.185644710040463e-09, "loss": 0.331, "step": 9103 }, { "epoch": 4.921967922148135, "grad_norm": 0.26620054244995117, "learning_rate": 7.0848186301775145e-09, "loss": 0.3664, "step": 9104 }, { "epoch": 4.922508560100919, "grad_norm": 0.2763153612613678, "learning_rate": 6.98470443110888e-09, "loss": 0.3781, "step": 9105 }, { "epoch": 4.923049198053703, "grad_norm": 0.2591570019721985, "learning_rate": 6.88530212710814e-09, "loss": 0.3638, "step": 9106 }, { "epoch": 4.923589836006488, "grad_norm": 0.2564665973186493, "learning_rate": 6.7866117323472925e-09, "loss": 0.3666, "step": 9107 }, { "epoch": 4.924130473959272, "grad_norm": 0.2689834535121918, "learning_rate": 6.688633260898414e-09, "loss": 0.3554, "step": 9108 }, { "epoch": 4.9246711119120565, "grad_norm": 0.24325302243232727, "learning_rate": 6.591366726730885e-09, "loss": 0.3345, "step": 9109 }, { "epoch": 4.9252117498648404, "grad_norm": 0.2511265277862549, "learning_rate": 6.4948121437125035e-09, "loss": 0.3754, "step": 9110 }, { "epoch": 4.925752387817624, "grad_norm": 0.25291627645492554, "learning_rate": 6.398969525610032e-09, "loss": 0.3385, "step": 9111 }, { "epoch": 4.926293025770409, "grad_norm": 0.2580259442329407, "learning_rate": 6.303838886088653e-09, "loss": 0.3644, "step": 9112 }, { "epoch": 4.926833663723193, "grad_norm": 0.23568320274353027, "learning_rate": 6.20942023871196e-09, "loss": 0.3328, "step": 9113 }, { "epoch": 4.927374301675978, "grad_norm": 0.2598705589771271, "learning_rate": 6.115713596941408e-09, "loss": 0.3851, "step": 9114 }, { "epoch": 4.927914939628762, "grad_norm": 0.2612532675266266, "learning_rate": 6.022718974137976e-09, "loss": 0.3697, "step": 9115 }, { "epoch": 4.928455577581547, "grad_norm": 0.26220640540122986, "learning_rate": 5.930436383561056e-09, "loss": 0.3278, "step": 9116 }, { "epoch": 4.9289962155343305, "grad_norm": 0.2938258945941925, "learning_rate": 5.838865838366792e-09, "loss": 0.4168, "step": 9117 }, { "epoch": 4.9295368534871145, "grad_norm": 0.25957944989204407, "learning_rate": 5.748007351613072e-09, "loss": 0.3328, "step": 9118 }, { "epoch": 4.930077491439899, "grad_norm": 0.2673860192298889, "learning_rate": 5.657860936252868e-09, "loss": 0.3636, "step": 9119 }, { "epoch": 4.930618129392683, "grad_norm": 0.26958364248275757, "learning_rate": 5.568426605139232e-09, "loss": 0.4007, "step": 9120 }, { "epoch": 4.931158767345468, "grad_norm": 0.24389295279979706, "learning_rate": 5.479704371024186e-09, "loss": 0.3277, "step": 9121 }, { "epoch": 4.931699405298252, "grad_norm": 0.2678050100803375, "learning_rate": 5.391694246557056e-09, "loss": 0.365, "step": 9122 }, { "epoch": 4.932240043251037, "grad_norm": 0.27633118629455566, "learning_rate": 5.304396244286691e-09, "loss": 0.3767, "step": 9123 }, { "epoch": 4.932780681203821, "grad_norm": 0.25347909331321716, "learning_rate": 5.217810376659249e-09, "loss": 0.3511, "step": 9124 }, { "epoch": 4.9333213191566045, "grad_norm": 0.24641470611095428, "learning_rate": 5.131936656020409e-09, "loss": 0.3514, "step": 9125 }, { "epoch": 4.933861957109389, "grad_norm": 0.27080366015434265, "learning_rate": 5.046775094613709e-09, "loss": 0.3732, "step": 9126 }, { "epoch": 4.934402595062173, "grad_norm": 0.2650505304336548, "learning_rate": 4.962325704581661e-09, "loss": 0.3553, "step": 9127 }, { "epoch": 4.934943233014957, "grad_norm": 0.2590729594230652, "learning_rate": 4.878588497964077e-09, "loss": 0.3604, "step": 9128 }, { "epoch": 4.935483870967742, "grad_norm": 0.2556917667388916, "learning_rate": 4.795563486700849e-09, "loss": 0.3628, "step": 9129 }, { "epoch": 4.936024508920526, "grad_norm": 0.2660444676876068, "learning_rate": 4.713250682629733e-09, "loss": 0.3569, "step": 9130 }, { "epoch": 4.936565146873311, "grad_norm": 0.27511194348335266, "learning_rate": 4.631650097485784e-09, "loss": 0.359, "step": 9131 }, { "epoch": 4.937105784826095, "grad_norm": 0.2793119251728058, "learning_rate": 4.550761742904142e-09, "loss": 0.3719, "step": 9132 }, { "epoch": 4.937646422778879, "grad_norm": 0.25426700711250305, "learning_rate": 4.470585630417801e-09, "loss": 0.3715, "step": 9133 }, { "epoch": 4.938187060731663, "grad_norm": 0.2769455015659332, "learning_rate": 4.391121771457618e-09, "loss": 0.3939, "step": 9134 }, { "epoch": 4.938727698684447, "grad_norm": 0.23823679983615875, "learning_rate": 4.312370177353975e-09, "loss": 0.3269, "step": 9135 }, { "epoch": 4.939268336637232, "grad_norm": 0.25965994596481323, "learning_rate": 4.234330859334557e-09, "loss": 0.398, "step": 9136 }, { "epoch": 4.939808974590016, "grad_norm": 0.2441171556711197, "learning_rate": 4.157003828526573e-09, "loss": 0.344, "step": 9137 }, { "epoch": 4.940349612542801, "grad_norm": 0.26520293951034546, "learning_rate": 4.08038909595454e-09, "loss": 0.3712, "step": 9138 }, { "epoch": 4.940890250495585, "grad_norm": 0.2556518316268921, "learning_rate": 4.004486672542496e-09, "loss": 0.3578, "step": 9139 }, { "epoch": 4.9414308884483695, "grad_norm": 0.268156498670578, "learning_rate": 3.929296569112895e-09, "loss": 0.3801, "step": 9140 }, { "epoch": 4.9419715264011534, "grad_norm": 0.24451026320457458, "learning_rate": 3.854818796385495e-09, "loss": 0.3474, "step": 9141 }, { "epoch": 4.942512164353937, "grad_norm": 0.27529263496398926, "learning_rate": 3.781053364979026e-09, "loss": 0.3721, "step": 9142 }, { "epoch": 4.943052802306722, "grad_norm": 0.27681949734687805, "learning_rate": 3.708000285411739e-09, "loss": 0.3746, "step": 9143 }, { "epoch": 4.943593440259506, "grad_norm": 0.259630024433136, "learning_rate": 3.6356595680986375e-09, "loss": 0.3512, "step": 9144 }, { "epoch": 4.94413407821229, "grad_norm": 0.2611832916736603, "learning_rate": 3.5640312233548024e-09, "loss": 0.3503, "step": 9145 }, { "epoch": 4.944674716165075, "grad_norm": 0.26331421732902527, "learning_rate": 3.493115261391511e-09, "loss": 0.3478, "step": 9146 }, { "epoch": 4.945215354117859, "grad_norm": 0.25908151268959045, "learning_rate": 3.4229116923212293e-09, "loss": 0.3655, "step": 9147 }, { "epoch": 4.9457559920706435, "grad_norm": 0.23877955973148346, "learning_rate": 3.3534205261526174e-09, "loss": 0.3454, "step": 9148 }, { "epoch": 4.9462966300234275, "grad_norm": 0.269363671541214, "learning_rate": 3.284641772793862e-09, "loss": 0.4178, "step": 9149 }, { "epoch": 4.946837267976212, "grad_norm": 0.27045199275016785, "learning_rate": 3.2165754420510063e-09, "loss": 0.3526, "step": 9150 }, { "epoch": 4.947377905928996, "grad_norm": 0.2779121696949005, "learning_rate": 3.149221543629066e-09, "loss": 0.3688, "step": 9151 }, { "epoch": 4.94791854388178, "grad_norm": 0.26373735070228577, "learning_rate": 3.0825800871314705e-09, "loss": 0.341, "step": 9152 }, { "epoch": 4.948459181834565, "grad_norm": 0.2782399356365204, "learning_rate": 3.0166510820595074e-09, "loss": 0.3608, "step": 9153 }, { "epoch": 4.948999819787349, "grad_norm": 0.27947893738746643, "learning_rate": 2.9514345378134357e-09, "loss": 0.3699, "step": 9154 }, { "epoch": 4.949540457740134, "grad_norm": 0.25066402554512024, "learning_rate": 2.886930463691928e-09, "loss": 0.3203, "step": 9155 }, { "epoch": 4.9500810956929175, "grad_norm": 0.2683260440826416, "learning_rate": 2.823138868890962e-09, "loss": 0.3778, "step": 9156 }, { "epoch": 4.950621733645702, "grad_norm": 0.2609219253063202, "learning_rate": 2.760059762506595e-09, "loss": 0.3574, "step": 9157 }, { "epoch": 4.951162371598486, "grad_norm": 0.26026174426078796, "learning_rate": 2.6976931535321884e-09, "loss": 0.3393, "step": 9158 }, { "epoch": 4.95170300955127, "grad_norm": 0.2527477741241455, "learning_rate": 2.636039050860073e-09, "loss": 0.3538, "step": 9159 }, { "epoch": 4.952243647504055, "grad_norm": 0.2678411900997162, "learning_rate": 2.5750974632809955e-09, "loss": 0.3948, "step": 9160 }, { "epoch": 4.952784285456839, "grad_norm": 0.2649003267288208, "learning_rate": 2.514868399483561e-09, "loss": 0.33, "step": 9161 }, { "epoch": 4.953324923409624, "grad_norm": 0.2374754697084427, "learning_rate": 2.4553518680547893e-09, "loss": 0.3412, "step": 9162 }, { "epoch": 4.953865561362408, "grad_norm": 0.2582480013370514, "learning_rate": 2.3965478774812256e-09, "loss": 0.3679, "step": 9163 }, { "epoch": 4.9544061993151916, "grad_norm": 0.26913562417030334, "learning_rate": 2.3384564361461635e-09, "loss": 0.356, "step": 9164 }, { "epoch": 4.954946837267976, "grad_norm": 0.24180622398853302, "learning_rate": 2.2810775523329775e-09, "loss": 0.3745, "step": 9165 }, { "epoch": 4.95548747522076, "grad_norm": 0.2550312280654907, "learning_rate": 2.2244112342223456e-09, "loss": 0.3821, "step": 9166 }, { "epoch": 4.956028113173545, "grad_norm": 0.2468712031841278, "learning_rate": 2.168457489893916e-09, "loss": 0.3558, "step": 9167 }, { "epoch": 4.956568751126329, "grad_norm": 0.26661866903305054, "learning_rate": 2.113216327324641e-09, "loss": 0.3618, "step": 9168 }, { "epoch": 4.957109389079113, "grad_norm": 0.2754621207714081, "learning_rate": 2.058687754391553e-09, "loss": 0.3653, "step": 9169 }, { "epoch": 4.957650027031898, "grad_norm": 0.2769436836242676, "learning_rate": 2.0048717788684335e-09, "loss": 0.3803, "step": 9170 }, { "epoch": 4.958190664984682, "grad_norm": 0.26223111152648926, "learning_rate": 1.9517684084291442e-09, "loss": 0.315, "step": 9171 }, { "epoch": 4.9587313029374664, "grad_norm": 0.2742185890674591, "learning_rate": 1.899377650644851e-09, "loss": 0.404, "step": 9172 }, { "epoch": 4.95927194089025, "grad_norm": 0.24863441288471222, "learning_rate": 1.847699512985135e-09, "loss": 0.3541, "step": 9173 }, { "epoch": 4.959812578843035, "grad_norm": 0.2914765775203705, "learning_rate": 1.7967340028179902e-09, "loss": 0.3721, "step": 9174 }, { "epoch": 4.960353216795819, "grad_norm": 0.2604866027832031, "learning_rate": 1.746481127409827e-09, "loss": 0.4057, "step": 9175 }, { "epoch": 4.960893854748603, "grad_norm": 0.26574966311454773, "learning_rate": 1.6969408939265796e-09, "loss": 0.3536, "step": 9176 }, { "epoch": 4.961434492701388, "grad_norm": 0.283233106136322, "learning_rate": 1.648113309430932e-09, "loss": 0.3746, "step": 9177 }, { "epoch": 4.961975130654172, "grad_norm": 0.24985407292842865, "learning_rate": 1.5999983808845376e-09, "loss": 0.3275, "step": 9178 }, { "epoch": 4.9625157686069565, "grad_norm": 0.26592645049095154, "learning_rate": 1.5525961151474645e-09, "loss": 0.3592, "step": 9179 }, { "epoch": 4.9630564065597405, "grad_norm": 0.2678586542606354, "learning_rate": 1.5059065189787502e-09, "loss": 0.3529, "step": 9180 }, { "epoch": 4.963597044512525, "grad_norm": 0.2732866108417511, "learning_rate": 1.4599295990352924e-09, "loss": 0.3571, "step": 9181 }, { "epoch": 4.964137682465309, "grad_norm": 0.2503153681755066, "learning_rate": 1.4146653618718475e-09, "loss": 0.3396, "step": 9182 }, { "epoch": 4.964678320418093, "grad_norm": 0.2600671052932739, "learning_rate": 1.3701138139421422e-09, "loss": 0.368, "step": 9183 }, { "epoch": 4.965218958370878, "grad_norm": 0.2618968188762665, "learning_rate": 1.3262749615988723e-09, "loss": 0.3564, "step": 9184 }, { "epoch": 4.965759596323662, "grad_norm": 0.26783448457717896, "learning_rate": 1.2831488110920386e-09, "loss": 0.3705, "step": 9185 }, { "epoch": 4.966300234276446, "grad_norm": 0.27529674768447876, "learning_rate": 1.2407353685706115e-09, "loss": 0.3619, "step": 9186 }, { "epoch": 4.9668408722292305, "grad_norm": 0.2847377359867096, "learning_rate": 1.1990346400819752e-09, "loss": 0.3629, "step": 9187 }, { "epoch": 4.9673815101820145, "grad_norm": 0.2857806980609894, "learning_rate": 1.1580466315713746e-09, "loss": 0.3815, "step": 9188 }, { "epoch": 4.967922148134799, "grad_norm": 0.2735769748687744, "learning_rate": 1.1177713488830233e-09, "loss": 0.3889, "step": 9189 }, { "epoch": 4.968462786087583, "grad_norm": 0.2641699016094208, "learning_rate": 1.07820879775955e-09, "loss": 0.3761, "step": 9190 }, { "epoch": 4.969003424040368, "grad_norm": 0.2488427609205246, "learning_rate": 1.0393589838414431e-09, "loss": 0.359, "step": 9191 }, { "epoch": 4.969544061993152, "grad_norm": 0.26051753759384155, "learning_rate": 1.0012219126676048e-09, "loss": 0.3674, "step": 9192 }, { "epoch": 4.970084699945936, "grad_norm": 0.2759767472743988, "learning_rate": 9.637975896759077e-10, "loss": 0.3853, "step": 9193 }, { "epoch": 4.970625337898721, "grad_norm": 0.2598288655281067, "learning_rate": 9.270860202020837e-10, "loss": 0.3597, "step": 9194 }, { "epoch": 4.9711659758515045, "grad_norm": 0.26341670751571655, "learning_rate": 8.910872094802792e-10, "loss": 0.3741, "step": 9195 }, { "epoch": 4.971706613804289, "grad_norm": 0.2670850455760956, "learning_rate": 8.558011626430551e-10, "loss": 0.3818, "step": 9196 }, { "epoch": 4.972247251757073, "grad_norm": 0.2679150700569153, "learning_rate": 8.212278847224975e-10, "loss": 0.39, "step": 9197 }, { "epoch": 4.972787889709858, "grad_norm": 0.2692587375640869, "learning_rate": 7.873673806463311e-10, "loss": 0.3403, "step": 9198 }, { "epoch": 4.973328527662642, "grad_norm": 0.2623067796230316, "learning_rate": 7.542196552440262e-10, "loss": 0.3545, "step": 9199 }, { "epoch": 4.973869165615426, "grad_norm": 0.26583847403526306, "learning_rate": 7.217847132401367e-10, "loss": 0.363, "step": 9200 }, { "epoch": 4.974409803568211, "grad_norm": 0.2767746150493622, "learning_rate": 6.900625592604071e-10, "loss": 0.3512, "step": 9201 }, { "epoch": 4.974950441520995, "grad_norm": 0.28464779257774353, "learning_rate": 6.590531978267756e-10, "loss": 0.3609, "step": 9202 }, { "epoch": 4.975491079473779, "grad_norm": 0.2627117931842804, "learning_rate": 6.287566333612605e-10, "loss": 0.3351, "step": 9203 }, { "epoch": 4.976031717426563, "grad_norm": 0.26655125617980957, "learning_rate": 5.991728701831845e-10, "loss": 0.3772, "step": 9204 }, { "epoch": 4.976572355379347, "grad_norm": 0.25739941000938416, "learning_rate": 5.703019125102849e-10, "loss": 0.3814, "step": 9205 }, { "epoch": 4.977112993332132, "grad_norm": 0.2623976767063141, "learning_rate": 5.421437644598237e-10, "loss": 0.3908, "step": 9206 }, { "epoch": 4.977653631284916, "grad_norm": 0.242452472448349, "learning_rate": 5.146984300452574e-10, "loss": 0.3146, "step": 9207 }, { "epoch": 4.978194269237701, "grad_norm": 0.25939616560935974, "learning_rate": 4.879659131806769e-10, "loss": 0.3615, "step": 9208 }, { "epoch": 4.978734907190485, "grad_norm": 0.24832409620285034, "learning_rate": 4.619462176769229e-10, "loss": 0.3905, "step": 9209 }, { "epoch": 4.979275545143269, "grad_norm": 0.25697964429855347, "learning_rate": 4.3663934724436086e-10, "loss": 0.4133, "step": 9210 }, { "epoch": 4.9798161830960535, "grad_norm": 0.25979509949684143, "learning_rate": 4.120453054912155e-10, "loss": 0.3705, "step": 9211 }, { "epoch": 4.980356821048837, "grad_norm": 0.25299936532974243, "learning_rate": 3.8816409592357106e-10, "loss": 0.34, "step": 9212 }, { "epoch": 4.980897459001622, "grad_norm": 0.2749847173690796, "learning_rate": 3.649957219464817e-10, "loss": 0.3785, "step": 9213 }, { "epoch": 4.981438096954406, "grad_norm": 0.2649393379688263, "learning_rate": 3.4254018686341596e-10, "loss": 0.3681, "step": 9214 }, { "epoch": 4.981978734907191, "grad_norm": 0.26384660601615906, "learning_rate": 3.20797493876257e-10, "loss": 0.3484, "step": 9215 }, { "epoch": 4.982519372859975, "grad_norm": 0.26114729046821594, "learning_rate": 2.9976764608474764e-10, "loss": 0.3658, "step": 9216 }, { "epoch": 4.983060010812759, "grad_norm": 0.27179184556007385, "learning_rate": 2.79450646487045e-10, "loss": 0.3604, "step": 9217 }, { "epoch": 4.9836006487655435, "grad_norm": 0.26943889260292053, "learning_rate": 2.598464979808313e-10, "loss": 0.3804, "step": 9218 }, { "epoch": 4.9841412867183275, "grad_norm": 0.2641843259334564, "learning_rate": 2.4095520335998266e-10, "loss": 0.3371, "step": 9219 }, { "epoch": 4.984681924671112, "grad_norm": 0.28133559226989746, "learning_rate": 2.227767653190105e-10, "loss": 0.3596, "step": 9220 }, { "epoch": 4.985222562623896, "grad_norm": 0.2666134536266327, "learning_rate": 2.0531118644917524e-10, "loss": 0.3377, "step": 9221 }, { "epoch": 4.98576320057668, "grad_norm": 0.2675313353538513, "learning_rate": 1.885584692407072e-10, "loss": 0.3531, "step": 9222 }, { "epoch": 4.986303838529465, "grad_norm": 0.24799907207489014, "learning_rate": 1.725186160822512e-10, "loss": 0.3539, "step": 9223 }, { "epoch": 4.986844476482249, "grad_norm": 0.24728845059871674, "learning_rate": 1.571916292608666e-10, "loss": 0.3509, "step": 9224 }, { "epoch": 4.987385114435034, "grad_norm": 0.25327473878860474, "learning_rate": 1.4257751096202755e-10, "loss": 0.3473, "step": 9225 }, { "epoch": 4.9879257523878175, "grad_norm": 0.26934900879859924, "learning_rate": 1.2867626326962258e-10, "loss": 0.3581, "step": 9226 }, { "epoch": 4.9884663903406015, "grad_norm": 0.27040383219718933, "learning_rate": 1.1548788816428957e-10, "loss": 0.403, "step": 9227 }, { "epoch": 4.989007028293386, "grad_norm": 0.2764724791049957, "learning_rate": 1.0301238752785659e-10, "loss": 0.3967, "step": 9228 }, { "epoch": 4.98954766624617, "grad_norm": 0.2611216902732849, "learning_rate": 9.124976313834577e-11, "loss": 0.3546, "step": 9229 }, { "epoch": 4.990088304198955, "grad_norm": 0.26182636618614197, "learning_rate": 8.020001667330412e-11, "loss": 0.3885, "step": 9230 }, { "epoch": 4.990628942151739, "grad_norm": 0.2629258632659912, "learning_rate": 6.986314970758301e-11, "loss": 0.3823, "step": 9231 }, { "epoch": 4.991169580104524, "grad_norm": 0.26330283284187317, "learning_rate": 6.02391637155586e-11, "loss": 0.3729, "step": 9232 }, { "epoch": 4.991710218057308, "grad_norm": 0.2656315267086029, "learning_rate": 5.1328060069466554e-11, "loss": 0.3384, "step": 9233 }, { "epoch": 4.9922508560100916, "grad_norm": 0.2702144384384155, "learning_rate": 4.3129840038846904e-11, "loss": 0.337, "step": 9234 }, { "epoch": 4.992791493962876, "grad_norm": 0.30379825830459595, "learning_rate": 3.564450479387471e-11, "loss": 0.3407, "step": 9235 }, { "epoch": 4.99333213191566, "grad_norm": 0.25860539078712463, "learning_rate": 2.8872055400919198e-11, "loss": 0.3428, "step": 9236 }, { "epoch": 4.993872769868445, "grad_norm": 0.2679060101509094, "learning_rate": 2.2812492825874388e-11, "loss": 0.3864, "step": 9237 }, { "epoch": 4.994413407821229, "grad_norm": 0.27441951632499695, "learning_rate": 1.74658179324938e-11, "loss": 0.3757, "step": 9238 }, { "epoch": 4.994954045774014, "grad_norm": 0.27093935012817383, "learning_rate": 1.283203148350065e-11, "loss": 0.3606, "step": 9239 }, { "epoch": 4.995494683726798, "grad_norm": 0.2916185259819031, "learning_rate": 8.911134139477639e-12, "loss": 0.3834, "step": 9240 }, { "epoch": 4.996035321679582, "grad_norm": 0.26088154315948486, "learning_rate": 5.703126458866948e-12, "loss": 0.3402, "step": 9241 }, { "epoch": 4.9965759596323664, "grad_norm": 0.25968217849731445, "learning_rate": 3.2080089001906845e-12, "loss": 0.3528, "step": 9242 }, { "epoch": 4.99711659758515, "grad_norm": 0.27741682529449463, "learning_rate": 1.4257818181651062e-12, "loss": 0.4118, "step": 9243 }, { "epoch": 4.997657235537934, "grad_norm": 0.24423567950725555, "learning_rate": 3.5644546703128557e-13, "loss": 0.3267, "step": 9244 }, { "epoch": 4.998197873490719, "grad_norm": 0.2597697377204895, "learning_rate": 0.0, "loss": 0.3684, "step": 9245 }, { "epoch": 4.998197873490719, "step": 9245, "total_flos": 1.4140474867580928e+16, "train_loss": 0.4185003241859558, "train_runtime": 192830.8421, "train_samples_per_second": 4.604, "train_steps_per_second": 0.048 } ], "logging_steps": 1.0, "max_steps": 9245, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.4140474867580928e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }