{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 8088, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002472799208704253, "grad_norm": 2.5362265715360204, "learning_rate": 5.0000000000000004e-08, "loss": 1.2213, "step": 1 }, { "epoch": 0.0004945598417408506, "grad_norm": 2.6997408857983025, "learning_rate": 1.0000000000000001e-07, "loss": 1.231, "step": 2 }, { "epoch": 0.000741839762611276, "grad_norm": 2.5867181090965676, "learning_rate": 1.5000000000000002e-07, "loss": 1.249, "step": 3 }, { "epoch": 0.0009891196834817012, "grad_norm": 2.357638460408377, "learning_rate": 2.0000000000000002e-07, "loss": 1.2146, "step": 4 }, { "epoch": 0.0012363996043521265, "grad_norm": 2.557414917391326, "learning_rate": 2.5000000000000004e-07, "loss": 1.2872, "step": 5 }, { "epoch": 0.001483679525222552, "grad_norm": 2.3734045786673423, "learning_rate": 3.0000000000000004e-07, "loss": 1.2081, "step": 6 }, { "epoch": 0.0017309594460929772, "grad_norm": 2.621127934186416, "learning_rate": 3.5000000000000004e-07, "loss": 1.2374, "step": 7 }, { "epoch": 0.0019782393669634025, "grad_norm": 2.7716435700320714, "learning_rate": 4.0000000000000003e-07, "loss": 1.2364, "step": 8 }, { "epoch": 0.002225519287833828, "grad_norm": 2.301737328244618, "learning_rate": 4.5000000000000003e-07, "loss": 1.2219, "step": 9 }, { "epoch": 0.002472799208704253, "grad_norm": 2.5519952552831398, "learning_rate": 5.000000000000001e-07, "loss": 1.2372, "step": 10 }, { "epoch": 0.0027200791295746785, "grad_norm": 2.3667701483261667, "learning_rate": 5.5e-07, "loss": 1.1995, "step": 11 }, { "epoch": 0.002967359050445104, "grad_norm": 2.3437329583372604, "learning_rate": 6.000000000000001e-07, "loss": 1.2414, "step": 12 }, { "epoch": 0.003214638971315529, "grad_norm": 2.060580346749902, "learning_rate": 6.5e-07, "loss": 1.2034, "step": 13 }, { "epoch": 0.0034619188921859545, "grad_norm": 2.0573625343235564, "learning_rate": 7.000000000000001e-07, "loss": 1.2025, "step": 14 }, { "epoch": 0.00370919881305638, "grad_norm": 2.374658307654524, "learning_rate": 7.5e-07, "loss": 1.2111, "step": 15 }, { "epoch": 0.003956478733926805, "grad_norm": 2.134642801469451, "learning_rate": 8.000000000000001e-07, "loss": 1.227, "step": 16 }, { "epoch": 0.0042037586547972305, "grad_norm": 2.2681061588439424, "learning_rate": 8.500000000000001e-07, "loss": 1.1964, "step": 17 }, { "epoch": 0.004451038575667656, "grad_norm": 1.8506691640366166, "learning_rate": 9.000000000000001e-07, "loss": 1.1977, "step": 18 }, { "epoch": 0.004698318496538081, "grad_norm": 1.948439188694478, "learning_rate": 9.500000000000001e-07, "loss": 1.2006, "step": 19 }, { "epoch": 0.004945598417408506, "grad_norm": 1.6605613980729736, "learning_rate": 1.0000000000000002e-06, "loss": 1.1666, "step": 20 }, { "epoch": 0.0051928783382789315, "grad_norm": 1.6922633947680867, "learning_rate": 1.0500000000000001e-06, "loss": 1.1989, "step": 21 }, { "epoch": 0.005440158259149357, "grad_norm": 1.5971190787663854, "learning_rate": 1.1e-06, "loss": 1.1874, "step": 22 }, { "epoch": 0.0056874381800197825, "grad_norm": 1.5631402047790957, "learning_rate": 1.1500000000000002e-06, "loss": 1.187, "step": 23 }, { "epoch": 0.005934718100890208, "grad_norm": 1.4965437093134575, "learning_rate": 1.2000000000000002e-06, "loss": 1.1685, "step": 24 }, { "epoch": 0.006181998021760633, "grad_norm": 1.4346303928048703, "learning_rate": 1.25e-06, "loss": 1.1712, "step": 25 }, { "epoch": 0.006429277942631058, "grad_norm": 1.4575013024426748, "learning_rate": 1.3e-06, "loss": 1.1598, "step": 26 }, { "epoch": 0.0066765578635014835, "grad_norm": 1.328401240687131, "learning_rate": 1.3500000000000002e-06, "loss": 1.1159, "step": 27 }, { "epoch": 0.006923837784371909, "grad_norm": 1.4155504497318074, "learning_rate": 1.4000000000000001e-06, "loss": 1.1471, "step": 28 }, { "epoch": 0.0071711177052423344, "grad_norm": 1.2794817067434705, "learning_rate": 1.45e-06, "loss": 1.1289, "step": 29 }, { "epoch": 0.00741839762611276, "grad_norm": 1.2986984654256946, "learning_rate": 1.5e-06, "loss": 1.1103, "step": 30 }, { "epoch": 0.007665677546983185, "grad_norm": 1.1904601251518023, "learning_rate": 1.5500000000000002e-06, "loss": 1.1071, "step": 31 }, { "epoch": 0.00791295746785361, "grad_norm": 1.211314439028396, "learning_rate": 1.6000000000000001e-06, "loss": 1.1066, "step": 32 }, { "epoch": 0.008160237388724036, "grad_norm": 1.1889594075249947, "learning_rate": 1.6500000000000003e-06, "loss": 1.1016, "step": 33 }, { "epoch": 0.008407517309594461, "grad_norm": 1.144650796083838, "learning_rate": 1.7000000000000002e-06, "loss": 1.0846, "step": 34 }, { "epoch": 0.008654797230464886, "grad_norm": 1.1047081836619403, "learning_rate": 1.75e-06, "loss": 1.052, "step": 35 }, { "epoch": 0.008902077151335312, "grad_norm": 1.1224289563936236, "learning_rate": 1.8000000000000001e-06, "loss": 1.083, "step": 36 }, { "epoch": 0.009149357072205737, "grad_norm": 1.165835379811125, "learning_rate": 1.85e-06, "loss": 1.0743, "step": 37 }, { "epoch": 0.009396636993076163, "grad_norm": 1.2275687643208097, "learning_rate": 1.9000000000000002e-06, "loss": 1.0349, "step": 38 }, { "epoch": 0.009643916913946587, "grad_norm": 1.1972161192753212, "learning_rate": 1.9500000000000004e-06, "loss": 1.016, "step": 39 }, { "epoch": 0.009891196834817012, "grad_norm": 1.082058322089044, "learning_rate": 2.0000000000000003e-06, "loss": 1.0162, "step": 40 }, { "epoch": 0.010138476755687438, "grad_norm": 1.0697734561517596, "learning_rate": 2.05e-06, "loss": 1.002, "step": 41 }, { "epoch": 0.010385756676557863, "grad_norm": 1.0899500742647537, "learning_rate": 2.1000000000000002e-06, "loss": 0.9707, "step": 42 }, { "epoch": 0.01063303659742829, "grad_norm": 1.0591744139232069, "learning_rate": 2.15e-06, "loss": 0.9578, "step": 43 }, { "epoch": 0.010880316518298714, "grad_norm": 1.0360370041311253, "learning_rate": 2.2e-06, "loss": 0.9401, "step": 44 }, { "epoch": 0.01112759643916914, "grad_norm": 0.9882030779618665, "learning_rate": 2.25e-06, "loss": 0.9465, "step": 45 }, { "epoch": 0.011374876360039565, "grad_norm": 1.101171577063728, "learning_rate": 2.3000000000000004e-06, "loss": 0.9889, "step": 46 }, { "epoch": 0.01162215628090999, "grad_norm": 0.967072842509527, "learning_rate": 2.35e-06, "loss": 0.9713, "step": 47 }, { "epoch": 0.011869436201780416, "grad_norm": 0.978495110160977, "learning_rate": 2.4000000000000003e-06, "loss": 0.9296, "step": 48 }, { "epoch": 0.01211671612265084, "grad_norm": 0.9606674955474925, "learning_rate": 2.4500000000000003e-06, "loss": 0.9297, "step": 49 }, { "epoch": 0.012363996043521267, "grad_norm": 1.0561055229672853, "learning_rate": 2.5e-06, "loss": 0.8987, "step": 50 }, { "epoch": 0.012611275964391691, "grad_norm": 0.9661450584899874, "learning_rate": 2.55e-06, "loss": 0.9205, "step": 51 }, { "epoch": 0.012858555885262116, "grad_norm": 0.9327207093078266, "learning_rate": 2.6e-06, "loss": 0.9184, "step": 52 }, { "epoch": 0.013105835806132542, "grad_norm": 0.9416813080118679, "learning_rate": 2.6500000000000005e-06, "loss": 0.9, "step": 53 }, { "epoch": 0.013353115727002967, "grad_norm": 0.8793376312707565, "learning_rate": 2.7000000000000004e-06, "loss": 0.8655, "step": 54 }, { "epoch": 0.013600395647873393, "grad_norm": 0.8190962235195288, "learning_rate": 2.7500000000000004e-06, "loss": 0.8803, "step": 55 }, { "epoch": 0.013847675568743818, "grad_norm": 0.8728021412747534, "learning_rate": 2.8000000000000003e-06, "loss": 0.8669, "step": 56 }, { "epoch": 0.014094955489614243, "grad_norm": 0.945032511017986, "learning_rate": 2.85e-06, "loss": 0.8457, "step": 57 }, { "epoch": 0.014342235410484669, "grad_norm": 0.8824615112963289, "learning_rate": 2.9e-06, "loss": 0.8369, "step": 58 }, { "epoch": 0.014589515331355093, "grad_norm": 0.8431299610759388, "learning_rate": 2.95e-06, "loss": 0.848, "step": 59 }, { "epoch": 0.01483679525222552, "grad_norm": 0.8541187789355988, "learning_rate": 3e-06, "loss": 0.8253, "step": 60 }, { "epoch": 0.015084075173095944, "grad_norm": 0.774863931803644, "learning_rate": 3.05e-06, "loss": 0.8426, "step": 61 }, { "epoch": 0.01533135509396637, "grad_norm": 0.7924045073787536, "learning_rate": 3.1000000000000004e-06, "loss": 0.8175, "step": 62 }, { "epoch": 0.015578635014836795, "grad_norm": 0.7945600224177056, "learning_rate": 3.1500000000000003e-06, "loss": 0.8014, "step": 63 }, { "epoch": 0.01582591493570722, "grad_norm": 0.8081849485275809, "learning_rate": 3.2000000000000003e-06, "loss": 0.7814, "step": 64 }, { "epoch": 0.016073194856577645, "grad_norm": 0.7702852209704323, "learning_rate": 3.2500000000000002e-06, "loss": 0.7893, "step": 65 }, { "epoch": 0.016320474777448073, "grad_norm": 0.7383664978090273, "learning_rate": 3.3000000000000006e-06, "loss": 0.7989, "step": 66 }, { "epoch": 0.016567754698318497, "grad_norm": 0.7436837377486586, "learning_rate": 3.3500000000000005e-06, "loss": 0.8058, "step": 67 }, { "epoch": 0.016815034619188922, "grad_norm": 0.7428905105848088, "learning_rate": 3.4000000000000005e-06, "loss": 0.7963, "step": 68 }, { "epoch": 0.017062314540059347, "grad_norm": 0.75729956851682, "learning_rate": 3.45e-06, "loss": 0.7828, "step": 69 }, { "epoch": 0.01730959446092977, "grad_norm": 0.6918807554019548, "learning_rate": 3.5e-06, "loss": 0.79, "step": 70 }, { "epoch": 0.0175568743818002, "grad_norm": 0.7250929492991405, "learning_rate": 3.5500000000000003e-06, "loss": 0.7984, "step": 71 }, { "epoch": 0.017804154302670624, "grad_norm": 0.7523258703265291, "learning_rate": 3.6000000000000003e-06, "loss": 0.7794, "step": 72 }, { "epoch": 0.01805143422354105, "grad_norm": 0.7596189403247041, "learning_rate": 3.65e-06, "loss": 0.7933, "step": 73 }, { "epoch": 0.018298714144411473, "grad_norm": 0.7098250319729716, "learning_rate": 3.7e-06, "loss": 0.7639, "step": 74 }, { "epoch": 0.018545994065281898, "grad_norm": 0.7325182854684962, "learning_rate": 3.7500000000000005e-06, "loss": 0.7613, "step": 75 }, { "epoch": 0.018793273986152326, "grad_norm": 0.7656692952105425, "learning_rate": 3.8000000000000005e-06, "loss": 0.7481, "step": 76 }, { "epoch": 0.01904055390702275, "grad_norm": 0.7299978668203949, "learning_rate": 3.85e-06, "loss": 0.7839, "step": 77 }, { "epoch": 0.019287833827893175, "grad_norm": 0.7157606053500201, "learning_rate": 3.900000000000001e-06, "loss": 0.7474, "step": 78 }, { "epoch": 0.0195351137487636, "grad_norm": 0.7324944560450859, "learning_rate": 3.95e-06, "loss": 0.763, "step": 79 }, { "epoch": 0.019782393669634024, "grad_norm": 0.7816018694216577, "learning_rate": 4.000000000000001e-06, "loss": 0.7219, "step": 80 }, { "epoch": 0.020029673590504452, "grad_norm": 0.7375563499331299, "learning_rate": 4.05e-06, "loss": 0.7654, "step": 81 }, { "epoch": 0.020276953511374877, "grad_norm": 0.7026975821766478, "learning_rate": 4.1e-06, "loss": 0.7733, "step": 82 }, { "epoch": 0.0205242334322453, "grad_norm": 0.6991770359109263, "learning_rate": 4.15e-06, "loss": 0.7457, "step": 83 }, { "epoch": 0.020771513353115726, "grad_norm": 0.7405828710814198, "learning_rate": 4.2000000000000004e-06, "loss": 0.7471, "step": 84 }, { "epoch": 0.02101879327398615, "grad_norm": 0.7172297475686586, "learning_rate": 4.25e-06, "loss": 0.7201, "step": 85 }, { "epoch": 0.02126607319485658, "grad_norm": 0.7029168304540451, "learning_rate": 4.3e-06, "loss": 0.7559, "step": 86 }, { "epoch": 0.021513353115727003, "grad_norm": 0.723359349050235, "learning_rate": 4.350000000000001e-06, "loss": 0.7287, "step": 87 }, { "epoch": 0.021760633036597428, "grad_norm": 0.7357471244599104, "learning_rate": 4.4e-06, "loss": 0.7218, "step": 88 }, { "epoch": 0.022007912957467853, "grad_norm": 0.7747133612650493, "learning_rate": 4.450000000000001e-06, "loss": 0.7442, "step": 89 }, { "epoch": 0.02225519287833828, "grad_norm": 0.6819171919004213, "learning_rate": 4.5e-06, "loss": 0.7168, "step": 90 }, { "epoch": 0.022502472799208705, "grad_norm": 0.6919857424868104, "learning_rate": 4.5500000000000005e-06, "loss": 0.7271, "step": 91 }, { "epoch": 0.02274975272007913, "grad_norm": 0.7250166917172235, "learning_rate": 4.600000000000001e-06, "loss": 0.7335, "step": 92 }, { "epoch": 0.022997032640949554, "grad_norm": 0.760740711812877, "learning_rate": 4.65e-06, "loss": 0.705, "step": 93 }, { "epoch": 0.02324431256181998, "grad_norm": 0.7159571116471827, "learning_rate": 4.7e-06, "loss": 0.7249, "step": 94 }, { "epoch": 0.023491592482690407, "grad_norm": 0.7112056318387862, "learning_rate": 4.75e-06, "loss": 0.7221, "step": 95 }, { "epoch": 0.02373887240356083, "grad_norm": 0.7407900000388679, "learning_rate": 4.800000000000001e-06, "loss": 0.7094, "step": 96 }, { "epoch": 0.023986152324431256, "grad_norm": 0.72179972775552, "learning_rate": 4.85e-06, "loss": 0.6995, "step": 97 }, { "epoch": 0.02423343224530168, "grad_norm": 0.6561750079352092, "learning_rate": 4.9000000000000005e-06, "loss": 0.6889, "step": 98 }, { "epoch": 0.024480712166172106, "grad_norm": 0.7107165656217939, "learning_rate": 4.95e-06, "loss": 0.7065, "step": 99 }, { "epoch": 0.024727992087042534, "grad_norm": 0.7536703835680636, "learning_rate": 5e-06, "loss": 0.738, "step": 100 }, { "epoch": 0.024975272007912958, "grad_norm": 0.7212863280521654, "learning_rate": 4.999999978871334e-06, "loss": 0.7177, "step": 101 }, { "epoch": 0.025222551928783383, "grad_norm": 0.7418268751267123, "learning_rate": 4.9999999154853315e-06, "loss": 0.7188, "step": 102 }, { "epoch": 0.025469831849653807, "grad_norm": 0.7559916354416649, "learning_rate": 4.999999809841997e-06, "loss": 0.7025, "step": 103 }, { "epoch": 0.025717111770524232, "grad_norm": 0.7811551138440429, "learning_rate": 4.999999661941331e-06, "loss": 0.7073, "step": 104 }, { "epoch": 0.02596439169139466, "grad_norm": 0.7497301361586501, "learning_rate": 4.999999471783337e-06, "loss": 0.6692, "step": 105 }, { "epoch": 0.026211671612265085, "grad_norm": 0.822184378582824, "learning_rate": 4.999999239368016e-06, "loss": 0.7119, "step": 106 }, { "epoch": 0.02645895153313551, "grad_norm": 0.8275732064267906, "learning_rate": 4.999998964695375e-06, "loss": 0.724, "step": 107 }, { "epoch": 0.026706231454005934, "grad_norm": 0.7022572200240244, "learning_rate": 4.9999986477654165e-06, "loss": 0.6907, "step": 108 }, { "epoch": 0.02695351137487636, "grad_norm": 0.733280029857595, "learning_rate": 4.999998288578146e-06, "loss": 0.6759, "step": 109 }, { "epoch": 0.027200791295746787, "grad_norm": 0.7526378507475411, "learning_rate": 4.9999978871335695e-06, "loss": 0.6685, "step": 110 }, { "epoch": 0.02744807121661721, "grad_norm": 0.7597255866628462, "learning_rate": 4.999997443431694e-06, "loss": 0.7168, "step": 111 }, { "epoch": 0.027695351137487636, "grad_norm": 0.7397163297259713, "learning_rate": 4.999996957472529e-06, "loss": 0.6834, "step": 112 }, { "epoch": 0.02794263105835806, "grad_norm": 0.7434766561795023, "learning_rate": 4.999996429256079e-06, "loss": 0.6713, "step": 113 }, { "epoch": 0.028189910979228485, "grad_norm": 0.688054583388141, "learning_rate": 4.9999958587823565e-06, "loss": 0.6771, "step": 114 }, { "epoch": 0.028437190900098913, "grad_norm": 0.7807222470327964, "learning_rate": 4.999995246051368e-06, "loss": 0.6803, "step": 115 }, { "epoch": 0.028684470820969338, "grad_norm": 0.7626145030682552, "learning_rate": 4.999994591063126e-06, "loss": 0.6709, "step": 116 }, { "epoch": 0.028931750741839762, "grad_norm": 0.712857303236686, "learning_rate": 4.999993893817641e-06, "loss": 0.6987, "step": 117 }, { "epoch": 0.029179030662710187, "grad_norm": 0.7175670694950973, "learning_rate": 4.999993154314924e-06, "loss": 0.6656, "step": 118 }, { "epoch": 0.02942631058358061, "grad_norm": 0.8126016444934842, "learning_rate": 4.999992372554989e-06, "loss": 0.7236, "step": 119 }, { "epoch": 0.02967359050445104, "grad_norm": 0.737989445104509, "learning_rate": 4.999991548537848e-06, "loss": 0.6728, "step": 120 }, { "epoch": 0.029920870425321464, "grad_norm": 0.8327590394230312, "learning_rate": 4.999990682263516e-06, "loss": 0.6558, "step": 121 }, { "epoch": 0.03016815034619189, "grad_norm": 0.731868642801384, "learning_rate": 4.999989773732007e-06, "loss": 0.6853, "step": 122 }, { "epoch": 0.030415430267062313, "grad_norm": 0.7396773552222567, "learning_rate": 4.999988822943335e-06, "loss": 0.6737, "step": 123 }, { "epoch": 0.03066271018793274, "grad_norm": 0.8637921752011817, "learning_rate": 4.999987829897519e-06, "loss": 0.6965, "step": 124 }, { "epoch": 0.030909990108803166, "grad_norm": 0.7451831927050838, "learning_rate": 4.999986794594574e-06, "loss": 0.706, "step": 125 }, { "epoch": 0.03115727002967359, "grad_norm": 0.7207943261718842, "learning_rate": 4.999985717034517e-06, "loss": 0.7053, "step": 126 }, { "epoch": 0.031404549950544015, "grad_norm": 0.7839501309721785, "learning_rate": 4.999984597217367e-06, "loss": 0.6709, "step": 127 }, { "epoch": 0.03165182987141444, "grad_norm": 0.7595007430844243, "learning_rate": 4.999983435143142e-06, "loss": 0.6786, "step": 128 }, { "epoch": 0.031899109792284865, "grad_norm": 0.7454986705571136, "learning_rate": 4.999982230811864e-06, "loss": 0.6662, "step": 129 }, { "epoch": 0.03214638971315529, "grad_norm": 0.7112711511564064, "learning_rate": 4.9999809842235515e-06, "loss": 0.6699, "step": 130 }, { "epoch": 0.032393669634025714, "grad_norm": 0.7659959221982169, "learning_rate": 4.999979695378226e-06, "loss": 0.6898, "step": 131 }, { "epoch": 0.032640949554896145, "grad_norm": 0.7641892451832198, "learning_rate": 4.999978364275908e-06, "loss": 0.6594, "step": 132 }, { "epoch": 0.03288822947576657, "grad_norm": 0.7591516753531807, "learning_rate": 4.999976990916622e-06, "loss": 0.6683, "step": 133 }, { "epoch": 0.033135509396636995, "grad_norm": 0.746675595620454, "learning_rate": 4.9999755753003905e-06, "loss": 0.641, "step": 134 }, { "epoch": 0.03338278931750742, "grad_norm": 0.7654447092133061, "learning_rate": 4.999974117427238e-06, "loss": 0.6755, "step": 135 }, { "epoch": 0.033630069238377844, "grad_norm": 0.7675507124577308, "learning_rate": 4.999972617297187e-06, "loss": 0.6462, "step": 136 }, { "epoch": 0.03387734915924827, "grad_norm": 0.7331037612104742, "learning_rate": 4.999971074910266e-06, "loss": 0.683, "step": 137 }, { "epoch": 0.03412462908011869, "grad_norm": 0.7870574637199415, "learning_rate": 4.999969490266498e-06, "loss": 0.651, "step": 138 }, { "epoch": 0.03437190900098912, "grad_norm": 0.7088258626006124, "learning_rate": 4.999967863365912e-06, "loss": 0.6635, "step": 139 }, { "epoch": 0.03461918892185954, "grad_norm": 0.727573343943909, "learning_rate": 4.999966194208534e-06, "loss": 0.6466, "step": 140 }, { "epoch": 0.034866468842729974, "grad_norm": 0.7272152984644323, "learning_rate": 4.999964482794394e-06, "loss": 0.6573, "step": 141 }, { "epoch": 0.0351137487636004, "grad_norm": 0.7802927520295474, "learning_rate": 4.999962729123519e-06, "loss": 0.6306, "step": 142 }, { "epoch": 0.03536102868447082, "grad_norm": 0.7516061535051545, "learning_rate": 4.99996093319594e-06, "loss": 0.6544, "step": 143 }, { "epoch": 0.03560830860534125, "grad_norm": 0.7714071852861921, "learning_rate": 4.9999590950116865e-06, "loss": 0.6428, "step": 144 }, { "epoch": 0.03585558852621167, "grad_norm": 0.7888111176086061, "learning_rate": 4.99995721457079e-06, "loss": 0.6436, "step": 145 }, { "epoch": 0.0361028684470821, "grad_norm": 0.8086998275264007, "learning_rate": 4.999955291873282e-06, "loss": 0.6447, "step": 146 }, { "epoch": 0.03635014836795252, "grad_norm": 0.7680250966004125, "learning_rate": 4.999953326919195e-06, "loss": 0.6522, "step": 147 }, { "epoch": 0.036597428288822946, "grad_norm": 0.7903118774805085, "learning_rate": 4.999951319708562e-06, "loss": 0.6537, "step": 148 }, { "epoch": 0.03684470820969337, "grad_norm": 0.7554875512244171, "learning_rate": 4.999949270241418e-06, "loss": 0.6793, "step": 149 }, { "epoch": 0.037091988130563795, "grad_norm": 0.801330159672659, "learning_rate": 4.999947178517798e-06, "loss": 0.6412, "step": 150 }, { "epoch": 0.03733926805143423, "grad_norm": 0.7520769967798256, "learning_rate": 4.999945044537735e-06, "loss": 0.6612, "step": 151 }, { "epoch": 0.03758654797230465, "grad_norm": 0.7142492113389125, "learning_rate": 4.999942868301266e-06, "loss": 0.6648, "step": 152 }, { "epoch": 0.037833827893175076, "grad_norm": 0.7702143241683127, "learning_rate": 4.999940649808429e-06, "loss": 0.6117, "step": 153 }, { "epoch": 0.0380811078140455, "grad_norm": 0.7774985935001439, "learning_rate": 4.999938389059261e-06, "loss": 0.6226, "step": 154 }, { "epoch": 0.038328387734915925, "grad_norm": 0.7960037248541736, "learning_rate": 4.999936086053799e-06, "loss": 0.6324, "step": 155 }, { "epoch": 0.03857566765578635, "grad_norm": 0.7699076528180318, "learning_rate": 4.9999337407920836e-06, "loss": 0.6512, "step": 156 }, { "epoch": 0.038822947576656774, "grad_norm": 0.8048775835752526, "learning_rate": 4.999931353274153e-06, "loss": 0.6475, "step": 157 }, { "epoch": 0.0390702274975272, "grad_norm": 0.8160115405346294, "learning_rate": 4.9999289235000495e-06, "loss": 0.6422, "step": 158 }, { "epoch": 0.039317507418397624, "grad_norm": 0.7275555749791285, "learning_rate": 4.9999264514698124e-06, "loss": 0.6369, "step": 159 }, { "epoch": 0.03956478733926805, "grad_norm": 0.7689207556493872, "learning_rate": 4.999923937183483e-06, "loss": 0.6508, "step": 160 }, { "epoch": 0.03981206726013848, "grad_norm": 0.8348108853775594, "learning_rate": 4.999921380641105e-06, "loss": 0.6671, "step": 161 }, { "epoch": 0.040059347181008904, "grad_norm": 0.7248271704801716, "learning_rate": 4.999918781842722e-06, "loss": 0.6491, "step": 162 }, { "epoch": 0.04030662710187933, "grad_norm": 0.7315284811526714, "learning_rate": 4.999916140788377e-06, "loss": 0.659, "step": 163 }, { "epoch": 0.040553907022749754, "grad_norm": 0.8578196886547811, "learning_rate": 4.999913457478115e-06, "loss": 0.6436, "step": 164 }, { "epoch": 0.04080118694362018, "grad_norm": 0.8963614849939332, "learning_rate": 4.999910731911981e-06, "loss": 0.6514, "step": 165 }, { "epoch": 0.0410484668644906, "grad_norm": 0.7739062484186896, "learning_rate": 4.999907964090022e-06, "loss": 0.6104, "step": 166 }, { "epoch": 0.04129574678536103, "grad_norm": 0.8074813391715496, "learning_rate": 4.999905154012284e-06, "loss": 0.617, "step": 167 }, { "epoch": 0.04154302670623145, "grad_norm": 0.7762599913589127, "learning_rate": 4.999902301678815e-06, "loss": 0.6375, "step": 168 }, { "epoch": 0.04179030662710188, "grad_norm": 0.7748970536822192, "learning_rate": 4.999899407089662e-06, "loss": 0.6184, "step": 169 }, { "epoch": 0.0420375865479723, "grad_norm": 0.7558565035852893, "learning_rate": 4.999896470244875e-06, "loss": 0.6369, "step": 170 }, { "epoch": 0.04228486646884273, "grad_norm": 0.8341248646777939, "learning_rate": 4.999893491144504e-06, "loss": 0.6418, "step": 171 }, { "epoch": 0.04253214638971316, "grad_norm": 0.7710490534111422, "learning_rate": 4.999890469788598e-06, "loss": 0.6382, "step": 172 }, { "epoch": 0.04277942631058358, "grad_norm": 0.7753712584186033, "learning_rate": 4.99988740617721e-06, "loss": 0.6553, "step": 173 }, { "epoch": 0.04302670623145401, "grad_norm": 0.8067411687298269, "learning_rate": 4.999884300310389e-06, "loss": 0.6542, "step": 174 }, { "epoch": 0.04327398615232443, "grad_norm": 0.7965266139793583, "learning_rate": 4.999881152188191e-06, "loss": 0.6297, "step": 175 }, { "epoch": 0.043521266073194856, "grad_norm": 0.7800110890457195, "learning_rate": 4.999877961810667e-06, "loss": 0.6328, "step": 176 }, { "epoch": 0.04376854599406528, "grad_norm": 0.7555029707902032, "learning_rate": 4.99987472917787e-06, "loss": 0.6567, "step": 177 }, { "epoch": 0.044015825914935705, "grad_norm": 0.8247765083336221, "learning_rate": 4.9998714542898566e-06, "loss": 0.6552, "step": 178 }, { "epoch": 0.04426310583580613, "grad_norm": 0.7853035050103945, "learning_rate": 4.999868137146682e-06, "loss": 0.6487, "step": 179 }, { "epoch": 0.04451038575667656, "grad_norm": 0.792588023124722, "learning_rate": 4.999864777748401e-06, "loss": 0.6343, "step": 180 }, { "epoch": 0.044757665677546986, "grad_norm": 0.7562657466952013, "learning_rate": 4.999861376095072e-06, "loss": 0.6372, "step": 181 }, { "epoch": 0.04500494559841741, "grad_norm": 0.7725421864498705, "learning_rate": 4.999857932186751e-06, "loss": 0.5965, "step": 182 }, { "epoch": 0.045252225519287835, "grad_norm": 0.801959316358281, "learning_rate": 4.999854446023496e-06, "loss": 0.6316, "step": 183 }, { "epoch": 0.04549950544015826, "grad_norm": 0.7854145905631205, "learning_rate": 4.999850917605369e-06, "loss": 0.6432, "step": 184 }, { "epoch": 0.045746785361028684, "grad_norm": 0.7625652300415934, "learning_rate": 4.999847346932426e-06, "loss": 0.6409, "step": 185 }, { "epoch": 0.04599406528189911, "grad_norm": 0.7070242573198564, "learning_rate": 4.999843734004729e-06, "loss": 0.6081, "step": 186 }, { "epoch": 0.046241345202769533, "grad_norm": 0.843090988301474, "learning_rate": 4.999840078822339e-06, "loss": 0.6494, "step": 187 }, { "epoch": 0.04648862512363996, "grad_norm": 0.7812267791142424, "learning_rate": 4.9998363813853175e-06, "loss": 0.6243, "step": 188 }, { "epoch": 0.04673590504451038, "grad_norm": 0.7932584261056079, "learning_rate": 4.999832641693727e-06, "loss": 0.6167, "step": 189 }, { "epoch": 0.046983184965380814, "grad_norm": 0.7886667271093307, "learning_rate": 4.999828859747631e-06, "loss": 0.6095, "step": 190 }, { "epoch": 0.04723046488625124, "grad_norm": 0.8023765397410424, "learning_rate": 4.999825035547093e-06, "loss": 0.6256, "step": 191 }, { "epoch": 0.04747774480712166, "grad_norm": 0.7820939097202719, "learning_rate": 4.999821169092178e-06, "loss": 0.6266, "step": 192 }, { "epoch": 0.04772502472799209, "grad_norm": 0.7864299627678912, "learning_rate": 4.9998172603829515e-06, "loss": 0.6168, "step": 193 }, { "epoch": 0.04797230464886251, "grad_norm": 0.7551224745595048, "learning_rate": 4.999813309419479e-06, "loss": 0.6034, "step": 194 }, { "epoch": 0.04821958456973294, "grad_norm": 0.7538626457900578, "learning_rate": 4.999809316201828e-06, "loss": 0.6154, "step": 195 }, { "epoch": 0.04846686449060336, "grad_norm": 0.7449552121210857, "learning_rate": 4.999805280730066e-06, "loss": 0.6332, "step": 196 }, { "epoch": 0.048714144411473786, "grad_norm": 0.8177138196697615, "learning_rate": 4.99980120300426e-06, "loss": 0.6385, "step": 197 }, { "epoch": 0.04896142433234421, "grad_norm": 0.7760055206345433, "learning_rate": 4.99979708302448e-06, "loss": 0.6227, "step": 198 }, { "epoch": 0.049208704253214636, "grad_norm": 0.7599944781350494, "learning_rate": 4.999792920790795e-06, "loss": 0.5753, "step": 199 }, { "epoch": 0.04945598417408507, "grad_norm": 0.7513478755596612, "learning_rate": 4.999788716303276e-06, "loss": 0.5782, "step": 200 }, { "epoch": 0.04970326409495549, "grad_norm": 0.7969737018661929, "learning_rate": 4.999784469561994e-06, "loss": 0.6112, "step": 201 }, { "epoch": 0.049950544015825916, "grad_norm": 0.7685838090874655, "learning_rate": 4.9997801805670204e-06, "loss": 0.6047, "step": 202 }, { "epoch": 0.05019782393669634, "grad_norm": 0.7733765759727036, "learning_rate": 4.9997758493184276e-06, "loss": 0.6378, "step": 203 }, { "epoch": 0.050445103857566766, "grad_norm": 0.7884015810991322, "learning_rate": 4.99977147581629e-06, "loss": 0.6162, "step": 204 }, { "epoch": 0.05069238377843719, "grad_norm": 0.7876715243321647, "learning_rate": 4.999767060060679e-06, "loss": 0.6017, "step": 205 }, { "epoch": 0.050939663699307615, "grad_norm": 0.76121613723978, "learning_rate": 4.999762602051673e-06, "loss": 0.6229, "step": 206 }, { "epoch": 0.05118694362017804, "grad_norm": 0.7837425671543329, "learning_rate": 4.9997581017893436e-06, "loss": 0.6376, "step": 207 }, { "epoch": 0.051434223541048464, "grad_norm": 0.7176819978947346, "learning_rate": 4.999753559273769e-06, "loss": 0.6102, "step": 208 }, { "epoch": 0.05168150346191889, "grad_norm": 0.7929682985852649, "learning_rate": 4.999748974505026e-06, "loss": 0.619, "step": 209 }, { "epoch": 0.05192878338278932, "grad_norm": 0.8166155545756911, "learning_rate": 4.999744347483191e-06, "loss": 0.5881, "step": 210 }, { "epoch": 0.052176063303659745, "grad_norm": 0.7925632446286965, "learning_rate": 4.999739678208343e-06, "loss": 0.6097, "step": 211 }, { "epoch": 0.05242334322453017, "grad_norm": 0.7724585651765387, "learning_rate": 4.99973496668056e-06, "loss": 0.6133, "step": 212 }, { "epoch": 0.052670623145400594, "grad_norm": 0.9395217619393431, "learning_rate": 4.999730212899923e-06, "loss": 0.5962, "step": 213 }, { "epoch": 0.05291790306627102, "grad_norm": 0.7846009290846587, "learning_rate": 4.999725416866512e-06, "loss": 0.5853, "step": 214 }, { "epoch": 0.05316518298714144, "grad_norm": 0.7538698383114204, "learning_rate": 4.999720578580407e-06, "loss": 0.6018, "step": 215 }, { "epoch": 0.05341246290801187, "grad_norm": 0.811056416448552, "learning_rate": 4.999715698041691e-06, "loss": 0.6114, "step": 216 }, { "epoch": 0.05365974282888229, "grad_norm": 0.8205077516037962, "learning_rate": 4.999710775250446e-06, "loss": 0.6306, "step": 217 }, { "epoch": 0.05390702274975272, "grad_norm": 0.8557907074033965, "learning_rate": 4.999705810206755e-06, "loss": 0.607, "step": 218 }, { "epoch": 0.05415430267062315, "grad_norm": 0.8333258626700865, "learning_rate": 4.999700802910702e-06, "loss": 0.6412, "step": 219 }, { "epoch": 0.05440158259149357, "grad_norm": 0.7928603799058208, "learning_rate": 4.999695753362372e-06, "loss": 0.6132, "step": 220 }, { "epoch": 0.054648862512364, "grad_norm": 0.809471093310714, "learning_rate": 4.99969066156185e-06, "loss": 0.5894, "step": 221 }, { "epoch": 0.05489614243323442, "grad_norm": 0.748135133878315, "learning_rate": 4.999685527509223e-06, "loss": 0.5785, "step": 222 }, { "epoch": 0.05514342235410485, "grad_norm": 0.7994273959936483, "learning_rate": 4.9996803512045756e-06, "loss": 0.611, "step": 223 }, { "epoch": 0.05539070227497527, "grad_norm": 0.8196600789722335, "learning_rate": 4.999675132647998e-06, "loss": 0.6116, "step": 224 }, { "epoch": 0.055637982195845696, "grad_norm": 0.7909171176460179, "learning_rate": 4.999669871839577e-06, "loss": 0.6217, "step": 225 }, { "epoch": 0.05588526211671612, "grad_norm": 0.8567648736166711, "learning_rate": 4.999664568779401e-06, "loss": 0.6112, "step": 226 }, { "epoch": 0.056132542037586546, "grad_norm": 0.7690407907423776, "learning_rate": 4.99965922346756e-06, "loss": 0.5871, "step": 227 }, { "epoch": 0.05637982195845697, "grad_norm": 0.7444628727728402, "learning_rate": 4.999653835904145e-06, "loss": 0.5681, "step": 228 }, { "epoch": 0.0566271018793274, "grad_norm": 0.7607893092647576, "learning_rate": 4.999648406089247e-06, "loss": 0.6139, "step": 229 }, { "epoch": 0.056874381800197826, "grad_norm": 0.7777597816934657, "learning_rate": 4.999642934022957e-06, "loss": 0.6332, "step": 230 }, { "epoch": 0.05712166172106825, "grad_norm": 0.82148546775061, "learning_rate": 4.999637419705369e-06, "loss": 0.5841, "step": 231 }, { "epoch": 0.057368941641938676, "grad_norm": 0.8010419071826838, "learning_rate": 4.9996318631365735e-06, "loss": 0.5925, "step": 232 }, { "epoch": 0.0576162215628091, "grad_norm": 0.7530690429731, "learning_rate": 4.9996262643166674e-06, "loss": 0.5951, "step": 233 }, { "epoch": 0.057863501483679525, "grad_norm": 0.7675728737861399, "learning_rate": 4.999620623245743e-06, "loss": 0.5849, "step": 234 }, { "epoch": 0.05811078140454995, "grad_norm": 0.7724654094858604, "learning_rate": 4.999614939923897e-06, "loss": 0.6169, "step": 235 }, { "epoch": 0.058358061325420374, "grad_norm": 0.7978696650914271, "learning_rate": 4.999609214351226e-06, "loss": 0.5964, "step": 236 }, { "epoch": 0.0586053412462908, "grad_norm": 0.7721298767744407, "learning_rate": 4.999603446527826e-06, "loss": 0.6255, "step": 237 }, { "epoch": 0.05885262116716122, "grad_norm": 0.7488208157668992, "learning_rate": 4.999597636453793e-06, "loss": 0.5971, "step": 238 }, { "epoch": 0.059099901088031655, "grad_norm": 0.8049217267637012, "learning_rate": 4.999591784129228e-06, "loss": 0.6221, "step": 239 }, { "epoch": 0.05934718100890208, "grad_norm": 0.7979752646183047, "learning_rate": 4.999585889554227e-06, "loss": 0.6165, "step": 240 }, { "epoch": 0.059594460929772504, "grad_norm": 0.7448820329141358, "learning_rate": 4.999579952728892e-06, "loss": 0.6154, "step": 241 }, { "epoch": 0.05984174085064293, "grad_norm": 0.8083942465217746, "learning_rate": 4.999573973653322e-06, "loss": 0.6086, "step": 242 }, { "epoch": 0.06008902077151335, "grad_norm": 0.7606973644680061, "learning_rate": 4.99956795232762e-06, "loss": 0.5945, "step": 243 }, { "epoch": 0.06033630069238378, "grad_norm": 0.812611247006916, "learning_rate": 4.999561888751885e-06, "loss": 0.5904, "step": 244 }, { "epoch": 0.0605835806132542, "grad_norm": 0.7727468846105328, "learning_rate": 4.9995557829262215e-06, "loss": 0.61, "step": 245 }, { "epoch": 0.06083086053412463, "grad_norm": 0.8133475657364307, "learning_rate": 4.999549634850732e-06, "loss": 0.5946, "step": 246 }, { "epoch": 0.06107814045499505, "grad_norm": 0.815731831489138, "learning_rate": 4.99954344452552e-06, "loss": 0.584, "step": 247 }, { "epoch": 0.06132542037586548, "grad_norm": 0.828023713294489, "learning_rate": 4.999537211950692e-06, "loss": 0.5963, "step": 248 }, { "epoch": 0.06157270029673591, "grad_norm": 0.7659802540309084, "learning_rate": 4.99953093712635e-06, "loss": 0.601, "step": 249 }, { "epoch": 0.06181998021760633, "grad_norm": 0.8129348699504775, "learning_rate": 4.999524620052603e-06, "loss": 0.5957, "step": 250 }, { "epoch": 0.06206726013847676, "grad_norm": 0.8344335464060704, "learning_rate": 4.999518260729557e-06, "loss": 0.5834, "step": 251 }, { "epoch": 0.06231454005934718, "grad_norm": 0.8014316640205381, "learning_rate": 4.999511859157319e-06, "loss": 0.5922, "step": 252 }, { "epoch": 0.0625618199802176, "grad_norm": 0.7753876743359669, "learning_rate": 4.999505415335998e-06, "loss": 0.5933, "step": 253 }, { "epoch": 0.06280909990108803, "grad_norm": 0.7817463507788848, "learning_rate": 4.9994989292657015e-06, "loss": 0.6109, "step": 254 }, { "epoch": 0.06305637982195846, "grad_norm": 0.828120989023736, "learning_rate": 4.99949240094654e-06, "loss": 0.6008, "step": 255 }, { "epoch": 0.06330365974282888, "grad_norm": 0.8407040487143337, "learning_rate": 4.999485830378625e-06, "loss": 0.5813, "step": 256 }, { "epoch": 0.0635509396636993, "grad_norm": 0.8114133897332183, "learning_rate": 4.999479217562066e-06, "loss": 0.5617, "step": 257 }, { "epoch": 0.06379821958456973, "grad_norm": 0.8420383888196065, "learning_rate": 4.999472562496975e-06, "loss": 0.5782, "step": 258 }, { "epoch": 0.06404549950544015, "grad_norm": 0.8146897788412562, "learning_rate": 4.999465865183465e-06, "loss": 0.6189, "step": 259 }, { "epoch": 0.06429277942631058, "grad_norm": 0.8609511362836474, "learning_rate": 4.999459125621649e-06, "loss": 0.6164, "step": 260 }, { "epoch": 0.064540059347181, "grad_norm": 0.8348638690296436, "learning_rate": 4.99945234381164e-06, "loss": 0.5344, "step": 261 }, { "epoch": 0.06478733926805143, "grad_norm": 0.772480507007813, "learning_rate": 4.999445519753555e-06, "loss": 0.5732, "step": 262 }, { "epoch": 0.06503461918892187, "grad_norm": 0.7745975874520878, "learning_rate": 4.999438653447507e-06, "loss": 0.5781, "step": 263 }, { "epoch": 0.06528189910979229, "grad_norm": 0.8688380494260995, "learning_rate": 4.999431744893613e-06, "loss": 0.5795, "step": 264 }, { "epoch": 0.06552917903066272, "grad_norm": 0.8584578845191142, "learning_rate": 4.999424794091989e-06, "loss": 0.6207, "step": 265 }, { "epoch": 0.06577645895153314, "grad_norm": 0.8130950950717318, "learning_rate": 4.9994178010427544e-06, "loss": 0.5666, "step": 266 }, { "epoch": 0.06602373887240356, "grad_norm": 0.8060107564461553, "learning_rate": 4.999410765746026e-06, "loss": 0.5419, "step": 267 }, { "epoch": 0.06627101879327399, "grad_norm": 0.7783985362196268, "learning_rate": 4.999403688201921e-06, "loss": 0.5613, "step": 268 }, { "epoch": 0.06651829871414441, "grad_norm": 0.8888599834984435, "learning_rate": 4.999396568410563e-06, "loss": 0.5607, "step": 269 }, { "epoch": 0.06676557863501484, "grad_norm": 0.8652022418369328, "learning_rate": 4.999389406372069e-06, "loss": 0.6023, "step": 270 }, { "epoch": 0.06701285855588526, "grad_norm": 0.8232756652188512, "learning_rate": 4.999382202086562e-06, "loss": 0.6064, "step": 271 }, { "epoch": 0.06726013847675569, "grad_norm": 0.783102206467771, "learning_rate": 4.9993749555541635e-06, "loss": 0.5697, "step": 272 }, { "epoch": 0.06750741839762611, "grad_norm": 0.8069785228514532, "learning_rate": 4.999367666774995e-06, "loss": 0.6105, "step": 273 }, { "epoch": 0.06775469831849654, "grad_norm": 0.7893414940765818, "learning_rate": 4.99936033574918e-06, "loss": 0.6075, "step": 274 }, { "epoch": 0.06800197823936696, "grad_norm": 0.8117544067424143, "learning_rate": 4.999352962476843e-06, "loss": 0.6306, "step": 275 }, { "epoch": 0.06824925816023739, "grad_norm": 0.7939328491906092, "learning_rate": 4.999345546958109e-06, "loss": 0.5861, "step": 276 }, { "epoch": 0.06849653808110781, "grad_norm": 0.8271668917371283, "learning_rate": 4.999338089193102e-06, "loss": 0.5754, "step": 277 }, { "epoch": 0.06874381800197824, "grad_norm": 0.8290077242370982, "learning_rate": 4.999330589181948e-06, "loss": 0.581, "step": 278 }, { "epoch": 0.06899109792284866, "grad_norm": 0.8239908337223516, "learning_rate": 4.999323046924776e-06, "loss": 0.5883, "step": 279 }, { "epoch": 0.06923837784371908, "grad_norm": 0.7867379951699298, "learning_rate": 4.999315462421711e-06, "loss": 0.5676, "step": 280 }, { "epoch": 0.06948565776458951, "grad_norm": 0.794330642065807, "learning_rate": 4.9993078356728816e-06, "loss": 0.5744, "step": 281 }, { "epoch": 0.06973293768545995, "grad_norm": 0.7920567183819462, "learning_rate": 4.999300166678419e-06, "loss": 0.5884, "step": 282 }, { "epoch": 0.06998021760633037, "grad_norm": 0.7934949697459773, "learning_rate": 4.99929245543845e-06, "loss": 0.6065, "step": 283 }, { "epoch": 0.0702274975272008, "grad_norm": 0.8421805349854998, "learning_rate": 4.999284701953106e-06, "loss": 0.5577, "step": 284 }, { "epoch": 0.07047477744807122, "grad_norm": 0.8124908525725804, "learning_rate": 4.9992769062225185e-06, "loss": 0.6129, "step": 285 }, { "epoch": 0.07072205736894165, "grad_norm": 0.7973073951210279, "learning_rate": 4.999269068246818e-06, "loss": 0.5694, "step": 286 }, { "epoch": 0.07096933728981207, "grad_norm": 0.8076738575720998, "learning_rate": 4.999261188026139e-06, "loss": 0.5669, "step": 287 }, { "epoch": 0.0712166172106825, "grad_norm": 0.7949443064726974, "learning_rate": 4.999253265560614e-06, "loss": 0.5859, "step": 288 }, { "epoch": 0.07146389713155292, "grad_norm": 0.785428609248276, "learning_rate": 4.999245300850375e-06, "loss": 0.5573, "step": 289 }, { "epoch": 0.07171117705242334, "grad_norm": 0.787601105135159, "learning_rate": 4.9992372938955595e-06, "loss": 0.5658, "step": 290 }, { "epoch": 0.07195845697329377, "grad_norm": 0.8181542303646591, "learning_rate": 4.999229244696301e-06, "loss": 0.5991, "step": 291 }, { "epoch": 0.0722057368941642, "grad_norm": 0.8058133731985243, "learning_rate": 4.9992211532527355e-06, "loss": 0.5862, "step": 292 }, { "epoch": 0.07245301681503462, "grad_norm": 0.7995192339831612, "learning_rate": 4.999213019565001e-06, "loss": 0.5738, "step": 293 }, { "epoch": 0.07270029673590504, "grad_norm": 0.83397514939666, "learning_rate": 4.999204843633234e-06, "loss": 0.6086, "step": 294 }, { "epoch": 0.07294757665677547, "grad_norm": 0.8022353428397413, "learning_rate": 4.9991966254575726e-06, "loss": 0.5817, "step": 295 }, { "epoch": 0.07319485657764589, "grad_norm": 0.7857047281743428, "learning_rate": 4.999188365038156e-06, "loss": 0.5678, "step": 296 }, { "epoch": 0.07344213649851632, "grad_norm": 0.8338345529151808, "learning_rate": 4.999180062375124e-06, "loss": 0.5902, "step": 297 }, { "epoch": 0.07368941641938674, "grad_norm": 0.788795960009146, "learning_rate": 4.999171717468617e-06, "loss": 0.5621, "step": 298 }, { "epoch": 0.07393669634025717, "grad_norm": 0.8007543016695616, "learning_rate": 4.999163330318777e-06, "loss": 0.5909, "step": 299 }, { "epoch": 0.07418397626112759, "grad_norm": 0.793092974341733, "learning_rate": 4.999154900925743e-06, "loss": 0.6027, "step": 300 }, { "epoch": 0.07443125618199802, "grad_norm": 0.7973449991777471, "learning_rate": 4.99914642928966e-06, "loss": 0.5768, "step": 301 }, { "epoch": 0.07467853610286845, "grad_norm": 0.8095789680023529, "learning_rate": 4.99913791541067e-06, "loss": 0.5654, "step": 302 }, { "epoch": 0.07492581602373888, "grad_norm": 0.8960253123671273, "learning_rate": 4.9991293592889174e-06, "loss": 0.5943, "step": 303 }, { "epoch": 0.0751730959446093, "grad_norm": 0.8324065932447318, "learning_rate": 4.999120760924547e-06, "loss": 0.5958, "step": 304 }, { "epoch": 0.07542037586547973, "grad_norm": 0.7774414096188346, "learning_rate": 4.999112120317703e-06, "loss": 0.5753, "step": 305 }, { "epoch": 0.07566765578635015, "grad_norm": 0.7876740890991246, "learning_rate": 4.9991034374685335e-06, "loss": 0.5706, "step": 306 }, { "epoch": 0.07591493570722058, "grad_norm": 0.8560375531506265, "learning_rate": 4.9990947123771825e-06, "loss": 0.5766, "step": 307 }, { "epoch": 0.076162215628091, "grad_norm": 0.8459626380953854, "learning_rate": 4.9990859450438e-06, "loss": 0.567, "step": 308 }, { "epoch": 0.07640949554896143, "grad_norm": 0.7880315987652132, "learning_rate": 4.999077135468533e-06, "loss": 0.5995, "step": 309 }, { "epoch": 0.07665677546983185, "grad_norm": 0.8955800578597224, "learning_rate": 4.9990682836515305e-06, "loss": 0.5843, "step": 310 }, { "epoch": 0.07690405539070228, "grad_norm": 0.8617447390840817, "learning_rate": 4.999059389592943e-06, "loss": 0.5698, "step": 311 }, { "epoch": 0.0771513353115727, "grad_norm": 0.8222678465858798, "learning_rate": 4.999050453292918e-06, "loss": 0.5792, "step": 312 }, { "epoch": 0.07739861523244312, "grad_norm": 0.8425038084425801, "learning_rate": 4.999041474751611e-06, "loss": 0.5654, "step": 313 }, { "epoch": 0.07764589515331355, "grad_norm": 0.7939986484417906, "learning_rate": 4.999032453969171e-06, "loss": 0.5844, "step": 314 }, { "epoch": 0.07789317507418397, "grad_norm": 0.8068900357493951, "learning_rate": 4.999023390945749e-06, "loss": 0.5842, "step": 315 }, { "epoch": 0.0781404549950544, "grad_norm": 0.8467336955386116, "learning_rate": 4.9990142856815015e-06, "loss": 0.5829, "step": 316 }, { "epoch": 0.07838773491592482, "grad_norm": 0.833281068277134, "learning_rate": 4.999005138176581e-06, "loss": 0.5519, "step": 317 }, { "epoch": 0.07863501483679525, "grad_norm": 0.8225381669617556, "learning_rate": 4.9989959484311415e-06, "loss": 0.5421, "step": 318 }, { "epoch": 0.07888229475766567, "grad_norm": 0.7395261056652702, "learning_rate": 4.998986716445339e-06, "loss": 0.5629, "step": 319 }, { "epoch": 0.0791295746785361, "grad_norm": 0.7829197261891223, "learning_rate": 4.99897744221933e-06, "loss": 0.5359, "step": 320 }, { "epoch": 0.07937685459940653, "grad_norm": 0.7979559607833192, "learning_rate": 4.998968125753271e-06, "loss": 0.5674, "step": 321 }, { "epoch": 0.07962413452027696, "grad_norm": 0.8490795677253689, "learning_rate": 4.998958767047319e-06, "loss": 0.5803, "step": 322 }, { "epoch": 0.07987141444114738, "grad_norm": 0.8033454860164779, "learning_rate": 4.998949366101631e-06, "loss": 0.577, "step": 323 }, { "epoch": 0.08011869436201781, "grad_norm": 0.8513966976989501, "learning_rate": 4.998939922916368e-06, "loss": 0.6031, "step": 324 }, { "epoch": 0.08036597428288823, "grad_norm": 0.86730395021255, "learning_rate": 4.998930437491689e-06, "loss": 0.5957, "step": 325 }, { "epoch": 0.08061325420375866, "grad_norm": 0.742412468911964, "learning_rate": 4.9989209098277545e-06, "loss": 0.5954, "step": 326 }, { "epoch": 0.08086053412462908, "grad_norm": 0.7672495419030062, "learning_rate": 4.998911339924726e-06, "loss": 0.5563, "step": 327 }, { "epoch": 0.08110781404549951, "grad_norm": 0.7723454315350652, "learning_rate": 4.998901727782763e-06, "loss": 0.5604, "step": 328 }, { "epoch": 0.08135509396636993, "grad_norm": 0.7687143526719443, "learning_rate": 4.99889207340203e-06, "loss": 0.575, "step": 329 }, { "epoch": 0.08160237388724036, "grad_norm": 0.8232392369424539, "learning_rate": 4.99888237678269e-06, "loss": 0.5772, "step": 330 }, { "epoch": 0.08184965380811078, "grad_norm": 0.7766370860070179, "learning_rate": 4.998872637924906e-06, "loss": 0.6117, "step": 331 }, { "epoch": 0.0820969337289812, "grad_norm": 0.7777679878898526, "learning_rate": 4.998862856828844e-06, "loss": 0.5678, "step": 332 }, { "epoch": 0.08234421364985163, "grad_norm": 0.8402362535184865, "learning_rate": 4.998853033494668e-06, "loss": 0.5627, "step": 333 }, { "epoch": 0.08259149357072205, "grad_norm": 0.8106043298842506, "learning_rate": 4.998843167922546e-06, "loss": 0.6011, "step": 334 }, { "epoch": 0.08283877349159248, "grad_norm": 0.8036741854165277, "learning_rate": 4.998833260112642e-06, "loss": 0.5678, "step": 335 }, { "epoch": 0.0830860534124629, "grad_norm": 0.8067745020147583, "learning_rate": 4.998823310065125e-06, "loss": 0.5808, "step": 336 }, { "epoch": 0.08333333333333333, "grad_norm": 0.7877781611533857, "learning_rate": 4.9988133177801625e-06, "loss": 0.5735, "step": 337 }, { "epoch": 0.08358061325420375, "grad_norm": 0.8373696159220352, "learning_rate": 4.9988032832579245e-06, "loss": 0.5613, "step": 338 }, { "epoch": 0.08382789317507418, "grad_norm": 0.8908331464192217, "learning_rate": 4.99879320649858e-06, "loss": 0.5427, "step": 339 }, { "epoch": 0.0840751730959446, "grad_norm": 0.7798491353155664, "learning_rate": 4.9987830875022995e-06, "loss": 0.5979, "step": 340 }, { "epoch": 0.08432245301681504, "grad_norm": 0.7836443525132174, "learning_rate": 4.998772926269254e-06, "loss": 0.5554, "step": 341 }, { "epoch": 0.08456973293768547, "grad_norm": 0.8875330119592878, "learning_rate": 4.998762722799615e-06, "loss": 0.5773, "step": 342 }, { "epoch": 0.08481701285855589, "grad_norm": 0.8458805310586042, "learning_rate": 4.9987524770935546e-06, "loss": 0.5576, "step": 343 }, { "epoch": 0.08506429277942631, "grad_norm": 0.8266480197072542, "learning_rate": 4.998742189151247e-06, "loss": 0.5821, "step": 344 }, { "epoch": 0.08531157270029674, "grad_norm": 0.7601244118882214, "learning_rate": 4.998731858972865e-06, "loss": 0.5631, "step": 345 }, { "epoch": 0.08555885262116716, "grad_norm": 0.8861885854250049, "learning_rate": 4.998721486558584e-06, "loss": 0.556, "step": 346 }, { "epoch": 0.08580613254203759, "grad_norm": 0.8790472177377368, "learning_rate": 4.998711071908579e-06, "loss": 0.5602, "step": 347 }, { "epoch": 0.08605341246290801, "grad_norm": 0.7871616565771263, "learning_rate": 4.998700615023027e-06, "loss": 0.5881, "step": 348 }, { "epoch": 0.08630069238377844, "grad_norm": 0.8602679302620617, "learning_rate": 4.9986901159021036e-06, "loss": 0.5692, "step": 349 }, { "epoch": 0.08654797230464886, "grad_norm": 0.869429029408227, "learning_rate": 4.998679574545986e-06, "loss": 0.555, "step": 350 }, { "epoch": 0.08679525222551929, "grad_norm": 0.8357333919438975, "learning_rate": 4.998668990954854e-06, "loss": 0.5494, "step": 351 }, { "epoch": 0.08704253214638971, "grad_norm": 0.8133863569483283, "learning_rate": 4.998658365128884e-06, "loss": 0.5666, "step": 352 }, { "epoch": 0.08728981206726014, "grad_norm": 0.8325395334428511, "learning_rate": 4.998647697068258e-06, "loss": 0.5591, "step": 353 }, { "epoch": 0.08753709198813056, "grad_norm": 0.8366416586559107, "learning_rate": 4.998636986773156e-06, "loss": 0.5428, "step": 354 }, { "epoch": 0.08778437190900099, "grad_norm": 0.8180821666277713, "learning_rate": 4.9986262342437566e-06, "loss": 0.5565, "step": 355 }, { "epoch": 0.08803165182987141, "grad_norm": 0.8302958602395372, "learning_rate": 4.9986154394802445e-06, "loss": 0.5754, "step": 356 }, { "epoch": 0.08827893175074183, "grad_norm": 0.908023409390746, "learning_rate": 4.998604602482801e-06, "loss": 0.5517, "step": 357 }, { "epoch": 0.08852621167161226, "grad_norm": 0.8461839551694633, "learning_rate": 4.998593723251609e-06, "loss": 0.5716, "step": 358 }, { "epoch": 0.08877349159248268, "grad_norm": 0.863219314588194, "learning_rate": 4.9985828017868534e-06, "loss": 0.5403, "step": 359 }, { "epoch": 0.08902077151335312, "grad_norm": 0.8146799690434612, "learning_rate": 4.998571838088717e-06, "loss": 0.5643, "step": 360 }, { "epoch": 0.08926805143422355, "grad_norm": 0.7935582497855002, "learning_rate": 4.9985608321573864e-06, "loss": 0.5698, "step": 361 }, { "epoch": 0.08951533135509397, "grad_norm": 0.8476507277896043, "learning_rate": 4.998549783993048e-06, "loss": 0.5528, "step": 362 }, { "epoch": 0.0897626112759644, "grad_norm": 0.8208621602814976, "learning_rate": 4.998538693595888e-06, "loss": 0.5786, "step": 363 }, { "epoch": 0.09000989119683482, "grad_norm": 0.882433464188905, "learning_rate": 4.998527560966094e-06, "loss": 0.5727, "step": 364 }, { "epoch": 0.09025717111770525, "grad_norm": 0.9015805731333748, "learning_rate": 4.9985163861038535e-06, "loss": 0.5669, "step": 365 }, { "epoch": 0.09050445103857567, "grad_norm": 0.8174154358479824, "learning_rate": 4.998505169009356e-06, "loss": 0.5541, "step": 366 }, { "epoch": 0.0907517309594461, "grad_norm": 0.7879889004369333, "learning_rate": 4.998493909682791e-06, "loss": 0.5377, "step": 367 }, { "epoch": 0.09099901088031652, "grad_norm": 0.8134430346957511, "learning_rate": 4.99848260812435e-06, "loss": 0.5673, "step": 368 }, { "epoch": 0.09124629080118694, "grad_norm": 0.7822338294968185, "learning_rate": 4.998471264334222e-06, "loss": 0.5747, "step": 369 }, { "epoch": 0.09149357072205737, "grad_norm": 0.846007678741242, "learning_rate": 4.998459878312598e-06, "loss": 0.5382, "step": 370 }, { "epoch": 0.0917408506429278, "grad_norm": 0.7834419615329641, "learning_rate": 4.998448450059674e-06, "loss": 0.5802, "step": 371 }, { "epoch": 0.09198813056379822, "grad_norm": 0.8462903242903708, "learning_rate": 4.998436979575641e-06, "loss": 0.5637, "step": 372 }, { "epoch": 0.09223541048466864, "grad_norm": 0.8512867722701203, "learning_rate": 4.998425466860692e-06, "loss": 0.5289, "step": 373 }, { "epoch": 0.09248269040553907, "grad_norm": 0.8021787954595253, "learning_rate": 4.998413911915025e-06, "loss": 0.5851, "step": 374 }, { "epoch": 0.09272997032640949, "grad_norm": 0.7994136334382167, "learning_rate": 4.998402314738831e-06, "loss": 0.5461, "step": 375 }, { "epoch": 0.09297725024727992, "grad_norm": 0.8676794377546366, "learning_rate": 4.998390675332308e-06, "loss": 0.5374, "step": 376 }, { "epoch": 0.09322453016815034, "grad_norm": 0.8618752560499016, "learning_rate": 4.9983789936956535e-06, "loss": 0.5235, "step": 377 }, { "epoch": 0.09347181008902077, "grad_norm": 0.8116452824280147, "learning_rate": 4.998367269829065e-06, "loss": 0.563, "step": 378 }, { "epoch": 0.09371909000989119, "grad_norm": 0.8601437613985123, "learning_rate": 4.998355503732739e-06, "loss": 0.5612, "step": 379 }, { "epoch": 0.09396636993076163, "grad_norm": 0.8155800361584301, "learning_rate": 4.9983436954068755e-06, "loss": 0.5712, "step": 380 }, { "epoch": 0.09421364985163205, "grad_norm": 0.8124067408872788, "learning_rate": 4.998331844851674e-06, "loss": 0.5323, "step": 381 }, { "epoch": 0.09446092977250248, "grad_norm": 0.8103826549978994, "learning_rate": 4.9983199520673345e-06, "loss": 0.5433, "step": 382 }, { "epoch": 0.0947082096933729, "grad_norm": 0.8323514299027938, "learning_rate": 4.998308017054059e-06, "loss": 0.5691, "step": 383 }, { "epoch": 0.09495548961424333, "grad_norm": 0.8000615986590797, "learning_rate": 4.998296039812047e-06, "loss": 0.5553, "step": 384 }, { "epoch": 0.09520276953511375, "grad_norm": 0.8339284242974745, "learning_rate": 4.9982840203415035e-06, "loss": 0.5579, "step": 385 }, { "epoch": 0.09545004945598418, "grad_norm": 0.8174909593172076, "learning_rate": 4.99827195864263e-06, "loss": 0.5556, "step": 386 }, { "epoch": 0.0956973293768546, "grad_norm": 0.7975261451643757, "learning_rate": 4.998259854715631e-06, "loss": 0.5661, "step": 387 }, { "epoch": 0.09594460929772503, "grad_norm": 0.7916331855598239, "learning_rate": 4.998247708560712e-06, "loss": 0.5409, "step": 388 }, { "epoch": 0.09619188921859545, "grad_norm": 0.8426876014737761, "learning_rate": 4.998235520178076e-06, "loss": 0.5245, "step": 389 }, { "epoch": 0.09643916913946587, "grad_norm": 0.7464229384648007, "learning_rate": 4.998223289567931e-06, "loss": 0.5494, "step": 390 }, { "epoch": 0.0966864490603363, "grad_norm": 0.8575385758360788, "learning_rate": 4.998211016730483e-06, "loss": 0.5247, "step": 391 }, { "epoch": 0.09693372898120672, "grad_norm": 0.8239224282844174, "learning_rate": 4.99819870166594e-06, "loss": 0.5592, "step": 392 }, { "epoch": 0.09718100890207715, "grad_norm": 0.949667436501745, "learning_rate": 4.998186344374509e-06, "loss": 0.5425, "step": 393 }, { "epoch": 0.09742828882294757, "grad_norm": 0.8371983680182382, "learning_rate": 4.9981739448564005e-06, "loss": 0.5675, "step": 394 }, { "epoch": 0.097675568743818, "grad_norm": 0.8190906217429191, "learning_rate": 4.998161503111822e-06, "loss": 0.6126, "step": 395 }, { "epoch": 0.09792284866468842, "grad_norm": 0.814594253339599, "learning_rate": 4.998149019140987e-06, "loss": 0.5475, "step": 396 }, { "epoch": 0.09817012858555885, "grad_norm": 0.881386637857726, "learning_rate": 4.998136492944102e-06, "loss": 0.5577, "step": 397 }, { "epoch": 0.09841740850642927, "grad_norm": 0.8200193748866078, "learning_rate": 4.998123924521383e-06, "loss": 0.5585, "step": 398 }, { "epoch": 0.09866468842729971, "grad_norm": 0.8111722627824661, "learning_rate": 4.99811131387304e-06, "loss": 0.5463, "step": 399 }, { "epoch": 0.09891196834817013, "grad_norm": 0.8941732458650538, "learning_rate": 4.9980986609992865e-06, "loss": 0.5478, "step": 400 }, { "epoch": 0.09915924826904056, "grad_norm": 0.9159041142286112, "learning_rate": 4.998085965900337e-06, "loss": 0.5607, "step": 401 }, { "epoch": 0.09940652818991098, "grad_norm": 0.8149622380453867, "learning_rate": 4.998073228576406e-06, "loss": 0.5563, "step": 402 }, { "epoch": 0.09965380811078141, "grad_norm": 0.8415024715760436, "learning_rate": 4.998060449027709e-06, "loss": 0.5735, "step": 403 }, { "epoch": 0.09990108803165183, "grad_norm": 0.8549093887109618, "learning_rate": 4.998047627254461e-06, "loss": 0.5285, "step": 404 }, { "epoch": 0.10014836795252226, "grad_norm": 0.8654615781606256, "learning_rate": 4.998034763256879e-06, "loss": 0.5543, "step": 405 }, { "epoch": 0.10039564787339268, "grad_norm": 0.8349697955100309, "learning_rate": 4.998021857035181e-06, "loss": 0.5738, "step": 406 }, { "epoch": 0.1006429277942631, "grad_norm": 0.8519615357866014, "learning_rate": 4.998008908589586e-06, "loss": 0.5524, "step": 407 }, { "epoch": 0.10089020771513353, "grad_norm": 0.9177041756133535, "learning_rate": 4.9979959179203095e-06, "loss": 0.5927, "step": 408 }, { "epoch": 0.10113748763600396, "grad_norm": 0.8578558789111481, "learning_rate": 4.997982885027575e-06, "loss": 0.5373, "step": 409 }, { "epoch": 0.10138476755687438, "grad_norm": 0.8249578416532023, "learning_rate": 4.997969809911601e-06, "loss": 0.5582, "step": 410 }, { "epoch": 0.1016320474777448, "grad_norm": 0.8322799534441788, "learning_rate": 4.997956692572609e-06, "loss": 0.5436, "step": 411 }, { "epoch": 0.10187932739861523, "grad_norm": 0.7946202487427521, "learning_rate": 4.9979435330108195e-06, "loss": 0.5561, "step": 412 }, { "epoch": 0.10212660731948565, "grad_norm": 0.8311385424691226, "learning_rate": 4.997930331226456e-06, "loss": 0.518, "step": 413 }, { "epoch": 0.10237388724035608, "grad_norm": 0.8304279162491168, "learning_rate": 4.997917087219741e-06, "loss": 0.5412, "step": 414 }, { "epoch": 0.1026211671612265, "grad_norm": 0.8877748788219127, "learning_rate": 4.9979038009909e-06, "loss": 0.517, "step": 415 }, { "epoch": 0.10286844708209693, "grad_norm": 0.8062638798591899, "learning_rate": 4.997890472540156e-06, "loss": 0.5317, "step": 416 }, { "epoch": 0.10311572700296735, "grad_norm": 0.8638613858007713, "learning_rate": 4.997877101867734e-06, "loss": 0.565, "step": 417 }, { "epoch": 0.10336300692383778, "grad_norm": 0.8549274979700249, "learning_rate": 4.997863688973862e-06, "loss": 0.5333, "step": 418 }, { "epoch": 0.10361028684470822, "grad_norm": 0.8238596502819312, "learning_rate": 4.997850233858765e-06, "loss": 0.5664, "step": 419 }, { "epoch": 0.10385756676557864, "grad_norm": 0.8179769972244695, "learning_rate": 4.99783673652267e-06, "loss": 0.5494, "step": 420 }, { "epoch": 0.10410484668644907, "grad_norm": 0.872760822491279, "learning_rate": 4.997823196965806e-06, "loss": 0.5686, "step": 421 }, { "epoch": 0.10435212660731949, "grad_norm": 0.8247079449845919, "learning_rate": 4.997809615188403e-06, "loss": 0.5761, "step": 422 }, { "epoch": 0.10459940652818991, "grad_norm": 0.8274894849461595, "learning_rate": 4.9977959911906885e-06, "loss": 0.546, "step": 423 }, { "epoch": 0.10484668644906034, "grad_norm": 0.8200195602952358, "learning_rate": 4.997782324972894e-06, "loss": 0.5487, "step": 424 }, { "epoch": 0.10509396636993076, "grad_norm": 0.8424425368017951, "learning_rate": 4.99776861653525e-06, "loss": 0.5284, "step": 425 }, { "epoch": 0.10534124629080119, "grad_norm": 0.8228965203175329, "learning_rate": 4.9977548658779885e-06, "loss": 0.5416, "step": 426 }, { "epoch": 0.10558852621167161, "grad_norm": 0.8329995359888007, "learning_rate": 4.997741073001342e-06, "loss": 0.5271, "step": 427 }, { "epoch": 0.10583580613254204, "grad_norm": 0.7637313695891461, "learning_rate": 4.997727237905543e-06, "loss": 0.5543, "step": 428 }, { "epoch": 0.10608308605341246, "grad_norm": 0.8469415248863096, "learning_rate": 4.9977133605908264e-06, "loss": 0.5194, "step": 429 }, { "epoch": 0.10633036597428289, "grad_norm": 0.8032002919054423, "learning_rate": 4.997699441057427e-06, "loss": 0.5175, "step": 430 }, { "epoch": 0.10657764589515331, "grad_norm": 0.789050525965101, "learning_rate": 4.997685479305577e-06, "loss": 0.5706, "step": 431 }, { "epoch": 0.10682492581602374, "grad_norm": 0.8025536621297401, "learning_rate": 4.997671475335517e-06, "loss": 0.5336, "step": 432 }, { "epoch": 0.10707220573689416, "grad_norm": 0.8376249206772568, "learning_rate": 4.99765742914748e-06, "loss": 0.5502, "step": 433 }, { "epoch": 0.10731948565776459, "grad_norm": 0.8226860382728634, "learning_rate": 4.9976433407417056e-06, "loss": 0.5352, "step": 434 }, { "epoch": 0.10756676557863501, "grad_norm": 0.8196965592986617, "learning_rate": 4.9976292101184305e-06, "loss": 0.5642, "step": 435 }, { "epoch": 0.10781404549950543, "grad_norm": 0.8027935956184445, "learning_rate": 4.997615037277894e-06, "loss": 0.5367, "step": 436 }, { "epoch": 0.10806132542037586, "grad_norm": 0.8412182776715479, "learning_rate": 4.997600822220336e-06, "loss": 0.5545, "step": 437 }, { "epoch": 0.1083086053412463, "grad_norm": 0.7967652156249115, "learning_rate": 4.997586564945998e-06, "loss": 0.538, "step": 438 }, { "epoch": 0.10855588526211672, "grad_norm": 0.8273469063469596, "learning_rate": 4.997572265455118e-06, "loss": 0.5349, "step": 439 }, { "epoch": 0.10880316518298715, "grad_norm": 0.816757505466961, "learning_rate": 4.9975579237479396e-06, "loss": 0.5598, "step": 440 }, { "epoch": 0.10905044510385757, "grad_norm": 0.8179328405367462, "learning_rate": 4.997543539824706e-06, "loss": 0.5461, "step": 441 }, { "epoch": 0.109297725024728, "grad_norm": 0.8095408289755744, "learning_rate": 4.997529113685659e-06, "loss": 0.5515, "step": 442 }, { "epoch": 0.10954500494559842, "grad_norm": 0.8916427729760883, "learning_rate": 4.997514645331042e-06, "loss": 0.5217, "step": 443 }, { "epoch": 0.10979228486646884, "grad_norm": 0.7917353212761363, "learning_rate": 4.9975001347611005e-06, "loss": 0.5371, "step": 444 }, { "epoch": 0.11003956478733927, "grad_norm": 0.8146465036810688, "learning_rate": 4.997485581976079e-06, "loss": 0.5987, "step": 445 }, { "epoch": 0.1102868447082097, "grad_norm": 0.8128773978395029, "learning_rate": 4.997470986976225e-06, "loss": 0.5481, "step": 446 }, { "epoch": 0.11053412462908012, "grad_norm": 0.7961091137875655, "learning_rate": 4.997456349761783e-06, "loss": 0.5582, "step": 447 }, { "epoch": 0.11078140454995054, "grad_norm": 0.813481787539708, "learning_rate": 4.997441670333003e-06, "loss": 0.5443, "step": 448 }, { "epoch": 0.11102868447082097, "grad_norm": 0.8158459985879026, "learning_rate": 4.997426948690131e-06, "loss": 0.5536, "step": 449 }, { "epoch": 0.11127596439169139, "grad_norm": 0.8327146727995968, "learning_rate": 4.997412184833417e-06, "loss": 0.5326, "step": 450 }, { "epoch": 0.11152324431256182, "grad_norm": 0.7905392611324622, "learning_rate": 4.99739737876311e-06, "loss": 0.5415, "step": 451 }, { "epoch": 0.11177052423343224, "grad_norm": 0.832901783841983, "learning_rate": 4.99738253047946e-06, "loss": 0.5263, "step": 452 }, { "epoch": 0.11201780415430267, "grad_norm": 0.8309586500909867, "learning_rate": 4.997367639982719e-06, "loss": 0.564, "step": 453 }, { "epoch": 0.11226508407517309, "grad_norm": 0.7896942371421249, "learning_rate": 4.997352707273138e-06, "loss": 0.5688, "step": 454 }, { "epoch": 0.11251236399604352, "grad_norm": 0.8159292622067303, "learning_rate": 4.9973377323509694e-06, "loss": 0.5397, "step": 455 }, { "epoch": 0.11275964391691394, "grad_norm": 0.8743313979782678, "learning_rate": 4.997322715216467e-06, "loss": 0.5425, "step": 456 }, { "epoch": 0.11300692383778438, "grad_norm": 0.8145131073267003, "learning_rate": 4.997307655869883e-06, "loss": 0.5279, "step": 457 }, { "epoch": 0.1132542037586548, "grad_norm": 0.8498643017964868, "learning_rate": 4.997292554311474e-06, "loss": 0.5439, "step": 458 }, { "epoch": 0.11350148367952523, "grad_norm": 0.876725589506382, "learning_rate": 4.997277410541493e-06, "loss": 0.5584, "step": 459 }, { "epoch": 0.11374876360039565, "grad_norm": 0.8392765984431413, "learning_rate": 4.9972622245601986e-06, "loss": 0.5696, "step": 460 }, { "epoch": 0.11399604352126608, "grad_norm": 0.8074407171452053, "learning_rate": 4.997246996367845e-06, "loss": 0.5587, "step": 461 }, { "epoch": 0.1142433234421365, "grad_norm": 0.8471548223920936, "learning_rate": 4.997231725964692e-06, "loss": 0.5244, "step": 462 }, { "epoch": 0.11449060336300693, "grad_norm": 0.8650548673944289, "learning_rate": 4.9972164133509955e-06, "loss": 0.5689, "step": 463 }, { "epoch": 0.11473788328387735, "grad_norm": 0.8510165250718368, "learning_rate": 4.997201058527016e-06, "loss": 0.5348, "step": 464 }, { "epoch": 0.11498516320474778, "grad_norm": 0.8119157561874207, "learning_rate": 4.997185661493011e-06, "loss": 0.5405, "step": 465 }, { "epoch": 0.1152324431256182, "grad_norm": 0.8063983800119408, "learning_rate": 4.997170222249244e-06, "loss": 0.5366, "step": 466 }, { "epoch": 0.11547972304648862, "grad_norm": 0.8326109923777355, "learning_rate": 4.997154740795972e-06, "loss": 0.5725, "step": 467 }, { "epoch": 0.11572700296735905, "grad_norm": 0.8157543563831605, "learning_rate": 4.99713921713346e-06, "loss": 0.5288, "step": 468 }, { "epoch": 0.11597428288822947, "grad_norm": 0.8035319772872705, "learning_rate": 4.997123651261969e-06, "loss": 0.5257, "step": 469 }, { "epoch": 0.1162215628090999, "grad_norm": 0.7921032125004203, "learning_rate": 4.997108043181762e-06, "loss": 0.5396, "step": 470 }, { "epoch": 0.11646884272997032, "grad_norm": 0.8305520689275583, "learning_rate": 4.9970923928931026e-06, "loss": 0.5511, "step": 471 }, { "epoch": 0.11671612265084075, "grad_norm": 0.8548918005506956, "learning_rate": 4.997076700396256e-06, "loss": 0.5318, "step": 472 }, { "epoch": 0.11696340257171117, "grad_norm": 0.8154176124090147, "learning_rate": 4.997060965691488e-06, "loss": 0.576, "step": 473 }, { "epoch": 0.1172106824925816, "grad_norm": 0.8412409936805049, "learning_rate": 4.9970451887790626e-06, "loss": 0.53, "step": 474 }, { "epoch": 0.11745796241345202, "grad_norm": 0.8226202940805938, "learning_rate": 4.997029369659249e-06, "loss": 0.521, "step": 475 }, { "epoch": 0.11770524233432245, "grad_norm": 0.8110352535755092, "learning_rate": 4.997013508332312e-06, "loss": 0.5311, "step": 476 }, { "epoch": 0.11795252225519288, "grad_norm": 0.86153650176347, "learning_rate": 4.996997604798522e-06, "loss": 0.5388, "step": 477 }, { "epoch": 0.11819980217606331, "grad_norm": 0.8483377928162458, "learning_rate": 4.996981659058146e-06, "loss": 0.5471, "step": 478 }, { "epoch": 0.11844708209693373, "grad_norm": 0.8624161681686172, "learning_rate": 4.9969656711114546e-06, "loss": 0.528, "step": 479 }, { "epoch": 0.11869436201780416, "grad_norm": 0.8400291073199923, "learning_rate": 4.996949640958718e-06, "loss": 0.5488, "step": 480 }, { "epoch": 0.11894164193867458, "grad_norm": 0.8311370315597272, "learning_rate": 4.996933568600206e-06, "loss": 0.5563, "step": 481 }, { "epoch": 0.11918892185954501, "grad_norm": 0.8870073351460207, "learning_rate": 4.996917454036192e-06, "loss": 0.5087, "step": 482 }, { "epoch": 0.11943620178041543, "grad_norm": 0.8713695523993678, "learning_rate": 4.996901297266947e-06, "loss": 0.5275, "step": 483 }, { "epoch": 0.11968348170128586, "grad_norm": 0.9038628652413889, "learning_rate": 4.996885098292745e-06, "loss": 0.5439, "step": 484 }, { "epoch": 0.11993076162215628, "grad_norm": 0.9167026612271514, "learning_rate": 4.99686885711386e-06, "loss": 0.5677, "step": 485 }, { "epoch": 0.1201780415430267, "grad_norm": 0.8720438532820466, "learning_rate": 4.996852573730565e-06, "loss": 0.5236, "step": 486 }, { "epoch": 0.12042532146389713, "grad_norm": 0.7557209197955111, "learning_rate": 4.996836248143138e-06, "loss": 0.5104, "step": 487 }, { "epoch": 0.12067260138476756, "grad_norm": 0.7989706682687766, "learning_rate": 4.996819880351851e-06, "loss": 0.5853, "step": 488 }, { "epoch": 0.12091988130563798, "grad_norm": 0.8984411166999768, "learning_rate": 4.996803470356984e-06, "loss": 0.5287, "step": 489 }, { "epoch": 0.1211671612265084, "grad_norm": 0.8648551900935894, "learning_rate": 4.996787018158813e-06, "loss": 0.5419, "step": 490 }, { "epoch": 0.12141444114737883, "grad_norm": 0.804324060258821, "learning_rate": 4.996770523757616e-06, "loss": 0.5205, "step": 491 }, { "epoch": 0.12166172106824925, "grad_norm": 0.9032754593092449, "learning_rate": 4.996753987153673e-06, "loss": 0.5159, "step": 492 }, { "epoch": 0.12190900098911968, "grad_norm": 0.9533551364220325, "learning_rate": 4.996737408347262e-06, "loss": 0.5226, "step": 493 }, { "epoch": 0.1221562809099901, "grad_norm": 0.8713670416215546, "learning_rate": 4.996720787338663e-06, "loss": 0.5618, "step": 494 }, { "epoch": 0.12240356083086053, "grad_norm": 0.8684842954756598, "learning_rate": 4.996704124128159e-06, "loss": 0.5639, "step": 495 }, { "epoch": 0.12265084075173097, "grad_norm": 0.8952885323193862, "learning_rate": 4.996687418716031e-06, "loss": 0.5079, "step": 496 }, { "epoch": 0.12289812067260139, "grad_norm": 0.8075323957819724, "learning_rate": 4.9966706711025596e-06, "loss": 0.5321, "step": 497 }, { "epoch": 0.12314540059347182, "grad_norm": 0.8494422449585496, "learning_rate": 4.996653881288029e-06, "loss": 0.5001, "step": 498 }, { "epoch": 0.12339268051434224, "grad_norm": 0.8875548481591807, "learning_rate": 4.996637049272724e-06, "loss": 0.5364, "step": 499 }, { "epoch": 0.12363996043521266, "grad_norm": 0.9020942067293436, "learning_rate": 4.996620175056928e-06, "loss": 0.5257, "step": 500 }, { "epoch": 0.12388724035608309, "grad_norm": 0.8675810693187886, "learning_rate": 4.9966032586409264e-06, "loss": 0.5365, "step": 501 }, { "epoch": 0.12413452027695351, "grad_norm": 0.8714654026651982, "learning_rate": 4.996586300025005e-06, "loss": 0.5008, "step": 502 }, { "epoch": 0.12438180019782394, "grad_norm": 0.8399592723027427, "learning_rate": 4.99656929920945e-06, "loss": 0.5151, "step": 503 }, { "epoch": 0.12462908011869436, "grad_norm": 0.7778730144888366, "learning_rate": 4.996552256194551e-06, "loss": 0.5302, "step": 504 }, { "epoch": 0.12487636003956479, "grad_norm": 0.8260924301670444, "learning_rate": 4.996535170980593e-06, "loss": 0.5182, "step": 505 }, { "epoch": 0.1251236399604352, "grad_norm": 0.8999624536805119, "learning_rate": 4.996518043567868e-06, "loss": 0.5232, "step": 506 }, { "epoch": 0.12537091988130564, "grad_norm": 0.8691087323090587, "learning_rate": 4.9965008739566615e-06, "loss": 0.5532, "step": 507 }, { "epoch": 0.12561819980217606, "grad_norm": 0.8672773310284954, "learning_rate": 4.9964836621472674e-06, "loss": 0.5627, "step": 508 }, { "epoch": 0.1258654797230465, "grad_norm": 0.790002975589831, "learning_rate": 4.996466408139975e-06, "loss": 0.5469, "step": 509 }, { "epoch": 0.1261127596439169, "grad_norm": 0.8660874103550955, "learning_rate": 4.996449111935075e-06, "loss": 0.5392, "step": 510 }, { "epoch": 0.12636003956478734, "grad_norm": 0.8196685228026358, "learning_rate": 4.996431773532863e-06, "loss": 0.5347, "step": 511 }, { "epoch": 0.12660731948565776, "grad_norm": 0.8679109946021679, "learning_rate": 4.996414392933629e-06, "loss": 0.5019, "step": 512 }, { "epoch": 0.12685459940652818, "grad_norm": 0.8309043787360044, "learning_rate": 4.996396970137668e-06, "loss": 0.5288, "step": 513 }, { "epoch": 0.1271018793273986, "grad_norm": 0.8410088998221761, "learning_rate": 4.9963795051452736e-06, "loss": 0.5466, "step": 514 }, { "epoch": 0.12734915924826903, "grad_norm": 0.8358751326108991, "learning_rate": 4.996361997956743e-06, "loss": 0.5428, "step": 515 }, { "epoch": 0.12759643916913946, "grad_norm": 0.8819431687194557, "learning_rate": 4.996344448572369e-06, "loss": 0.5364, "step": 516 }, { "epoch": 0.12784371909000988, "grad_norm": 0.8490128009282251, "learning_rate": 4.9963268569924515e-06, "loss": 0.5394, "step": 517 }, { "epoch": 0.1280909990108803, "grad_norm": 0.8244195316603701, "learning_rate": 4.996309223217285e-06, "loss": 0.5176, "step": 518 }, { "epoch": 0.12833827893175073, "grad_norm": 0.8235400710586557, "learning_rate": 4.99629154724717e-06, "loss": 0.5364, "step": 519 }, { "epoch": 0.12858555885262116, "grad_norm": 0.8301561516400873, "learning_rate": 4.996273829082404e-06, "loss": 0.5339, "step": 520 }, { "epoch": 0.12883283877349158, "grad_norm": 0.8383832053104039, "learning_rate": 4.996256068723287e-06, "loss": 0.5079, "step": 521 }, { "epoch": 0.129080118694362, "grad_norm": 0.8312860024324458, "learning_rate": 4.996238266170118e-06, "loss": 0.5199, "step": 522 }, { "epoch": 0.12932739861523243, "grad_norm": 0.8799017293310935, "learning_rate": 4.9962204214232005e-06, "loss": 0.499, "step": 523 }, { "epoch": 0.12957467853610286, "grad_norm": 0.8835474145873032, "learning_rate": 4.996202534482832e-06, "loss": 0.4991, "step": 524 }, { "epoch": 0.1298219584569733, "grad_norm": 0.8039735973515206, "learning_rate": 4.9961846053493194e-06, "loss": 0.5355, "step": 525 }, { "epoch": 0.13006923837784373, "grad_norm": 0.8361160383616958, "learning_rate": 4.9961666340229635e-06, "loss": 0.5466, "step": 526 }, { "epoch": 0.13031651829871416, "grad_norm": 0.8569201635197241, "learning_rate": 4.996148620504067e-06, "loss": 0.5349, "step": 527 }, { "epoch": 0.13056379821958458, "grad_norm": 0.8802630630022769, "learning_rate": 4.996130564792936e-06, "loss": 0.5228, "step": 528 }, { "epoch": 0.130811078140455, "grad_norm": 0.8030195373159202, "learning_rate": 4.996112466889876e-06, "loss": 0.5689, "step": 529 }, { "epoch": 0.13105835806132543, "grad_norm": 0.7747458285259381, "learning_rate": 4.996094326795192e-06, "loss": 0.5297, "step": 530 }, { "epoch": 0.13130563798219586, "grad_norm": 0.808534915335967, "learning_rate": 4.996076144509191e-06, "loss": 0.5333, "step": 531 }, { "epoch": 0.13155291790306628, "grad_norm": 0.8248147055112679, "learning_rate": 4.996057920032179e-06, "loss": 0.5338, "step": 532 }, { "epoch": 0.1318001978239367, "grad_norm": 0.8369441009968227, "learning_rate": 4.996039653364466e-06, "loss": 0.5249, "step": 533 }, { "epoch": 0.13204747774480713, "grad_norm": 0.8122213605964168, "learning_rate": 4.99602134450636e-06, "loss": 0.535, "step": 534 }, { "epoch": 0.13229475766567755, "grad_norm": 0.8156674463115173, "learning_rate": 4.9960029934581706e-06, "loss": 0.5331, "step": 535 }, { "epoch": 0.13254203758654798, "grad_norm": 0.7744707960410743, "learning_rate": 4.9959846002202075e-06, "loss": 0.5362, "step": 536 }, { "epoch": 0.1327893175074184, "grad_norm": 0.8384560596557845, "learning_rate": 4.995966164792782e-06, "loss": 0.5453, "step": 537 }, { "epoch": 0.13303659742828883, "grad_norm": 0.8058804582138253, "learning_rate": 4.9959476871762055e-06, "loss": 0.5157, "step": 538 }, { "epoch": 0.13328387734915925, "grad_norm": 0.8294745593661792, "learning_rate": 4.995929167370791e-06, "loss": 0.5766, "step": 539 }, { "epoch": 0.13353115727002968, "grad_norm": 0.8396457626210513, "learning_rate": 4.99591060537685e-06, "loss": 0.53, "step": 540 }, { "epoch": 0.1337784371909001, "grad_norm": 0.8422569925721224, "learning_rate": 4.995892001194699e-06, "loss": 0.5293, "step": 541 }, { "epoch": 0.13402571711177053, "grad_norm": 0.8097797303451486, "learning_rate": 4.995873354824649e-06, "loss": 0.5193, "step": 542 }, { "epoch": 0.13427299703264095, "grad_norm": 0.8282914521596959, "learning_rate": 4.995854666267017e-06, "loss": 0.517, "step": 543 }, { "epoch": 0.13452027695351138, "grad_norm": 0.8430683316926538, "learning_rate": 4.99583593552212e-06, "loss": 0.5572, "step": 544 }, { "epoch": 0.1347675568743818, "grad_norm": 0.8118636383334119, "learning_rate": 4.995817162590273e-06, "loss": 0.5268, "step": 545 }, { "epoch": 0.13501483679525222, "grad_norm": 0.7958627214231268, "learning_rate": 4.995798347471793e-06, "loss": 0.5271, "step": 546 }, { "epoch": 0.13526211671612265, "grad_norm": 0.8098286264528141, "learning_rate": 4.995779490166999e-06, "loss": 0.5318, "step": 547 }, { "epoch": 0.13550939663699307, "grad_norm": 0.8301867014748663, "learning_rate": 4.995760590676209e-06, "loss": 0.5593, "step": 548 }, { "epoch": 0.1357566765578635, "grad_norm": 0.7834537582380657, "learning_rate": 4.995741648999744e-06, "loss": 0.5145, "step": 549 }, { "epoch": 0.13600395647873392, "grad_norm": 0.8100081692650491, "learning_rate": 4.995722665137923e-06, "loss": 0.5353, "step": 550 }, { "epoch": 0.13625123639960435, "grad_norm": 0.80189918046591, "learning_rate": 4.995703639091067e-06, "loss": 0.5322, "step": 551 }, { "epoch": 0.13649851632047477, "grad_norm": 0.8350298859457816, "learning_rate": 4.995684570859497e-06, "loss": 0.5705, "step": 552 }, { "epoch": 0.1367457962413452, "grad_norm": 0.8182785573997086, "learning_rate": 4.995665460443536e-06, "loss": 0.5083, "step": 553 }, { "epoch": 0.13699307616221562, "grad_norm": 0.9006658583356458, "learning_rate": 4.995646307843508e-06, "loss": 0.54, "step": 554 }, { "epoch": 0.13724035608308605, "grad_norm": 0.7948803191646967, "learning_rate": 4.995627113059734e-06, "loss": 0.5562, "step": 555 }, { "epoch": 0.13748763600395647, "grad_norm": 0.8282246520634873, "learning_rate": 4.995607876092541e-06, "loss": 0.5289, "step": 556 }, { "epoch": 0.1377349159248269, "grad_norm": 0.8931269666220943, "learning_rate": 4.995588596942254e-06, "loss": 0.5478, "step": 557 }, { "epoch": 0.13798219584569732, "grad_norm": 0.8291498125431265, "learning_rate": 4.995569275609197e-06, "loss": 0.5269, "step": 558 }, { "epoch": 0.13822947576656774, "grad_norm": 0.824472874788768, "learning_rate": 4.995549912093698e-06, "loss": 0.5203, "step": 559 }, { "epoch": 0.13847675568743817, "grad_norm": 0.8380697493482778, "learning_rate": 4.995530506396084e-06, "loss": 0.5343, "step": 560 }, { "epoch": 0.1387240356083086, "grad_norm": 0.8197126786744611, "learning_rate": 4.995511058516683e-06, "loss": 0.5252, "step": 561 }, { "epoch": 0.13897131552917902, "grad_norm": 0.8478247444958412, "learning_rate": 4.995491568455824e-06, "loss": 0.5136, "step": 562 }, { "epoch": 0.13921859545004944, "grad_norm": 0.8005226390245985, "learning_rate": 4.9954720362138365e-06, "loss": 0.5269, "step": 563 }, { "epoch": 0.1394658753709199, "grad_norm": 0.8227428725452879, "learning_rate": 4.995452461791049e-06, "loss": 0.5127, "step": 564 }, { "epoch": 0.13971315529179032, "grad_norm": 0.8065785300544993, "learning_rate": 4.995432845187796e-06, "loss": 0.5156, "step": 565 }, { "epoch": 0.13996043521266074, "grad_norm": 0.8094394004280067, "learning_rate": 4.9954131864044055e-06, "loss": 0.5167, "step": 566 }, { "epoch": 0.14020771513353117, "grad_norm": 0.8659413683091203, "learning_rate": 4.995393485441211e-06, "loss": 0.5371, "step": 567 }, { "epoch": 0.1404549950544016, "grad_norm": 0.8210244711493505, "learning_rate": 4.995373742298545e-06, "loss": 0.517, "step": 568 }, { "epoch": 0.14070227497527202, "grad_norm": 0.7525169650037641, "learning_rate": 4.995353956976743e-06, "loss": 0.5185, "step": 569 }, { "epoch": 0.14094955489614244, "grad_norm": 0.844339650153545, "learning_rate": 4.995334129476137e-06, "loss": 0.4857, "step": 570 }, { "epoch": 0.14119683481701287, "grad_norm": 0.8248404365036152, "learning_rate": 4.995314259797065e-06, "loss": 0.5301, "step": 571 }, { "epoch": 0.1414441147378833, "grad_norm": 0.8040746643584687, "learning_rate": 4.99529434793986e-06, "loss": 0.5283, "step": 572 }, { "epoch": 0.14169139465875372, "grad_norm": 0.8189779701343372, "learning_rate": 4.995274393904861e-06, "loss": 0.5132, "step": 573 }, { "epoch": 0.14193867457962414, "grad_norm": 0.8739231936574126, "learning_rate": 4.995254397692403e-06, "loss": 0.497, "step": 574 }, { "epoch": 0.14218595450049457, "grad_norm": 0.8348211644398329, "learning_rate": 4.995234359302825e-06, "loss": 0.5216, "step": 575 }, { "epoch": 0.142433234421365, "grad_norm": 0.8033507653933986, "learning_rate": 4.995214278736467e-06, "loss": 0.5134, "step": 576 }, { "epoch": 0.14268051434223541, "grad_norm": 0.8666478902841073, "learning_rate": 4.9951941559936655e-06, "loss": 0.4913, "step": 577 }, { "epoch": 0.14292779426310584, "grad_norm": 0.8121736161408633, "learning_rate": 4.995173991074764e-06, "loss": 0.5204, "step": 578 }, { "epoch": 0.14317507418397626, "grad_norm": 0.8130603144768949, "learning_rate": 4.995153783980101e-06, "loss": 0.51, "step": 579 }, { "epoch": 0.1434223541048467, "grad_norm": 0.8376602599719988, "learning_rate": 4.995133534710018e-06, "loss": 0.5286, "step": 580 }, { "epoch": 0.1436696340257171, "grad_norm": 0.8028003299616209, "learning_rate": 4.995113243264859e-06, "loss": 0.5154, "step": 581 }, { "epoch": 0.14391691394658754, "grad_norm": 0.7863768226347286, "learning_rate": 4.995092909644966e-06, "loss": 0.5024, "step": 582 }, { "epoch": 0.14416419386745796, "grad_norm": 0.8021001800040607, "learning_rate": 4.995072533850682e-06, "loss": 0.5354, "step": 583 }, { "epoch": 0.1444114737883284, "grad_norm": 0.8402628321453506, "learning_rate": 4.995052115882353e-06, "loss": 0.5249, "step": 584 }, { "epoch": 0.1446587537091988, "grad_norm": 0.8331277575923502, "learning_rate": 4.9950316557403235e-06, "loss": 0.4983, "step": 585 }, { "epoch": 0.14490603363006924, "grad_norm": 0.8188619596157297, "learning_rate": 4.9950111534249375e-06, "loss": 0.5357, "step": 586 }, { "epoch": 0.14515331355093966, "grad_norm": 0.7985060052509381, "learning_rate": 4.994990608936544e-06, "loss": 0.5091, "step": 587 }, { "epoch": 0.14540059347181009, "grad_norm": 0.7655405143056095, "learning_rate": 4.99497002227549e-06, "loss": 0.5356, "step": 588 }, { "epoch": 0.1456478733926805, "grad_norm": 0.8256783148504685, "learning_rate": 4.9949493934421226e-06, "loss": 0.5148, "step": 589 }, { "epoch": 0.14589515331355093, "grad_norm": 0.8228995595458576, "learning_rate": 4.99492872243679e-06, "loss": 0.5198, "step": 590 }, { "epoch": 0.14614243323442136, "grad_norm": 0.7983002453848045, "learning_rate": 4.994908009259843e-06, "loss": 0.5196, "step": 591 }, { "epoch": 0.14638971315529178, "grad_norm": 0.8146201436352071, "learning_rate": 4.994887253911631e-06, "loss": 0.5521, "step": 592 }, { "epoch": 0.1466369930761622, "grad_norm": 0.8252581518039378, "learning_rate": 4.9948664563925054e-06, "loss": 0.5505, "step": 593 }, { "epoch": 0.14688427299703263, "grad_norm": 0.8172097763249438, "learning_rate": 4.994845616702817e-06, "loss": 0.5245, "step": 594 }, { "epoch": 0.14713155291790306, "grad_norm": 0.8308363666090501, "learning_rate": 4.994824734842918e-06, "loss": 0.5291, "step": 595 }, { "epoch": 0.14737883283877348, "grad_norm": 0.8236367133690201, "learning_rate": 4.994803810813161e-06, "loss": 0.5274, "step": 596 }, { "epoch": 0.1476261127596439, "grad_norm": 0.8921360057125517, "learning_rate": 4.9947828446139016e-06, "loss": 0.5239, "step": 597 }, { "epoch": 0.14787339268051433, "grad_norm": 0.8519369558875952, "learning_rate": 4.994761836245492e-06, "loss": 0.5568, "step": 598 }, { "epoch": 0.14812067260138476, "grad_norm": 0.8593306021580139, "learning_rate": 4.994740785708289e-06, "loss": 0.5183, "step": 599 }, { "epoch": 0.14836795252225518, "grad_norm": 0.921637121737583, "learning_rate": 4.994719693002646e-06, "loss": 0.5235, "step": 600 }, { "epoch": 0.1486152324431256, "grad_norm": 0.8399998695591865, "learning_rate": 4.994698558128923e-06, "loss": 0.566, "step": 601 }, { "epoch": 0.14886251236399603, "grad_norm": 0.7904438558914851, "learning_rate": 4.994677381087475e-06, "loss": 0.5184, "step": 602 }, { "epoch": 0.14910979228486648, "grad_norm": 0.8107257376120282, "learning_rate": 4.99465616187866e-06, "loss": 0.5232, "step": 603 }, { "epoch": 0.1493570722057369, "grad_norm": 0.8466653719532753, "learning_rate": 4.994634900502837e-06, "loss": 0.5461, "step": 604 }, { "epoch": 0.14960435212660733, "grad_norm": 0.8332150966857227, "learning_rate": 4.994613596960366e-06, "loss": 0.543, "step": 605 }, { "epoch": 0.14985163204747776, "grad_norm": 0.8494372220903758, "learning_rate": 4.994592251251606e-06, "loss": 0.5414, "step": 606 }, { "epoch": 0.15009891196834818, "grad_norm": 0.836739182677242, "learning_rate": 4.994570863376918e-06, "loss": 0.4868, "step": 607 }, { "epoch": 0.1503461918892186, "grad_norm": 0.8560168407660868, "learning_rate": 4.994549433336664e-06, "loss": 0.5357, "step": 608 }, { "epoch": 0.15059347181008903, "grad_norm": 0.8248533428465916, "learning_rate": 4.9945279611312066e-06, "loss": 0.5371, "step": 609 }, { "epoch": 0.15084075173095945, "grad_norm": 0.8279650051807312, "learning_rate": 4.9945064467609076e-06, "loss": 0.5133, "step": 610 }, { "epoch": 0.15108803165182988, "grad_norm": 0.8207095863487788, "learning_rate": 4.994484890226132e-06, "loss": 0.555, "step": 611 }, { "epoch": 0.1513353115727003, "grad_norm": 0.8473352308749146, "learning_rate": 4.9944632915272426e-06, "loss": 0.5249, "step": 612 }, { "epoch": 0.15158259149357073, "grad_norm": 0.8269313598527205, "learning_rate": 4.994441650664605e-06, "loss": 0.4928, "step": 613 }, { "epoch": 0.15182987141444115, "grad_norm": 0.8051367309430479, "learning_rate": 4.994419967638587e-06, "loss": 0.5126, "step": 614 }, { "epoch": 0.15207715133531158, "grad_norm": 0.9477532042198118, "learning_rate": 4.994398242449552e-06, "loss": 0.5297, "step": 615 }, { "epoch": 0.152324431256182, "grad_norm": 0.8121271507476093, "learning_rate": 4.994376475097869e-06, "loss": 0.5315, "step": 616 }, { "epoch": 0.15257171117705243, "grad_norm": 0.7962233363129194, "learning_rate": 4.994354665583906e-06, "loss": 0.5335, "step": 617 }, { "epoch": 0.15281899109792285, "grad_norm": 0.8342385325804931, "learning_rate": 4.9943328139080304e-06, "loss": 0.5026, "step": 618 }, { "epoch": 0.15306627101879328, "grad_norm": 0.7960492212543056, "learning_rate": 4.994310920070613e-06, "loss": 0.5243, "step": 619 }, { "epoch": 0.1533135509396637, "grad_norm": 0.8219481066829437, "learning_rate": 4.994288984072023e-06, "loss": 0.5422, "step": 620 }, { "epoch": 0.15356083086053413, "grad_norm": 0.8965110210914611, "learning_rate": 4.994267005912631e-06, "loss": 0.4988, "step": 621 }, { "epoch": 0.15380811078140455, "grad_norm": 0.8674803487785987, "learning_rate": 4.994244985592809e-06, "loss": 0.5235, "step": 622 }, { "epoch": 0.15405539070227497, "grad_norm": 0.8294901688893189, "learning_rate": 4.99422292311293e-06, "loss": 0.523, "step": 623 }, { "epoch": 0.1543026706231454, "grad_norm": 0.8573169917869833, "learning_rate": 4.994200818473365e-06, "loss": 0.5161, "step": 624 }, { "epoch": 0.15454995054401582, "grad_norm": 0.8239599780470138, "learning_rate": 4.994178671674489e-06, "loss": 0.5249, "step": 625 }, { "epoch": 0.15479723046488625, "grad_norm": 0.8573459117967988, "learning_rate": 4.994156482716677e-06, "loss": 0.4955, "step": 626 }, { "epoch": 0.15504451038575667, "grad_norm": 0.8243090403433094, "learning_rate": 4.994134251600302e-06, "loss": 0.5008, "step": 627 }, { "epoch": 0.1552917903066271, "grad_norm": 0.8606707487222631, "learning_rate": 4.994111978325741e-06, "loss": 0.5306, "step": 628 }, { "epoch": 0.15553907022749752, "grad_norm": 0.8525860766404425, "learning_rate": 4.99408966289337e-06, "loss": 0.5508, "step": 629 }, { "epoch": 0.15578635014836795, "grad_norm": 0.9058751830432761, "learning_rate": 4.994067305303567e-06, "loss": 0.532, "step": 630 }, { "epoch": 0.15603363006923837, "grad_norm": 0.9144797398646675, "learning_rate": 4.9940449055567096e-06, "loss": 0.5025, "step": 631 }, { "epoch": 0.1562809099901088, "grad_norm": 0.7934911770321739, "learning_rate": 4.994022463653176e-06, "loss": 0.4991, "step": 632 }, { "epoch": 0.15652818991097922, "grad_norm": 0.8508702206108024, "learning_rate": 4.993999979593346e-06, "loss": 0.5186, "step": 633 }, { "epoch": 0.15677546983184965, "grad_norm": 0.9125868217774985, "learning_rate": 4.993977453377599e-06, "loss": 0.5141, "step": 634 }, { "epoch": 0.15702274975272007, "grad_norm": 0.8314643342376442, "learning_rate": 4.993954885006316e-06, "loss": 0.5388, "step": 635 }, { "epoch": 0.1572700296735905, "grad_norm": 0.8567507360554277, "learning_rate": 4.9939322744798795e-06, "loss": 0.5099, "step": 636 }, { "epoch": 0.15751730959446092, "grad_norm": 0.9458345629100596, "learning_rate": 4.9939096217986706e-06, "loss": 0.5329, "step": 637 }, { "epoch": 0.15776458951533134, "grad_norm": 0.8717178654756879, "learning_rate": 4.993886926963072e-06, "loss": 0.5101, "step": 638 }, { "epoch": 0.15801186943620177, "grad_norm": 0.9099670727771642, "learning_rate": 4.993864189973468e-06, "loss": 0.5197, "step": 639 }, { "epoch": 0.1582591493570722, "grad_norm": 0.8690584059000377, "learning_rate": 4.993841410830243e-06, "loss": 0.5129, "step": 640 }, { "epoch": 0.15850642927794262, "grad_norm": 0.8395676636532586, "learning_rate": 4.993818589533781e-06, "loss": 0.5435, "step": 641 }, { "epoch": 0.15875370919881307, "grad_norm": 0.8390162044014295, "learning_rate": 4.993795726084469e-06, "loss": 0.4987, "step": 642 }, { "epoch": 0.1590009891196835, "grad_norm": 0.8628449906727201, "learning_rate": 4.993772820482693e-06, "loss": 0.5581, "step": 643 }, { "epoch": 0.15924826904055392, "grad_norm": 0.8769274046379205, "learning_rate": 4.99374987272884e-06, "loss": 0.5253, "step": 644 }, { "epoch": 0.15949554896142434, "grad_norm": 0.8423197065733172, "learning_rate": 4.9937268828232974e-06, "loss": 0.5182, "step": 645 }, { "epoch": 0.15974282888229477, "grad_norm": 0.8495522941564868, "learning_rate": 4.993703850766455e-06, "loss": 0.5176, "step": 646 }, { "epoch": 0.1599901088031652, "grad_norm": 0.8566253722599317, "learning_rate": 4.993680776558701e-06, "loss": 0.4967, "step": 647 }, { "epoch": 0.16023738872403562, "grad_norm": 0.8578979605277798, "learning_rate": 4.993657660200427e-06, "loss": 0.5321, "step": 648 }, { "epoch": 0.16048466864490604, "grad_norm": 0.8437931759195362, "learning_rate": 4.993634501692022e-06, "loss": 0.527, "step": 649 }, { "epoch": 0.16073194856577647, "grad_norm": 0.8312148356989333, "learning_rate": 4.993611301033878e-06, "loss": 0.5346, "step": 650 }, { "epoch": 0.1609792284866469, "grad_norm": 0.8583661441969466, "learning_rate": 4.993588058226388e-06, "loss": 0.4911, "step": 651 }, { "epoch": 0.16122650840751732, "grad_norm": 0.835364664881372, "learning_rate": 4.9935647732699426e-06, "loss": 0.508, "step": 652 }, { "epoch": 0.16147378832838774, "grad_norm": 0.9088368040786615, "learning_rate": 4.993541446164938e-06, "loss": 0.5452, "step": 653 }, { "epoch": 0.16172106824925817, "grad_norm": 0.901014609282978, "learning_rate": 4.993518076911766e-06, "loss": 0.5111, "step": 654 }, { "epoch": 0.1619683481701286, "grad_norm": 0.8774481271049077, "learning_rate": 4.993494665510825e-06, "loss": 0.5152, "step": 655 }, { "epoch": 0.16221562809099901, "grad_norm": 0.8413271828266707, "learning_rate": 4.993471211962508e-06, "loss": 0.5041, "step": 656 }, { "epoch": 0.16246290801186944, "grad_norm": 0.8387696777792335, "learning_rate": 4.993447716267211e-06, "loss": 0.4912, "step": 657 }, { "epoch": 0.16271018793273986, "grad_norm": 0.8858353120807996, "learning_rate": 4.993424178425334e-06, "loss": 0.5147, "step": 658 }, { "epoch": 0.1629574678536103, "grad_norm": 0.8753424782843612, "learning_rate": 4.9934005984372725e-06, "loss": 0.5108, "step": 659 }, { "epoch": 0.1632047477744807, "grad_norm": 0.8503862309995437, "learning_rate": 4.993376976303426e-06, "loss": 0.5374, "step": 660 }, { "epoch": 0.16345202769535114, "grad_norm": 0.9263303515730487, "learning_rate": 4.9933533120241925e-06, "loss": 0.5227, "step": 661 }, { "epoch": 0.16369930761622156, "grad_norm": 0.8091103017427309, "learning_rate": 4.993329605599974e-06, "loss": 0.4703, "step": 662 }, { "epoch": 0.163946587537092, "grad_norm": 0.8369441099222458, "learning_rate": 4.99330585703117e-06, "loss": 0.5025, "step": 663 }, { "epoch": 0.1641938674579624, "grad_norm": 0.8541506168890921, "learning_rate": 4.993282066318182e-06, "loss": 0.5111, "step": 664 }, { "epoch": 0.16444114737883284, "grad_norm": 0.8193787625042165, "learning_rate": 4.9932582334614124e-06, "loss": 0.5036, "step": 665 }, { "epoch": 0.16468842729970326, "grad_norm": 0.8507283745756844, "learning_rate": 4.993234358461264e-06, "loss": 0.5103, "step": 666 }, { "epoch": 0.16493570722057369, "grad_norm": 0.8357763055370562, "learning_rate": 4.9932104413181405e-06, "loss": 0.5099, "step": 667 }, { "epoch": 0.1651829871414441, "grad_norm": 0.7974423447277945, "learning_rate": 4.9931864820324445e-06, "loss": 0.526, "step": 668 }, { "epoch": 0.16543026706231453, "grad_norm": 0.856566840869885, "learning_rate": 4.993162480604584e-06, "loss": 0.5296, "step": 669 }, { "epoch": 0.16567754698318496, "grad_norm": 0.8202309677412768, "learning_rate": 4.993138437034963e-06, "loss": 0.5371, "step": 670 }, { "epoch": 0.16592482690405538, "grad_norm": 0.8537092958810982, "learning_rate": 4.993114351323987e-06, "loss": 0.5363, "step": 671 }, { "epoch": 0.1661721068249258, "grad_norm": 0.8941502744703057, "learning_rate": 4.993090223472065e-06, "loss": 0.5275, "step": 672 }, { "epoch": 0.16641938674579623, "grad_norm": 0.8602589105442886, "learning_rate": 4.9930660534796046e-06, "loss": 0.5173, "step": 673 }, { "epoch": 0.16666666666666666, "grad_norm": 0.8547239627917911, "learning_rate": 4.993041841347012e-06, "loss": 0.5322, "step": 674 }, { "epoch": 0.16691394658753708, "grad_norm": 0.8148644041571959, "learning_rate": 4.9930175870747e-06, "loss": 0.544, "step": 675 }, { "epoch": 0.1671612265084075, "grad_norm": 0.8546772113475981, "learning_rate": 4.992993290663076e-06, "loss": 0.4969, "step": 676 }, { "epoch": 0.16740850642927793, "grad_norm": 0.9273768329874028, "learning_rate": 4.9929689521125515e-06, "loss": 0.5229, "step": 677 }, { "epoch": 0.16765578635014836, "grad_norm": 0.8370005835873986, "learning_rate": 4.992944571423538e-06, "loss": 0.4871, "step": 678 }, { "epoch": 0.16790306627101878, "grad_norm": 0.8525307890631186, "learning_rate": 4.992920148596447e-06, "loss": 0.5083, "step": 679 }, { "epoch": 0.1681503461918892, "grad_norm": 0.8290265597255343, "learning_rate": 4.9928956836316915e-06, "loss": 0.5309, "step": 680 }, { "epoch": 0.16839762611275966, "grad_norm": 0.8254586728696632, "learning_rate": 4.992871176529686e-06, "loss": 0.5231, "step": 681 }, { "epoch": 0.16864490603363008, "grad_norm": 0.827176590783688, "learning_rate": 4.992846627290844e-06, "loss": 0.5417, "step": 682 }, { "epoch": 0.1688921859545005, "grad_norm": 0.8415975897359966, "learning_rate": 4.99282203591558e-06, "loss": 0.507, "step": 683 }, { "epoch": 0.16913946587537093, "grad_norm": 0.8563520086330875, "learning_rate": 4.99279740240431e-06, "loss": 0.4986, "step": 684 }, { "epoch": 0.16938674579624136, "grad_norm": 0.8614893988702672, "learning_rate": 4.992772726757451e-06, "loss": 0.5088, "step": 685 }, { "epoch": 0.16963402571711178, "grad_norm": 0.7756849470892396, "learning_rate": 4.992748008975419e-06, "loss": 0.5599, "step": 686 }, { "epoch": 0.1698813056379822, "grad_norm": 0.8256108345562647, "learning_rate": 4.992723249058633e-06, "loss": 0.4938, "step": 687 }, { "epoch": 0.17012858555885263, "grad_norm": 0.8319058935689565, "learning_rate": 4.992698447007511e-06, "loss": 0.5157, "step": 688 }, { "epoch": 0.17037586547972305, "grad_norm": 0.8374831493414546, "learning_rate": 4.992673602822472e-06, "loss": 0.5417, "step": 689 }, { "epoch": 0.17062314540059348, "grad_norm": 0.8135538110680443, "learning_rate": 4.992648716503936e-06, "loss": 0.5134, "step": 690 }, { "epoch": 0.1708704253214639, "grad_norm": 0.9041030573320586, "learning_rate": 4.9926237880523235e-06, "loss": 0.5517, "step": 691 }, { "epoch": 0.17111770524233433, "grad_norm": 0.895349923324367, "learning_rate": 4.9925988174680565e-06, "loss": 0.5376, "step": 692 }, { "epoch": 0.17136498516320475, "grad_norm": 0.94778749124042, "learning_rate": 4.992573804751557e-06, "loss": 0.5212, "step": 693 }, { "epoch": 0.17161226508407518, "grad_norm": 0.8730544070580147, "learning_rate": 4.992548749903247e-06, "loss": 0.5119, "step": 694 }, { "epoch": 0.1718595450049456, "grad_norm": 0.8316096946499686, "learning_rate": 4.9925236529235495e-06, "loss": 0.5234, "step": 695 }, { "epoch": 0.17210682492581603, "grad_norm": 0.8724157769080901, "learning_rate": 4.992498513812891e-06, "loss": 0.5287, "step": 696 }, { "epoch": 0.17235410484668645, "grad_norm": 0.919022031040849, "learning_rate": 4.992473332571696e-06, "loss": 0.487, "step": 697 }, { "epoch": 0.17260138476755688, "grad_norm": 0.8603603196815058, "learning_rate": 4.9924481092003874e-06, "loss": 0.5009, "step": 698 }, { "epoch": 0.1728486646884273, "grad_norm": 0.8609657848997537, "learning_rate": 4.992422843699394e-06, "loss": 0.537, "step": 699 }, { "epoch": 0.17309594460929772, "grad_norm": 0.9497038354277341, "learning_rate": 4.992397536069143e-06, "loss": 0.5157, "step": 700 }, { "epoch": 0.17334322453016815, "grad_norm": 0.925901592259855, "learning_rate": 4.99237218631006e-06, "loss": 0.5256, "step": 701 }, { "epoch": 0.17359050445103857, "grad_norm": 0.8059602370847663, "learning_rate": 4.992346794422576e-06, "loss": 0.509, "step": 702 }, { "epoch": 0.173837784371909, "grad_norm": 0.8093787052289616, "learning_rate": 4.992321360407119e-06, "loss": 0.5125, "step": 703 }, { "epoch": 0.17408506429277942, "grad_norm": 0.8354283265994193, "learning_rate": 4.992295884264119e-06, "loss": 0.5071, "step": 704 }, { "epoch": 0.17433234421364985, "grad_norm": 0.8442582808343858, "learning_rate": 4.992270365994006e-06, "loss": 0.53, "step": 705 }, { "epoch": 0.17457962413452027, "grad_norm": 0.7836454779510944, "learning_rate": 4.9922448055972125e-06, "loss": 0.5214, "step": 706 }, { "epoch": 0.1748269040553907, "grad_norm": 0.840735881666944, "learning_rate": 4.99221920307417e-06, "loss": 0.5171, "step": 707 }, { "epoch": 0.17507418397626112, "grad_norm": 0.8829052650768754, "learning_rate": 4.992193558425311e-06, "loss": 0.5168, "step": 708 }, { "epoch": 0.17532146389713155, "grad_norm": 0.8729350937304439, "learning_rate": 4.9921678716510705e-06, "loss": 0.4853, "step": 709 }, { "epoch": 0.17556874381800197, "grad_norm": 0.9103933430094363, "learning_rate": 4.9921421427518804e-06, "loss": 0.4959, "step": 710 }, { "epoch": 0.1758160237388724, "grad_norm": 0.8035835757258305, "learning_rate": 4.992116371728176e-06, "loss": 0.4941, "step": 711 }, { "epoch": 0.17606330365974282, "grad_norm": 0.8305929115660557, "learning_rate": 4.9920905585803945e-06, "loss": 0.4876, "step": 712 }, { "epoch": 0.17631058358061324, "grad_norm": 0.7978374484195752, "learning_rate": 4.992064703308971e-06, "loss": 0.5257, "step": 713 }, { "epoch": 0.17655786350148367, "grad_norm": 0.8418807640743572, "learning_rate": 4.992038805914343e-06, "loss": 0.513, "step": 714 }, { "epoch": 0.1768051434223541, "grad_norm": 0.8330452494475705, "learning_rate": 4.992012866396948e-06, "loss": 0.4956, "step": 715 }, { "epoch": 0.17705242334322452, "grad_norm": 0.8661744791086904, "learning_rate": 4.991986884757224e-06, "loss": 0.4939, "step": 716 }, { "epoch": 0.17729970326409494, "grad_norm": 0.8486526830433232, "learning_rate": 4.991960860995611e-06, "loss": 0.4879, "step": 717 }, { "epoch": 0.17754698318496537, "grad_norm": 0.8352424841247117, "learning_rate": 4.991934795112548e-06, "loss": 0.4961, "step": 718 }, { "epoch": 0.1777942631058358, "grad_norm": 0.8789343341243142, "learning_rate": 4.991908687108477e-06, "loss": 0.4897, "step": 719 }, { "epoch": 0.17804154302670624, "grad_norm": 0.823652151046163, "learning_rate": 4.991882536983839e-06, "loss": 0.512, "step": 720 }, { "epoch": 0.17828882294757667, "grad_norm": 0.8134645433823909, "learning_rate": 4.991856344739073e-06, "loss": 0.5404, "step": 721 }, { "epoch": 0.1785361028684471, "grad_norm": 0.8501968648399841, "learning_rate": 4.991830110374626e-06, "loss": 0.5137, "step": 722 }, { "epoch": 0.17878338278931752, "grad_norm": 0.801152491159058, "learning_rate": 4.991803833890939e-06, "loss": 0.5255, "step": 723 }, { "epoch": 0.17903066271018794, "grad_norm": 0.8249255075508455, "learning_rate": 4.991777515288457e-06, "loss": 0.518, "step": 724 }, { "epoch": 0.17927794263105837, "grad_norm": 0.8299863504018892, "learning_rate": 4.991751154567625e-06, "loss": 0.4974, "step": 725 }, { "epoch": 0.1795252225519288, "grad_norm": 0.902641066855043, "learning_rate": 4.991724751728888e-06, "loss": 0.4966, "step": 726 }, { "epoch": 0.17977250247279922, "grad_norm": 0.8069318501808427, "learning_rate": 4.991698306772692e-06, "loss": 0.5069, "step": 727 }, { "epoch": 0.18001978239366964, "grad_norm": 0.8545994506172374, "learning_rate": 4.991671819699484e-06, "loss": 0.4956, "step": 728 }, { "epoch": 0.18026706231454007, "grad_norm": 0.8164400523644465, "learning_rate": 4.9916452905097135e-06, "loss": 0.5065, "step": 729 }, { "epoch": 0.1805143422354105, "grad_norm": 0.8086218873858831, "learning_rate": 4.991618719203827e-06, "loss": 0.5011, "step": 730 }, { "epoch": 0.18076162215628092, "grad_norm": 0.8552002311074453, "learning_rate": 4.991592105782274e-06, "loss": 0.5104, "step": 731 }, { "epoch": 0.18100890207715134, "grad_norm": 0.8717792545878794, "learning_rate": 4.9915654502455045e-06, "loss": 0.534, "step": 732 }, { "epoch": 0.18125618199802176, "grad_norm": 0.7849441865788286, "learning_rate": 4.9915387525939695e-06, "loss": 0.4982, "step": 733 }, { "epoch": 0.1815034619188922, "grad_norm": 0.8340027119415854, "learning_rate": 4.99151201282812e-06, "loss": 0.5142, "step": 734 }, { "epoch": 0.18175074183976261, "grad_norm": 0.8413817442374992, "learning_rate": 4.991485230948407e-06, "loss": 0.5332, "step": 735 }, { "epoch": 0.18199802176063304, "grad_norm": 0.8452871469316092, "learning_rate": 4.991458406955285e-06, "loss": 0.5022, "step": 736 }, { "epoch": 0.18224530168150346, "grad_norm": 0.872847298100631, "learning_rate": 4.991431540849206e-06, "loss": 0.4867, "step": 737 }, { "epoch": 0.1824925816023739, "grad_norm": 0.8514132061403205, "learning_rate": 4.991404632630625e-06, "loss": 0.5106, "step": 738 }, { "epoch": 0.1827398615232443, "grad_norm": 0.8949447429217282, "learning_rate": 4.991377682299996e-06, "loss": 0.4973, "step": 739 }, { "epoch": 0.18298714144411474, "grad_norm": 0.8326410302920144, "learning_rate": 4.991350689857775e-06, "loss": 0.5125, "step": 740 }, { "epoch": 0.18323442136498516, "grad_norm": 0.8675242195523758, "learning_rate": 4.9913236553044185e-06, "loss": 0.4978, "step": 741 }, { "epoch": 0.1834817012858556, "grad_norm": 0.801469598537518, "learning_rate": 4.991296578640383e-06, "loss": 0.5135, "step": 742 }, { "epoch": 0.183728981206726, "grad_norm": 0.8201428570448723, "learning_rate": 4.991269459866126e-06, "loss": 0.5144, "step": 743 }, { "epoch": 0.18397626112759644, "grad_norm": 0.8623718853955257, "learning_rate": 4.991242298982107e-06, "loss": 0.4808, "step": 744 }, { "epoch": 0.18422354104846686, "grad_norm": 0.7946767655860458, "learning_rate": 4.991215095988784e-06, "loss": 0.5226, "step": 745 }, { "epoch": 0.18447082096933728, "grad_norm": 0.8439539102467273, "learning_rate": 4.991187850886618e-06, "loss": 0.4925, "step": 746 }, { "epoch": 0.1847181008902077, "grad_norm": 0.7919859124879289, "learning_rate": 4.991160563676067e-06, "loss": 0.4916, "step": 747 }, { "epoch": 0.18496538081107813, "grad_norm": 0.8643732765638406, "learning_rate": 4.991133234357595e-06, "loss": 0.4911, "step": 748 }, { "epoch": 0.18521266073194856, "grad_norm": 0.8750805346885794, "learning_rate": 4.9911058629316615e-06, "loss": 0.4655, "step": 749 }, { "epoch": 0.18545994065281898, "grad_norm": 0.8199097338629083, "learning_rate": 4.991078449398732e-06, "loss": 0.523, "step": 750 }, { "epoch": 0.1857072205736894, "grad_norm": 0.8894643167179568, "learning_rate": 4.991050993759268e-06, "loss": 0.4973, "step": 751 }, { "epoch": 0.18595450049455983, "grad_norm": 0.8224624160630544, "learning_rate": 4.991023496013734e-06, "loss": 0.4931, "step": 752 }, { "epoch": 0.18620178041543026, "grad_norm": 0.8215579795094418, "learning_rate": 4.990995956162593e-06, "loss": 0.5023, "step": 753 }, { "epoch": 0.18644906033630068, "grad_norm": 0.8083894480373339, "learning_rate": 4.990968374206314e-06, "loss": 0.5224, "step": 754 }, { "epoch": 0.1866963402571711, "grad_norm": 0.8521252726155766, "learning_rate": 4.9909407501453625e-06, "loss": 0.511, "step": 755 }, { "epoch": 0.18694362017804153, "grad_norm": 0.8703568142440704, "learning_rate": 4.990913083980202e-06, "loss": 0.5258, "step": 756 }, { "epoch": 0.18719090009891196, "grad_norm": 0.84010044458961, "learning_rate": 4.990885375711304e-06, "loss": 0.5004, "step": 757 }, { "epoch": 0.18743818001978238, "grad_norm": 0.8318706342323815, "learning_rate": 4.990857625339135e-06, "loss": 0.5028, "step": 758 }, { "epoch": 0.18768545994065283, "grad_norm": 0.8416207130288796, "learning_rate": 4.9908298328641645e-06, "loss": 0.5053, "step": 759 }, { "epoch": 0.18793273986152326, "grad_norm": 0.8888518172116866, "learning_rate": 4.9908019982868625e-06, "loss": 0.5172, "step": 760 }, { "epoch": 0.18818001978239368, "grad_norm": 0.8502389509777805, "learning_rate": 4.990774121607699e-06, "loss": 0.5059, "step": 761 }, { "epoch": 0.1884272997032641, "grad_norm": 0.8610956280514308, "learning_rate": 4.990746202827145e-06, "loss": 0.535, "step": 762 }, { "epoch": 0.18867457962413453, "grad_norm": 0.7972219564385539, "learning_rate": 4.990718241945673e-06, "loss": 0.5218, "step": 763 }, { "epoch": 0.18892185954500496, "grad_norm": 0.7805412892800699, "learning_rate": 4.990690238963756e-06, "loss": 0.509, "step": 764 }, { "epoch": 0.18916913946587538, "grad_norm": 0.8236493383786434, "learning_rate": 4.990662193881865e-06, "loss": 0.5031, "step": 765 }, { "epoch": 0.1894164193867458, "grad_norm": 0.8999257859616762, "learning_rate": 4.9906341067004784e-06, "loss": 0.488, "step": 766 }, { "epoch": 0.18966369930761623, "grad_norm": 0.8708077868377747, "learning_rate": 4.990605977420067e-06, "loss": 0.5233, "step": 767 }, { "epoch": 0.18991097922848665, "grad_norm": 0.8420116856277404, "learning_rate": 4.990577806041108e-06, "loss": 0.5135, "step": 768 }, { "epoch": 0.19015825914935708, "grad_norm": 0.8548814632547316, "learning_rate": 4.990549592564076e-06, "loss": 0.4867, "step": 769 }, { "epoch": 0.1904055390702275, "grad_norm": 0.8268530338638478, "learning_rate": 4.99052133698945e-06, "loss": 0.4688, "step": 770 }, { "epoch": 0.19065281899109793, "grad_norm": 0.8320420070519919, "learning_rate": 4.990493039317707e-06, "loss": 0.5263, "step": 771 }, { "epoch": 0.19090009891196835, "grad_norm": 0.8365840878509793, "learning_rate": 4.990464699549325e-06, "loss": 0.4931, "step": 772 }, { "epoch": 0.19114737883283878, "grad_norm": 0.8101506431051484, "learning_rate": 4.990436317684782e-06, "loss": 0.5262, "step": 773 }, { "epoch": 0.1913946587537092, "grad_norm": 0.8647254791244104, "learning_rate": 4.990407893724561e-06, "loss": 0.5095, "step": 774 }, { "epoch": 0.19164193867457963, "grad_norm": 0.813120102249058, "learning_rate": 4.990379427669138e-06, "loss": 0.4984, "step": 775 }, { "epoch": 0.19188921859545005, "grad_norm": 0.8042977000267384, "learning_rate": 4.990350919518997e-06, "loss": 0.4978, "step": 776 }, { "epoch": 0.19213649851632048, "grad_norm": 0.8483867646954429, "learning_rate": 4.9903223692746196e-06, "loss": 0.4732, "step": 777 }, { "epoch": 0.1923837784371909, "grad_norm": 0.8647994593068955, "learning_rate": 4.990293776936488e-06, "loss": 0.545, "step": 778 }, { "epoch": 0.19263105835806132, "grad_norm": 0.8740789736428879, "learning_rate": 4.990265142505085e-06, "loss": 0.4954, "step": 779 }, { "epoch": 0.19287833827893175, "grad_norm": 0.8174292157713233, "learning_rate": 4.990236465980896e-06, "loss": 0.4935, "step": 780 }, { "epoch": 0.19312561819980217, "grad_norm": 0.8630949715708419, "learning_rate": 4.990207747364404e-06, "loss": 0.5115, "step": 781 }, { "epoch": 0.1933728981206726, "grad_norm": 0.8452101849069688, "learning_rate": 4.9901789866560955e-06, "loss": 0.5075, "step": 782 }, { "epoch": 0.19362017804154302, "grad_norm": 0.8222049299567881, "learning_rate": 4.990150183856457e-06, "loss": 0.4868, "step": 783 }, { "epoch": 0.19386745796241345, "grad_norm": 0.8045405700762853, "learning_rate": 4.990121338965975e-06, "loss": 0.4996, "step": 784 }, { "epoch": 0.19411473788328387, "grad_norm": 0.839462282900959, "learning_rate": 4.9900924519851354e-06, "loss": 0.513, "step": 785 }, { "epoch": 0.1943620178041543, "grad_norm": 0.8129960974230438, "learning_rate": 4.990063522914429e-06, "loss": 0.5067, "step": 786 }, { "epoch": 0.19460929772502472, "grad_norm": 0.8498358711521139, "learning_rate": 4.990034551754344e-06, "loss": 0.4971, "step": 787 }, { "epoch": 0.19485657764589515, "grad_norm": 0.8342170766695094, "learning_rate": 4.9900055385053696e-06, "loss": 0.5785, "step": 788 }, { "epoch": 0.19510385756676557, "grad_norm": 0.7720839055732265, "learning_rate": 4.9899764831679954e-06, "loss": 0.5091, "step": 789 }, { "epoch": 0.195351137487636, "grad_norm": 0.8448261448148362, "learning_rate": 4.989947385742715e-06, "loss": 0.5128, "step": 790 }, { "epoch": 0.19559841740850642, "grad_norm": 0.8355306276455249, "learning_rate": 4.9899182462300175e-06, "loss": 0.5334, "step": 791 }, { "epoch": 0.19584569732937684, "grad_norm": 0.8108028837754382, "learning_rate": 4.989889064630397e-06, "loss": 0.5037, "step": 792 }, { "epoch": 0.19609297725024727, "grad_norm": 0.8099327440790443, "learning_rate": 4.989859840944346e-06, "loss": 0.5074, "step": 793 }, { "epoch": 0.1963402571711177, "grad_norm": 0.8540803863374226, "learning_rate": 4.989830575172361e-06, "loss": 0.4785, "step": 794 }, { "epoch": 0.19658753709198812, "grad_norm": 0.8361945024114827, "learning_rate": 4.9898012673149325e-06, "loss": 0.4938, "step": 795 }, { "epoch": 0.19683481701285854, "grad_norm": 0.8603101439414645, "learning_rate": 4.989771917372559e-06, "loss": 0.458, "step": 796 }, { "epoch": 0.19708209693372897, "grad_norm": 0.8623322983540171, "learning_rate": 4.989742525345736e-06, "loss": 0.5032, "step": 797 }, { "epoch": 0.19732937685459942, "grad_norm": 0.8473115195100572, "learning_rate": 4.9897130912349585e-06, "loss": 0.4936, "step": 798 }, { "epoch": 0.19757665677546984, "grad_norm": 0.7940684143161678, "learning_rate": 4.9896836150407256e-06, "loss": 0.5473, "step": 799 }, { "epoch": 0.19782393669634027, "grad_norm": 0.8272717457807083, "learning_rate": 4.989654096763537e-06, "loss": 0.5171, "step": 800 }, { "epoch": 0.1980712166172107, "grad_norm": 0.8902992860806108, "learning_rate": 4.989624536403888e-06, "loss": 0.5375, "step": 801 }, { "epoch": 0.19831849653808112, "grad_norm": 0.9101693315058536, "learning_rate": 4.989594933962281e-06, "loss": 0.4882, "step": 802 }, { "epoch": 0.19856577645895154, "grad_norm": 0.823665912051704, "learning_rate": 4.989565289439216e-06, "loss": 0.5004, "step": 803 }, { "epoch": 0.19881305637982197, "grad_norm": 0.8537141855296777, "learning_rate": 4.9895356028351936e-06, "loss": 0.5057, "step": 804 }, { "epoch": 0.1990603363006924, "grad_norm": 0.8499363471459407, "learning_rate": 4.989505874150716e-06, "loss": 0.5051, "step": 805 }, { "epoch": 0.19930761622156282, "grad_norm": 0.8713007651523925, "learning_rate": 4.989476103386285e-06, "loss": 0.5093, "step": 806 }, { "epoch": 0.19955489614243324, "grad_norm": 0.8878831506094113, "learning_rate": 4.9894462905424035e-06, "loss": 0.5067, "step": 807 }, { "epoch": 0.19980217606330367, "grad_norm": 0.8677379139060017, "learning_rate": 4.989416435619577e-06, "loss": 0.5013, "step": 808 }, { "epoch": 0.2000494559841741, "grad_norm": 0.8442842813703827, "learning_rate": 4.98938653861831e-06, "loss": 0.5048, "step": 809 }, { "epoch": 0.20029673590504452, "grad_norm": 0.8111674446939234, "learning_rate": 4.989356599539106e-06, "loss": 0.5167, "step": 810 }, { "epoch": 0.20054401582591494, "grad_norm": 0.8954173542551098, "learning_rate": 4.989326618382471e-06, "loss": 0.5147, "step": 811 }, { "epoch": 0.20079129574678536, "grad_norm": 0.9233165216979647, "learning_rate": 4.9892965951489154e-06, "loss": 0.5064, "step": 812 }, { "epoch": 0.2010385756676558, "grad_norm": 0.9134666739113686, "learning_rate": 4.989266529838943e-06, "loss": 0.5009, "step": 813 }, { "epoch": 0.2012858555885262, "grad_norm": 0.8492218856969446, "learning_rate": 4.989236422453064e-06, "loss": 0.5124, "step": 814 }, { "epoch": 0.20153313550939664, "grad_norm": 0.8652036940944711, "learning_rate": 4.989206272991785e-06, "loss": 0.5366, "step": 815 }, { "epoch": 0.20178041543026706, "grad_norm": 0.8418660565061913, "learning_rate": 4.9891760814556186e-06, "loss": 0.5105, "step": 816 }, { "epoch": 0.2020276953511375, "grad_norm": 0.8878975245185788, "learning_rate": 4.989145847845074e-06, "loss": 0.5132, "step": 817 }, { "epoch": 0.2022749752720079, "grad_norm": 0.8315141988567767, "learning_rate": 4.989115572160661e-06, "loss": 0.5008, "step": 818 }, { "epoch": 0.20252225519287834, "grad_norm": 0.8376037839192603, "learning_rate": 4.989085254402892e-06, "loss": 0.5057, "step": 819 }, { "epoch": 0.20276953511374876, "grad_norm": 0.8037174338863466, "learning_rate": 4.98905489457228e-06, "loss": 0.4975, "step": 820 }, { "epoch": 0.20301681503461919, "grad_norm": 0.8766782392881515, "learning_rate": 4.9890244926693385e-06, "loss": 0.4667, "step": 821 }, { "epoch": 0.2032640949554896, "grad_norm": 0.9181666145995278, "learning_rate": 4.98899404869458e-06, "loss": 0.5197, "step": 822 }, { "epoch": 0.20351137487636003, "grad_norm": 0.8421985589488679, "learning_rate": 4.98896356264852e-06, "loss": 0.4693, "step": 823 }, { "epoch": 0.20375865479723046, "grad_norm": 0.8414578412087721, "learning_rate": 4.988933034531674e-06, "loss": 0.4959, "step": 824 }, { "epoch": 0.20400593471810088, "grad_norm": 0.8207895326319039, "learning_rate": 4.988902464344557e-06, "loss": 0.4968, "step": 825 }, { "epoch": 0.2042532146389713, "grad_norm": 0.8258058754282276, "learning_rate": 4.988871852087687e-06, "loss": 0.4806, "step": 826 }, { "epoch": 0.20450049455984173, "grad_norm": 0.7911811305702641, "learning_rate": 4.988841197761581e-06, "loss": 0.5105, "step": 827 }, { "epoch": 0.20474777448071216, "grad_norm": 0.8067032711527126, "learning_rate": 4.988810501366756e-06, "loss": 0.4988, "step": 828 }, { "epoch": 0.20499505440158258, "grad_norm": 0.8248411254954989, "learning_rate": 4.988779762903733e-06, "loss": 0.4679, "step": 829 }, { "epoch": 0.205242334322453, "grad_norm": 0.8075962457898619, "learning_rate": 4.98874898237303e-06, "loss": 0.4802, "step": 830 }, { "epoch": 0.20548961424332343, "grad_norm": 0.8694350300009082, "learning_rate": 4.988718159775168e-06, "loss": 0.512, "step": 831 }, { "epoch": 0.20573689416419386, "grad_norm": 0.838526356063612, "learning_rate": 4.988687295110667e-06, "loss": 0.4772, "step": 832 }, { "epoch": 0.20598417408506428, "grad_norm": 0.8581376012516917, "learning_rate": 4.98865638838005e-06, "loss": 0.4997, "step": 833 }, { "epoch": 0.2062314540059347, "grad_norm": 0.8508868762945916, "learning_rate": 4.988625439583838e-06, "loss": 0.5016, "step": 834 }, { "epoch": 0.20647873392680513, "grad_norm": 0.8649500124082152, "learning_rate": 4.988594448722556e-06, "loss": 0.4915, "step": 835 }, { "epoch": 0.20672601384767555, "grad_norm": 0.8094411363162982, "learning_rate": 4.988563415796726e-06, "loss": 0.5196, "step": 836 }, { "epoch": 0.206973293768546, "grad_norm": 0.8414623331186055, "learning_rate": 4.988532340806873e-06, "loss": 0.5158, "step": 837 }, { "epoch": 0.20722057368941643, "grad_norm": 0.8243945840705411, "learning_rate": 4.9885012237535235e-06, "loss": 0.4897, "step": 838 }, { "epoch": 0.20746785361028686, "grad_norm": 0.9140469955319724, "learning_rate": 4.988470064637202e-06, "loss": 0.4759, "step": 839 }, { "epoch": 0.20771513353115728, "grad_norm": 0.8558925215879722, "learning_rate": 4.988438863458436e-06, "loss": 0.5119, "step": 840 }, { "epoch": 0.2079624134520277, "grad_norm": 0.815440252440534, "learning_rate": 4.988407620217752e-06, "loss": 0.4945, "step": 841 }, { "epoch": 0.20820969337289813, "grad_norm": 0.8232200098822217, "learning_rate": 4.988376334915679e-06, "loss": 0.4996, "step": 842 }, { "epoch": 0.20845697329376855, "grad_norm": 0.8691564428161908, "learning_rate": 4.988345007552746e-06, "loss": 0.5097, "step": 843 }, { "epoch": 0.20870425321463898, "grad_norm": 0.8264653567410514, "learning_rate": 4.9883136381294816e-06, "loss": 0.5119, "step": 844 }, { "epoch": 0.2089515331355094, "grad_norm": 0.8785002227076522, "learning_rate": 4.988282226646417e-06, "loss": 0.514, "step": 845 }, { "epoch": 0.20919881305637983, "grad_norm": 0.8023528924268092, "learning_rate": 4.988250773104083e-06, "loss": 0.5428, "step": 846 }, { "epoch": 0.20944609297725025, "grad_norm": 0.8211524437917096, "learning_rate": 4.98821927750301e-06, "loss": 0.5256, "step": 847 }, { "epoch": 0.20969337289812068, "grad_norm": 0.8010942784871227, "learning_rate": 4.988187739843731e-06, "loss": 0.5346, "step": 848 }, { "epoch": 0.2099406528189911, "grad_norm": 0.9044984773545841, "learning_rate": 4.988156160126781e-06, "loss": 0.5038, "step": 849 }, { "epoch": 0.21018793273986153, "grad_norm": 0.8706964927904601, "learning_rate": 4.98812453835269e-06, "loss": 0.507, "step": 850 }, { "epoch": 0.21043521266073195, "grad_norm": 0.8736523995044558, "learning_rate": 4.988092874521996e-06, "loss": 0.4939, "step": 851 }, { "epoch": 0.21068249258160238, "grad_norm": 0.8851801188270471, "learning_rate": 4.988061168635232e-06, "loss": 0.5165, "step": 852 }, { "epoch": 0.2109297725024728, "grad_norm": 0.853684837000266, "learning_rate": 4.9880294206929356e-06, "loss": 0.5153, "step": 853 }, { "epoch": 0.21117705242334323, "grad_norm": 0.911771572862549, "learning_rate": 4.9879976306956415e-06, "loss": 0.4859, "step": 854 }, { "epoch": 0.21142433234421365, "grad_norm": 0.8626964859349916, "learning_rate": 4.987965798643889e-06, "loss": 0.4644, "step": 855 }, { "epoch": 0.21167161226508407, "grad_norm": 0.8572993279837756, "learning_rate": 4.987933924538215e-06, "loss": 0.5022, "step": 856 }, { "epoch": 0.2119188921859545, "grad_norm": 0.8963927749480562, "learning_rate": 4.987902008379159e-06, "loss": 0.4736, "step": 857 }, { "epoch": 0.21216617210682492, "grad_norm": 0.8505265096635852, "learning_rate": 4.987870050167259e-06, "loss": 0.509, "step": 858 }, { "epoch": 0.21241345202769535, "grad_norm": 0.8393583930696208, "learning_rate": 4.987838049903058e-06, "loss": 0.5031, "step": 859 }, { "epoch": 0.21266073194856577, "grad_norm": 0.8669348205976165, "learning_rate": 4.987806007587094e-06, "loss": 0.4743, "step": 860 }, { "epoch": 0.2129080118694362, "grad_norm": 0.8272724159339694, "learning_rate": 4.9877739232199095e-06, "loss": 0.5207, "step": 861 }, { "epoch": 0.21315529179030662, "grad_norm": 0.8732070794723419, "learning_rate": 4.987741796802047e-06, "loss": 0.5009, "step": 862 }, { "epoch": 0.21340257171117705, "grad_norm": 0.8813154447433397, "learning_rate": 4.987709628334051e-06, "loss": 0.5135, "step": 863 }, { "epoch": 0.21364985163204747, "grad_norm": 0.8325507743445312, "learning_rate": 4.987677417816462e-06, "loss": 0.5058, "step": 864 }, { "epoch": 0.2138971315529179, "grad_norm": 0.8747157253538062, "learning_rate": 4.987645165249827e-06, "loss": 0.5056, "step": 865 }, { "epoch": 0.21414441147378832, "grad_norm": 0.90232302210331, "learning_rate": 4.987612870634691e-06, "loss": 0.5056, "step": 866 }, { "epoch": 0.21439169139465875, "grad_norm": 0.84923644734889, "learning_rate": 4.987580533971599e-06, "loss": 0.4947, "step": 867 }, { "epoch": 0.21463897131552917, "grad_norm": 0.8698770676663805, "learning_rate": 4.9875481552610975e-06, "loss": 0.5125, "step": 868 }, { "epoch": 0.2148862512363996, "grad_norm": 0.817003058821012, "learning_rate": 4.9875157345037345e-06, "loss": 0.5422, "step": 869 }, { "epoch": 0.21513353115727002, "grad_norm": 0.8417204204341288, "learning_rate": 4.9874832717000576e-06, "loss": 0.4953, "step": 870 }, { "epoch": 0.21538081107814044, "grad_norm": 0.9572380936686788, "learning_rate": 4.9874507668506155e-06, "loss": 0.4844, "step": 871 }, { "epoch": 0.21562809099901087, "grad_norm": 0.834643198655235, "learning_rate": 4.987418219955958e-06, "loss": 0.5242, "step": 872 }, { "epoch": 0.2158753709198813, "grad_norm": 0.8559434479885895, "learning_rate": 4.987385631016635e-06, "loss": 0.5118, "step": 873 }, { "epoch": 0.21612265084075172, "grad_norm": 0.8746611306575874, "learning_rate": 4.987353000033197e-06, "loss": 0.4964, "step": 874 }, { "epoch": 0.21636993076162217, "grad_norm": 0.8810491800616369, "learning_rate": 4.987320327006196e-06, "loss": 0.4804, "step": 875 }, { "epoch": 0.2166172106824926, "grad_norm": 0.857857539044612, "learning_rate": 4.987287611936185e-06, "loss": 0.5073, "step": 876 }, { "epoch": 0.21686449060336302, "grad_norm": 0.8268996691952446, "learning_rate": 4.987254854823715e-06, "loss": 0.5007, "step": 877 }, { "epoch": 0.21711177052423344, "grad_norm": 0.828244801524196, "learning_rate": 4.987222055669342e-06, "loss": 0.5075, "step": 878 }, { "epoch": 0.21735905044510387, "grad_norm": 0.8662331481181559, "learning_rate": 4.987189214473618e-06, "loss": 0.5054, "step": 879 }, { "epoch": 0.2176063303659743, "grad_norm": 0.840848336618183, "learning_rate": 4.987156331237099e-06, "loss": 0.4954, "step": 880 }, { "epoch": 0.21785361028684472, "grad_norm": 0.8368772772807719, "learning_rate": 4.987123405960343e-06, "loss": 0.5116, "step": 881 }, { "epoch": 0.21810089020771514, "grad_norm": 0.8593385012576953, "learning_rate": 4.987090438643904e-06, "loss": 0.5273, "step": 882 }, { "epoch": 0.21834817012858557, "grad_norm": 0.8364107176881544, "learning_rate": 4.98705742928834e-06, "loss": 0.5014, "step": 883 }, { "epoch": 0.218595450049456, "grad_norm": 0.8227427355602717, "learning_rate": 4.987024377894208e-06, "loss": 0.4951, "step": 884 }, { "epoch": 0.21884272997032642, "grad_norm": 0.8150154334336953, "learning_rate": 4.986991284462068e-06, "loss": 0.49, "step": 885 }, { "epoch": 0.21909000989119684, "grad_norm": 0.8449279651032495, "learning_rate": 4.98695814899248e-06, "loss": 0.5147, "step": 886 }, { "epoch": 0.21933728981206727, "grad_norm": 0.8199419337132845, "learning_rate": 4.986924971486001e-06, "loss": 0.5203, "step": 887 }, { "epoch": 0.2195845697329377, "grad_norm": 0.8211194462863136, "learning_rate": 4.986891751943196e-06, "loss": 0.5027, "step": 888 }, { "epoch": 0.21983184965380811, "grad_norm": 0.8510718071011489, "learning_rate": 4.986858490364624e-06, "loss": 0.4842, "step": 889 }, { "epoch": 0.22007912957467854, "grad_norm": 0.8122596001789849, "learning_rate": 4.986825186750846e-06, "loss": 0.4882, "step": 890 }, { "epoch": 0.22032640949554896, "grad_norm": 0.8227992726845009, "learning_rate": 4.986791841102427e-06, "loss": 0.4894, "step": 891 }, { "epoch": 0.2205736894164194, "grad_norm": 0.8330978830853362, "learning_rate": 4.986758453419931e-06, "loss": 0.5047, "step": 892 }, { "epoch": 0.2208209693372898, "grad_norm": 0.8289885814322678, "learning_rate": 4.986725023703921e-06, "loss": 0.5211, "step": 893 }, { "epoch": 0.22106824925816024, "grad_norm": 0.7967487040612072, "learning_rate": 4.986691551954962e-06, "loss": 0.4961, "step": 894 }, { "epoch": 0.22131552917903066, "grad_norm": 0.8663375538553278, "learning_rate": 4.986658038173621e-06, "loss": 0.51, "step": 895 }, { "epoch": 0.2215628090999011, "grad_norm": 0.7906515926897258, "learning_rate": 4.986624482360464e-06, "loss": 0.5029, "step": 896 }, { "epoch": 0.2218100890207715, "grad_norm": 0.7744820254840943, "learning_rate": 4.986590884516057e-06, "loss": 0.5023, "step": 897 }, { "epoch": 0.22205736894164194, "grad_norm": 0.883399410152621, "learning_rate": 4.98655724464097e-06, "loss": 0.4854, "step": 898 }, { "epoch": 0.22230464886251236, "grad_norm": 0.8630333018932411, "learning_rate": 4.98652356273577e-06, "loss": 0.4881, "step": 899 }, { "epoch": 0.22255192878338279, "grad_norm": 0.8277706025700802, "learning_rate": 4.986489838801027e-06, "loss": 0.5004, "step": 900 }, { "epoch": 0.2227992087042532, "grad_norm": 0.8204017170552954, "learning_rate": 4.98645607283731e-06, "loss": 0.4963, "step": 901 }, { "epoch": 0.22304648862512363, "grad_norm": 0.8265861428479575, "learning_rate": 4.986422264845191e-06, "loss": 0.4839, "step": 902 }, { "epoch": 0.22329376854599406, "grad_norm": 0.8276540389714453, "learning_rate": 4.986388414825242e-06, "loss": 0.4946, "step": 903 }, { "epoch": 0.22354104846686448, "grad_norm": 0.8699787108810072, "learning_rate": 4.986354522778033e-06, "loss": 0.5113, "step": 904 }, { "epoch": 0.2237883283877349, "grad_norm": 0.7617126884375469, "learning_rate": 4.986320588704139e-06, "loss": 0.5106, "step": 905 }, { "epoch": 0.22403560830860533, "grad_norm": 0.8589768854675831, "learning_rate": 4.986286612604132e-06, "loss": 0.4754, "step": 906 }, { "epoch": 0.22428288822947576, "grad_norm": 0.8161695889367072, "learning_rate": 4.986252594478588e-06, "loss": 0.4865, "step": 907 }, { "epoch": 0.22453016815034618, "grad_norm": 0.8425510497631642, "learning_rate": 4.98621853432808e-06, "loss": 0.4961, "step": 908 }, { "epoch": 0.2247774480712166, "grad_norm": 0.8942256571041988, "learning_rate": 4.986184432153185e-06, "loss": 0.5114, "step": 909 }, { "epoch": 0.22502472799208703, "grad_norm": 0.8569701182433922, "learning_rate": 4.986150287954479e-06, "loss": 0.4747, "step": 910 }, { "epoch": 0.22527200791295746, "grad_norm": 0.8342740450002151, "learning_rate": 4.986116101732539e-06, "loss": 0.4965, "step": 911 }, { "epoch": 0.22551928783382788, "grad_norm": 0.906710700659202, "learning_rate": 4.986081873487944e-06, "loss": 0.5079, "step": 912 }, { "epoch": 0.2257665677546983, "grad_norm": 0.8145221155521087, "learning_rate": 4.98604760322127e-06, "loss": 0.4913, "step": 913 }, { "epoch": 0.22601384767556876, "grad_norm": 0.8166511496949852, "learning_rate": 4.986013290933099e-06, "loss": 0.472, "step": 914 }, { "epoch": 0.22626112759643918, "grad_norm": 0.914692426611638, "learning_rate": 4.98597893662401e-06, "loss": 0.4695, "step": 915 }, { "epoch": 0.2265084075173096, "grad_norm": 0.7809607473935676, "learning_rate": 4.985944540294584e-06, "loss": 0.5388, "step": 916 }, { "epoch": 0.22675568743818003, "grad_norm": 0.8231717409594672, "learning_rate": 4.9859101019454015e-06, "loss": 0.5249, "step": 917 }, { "epoch": 0.22700296735905046, "grad_norm": 0.8361911011029732, "learning_rate": 4.985875621577045e-06, "loss": 0.5121, "step": 918 }, { "epoch": 0.22725024727992088, "grad_norm": 0.8048366668863958, "learning_rate": 4.985841099190098e-06, "loss": 0.4892, "step": 919 }, { "epoch": 0.2274975272007913, "grad_norm": 0.8660579797696568, "learning_rate": 4.985806534785143e-06, "loss": 0.4799, "step": 920 }, { "epoch": 0.22774480712166173, "grad_norm": 0.8318167298034147, "learning_rate": 4.9857719283627635e-06, "loss": 0.498, "step": 921 }, { "epoch": 0.22799208704253215, "grad_norm": 0.8605410511892199, "learning_rate": 4.985737279923547e-06, "loss": 0.5138, "step": 922 }, { "epoch": 0.22823936696340258, "grad_norm": 0.8703326428337433, "learning_rate": 4.9857025894680775e-06, "loss": 0.4784, "step": 923 }, { "epoch": 0.228486646884273, "grad_norm": 0.8370959017594702, "learning_rate": 4.9856678569969415e-06, "loss": 0.488, "step": 924 }, { "epoch": 0.22873392680514343, "grad_norm": 0.8693400164771248, "learning_rate": 4.985633082510727e-06, "loss": 0.5124, "step": 925 }, { "epoch": 0.22898120672601385, "grad_norm": 0.8636007951346818, "learning_rate": 4.985598266010021e-06, "loss": 0.5016, "step": 926 }, { "epoch": 0.22922848664688428, "grad_norm": 0.8832259920894722, "learning_rate": 4.985563407495411e-06, "loss": 0.4954, "step": 927 }, { "epoch": 0.2294757665677547, "grad_norm": 0.8191758445943106, "learning_rate": 4.985528506967488e-06, "loss": 0.507, "step": 928 }, { "epoch": 0.22972304648862513, "grad_norm": 0.8789118908623763, "learning_rate": 4.985493564426841e-06, "loss": 0.4885, "step": 929 }, { "epoch": 0.22997032640949555, "grad_norm": 0.8924491857440008, "learning_rate": 4.985458579874061e-06, "loss": 0.5033, "step": 930 }, { "epoch": 0.23021760633036598, "grad_norm": 0.8383332268067973, "learning_rate": 4.9854235533097396e-06, "loss": 0.5156, "step": 931 }, { "epoch": 0.2304648862512364, "grad_norm": 0.8767336119514518, "learning_rate": 4.985388484734467e-06, "loss": 0.4849, "step": 932 }, { "epoch": 0.23071216617210683, "grad_norm": 0.8405975137037067, "learning_rate": 4.985353374148838e-06, "loss": 0.506, "step": 933 }, { "epoch": 0.23095944609297725, "grad_norm": 0.8170821427085228, "learning_rate": 4.9853182215534465e-06, "loss": 0.4962, "step": 934 }, { "epoch": 0.23120672601384767, "grad_norm": 0.8314334184214403, "learning_rate": 4.985283026948885e-06, "loss": 0.483, "step": 935 }, { "epoch": 0.2314540059347181, "grad_norm": 0.9084995950646598, "learning_rate": 4.985247790335748e-06, "loss": 0.4794, "step": 936 }, { "epoch": 0.23170128585558852, "grad_norm": 0.8483507056354166, "learning_rate": 4.9852125117146335e-06, "loss": 0.5065, "step": 937 }, { "epoch": 0.23194856577645895, "grad_norm": 0.8402483495826693, "learning_rate": 4.985177191086136e-06, "loss": 0.4915, "step": 938 }, { "epoch": 0.23219584569732937, "grad_norm": 0.823169932427246, "learning_rate": 4.985141828450852e-06, "loss": 0.4918, "step": 939 }, { "epoch": 0.2324431256181998, "grad_norm": 0.8243831987168234, "learning_rate": 4.985106423809381e-06, "loss": 0.4755, "step": 940 }, { "epoch": 0.23269040553907022, "grad_norm": 0.8252991162421606, "learning_rate": 4.98507097716232e-06, "loss": 0.5114, "step": 941 }, { "epoch": 0.23293768545994065, "grad_norm": 0.836106206941783, "learning_rate": 4.98503548851027e-06, "loss": 0.483, "step": 942 }, { "epoch": 0.23318496538081107, "grad_norm": 0.8506488758080649, "learning_rate": 4.984999957853829e-06, "loss": 0.4987, "step": 943 }, { "epoch": 0.2334322453016815, "grad_norm": 0.8289096749494381, "learning_rate": 4.984964385193598e-06, "loss": 0.5003, "step": 944 }, { "epoch": 0.23367952522255192, "grad_norm": 0.8768460912190904, "learning_rate": 4.9849287705301786e-06, "loss": 0.475, "step": 945 }, { "epoch": 0.23392680514342234, "grad_norm": 0.8140839084687289, "learning_rate": 4.984893113864173e-06, "loss": 0.5001, "step": 946 }, { "epoch": 0.23417408506429277, "grad_norm": 0.8527563886559387, "learning_rate": 4.9848574151961835e-06, "loss": 0.5011, "step": 947 }, { "epoch": 0.2344213649851632, "grad_norm": 0.8497649216961695, "learning_rate": 4.984821674526813e-06, "loss": 0.5048, "step": 948 }, { "epoch": 0.23466864490603362, "grad_norm": 0.8118793995529595, "learning_rate": 4.984785891856667e-06, "loss": 0.4742, "step": 949 }, { "epoch": 0.23491592482690404, "grad_norm": 0.8791594821173937, "learning_rate": 4.984750067186349e-06, "loss": 0.4938, "step": 950 }, { "epoch": 0.23516320474777447, "grad_norm": 0.8888145994376402, "learning_rate": 4.984714200516465e-06, "loss": 0.5096, "step": 951 }, { "epoch": 0.2354104846686449, "grad_norm": 0.8348077340788664, "learning_rate": 4.9846782918476225e-06, "loss": 0.4902, "step": 952 }, { "epoch": 0.23565776458951534, "grad_norm": 0.8778483850793724, "learning_rate": 4.9846423411804255e-06, "loss": 0.4926, "step": 953 }, { "epoch": 0.23590504451038577, "grad_norm": 0.8187933596056294, "learning_rate": 4.984606348515485e-06, "loss": 0.4858, "step": 954 }, { "epoch": 0.2361523244312562, "grad_norm": 0.7797555946495061, "learning_rate": 4.984570313853408e-06, "loss": 0.4931, "step": 955 }, { "epoch": 0.23639960435212662, "grad_norm": 0.8552422210148904, "learning_rate": 4.984534237194802e-06, "loss": 0.5172, "step": 956 }, { "epoch": 0.23664688427299704, "grad_norm": 0.8013218564963912, "learning_rate": 4.984498118540279e-06, "loss": 0.4941, "step": 957 }, { "epoch": 0.23689416419386747, "grad_norm": 0.8354238632072138, "learning_rate": 4.984461957890449e-06, "loss": 0.4857, "step": 958 }, { "epoch": 0.2371414441147379, "grad_norm": 0.8201898617265962, "learning_rate": 4.984425755245923e-06, "loss": 0.4968, "step": 959 }, { "epoch": 0.23738872403560832, "grad_norm": 0.8470092115227233, "learning_rate": 4.984389510607313e-06, "loss": 0.4862, "step": 960 }, { "epoch": 0.23763600395647874, "grad_norm": 0.845953728104242, "learning_rate": 4.984353223975231e-06, "loss": 0.486, "step": 961 }, { "epoch": 0.23788328387734917, "grad_norm": 0.8270735966336179, "learning_rate": 4.98431689535029e-06, "loss": 0.5043, "step": 962 }, { "epoch": 0.2381305637982196, "grad_norm": 0.8152648813245965, "learning_rate": 4.984280524733107e-06, "loss": 0.4591, "step": 963 }, { "epoch": 0.23837784371909002, "grad_norm": 0.8230530257009434, "learning_rate": 4.984244112124293e-06, "loss": 0.4709, "step": 964 }, { "epoch": 0.23862512363996044, "grad_norm": 0.8769324474154776, "learning_rate": 4.9842076575244665e-06, "loss": 0.4615, "step": 965 }, { "epoch": 0.23887240356083086, "grad_norm": 0.8170880234645825, "learning_rate": 4.984171160934243e-06, "loss": 0.4801, "step": 966 }, { "epoch": 0.2391196834817013, "grad_norm": 0.8473619382925522, "learning_rate": 4.9841346223542375e-06, "loss": 0.4687, "step": 967 }, { "epoch": 0.23936696340257171, "grad_norm": 0.8226901640257309, "learning_rate": 4.984098041785069e-06, "loss": 0.4927, "step": 968 }, { "epoch": 0.23961424332344214, "grad_norm": 0.9073747433995021, "learning_rate": 4.9840614192273565e-06, "loss": 0.4763, "step": 969 }, { "epoch": 0.23986152324431256, "grad_norm": 0.8676535240669757, "learning_rate": 4.984024754681717e-06, "loss": 0.4699, "step": 970 }, { "epoch": 0.240108803165183, "grad_norm": 0.8592342568796588, "learning_rate": 4.983988048148773e-06, "loss": 0.4589, "step": 971 }, { "epoch": 0.2403560830860534, "grad_norm": 0.8106560924699778, "learning_rate": 4.983951299629142e-06, "loss": 0.4864, "step": 972 }, { "epoch": 0.24060336300692384, "grad_norm": 0.8273722868885549, "learning_rate": 4.983914509123447e-06, "loss": 0.4741, "step": 973 }, { "epoch": 0.24085064292779426, "grad_norm": 0.7817588752032723, "learning_rate": 4.983877676632311e-06, "loss": 0.4825, "step": 974 }, { "epoch": 0.2410979228486647, "grad_norm": 0.7946571630913781, "learning_rate": 4.983840802156353e-06, "loss": 0.5078, "step": 975 }, { "epoch": 0.2413452027695351, "grad_norm": 0.8172722258882839, "learning_rate": 4.983803885696199e-06, "loss": 0.4906, "step": 976 }, { "epoch": 0.24159248269040554, "grad_norm": 0.858810786727647, "learning_rate": 4.983766927252472e-06, "loss": 0.473, "step": 977 }, { "epoch": 0.24183976261127596, "grad_norm": 0.8672721626002375, "learning_rate": 4.983729926825798e-06, "loss": 0.4743, "step": 978 }, { "epoch": 0.24208704253214638, "grad_norm": 0.9329297411602999, "learning_rate": 4.983692884416801e-06, "loss": 0.472, "step": 979 }, { "epoch": 0.2423343224530168, "grad_norm": 0.9015184792086365, "learning_rate": 4.983655800026108e-06, "loss": 0.5101, "step": 980 }, { "epoch": 0.24258160237388723, "grad_norm": 0.8477549099000955, "learning_rate": 4.983618673654344e-06, "loss": 0.5164, "step": 981 }, { "epoch": 0.24282888229475766, "grad_norm": 0.8737374865512695, "learning_rate": 4.983581505302139e-06, "loss": 0.5048, "step": 982 }, { "epoch": 0.24307616221562808, "grad_norm": 0.8492727947637201, "learning_rate": 4.983544294970121e-06, "loss": 0.4872, "step": 983 }, { "epoch": 0.2433234421364985, "grad_norm": 0.8312941393834273, "learning_rate": 4.983507042658917e-06, "loss": 0.4921, "step": 984 }, { "epoch": 0.24357072205736893, "grad_norm": 0.8457934782705832, "learning_rate": 4.983469748369159e-06, "loss": 0.5009, "step": 985 }, { "epoch": 0.24381800197823936, "grad_norm": 0.889062562190951, "learning_rate": 4.983432412101475e-06, "loss": 0.5046, "step": 986 }, { "epoch": 0.24406528189910978, "grad_norm": 0.9200936675121365, "learning_rate": 4.983395033856498e-06, "loss": 0.5063, "step": 987 }, { "epoch": 0.2443125618199802, "grad_norm": 0.865579100161537, "learning_rate": 4.9833576136348595e-06, "loss": 0.4931, "step": 988 }, { "epoch": 0.24455984174085063, "grad_norm": 0.8758672885998708, "learning_rate": 4.983320151437191e-06, "loss": 0.481, "step": 989 }, { "epoch": 0.24480712166172106, "grad_norm": 0.8658318444737918, "learning_rate": 4.983282647264126e-06, "loss": 0.4712, "step": 990 }, { "epoch": 0.24505440158259148, "grad_norm": 0.8382351361641673, "learning_rate": 4.983245101116299e-06, "loss": 0.4911, "step": 991 }, { "epoch": 0.24530168150346193, "grad_norm": 0.8463983674050847, "learning_rate": 4.983207512994345e-06, "loss": 0.5312, "step": 992 }, { "epoch": 0.24554896142433236, "grad_norm": 0.8829203582411207, "learning_rate": 4.983169882898898e-06, "loss": 0.459, "step": 993 }, { "epoch": 0.24579624134520278, "grad_norm": 0.8208662802668374, "learning_rate": 4.983132210830596e-06, "loss": 0.4835, "step": 994 }, { "epoch": 0.2460435212660732, "grad_norm": 0.868902743454076, "learning_rate": 4.983094496790074e-06, "loss": 0.4895, "step": 995 }, { "epoch": 0.24629080118694363, "grad_norm": 0.8525195601900595, "learning_rate": 4.98305674077797e-06, "loss": 0.5008, "step": 996 }, { "epoch": 0.24653808110781406, "grad_norm": 0.8486858338119296, "learning_rate": 4.9830189427949225e-06, "loss": 0.4767, "step": 997 }, { "epoch": 0.24678536102868448, "grad_norm": 0.8399588457998348, "learning_rate": 4.982981102841569e-06, "loss": 0.4807, "step": 998 }, { "epoch": 0.2470326409495549, "grad_norm": 0.8400317988123928, "learning_rate": 4.982943220918552e-06, "loss": 0.4472, "step": 999 }, { "epoch": 0.24727992087042533, "grad_norm": 0.7933513082453183, "learning_rate": 4.982905297026509e-06, "loss": 0.4897, "step": 1000 }, { "epoch": 0.24752720079129575, "grad_norm": 0.8533021805361142, "learning_rate": 4.982867331166083e-06, "loss": 0.4826, "step": 1001 }, { "epoch": 0.24777448071216618, "grad_norm": 0.871571750396739, "learning_rate": 4.982829323337914e-06, "loss": 0.4822, "step": 1002 }, { "epoch": 0.2480217606330366, "grad_norm": 0.8561307423314862, "learning_rate": 4.982791273542646e-06, "loss": 0.4928, "step": 1003 }, { "epoch": 0.24826904055390703, "grad_norm": 0.8162243679119858, "learning_rate": 4.9827531817809215e-06, "loss": 0.4918, "step": 1004 }, { "epoch": 0.24851632047477745, "grad_norm": 0.8091135061963646, "learning_rate": 4.9827150480533835e-06, "loss": 0.5059, "step": 1005 }, { "epoch": 0.24876360039564788, "grad_norm": 0.839406106272644, "learning_rate": 4.982676872360677e-06, "loss": 0.5087, "step": 1006 }, { "epoch": 0.2490108803165183, "grad_norm": 0.8313205468854626, "learning_rate": 4.982638654703449e-06, "loss": 0.4686, "step": 1007 }, { "epoch": 0.24925816023738873, "grad_norm": 0.8271088700543145, "learning_rate": 4.9826003950823445e-06, "loss": 0.4938, "step": 1008 }, { "epoch": 0.24950544015825915, "grad_norm": 0.8405321735427581, "learning_rate": 4.982562093498009e-06, "loss": 0.4876, "step": 1009 }, { "epoch": 0.24975272007912958, "grad_norm": 0.8665303043556788, "learning_rate": 4.982523749951091e-06, "loss": 0.4805, "step": 1010 }, { "epoch": 0.25, "grad_norm": 0.8787115918536075, "learning_rate": 4.982485364442238e-06, "loss": 0.487, "step": 1011 }, { "epoch": 0.2502472799208704, "grad_norm": 0.8409882874323071, "learning_rate": 4.982446936972099e-06, "loss": 0.4678, "step": 1012 }, { "epoch": 0.25049455984174085, "grad_norm": 0.8561426691383394, "learning_rate": 4.982408467541325e-06, "loss": 0.4897, "step": 1013 }, { "epoch": 0.2507418397626113, "grad_norm": 0.8572923571758608, "learning_rate": 4.982369956150563e-06, "loss": 0.4852, "step": 1014 }, { "epoch": 0.2509891196834817, "grad_norm": 0.8981118858205535, "learning_rate": 4.982331402800468e-06, "loss": 0.4807, "step": 1015 }, { "epoch": 0.2512363996043521, "grad_norm": 0.9095037169315664, "learning_rate": 4.982292807491688e-06, "loss": 0.5035, "step": 1016 }, { "epoch": 0.25148367952522255, "grad_norm": 0.8421819705790509, "learning_rate": 4.982254170224878e-06, "loss": 0.4421, "step": 1017 }, { "epoch": 0.251730959446093, "grad_norm": 0.8399733420526356, "learning_rate": 4.982215491000689e-06, "loss": 0.4987, "step": 1018 }, { "epoch": 0.2519782393669634, "grad_norm": 0.8546805880965436, "learning_rate": 4.982176769819777e-06, "loss": 0.494, "step": 1019 }, { "epoch": 0.2522255192878338, "grad_norm": 0.891980552444231, "learning_rate": 4.982138006682795e-06, "loss": 0.4995, "step": 1020 }, { "epoch": 0.25247279920870425, "grad_norm": 0.832920847499595, "learning_rate": 4.982099201590399e-06, "loss": 0.4675, "step": 1021 }, { "epoch": 0.25272007912957467, "grad_norm": 0.8018275844329361, "learning_rate": 4.982060354543244e-06, "loss": 0.5018, "step": 1022 }, { "epoch": 0.2529673590504451, "grad_norm": 0.8518048762893595, "learning_rate": 4.982021465541988e-06, "loss": 0.5046, "step": 1023 }, { "epoch": 0.2532146389713155, "grad_norm": 0.830301537925145, "learning_rate": 4.9819825345872855e-06, "loss": 0.4926, "step": 1024 }, { "epoch": 0.25346191889218594, "grad_norm": 0.8314313821169597, "learning_rate": 4.981943561679799e-06, "loss": 0.4857, "step": 1025 }, { "epoch": 0.25370919881305637, "grad_norm": 0.8085067071753731, "learning_rate": 4.981904546820183e-06, "loss": 0.4997, "step": 1026 }, { "epoch": 0.2539564787339268, "grad_norm": 0.8312408342022083, "learning_rate": 4.981865490009099e-06, "loss": 0.4873, "step": 1027 }, { "epoch": 0.2542037586547972, "grad_norm": 0.8236446385494339, "learning_rate": 4.9818263912472074e-06, "loss": 0.4854, "step": 1028 }, { "epoch": 0.25445103857566764, "grad_norm": 0.8279083883523636, "learning_rate": 4.9817872505351686e-06, "loss": 0.4848, "step": 1029 }, { "epoch": 0.25469831849653807, "grad_norm": 0.8690599083185739, "learning_rate": 4.9817480678736426e-06, "loss": 0.4865, "step": 1030 }, { "epoch": 0.2549455984174085, "grad_norm": 0.8421918735243161, "learning_rate": 4.981708843263295e-06, "loss": 0.4685, "step": 1031 }, { "epoch": 0.2551928783382789, "grad_norm": 0.8407079219400131, "learning_rate": 4.981669576704787e-06, "loss": 0.4956, "step": 1032 }, { "epoch": 0.25544015825914934, "grad_norm": 0.8773337094609159, "learning_rate": 4.9816302681987825e-06, "loss": 0.4724, "step": 1033 }, { "epoch": 0.25568743818001977, "grad_norm": 0.868844911839338, "learning_rate": 4.981590917745945e-06, "loss": 0.4981, "step": 1034 }, { "epoch": 0.2559347181008902, "grad_norm": 0.8666128355880677, "learning_rate": 4.981551525346941e-06, "loss": 0.5125, "step": 1035 }, { "epoch": 0.2561819980217606, "grad_norm": 0.9213503864137835, "learning_rate": 4.9815120910024365e-06, "loss": 0.516, "step": 1036 }, { "epoch": 0.25642927794263104, "grad_norm": 0.8307356659494541, "learning_rate": 4.981472614713096e-06, "loss": 0.5132, "step": 1037 }, { "epoch": 0.25667655786350146, "grad_norm": 0.8297935826184679, "learning_rate": 4.981433096479588e-06, "loss": 0.4802, "step": 1038 }, { "epoch": 0.2569238377843719, "grad_norm": 0.8327277791219414, "learning_rate": 4.981393536302582e-06, "loss": 0.4928, "step": 1039 }, { "epoch": 0.2571711177052423, "grad_norm": 0.8707053568449094, "learning_rate": 4.981353934182745e-06, "loss": 0.4899, "step": 1040 }, { "epoch": 0.25741839762611274, "grad_norm": 0.8652524290884858, "learning_rate": 4.981314290120747e-06, "loss": 0.4886, "step": 1041 }, { "epoch": 0.25766567754698316, "grad_norm": 0.8466892303798033, "learning_rate": 4.981274604117257e-06, "loss": 0.5103, "step": 1042 }, { "epoch": 0.2579129574678536, "grad_norm": 0.9783250538294788, "learning_rate": 4.981234876172947e-06, "loss": 0.4887, "step": 1043 }, { "epoch": 0.258160237388724, "grad_norm": 0.9318808540890211, "learning_rate": 4.981195106288488e-06, "loss": 0.5011, "step": 1044 }, { "epoch": 0.25840751730959444, "grad_norm": 0.8551466560368262, "learning_rate": 4.981155294464552e-06, "loss": 0.5029, "step": 1045 }, { "epoch": 0.25865479723046486, "grad_norm": 0.8793301608280027, "learning_rate": 4.981115440701814e-06, "loss": 0.4742, "step": 1046 }, { "epoch": 0.2589020771513353, "grad_norm": 0.8969182144318821, "learning_rate": 4.981075545000944e-06, "loss": 0.5168, "step": 1047 }, { "epoch": 0.2591493570722057, "grad_norm": 0.8662813622902052, "learning_rate": 4.981035607362619e-06, "loss": 0.4981, "step": 1048 }, { "epoch": 0.25939663699307614, "grad_norm": 0.8852575900920927, "learning_rate": 4.980995627787513e-06, "loss": 0.4845, "step": 1049 }, { "epoch": 0.2596439169139466, "grad_norm": 0.8434609325170388, "learning_rate": 4.980955606276303e-06, "loss": 0.4663, "step": 1050 }, { "epoch": 0.25989119683481704, "grad_norm": 0.8403758855291492, "learning_rate": 4.980915542829664e-06, "loss": 0.4831, "step": 1051 }, { "epoch": 0.26013847675568746, "grad_norm": 0.8877325981859041, "learning_rate": 4.980875437448274e-06, "loss": 0.4785, "step": 1052 }, { "epoch": 0.2603857566765579, "grad_norm": 0.8052008309585926, "learning_rate": 4.98083529013281e-06, "loss": 0.5048, "step": 1053 }, { "epoch": 0.2606330365974283, "grad_norm": 0.8478864104168425, "learning_rate": 4.980795100883953e-06, "loss": 0.4704, "step": 1054 }, { "epoch": 0.26088031651829874, "grad_norm": 0.8794267459522233, "learning_rate": 4.9807548697023795e-06, "loss": 0.4629, "step": 1055 }, { "epoch": 0.26112759643916916, "grad_norm": 0.8058144806983949, "learning_rate": 4.9807145965887705e-06, "loss": 0.4852, "step": 1056 }, { "epoch": 0.2613748763600396, "grad_norm": 0.8843411492674896, "learning_rate": 4.980674281543807e-06, "loss": 0.4644, "step": 1057 }, { "epoch": 0.26162215628091, "grad_norm": 0.8493178218035321, "learning_rate": 4.98063392456817e-06, "loss": 0.4879, "step": 1058 }, { "epoch": 0.26186943620178044, "grad_norm": 0.8356662278593041, "learning_rate": 4.980593525662544e-06, "loss": 0.4703, "step": 1059 }, { "epoch": 0.26211671612265086, "grad_norm": 0.8475187900521053, "learning_rate": 4.980553084827607e-06, "loss": 0.4914, "step": 1060 }, { "epoch": 0.2623639960435213, "grad_norm": 0.8450529868997396, "learning_rate": 4.980512602064047e-06, "loss": 0.4844, "step": 1061 }, { "epoch": 0.2626112759643917, "grad_norm": 0.8079903048049786, "learning_rate": 4.9804720773725465e-06, "loss": 0.4752, "step": 1062 }, { "epoch": 0.26285855588526214, "grad_norm": 0.8511900505503611, "learning_rate": 4.980431510753791e-06, "loss": 0.4774, "step": 1063 }, { "epoch": 0.26310583580613256, "grad_norm": 0.8332435125227167, "learning_rate": 4.980390902208465e-06, "loss": 0.4751, "step": 1064 }, { "epoch": 0.263353115727003, "grad_norm": 0.8536824402482716, "learning_rate": 4.980350251737256e-06, "loss": 0.5205, "step": 1065 }, { "epoch": 0.2636003956478734, "grad_norm": 0.8553553797802288, "learning_rate": 4.980309559340851e-06, "loss": 0.4665, "step": 1066 }, { "epoch": 0.26384767556874383, "grad_norm": 0.8286430821029018, "learning_rate": 4.980268825019939e-06, "loss": 0.4861, "step": 1067 }, { "epoch": 0.26409495548961426, "grad_norm": 0.8349366249443254, "learning_rate": 4.980228048775205e-06, "loss": 0.4921, "step": 1068 }, { "epoch": 0.2643422354104847, "grad_norm": 0.8327046354641331, "learning_rate": 4.980187230607341e-06, "loss": 0.4672, "step": 1069 }, { "epoch": 0.2645895153313551, "grad_norm": 0.8665614695088318, "learning_rate": 4.980146370517037e-06, "loss": 0.4803, "step": 1070 }, { "epoch": 0.26483679525222553, "grad_norm": 0.8410944228997952, "learning_rate": 4.980105468504983e-06, "loss": 0.4753, "step": 1071 }, { "epoch": 0.26508407517309596, "grad_norm": 0.8731177178369249, "learning_rate": 4.9800645245718705e-06, "loss": 0.5105, "step": 1072 }, { "epoch": 0.2653313550939664, "grad_norm": 0.87074377533807, "learning_rate": 4.980023538718392e-06, "loss": 0.4868, "step": 1073 }, { "epoch": 0.2655786350148368, "grad_norm": 0.8497553336959501, "learning_rate": 4.979982510945239e-06, "loss": 0.46, "step": 1074 }, { "epoch": 0.26582591493570723, "grad_norm": 0.8482593664870046, "learning_rate": 4.9799414412531056e-06, "loss": 0.5059, "step": 1075 }, { "epoch": 0.26607319485657766, "grad_norm": 0.8734764426183708, "learning_rate": 4.9799003296426864e-06, "loss": 0.457, "step": 1076 }, { "epoch": 0.2663204747774481, "grad_norm": 0.8762054435854876, "learning_rate": 4.979859176114676e-06, "loss": 0.4828, "step": 1077 }, { "epoch": 0.2665677546983185, "grad_norm": 0.8386188836793864, "learning_rate": 4.979817980669771e-06, "loss": 0.4531, "step": 1078 }, { "epoch": 0.26681503461918893, "grad_norm": 0.8303211743323535, "learning_rate": 4.979776743308667e-06, "loss": 0.4786, "step": 1079 }, { "epoch": 0.26706231454005935, "grad_norm": 0.8763603274871483, "learning_rate": 4.979735464032059e-06, "loss": 0.4729, "step": 1080 }, { "epoch": 0.2673095944609298, "grad_norm": 0.8482768577451538, "learning_rate": 4.979694142840647e-06, "loss": 0.4685, "step": 1081 }, { "epoch": 0.2675568743818002, "grad_norm": 0.8181406667783483, "learning_rate": 4.9796527797351304e-06, "loss": 0.4883, "step": 1082 }, { "epoch": 0.2678041543026706, "grad_norm": 0.820627311447771, "learning_rate": 4.979611374716207e-06, "loss": 0.4595, "step": 1083 }, { "epoch": 0.26805143422354105, "grad_norm": 0.8494425109762445, "learning_rate": 4.979569927784576e-06, "loss": 0.5001, "step": 1084 }, { "epoch": 0.2682987141444115, "grad_norm": 0.8347143974885791, "learning_rate": 4.979528438940938e-06, "loss": 0.4854, "step": 1085 }, { "epoch": 0.2685459940652819, "grad_norm": 0.8293185823179237, "learning_rate": 4.979486908185996e-06, "loss": 0.491, "step": 1086 }, { "epoch": 0.2687932739861523, "grad_norm": 0.8624687861606527, "learning_rate": 4.97944533552045e-06, "loss": 0.4668, "step": 1087 }, { "epoch": 0.26904055390702275, "grad_norm": 0.8647425254764696, "learning_rate": 4.979403720945004e-06, "loss": 0.4785, "step": 1088 }, { "epoch": 0.2692878338278932, "grad_norm": 0.8642500279889467, "learning_rate": 4.979362064460361e-06, "loss": 0.4906, "step": 1089 }, { "epoch": 0.2695351137487636, "grad_norm": 0.8188279970742318, "learning_rate": 4.979320366067226e-06, "loss": 0.4922, "step": 1090 }, { "epoch": 0.269782393669634, "grad_norm": 0.8494061066145452, "learning_rate": 4.979278625766302e-06, "loss": 0.4373, "step": 1091 }, { "epoch": 0.27002967359050445, "grad_norm": 0.8541094055180173, "learning_rate": 4.979236843558296e-06, "loss": 0.4982, "step": 1092 }, { "epoch": 0.2702769535113749, "grad_norm": 0.8901352777542862, "learning_rate": 4.979195019443913e-06, "loss": 0.4895, "step": 1093 }, { "epoch": 0.2705242334322453, "grad_norm": 0.8723236124194512, "learning_rate": 4.9791531534238615e-06, "loss": 0.4876, "step": 1094 }, { "epoch": 0.2707715133531157, "grad_norm": 0.8460275573328927, "learning_rate": 4.9791112454988485e-06, "loss": 0.4582, "step": 1095 }, { "epoch": 0.27101879327398615, "grad_norm": 0.8032142309223071, "learning_rate": 4.979069295669582e-06, "loss": 0.4979, "step": 1096 }, { "epoch": 0.27126607319485657, "grad_norm": 0.8984869165646926, "learning_rate": 4.979027303936771e-06, "loss": 0.4883, "step": 1097 }, { "epoch": 0.271513353115727, "grad_norm": 0.823481716476794, "learning_rate": 4.9789852703011255e-06, "loss": 0.4748, "step": 1098 }, { "epoch": 0.2717606330365974, "grad_norm": 0.8770522684481887, "learning_rate": 4.978943194763356e-06, "loss": 0.4761, "step": 1099 }, { "epoch": 0.27200791295746785, "grad_norm": 0.8827483342198571, "learning_rate": 4.978901077324174e-06, "loss": 0.5047, "step": 1100 }, { "epoch": 0.27225519287833827, "grad_norm": 0.8442680181370851, "learning_rate": 4.978858917984292e-06, "loss": 0.476, "step": 1101 }, { "epoch": 0.2725024727992087, "grad_norm": 0.8093223452069177, "learning_rate": 4.9788167167444206e-06, "loss": 0.4974, "step": 1102 }, { "epoch": 0.2727497527200791, "grad_norm": 0.8520597566674003, "learning_rate": 4.978774473605274e-06, "loss": 0.4953, "step": 1103 }, { "epoch": 0.27299703264094954, "grad_norm": 0.8851794426709616, "learning_rate": 4.978732188567568e-06, "loss": 0.4748, "step": 1104 }, { "epoch": 0.27324431256181997, "grad_norm": 0.8047095063413441, "learning_rate": 4.978689861632016e-06, "loss": 0.4799, "step": 1105 }, { "epoch": 0.2734915924826904, "grad_norm": 0.8036515216440242, "learning_rate": 4.978647492799332e-06, "loss": 0.4623, "step": 1106 }, { "epoch": 0.2737388724035608, "grad_norm": 0.8287473081378811, "learning_rate": 4.978605082070234e-06, "loss": 0.4808, "step": 1107 }, { "epoch": 0.27398615232443124, "grad_norm": 0.7830078111911496, "learning_rate": 4.9785626294454385e-06, "loss": 0.4848, "step": 1108 }, { "epoch": 0.27423343224530167, "grad_norm": 0.8796446581845684, "learning_rate": 4.978520134925663e-06, "loss": 0.4649, "step": 1109 }, { "epoch": 0.2744807121661721, "grad_norm": 0.8207939048014206, "learning_rate": 4.978477598511625e-06, "loss": 0.4956, "step": 1110 }, { "epoch": 0.2747279920870425, "grad_norm": 0.8206309916991434, "learning_rate": 4.978435020204045e-06, "loss": 0.5177, "step": 1111 }, { "epoch": 0.27497527200791294, "grad_norm": 0.8349867209882694, "learning_rate": 4.978392400003642e-06, "loss": 0.4801, "step": 1112 }, { "epoch": 0.27522255192878337, "grad_norm": 0.8467923995371689, "learning_rate": 4.978349737911136e-06, "loss": 0.4868, "step": 1113 }, { "epoch": 0.2754698318496538, "grad_norm": 0.8578801603556364, "learning_rate": 4.9783070339272485e-06, "loss": 0.487, "step": 1114 }, { "epoch": 0.2757171117705242, "grad_norm": 0.7810402137613652, "learning_rate": 4.978264288052701e-06, "loss": 0.4741, "step": 1115 }, { "epoch": 0.27596439169139464, "grad_norm": 0.7896403749725643, "learning_rate": 4.978221500288217e-06, "loss": 0.5014, "step": 1116 }, { "epoch": 0.27621167161226506, "grad_norm": 0.8440418279389584, "learning_rate": 4.978178670634518e-06, "loss": 0.4677, "step": 1117 }, { "epoch": 0.2764589515331355, "grad_norm": 0.8645968361942973, "learning_rate": 4.97813579909233e-06, "loss": 0.4951, "step": 1118 }, { "epoch": 0.2767062314540059, "grad_norm": 0.8850512904631548, "learning_rate": 4.9780928856623765e-06, "loss": 0.4813, "step": 1119 }, { "epoch": 0.27695351137487634, "grad_norm": 0.8682217080189116, "learning_rate": 4.978049930345382e-06, "loss": 0.4832, "step": 1120 }, { "epoch": 0.27720079129574676, "grad_norm": 0.8551141090731345, "learning_rate": 4.978006933142075e-06, "loss": 0.4796, "step": 1121 }, { "epoch": 0.2774480712166172, "grad_norm": 0.8561104145175743, "learning_rate": 4.97796389405318e-06, "loss": 0.4945, "step": 1122 }, { "epoch": 0.2776953511374876, "grad_norm": 0.8408560414663986, "learning_rate": 4.977920813079426e-06, "loss": 0.464, "step": 1123 }, { "epoch": 0.27794263105835804, "grad_norm": 0.8654252795240646, "learning_rate": 4.97787769022154e-06, "loss": 0.4858, "step": 1124 }, { "epoch": 0.27818991097922846, "grad_norm": 0.860275164094403, "learning_rate": 4.9778345254802505e-06, "loss": 0.4902, "step": 1125 }, { "epoch": 0.2784371909000989, "grad_norm": 0.8232668434710275, "learning_rate": 4.977791318856289e-06, "loss": 0.4662, "step": 1126 }, { "epoch": 0.2786844708209693, "grad_norm": 0.8368306472164873, "learning_rate": 4.977748070350385e-06, "loss": 0.4809, "step": 1127 }, { "epoch": 0.2789317507418398, "grad_norm": 0.836130275495686, "learning_rate": 4.977704779963269e-06, "loss": 0.4929, "step": 1128 }, { "epoch": 0.2791790306627102, "grad_norm": 0.9149228882397445, "learning_rate": 4.9776614476956735e-06, "loss": 0.4691, "step": 1129 }, { "epoch": 0.27942631058358064, "grad_norm": 0.8068181975659364, "learning_rate": 4.97761807354833e-06, "loss": 0.4684, "step": 1130 }, { "epoch": 0.27967359050445106, "grad_norm": 0.8422358197300707, "learning_rate": 4.977574657521973e-06, "loss": 0.4761, "step": 1131 }, { "epoch": 0.2799208704253215, "grad_norm": 0.8502642960030118, "learning_rate": 4.977531199617335e-06, "loss": 0.4574, "step": 1132 }, { "epoch": 0.2801681503461919, "grad_norm": 0.8596392419555559, "learning_rate": 4.977487699835151e-06, "loss": 0.4956, "step": 1133 }, { "epoch": 0.28041543026706234, "grad_norm": 0.8457975155369798, "learning_rate": 4.977444158176157e-06, "loss": 0.5049, "step": 1134 }, { "epoch": 0.28066271018793276, "grad_norm": 0.8237071413210715, "learning_rate": 4.9774005746410885e-06, "loss": 0.4795, "step": 1135 }, { "epoch": 0.2809099901088032, "grad_norm": 0.8690081793064807, "learning_rate": 4.977356949230681e-06, "loss": 0.4831, "step": 1136 }, { "epoch": 0.2811572700296736, "grad_norm": 0.8522428926389992, "learning_rate": 4.977313281945674e-06, "loss": 0.4606, "step": 1137 }, { "epoch": 0.28140454995054404, "grad_norm": 0.8686896451812126, "learning_rate": 4.977269572786804e-06, "loss": 0.4681, "step": 1138 }, { "epoch": 0.28165182987141446, "grad_norm": 0.8152365489529878, "learning_rate": 4.9772258217548105e-06, "loss": 0.4911, "step": 1139 }, { "epoch": 0.2818991097922849, "grad_norm": 0.8264760785996421, "learning_rate": 4.977182028850434e-06, "loss": 0.4598, "step": 1140 }, { "epoch": 0.2821463897131553, "grad_norm": 0.8211689788246055, "learning_rate": 4.9771381940744114e-06, "loss": 0.457, "step": 1141 }, { "epoch": 0.28239366963402573, "grad_norm": 0.8623747209430598, "learning_rate": 4.977094317427488e-06, "loss": 0.4527, "step": 1142 }, { "epoch": 0.28264094955489616, "grad_norm": 0.8244132721987166, "learning_rate": 4.977050398910402e-06, "loss": 0.4821, "step": 1143 }, { "epoch": 0.2828882294757666, "grad_norm": 0.88533076955512, "learning_rate": 4.977006438523898e-06, "loss": 0.4614, "step": 1144 }, { "epoch": 0.283135509396637, "grad_norm": 0.91782791472328, "learning_rate": 4.9769624362687175e-06, "loss": 0.5028, "step": 1145 }, { "epoch": 0.28338278931750743, "grad_norm": 0.8483629265141885, "learning_rate": 4.9769183921456045e-06, "loss": 0.4735, "step": 1146 }, { "epoch": 0.28363006923837786, "grad_norm": 0.8051321989492259, "learning_rate": 4.976874306155305e-06, "loss": 0.4827, "step": 1147 }, { "epoch": 0.2838773491592483, "grad_norm": 0.9174584209766616, "learning_rate": 4.9768301782985625e-06, "loss": 0.4711, "step": 1148 }, { "epoch": 0.2841246290801187, "grad_norm": 0.8715629939118485, "learning_rate": 4.9767860085761234e-06, "loss": 0.4716, "step": 1149 }, { "epoch": 0.28437190900098913, "grad_norm": 0.8190866023796931, "learning_rate": 4.9767417969887345e-06, "loss": 0.4717, "step": 1150 }, { "epoch": 0.28461918892185956, "grad_norm": 0.8436727789264102, "learning_rate": 4.976697543537144e-06, "loss": 0.4655, "step": 1151 }, { "epoch": 0.28486646884273, "grad_norm": 0.9569028098073552, "learning_rate": 4.976653248222097e-06, "loss": 0.4459, "step": 1152 }, { "epoch": 0.2851137487636004, "grad_norm": 0.9183949145964179, "learning_rate": 4.976608911044345e-06, "loss": 0.4595, "step": 1153 }, { "epoch": 0.28536102868447083, "grad_norm": 0.8331716153606737, "learning_rate": 4.976564532004636e-06, "loss": 0.4654, "step": 1154 }, { "epoch": 0.28560830860534125, "grad_norm": 0.818296150644725, "learning_rate": 4.976520111103721e-06, "loss": 0.4777, "step": 1155 }, { "epoch": 0.2858555885262117, "grad_norm": 0.88625253346554, "learning_rate": 4.976475648342351e-06, "loss": 0.4807, "step": 1156 }, { "epoch": 0.2861028684470821, "grad_norm": 0.8305715999786408, "learning_rate": 4.976431143721277e-06, "loss": 0.4482, "step": 1157 }, { "epoch": 0.28635014836795253, "grad_norm": 0.8426926584136363, "learning_rate": 4.976386597241251e-06, "loss": 0.4872, "step": 1158 }, { "epoch": 0.28659742828882295, "grad_norm": 0.8366689422958908, "learning_rate": 4.976342008903025e-06, "loss": 0.486, "step": 1159 }, { "epoch": 0.2868447082096934, "grad_norm": 0.8956525760405114, "learning_rate": 4.976297378707355e-06, "loss": 0.4715, "step": 1160 }, { "epoch": 0.2870919881305638, "grad_norm": 0.8272900765226568, "learning_rate": 4.976252706654995e-06, "loss": 0.4691, "step": 1161 }, { "epoch": 0.2873392680514342, "grad_norm": 0.8050824860585051, "learning_rate": 4.976207992746699e-06, "loss": 0.4725, "step": 1162 }, { "epoch": 0.28758654797230465, "grad_norm": 0.8407468844517442, "learning_rate": 4.976163236983223e-06, "loss": 0.4985, "step": 1163 }, { "epoch": 0.2878338278931751, "grad_norm": 0.9432126176475985, "learning_rate": 4.976118439365324e-06, "loss": 0.4755, "step": 1164 }, { "epoch": 0.2880811078140455, "grad_norm": 0.8343734797289791, "learning_rate": 4.976073599893758e-06, "loss": 0.4469, "step": 1165 }, { "epoch": 0.2883283877349159, "grad_norm": 0.80957107924281, "learning_rate": 4.976028718569285e-06, "loss": 0.479, "step": 1166 }, { "epoch": 0.28857566765578635, "grad_norm": 0.7838786090709212, "learning_rate": 4.975983795392662e-06, "loss": 0.4671, "step": 1167 }, { "epoch": 0.2888229475766568, "grad_norm": 0.8892179393889282, "learning_rate": 4.975938830364649e-06, "loss": 0.4629, "step": 1168 }, { "epoch": 0.2890702274975272, "grad_norm": 0.8885646660585704, "learning_rate": 4.975893823486006e-06, "loss": 0.5188, "step": 1169 }, { "epoch": 0.2893175074183976, "grad_norm": 0.8950341542311611, "learning_rate": 4.975848774757493e-06, "loss": 0.4882, "step": 1170 }, { "epoch": 0.28956478733926805, "grad_norm": 0.8153300668207971, "learning_rate": 4.975803684179873e-06, "loss": 0.4822, "step": 1171 }, { "epoch": 0.2898120672601385, "grad_norm": 0.8661852061603399, "learning_rate": 4.975758551753906e-06, "loss": 0.4655, "step": 1172 }, { "epoch": 0.2900593471810089, "grad_norm": 0.8741765156999356, "learning_rate": 4.975713377480357e-06, "loss": 0.4802, "step": 1173 }, { "epoch": 0.2903066271018793, "grad_norm": 0.8232541934414569, "learning_rate": 4.975668161359988e-06, "loss": 0.4991, "step": 1174 }, { "epoch": 0.29055390702274975, "grad_norm": 0.8204906769924353, "learning_rate": 4.9756229033935646e-06, "loss": 0.4593, "step": 1175 }, { "epoch": 0.29080118694362017, "grad_norm": 0.8707746000436765, "learning_rate": 4.97557760358185e-06, "loss": 0.4607, "step": 1176 }, { "epoch": 0.2910484668644906, "grad_norm": 0.9088228180075838, "learning_rate": 4.975532261925612e-06, "loss": 0.479, "step": 1177 }, { "epoch": 0.291295746785361, "grad_norm": 0.8474947512406128, "learning_rate": 4.975486878425616e-06, "loss": 0.449, "step": 1178 }, { "epoch": 0.29154302670623145, "grad_norm": 0.8214047939389825, "learning_rate": 4.975441453082629e-06, "loss": 0.5202, "step": 1179 }, { "epoch": 0.29179030662710187, "grad_norm": 0.874242317071171, "learning_rate": 4.9753959858974195e-06, "loss": 0.5118, "step": 1180 }, { "epoch": 0.2920375865479723, "grad_norm": 0.8628389428690536, "learning_rate": 4.975350476870755e-06, "loss": 0.4553, "step": 1181 }, { "epoch": 0.2922848664688427, "grad_norm": 0.8522753264964066, "learning_rate": 4.975304926003405e-06, "loss": 0.4472, "step": 1182 }, { "epoch": 0.29253214638971314, "grad_norm": 0.8249790271701399, "learning_rate": 4.97525933329614e-06, "loss": 0.4739, "step": 1183 }, { "epoch": 0.29277942631058357, "grad_norm": 0.9003269024269399, "learning_rate": 4.97521369874973e-06, "loss": 0.4815, "step": 1184 }, { "epoch": 0.293026706231454, "grad_norm": 0.920371626612895, "learning_rate": 4.975168022364948e-06, "loss": 0.4552, "step": 1185 }, { "epoch": 0.2932739861523244, "grad_norm": 0.8453419054277556, "learning_rate": 4.975122304142564e-06, "loss": 0.4827, "step": 1186 }, { "epoch": 0.29352126607319484, "grad_norm": 0.893824981194909, "learning_rate": 4.97507654408335e-06, "loss": 0.4593, "step": 1187 }, { "epoch": 0.29376854599406527, "grad_norm": 0.8765416230068477, "learning_rate": 4.9750307421880825e-06, "loss": 0.468, "step": 1188 }, { "epoch": 0.2940158259149357, "grad_norm": 0.7894978603788854, "learning_rate": 4.974984898457534e-06, "loss": 0.486, "step": 1189 }, { "epoch": 0.2942631058358061, "grad_norm": 0.8576759645871942, "learning_rate": 4.9749390128924806e-06, "loss": 0.5149, "step": 1190 }, { "epoch": 0.29451038575667654, "grad_norm": 0.8637297369732642, "learning_rate": 4.9748930854936955e-06, "loss": 0.4778, "step": 1191 }, { "epoch": 0.29475766567754697, "grad_norm": 0.8527005901061284, "learning_rate": 4.974847116261957e-06, "loss": 0.5054, "step": 1192 }, { "epoch": 0.2950049455984174, "grad_norm": 0.815567947874674, "learning_rate": 4.974801105198042e-06, "loss": 0.4802, "step": 1193 }, { "epoch": 0.2952522255192878, "grad_norm": 0.8479219848611034, "learning_rate": 4.974755052302726e-06, "loss": 0.4702, "step": 1194 }, { "epoch": 0.29549950544015824, "grad_norm": 0.8556137872597406, "learning_rate": 4.974708957576791e-06, "loss": 0.4704, "step": 1195 }, { "epoch": 0.29574678536102866, "grad_norm": 0.8398783844649267, "learning_rate": 4.974662821021014e-06, "loss": 0.4854, "step": 1196 }, { "epoch": 0.2959940652818991, "grad_norm": 0.8769661180300431, "learning_rate": 4.974616642636174e-06, "loss": 0.502, "step": 1197 }, { "epoch": 0.2962413452027695, "grad_norm": 0.846002502686499, "learning_rate": 4.974570422423053e-06, "loss": 0.5028, "step": 1198 }, { "epoch": 0.29648862512363994, "grad_norm": 0.8732437761139039, "learning_rate": 4.974524160382433e-06, "loss": 0.4487, "step": 1199 }, { "epoch": 0.29673590504451036, "grad_norm": 0.8534970199568261, "learning_rate": 4.974477856515094e-06, "loss": 0.4772, "step": 1200 }, { "epoch": 0.2969831849653808, "grad_norm": 0.8100385165833147, "learning_rate": 4.97443151082182e-06, "loss": 0.5059, "step": 1201 }, { "epoch": 0.2972304648862512, "grad_norm": 0.8427846867481712, "learning_rate": 4.974385123303394e-06, "loss": 0.4697, "step": 1202 }, { "epoch": 0.29747774480712164, "grad_norm": 0.8733326416501844, "learning_rate": 4.974338693960599e-06, "loss": 0.4638, "step": 1203 }, { "epoch": 0.29772502472799206, "grad_norm": 0.8599299796559156, "learning_rate": 4.974292222794223e-06, "loss": 0.4563, "step": 1204 }, { "epoch": 0.2979723046488625, "grad_norm": 0.8424539008316979, "learning_rate": 4.9742457098050475e-06, "loss": 0.5017, "step": 1205 }, { "epoch": 0.29821958456973297, "grad_norm": 0.8474616781082716, "learning_rate": 4.974199154993862e-06, "loss": 0.4874, "step": 1206 }, { "epoch": 0.2984668644906034, "grad_norm": 0.8432090258049286, "learning_rate": 4.974152558361451e-06, "loss": 0.4536, "step": 1207 }, { "epoch": 0.2987141444114738, "grad_norm": 0.8365560807553833, "learning_rate": 4.9741059199086024e-06, "loss": 0.4971, "step": 1208 }, { "epoch": 0.29896142433234424, "grad_norm": 0.8247988037749963, "learning_rate": 4.974059239636106e-06, "loss": 0.4564, "step": 1209 }, { "epoch": 0.29920870425321466, "grad_norm": 0.8364405143056981, "learning_rate": 4.97401251754475e-06, "loss": 0.539, "step": 1210 }, { "epoch": 0.2994559841740851, "grad_norm": 0.8749742019563281, "learning_rate": 4.973965753635325e-06, "loss": 0.4773, "step": 1211 }, { "epoch": 0.2997032640949555, "grad_norm": 0.8643498007988748, "learning_rate": 4.97391894790862e-06, "loss": 0.4521, "step": 1212 }, { "epoch": 0.29995054401582594, "grad_norm": 0.8051146754308409, "learning_rate": 4.973872100365427e-06, "loss": 0.4659, "step": 1213 }, { "epoch": 0.30019782393669636, "grad_norm": 0.8483014608260354, "learning_rate": 4.973825211006537e-06, "loss": 0.4753, "step": 1214 }, { "epoch": 0.3004451038575668, "grad_norm": 0.8784932670634085, "learning_rate": 4.973778279832744e-06, "loss": 0.4907, "step": 1215 }, { "epoch": 0.3006923837784372, "grad_norm": 0.8310183524315704, "learning_rate": 4.97373130684484e-06, "loss": 0.4672, "step": 1216 }, { "epoch": 0.30093966369930764, "grad_norm": 0.8636283603039391, "learning_rate": 4.973684292043619e-06, "loss": 0.435, "step": 1217 }, { "epoch": 0.30118694362017806, "grad_norm": 0.8389114184976831, "learning_rate": 4.973637235429877e-06, "loss": 0.4993, "step": 1218 }, { "epoch": 0.3014342235410485, "grad_norm": 0.8429199752469544, "learning_rate": 4.973590137004408e-06, "loss": 0.5084, "step": 1219 }, { "epoch": 0.3016815034619189, "grad_norm": 0.8648994179752031, "learning_rate": 4.9735429967680094e-06, "loss": 0.4584, "step": 1220 }, { "epoch": 0.30192878338278933, "grad_norm": 0.8092649880087687, "learning_rate": 4.973495814721477e-06, "loss": 0.4748, "step": 1221 }, { "epoch": 0.30217606330365976, "grad_norm": 0.8045236519854421, "learning_rate": 4.9734485908656075e-06, "loss": 0.5183, "step": 1222 }, { "epoch": 0.3024233432245302, "grad_norm": 0.8375848102184394, "learning_rate": 4.973401325201202e-06, "loss": 0.4846, "step": 1223 }, { "epoch": 0.3026706231454006, "grad_norm": 0.8204333352305591, "learning_rate": 4.9733540177290566e-06, "loss": 0.4735, "step": 1224 }, { "epoch": 0.30291790306627103, "grad_norm": 0.9320660470455538, "learning_rate": 4.973306668449971e-06, "loss": 0.487, "step": 1225 }, { "epoch": 0.30316518298714146, "grad_norm": 0.8512691440015686, "learning_rate": 4.973259277364748e-06, "loss": 0.4564, "step": 1226 }, { "epoch": 0.3034124629080119, "grad_norm": 0.8448716455291453, "learning_rate": 4.973211844474187e-06, "loss": 0.4607, "step": 1227 }, { "epoch": 0.3036597428288823, "grad_norm": 0.9007565601848737, "learning_rate": 4.973164369779089e-06, "loss": 0.4714, "step": 1228 }, { "epoch": 0.30390702274975273, "grad_norm": 0.8980928349718376, "learning_rate": 4.9731168532802586e-06, "loss": 0.4674, "step": 1229 }, { "epoch": 0.30415430267062316, "grad_norm": 0.8184075705545406, "learning_rate": 4.973069294978497e-06, "loss": 0.4683, "step": 1230 }, { "epoch": 0.3044015825914936, "grad_norm": 0.8307595261131485, "learning_rate": 4.973021694874609e-06, "loss": 0.4803, "step": 1231 }, { "epoch": 0.304648862512364, "grad_norm": 0.8995162154565732, "learning_rate": 4.972974052969399e-06, "loss": 0.4909, "step": 1232 }, { "epoch": 0.30489614243323443, "grad_norm": 0.8256622372929362, "learning_rate": 4.972926369263672e-06, "loss": 0.5004, "step": 1233 }, { "epoch": 0.30514342235410485, "grad_norm": 0.7964903751490071, "learning_rate": 4.972878643758234e-06, "loss": 0.5006, "step": 1234 }, { "epoch": 0.3053907022749753, "grad_norm": 0.9183004423758536, "learning_rate": 4.972830876453893e-06, "loss": 0.4817, "step": 1235 }, { "epoch": 0.3056379821958457, "grad_norm": 0.8931482050228089, "learning_rate": 4.972783067351455e-06, "loss": 0.4799, "step": 1236 }, { "epoch": 0.30588526211671613, "grad_norm": 0.9003322117775642, "learning_rate": 4.972735216451728e-06, "loss": 0.4832, "step": 1237 }, { "epoch": 0.30613254203758655, "grad_norm": 0.8025764006110967, "learning_rate": 4.972687323755522e-06, "loss": 0.4695, "step": 1238 }, { "epoch": 0.306379821958457, "grad_norm": 0.8313814899782755, "learning_rate": 4.972639389263645e-06, "loss": 0.5017, "step": 1239 }, { "epoch": 0.3066271018793274, "grad_norm": 0.8085003588929675, "learning_rate": 4.97259141297691e-06, "loss": 0.484, "step": 1240 }, { "epoch": 0.3068743818001978, "grad_norm": 0.8789937098823752, "learning_rate": 4.9725433948961235e-06, "loss": 0.4629, "step": 1241 }, { "epoch": 0.30712166172106825, "grad_norm": 0.8417003920288945, "learning_rate": 4.972495335022101e-06, "loss": 0.4892, "step": 1242 }, { "epoch": 0.3073689416419387, "grad_norm": 0.8296348241115892, "learning_rate": 4.972447233355654e-06, "loss": 0.471, "step": 1243 }, { "epoch": 0.3076162215628091, "grad_norm": 0.8383843142341431, "learning_rate": 4.972399089897594e-06, "loss": 0.5055, "step": 1244 }, { "epoch": 0.3078635014836795, "grad_norm": 0.8684099957350048, "learning_rate": 4.972350904648736e-06, "loss": 0.456, "step": 1245 }, { "epoch": 0.30811078140454995, "grad_norm": 0.8774377280391846, "learning_rate": 4.972302677609895e-06, "loss": 0.4821, "step": 1246 }, { "epoch": 0.3083580613254204, "grad_norm": 0.8757062818355338, "learning_rate": 4.972254408781885e-06, "loss": 0.4662, "step": 1247 }, { "epoch": 0.3086053412462908, "grad_norm": 0.8623536682487267, "learning_rate": 4.972206098165522e-06, "loss": 0.4751, "step": 1248 }, { "epoch": 0.3088526211671612, "grad_norm": 0.8482930842606974, "learning_rate": 4.972157745761624e-06, "loss": 0.4963, "step": 1249 }, { "epoch": 0.30909990108803165, "grad_norm": 0.8841543541026415, "learning_rate": 4.972109351571006e-06, "loss": 0.4845, "step": 1250 }, { "epoch": 0.3093471810089021, "grad_norm": 0.8180021244000164, "learning_rate": 4.972060915594488e-06, "loss": 0.5033, "step": 1251 }, { "epoch": 0.3095944609297725, "grad_norm": 0.8183146518355443, "learning_rate": 4.9720124378328885e-06, "loss": 0.4673, "step": 1252 }, { "epoch": 0.3098417408506429, "grad_norm": 0.9126517291897078, "learning_rate": 4.971963918287026e-06, "loss": 0.4785, "step": 1253 }, { "epoch": 0.31008902077151335, "grad_norm": 0.8475981845701279, "learning_rate": 4.971915356957721e-06, "loss": 0.4515, "step": 1254 }, { "epoch": 0.31033630069238377, "grad_norm": 0.829251256081981, "learning_rate": 4.971866753845794e-06, "loss": 0.4841, "step": 1255 }, { "epoch": 0.3105835806132542, "grad_norm": 0.810109516370709, "learning_rate": 4.971818108952066e-06, "loss": 0.4957, "step": 1256 }, { "epoch": 0.3108308605341246, "grad_norm": 0.8424982626224964, "learning_rate": 4.9717694222773624e-06, "loss": 0.4903, "step": 1257 }, { "epoch": 0.31107814045499504, "grad_norm": 0.8273938509757517, "learning_rate": 4.971720693822503e-06, "loss": 0.4696, "step": 1258 }, { "epoch": 0.31132542037586547, "grad_norm": 0.8584209103624083, "learning_rate": 4.971671923588312e-06, "loss": 0.4694, "step": 1259 }, { "epoch": 0.3115727002967359, "grad_norm": 0.8635606173746708, "learning_rate": 4.971623111575614e-06, "loss": 0.4716, "step": 1260 }, { "epoch": 0.3118199802176063, "grad_norm": 0.8772316694757156, "learning_rate": 4.971574257785234e-06, "loss": 0.4748, "step": 1261 }, { "epoch": 0.31206726013847674, "grad_norm": 0.9346025894181195, "learning_rate": 4.971525362217998e-06, "loss": 0.4853, "step": 1262 }, { "epoch": 0.31231454005934717, "grad_norm": 0.8196238081047258, "learning_rate": 4.971476424874733e-06, "loss": 0.4775, "step": 1263 }, { "epoch": 0.3125618199802176, "grad_norm": 0.8501168249933853, "learning_rate": 4.971427445756265e-06, "loss": 0.485, "step": 1264 }, { "epoch": 0.312809099901088, "grad_norm": 0.8453782824523821, "learning_rate": 4.971378424863423e-06, "loss": 0.4672, "step": 1265 }, { "epoch": 0.31305637982195844, "grad_norm": 0.8316282468393369, "learning_rate": 4.971329362197035e-06, "loss": 0.4592, "step": 1266 }, { "epoch": 0.31330365974282887, "grad_norm": 0.8913623594613578, "learning_rate": 4.97128025775793e-06, "loss": 0.4562, "step": 1267 }, { "epoch": 0.3135509396636993, "grad_norm": 0.8805133912519674, "learning_rate": 4.971231111546939e-06, "loss": 0.4554, "step": 1268 }, { "epoch": 0.3137982195845697, "grad_norm": 0.8378082421317632, "learning_rate": 4.971181923564892e-06, "loss": 0.4898, "step": 1269 }, { "epoch": 0.31404549950544014, "grad_norm": 0.9098239301913463, "learning_rate": 4.97113269381262e-06, "loss": 0.507, "step": 1270 }, { "epoch": 0.31429277942631056, "grad_norm": 0.8805091022613049, "learning_rate": 4.971083422290956e-06, "loss": 0.459, "step": 1271 }, { "epoch": 0.314540059347181, "grad_norm": 0.8558333892198907, "learning_rate": 4.971034109000732e-06, "loss": 0.4786, "step": 1272 }, { "epoch": 0.3147873392680514, "grad_norm": 0.830002999251238, "learning_rate": 4.970984753942783e-06, "loss": 0.4449, "step": 1273 }, { "epoch": 0.31503461918892184, "grad_norm": 0.8401177086898065, "learning_rate": 4.970935357117941e-06, "loss": 0.5094, "step": 1274 }, { "epoch": 0.31528189910979226, "grad_norm": 0.8369130683042246, "learning_rate": 4.9708859185270435e-06, "loss": 0.4823, "step": 1275 }, { "epoch": 0.3155291790306627, "grad_norm": 0.8319862899317088, "learning_rate": 4.970836438170924e-06, "loss": 0.4855, "step": 1276 }, { "epoch": 0.3157764589515331, "grad_norm": 0.8244796578240013, "learning_rate": 4.97078691605042e-06, "loss": 0.4527, "step": 1277 }, { "epoch": 0.31602373887240354, "grad_norm": 0.8239294602758832, "learning_rate": 4.970737352166368e-06, "loss": 0.4307, "step": 1278 }, { "epoch": 0.31627101879327396, "grad_norm": 0.812065916663278, "learning_rate": 4.970687746519607e-06, "loss": 0.4816, "step": 1279 }, { "epoch": 0.3165182987141444, "grad_norm": 0.8330917454579297, "learning_rate": 4.970638099110974e-06, "loss": 0.5176, "step": 1280 }, { "epoch": 0.3167655786350148, "grad_norm": 0.8260932984000681, "learning_rate": 4.970588409941308e-06, "loss": 0.4897, "step": 1281 }, { "epoch": 0.31701285855588524, "grad_norm": 0.8279566577283952, "learning_rate": 4.9705386790114505e-06, "loss": 0.4774, "step": 1282 }, { "epoch": 0.31726013847675566, "grad_norm": 0.8146960068506958, "learning_rate": 4.970488906322241e-06, "loss": 0.4885, "step": 1283 }, { "epoch": 0.31750741839762614, "grad_norm": 0.855983788309996, "learning_rate": 4.970439091874521e-06, "loss": 0.4864, "step": 1284 }, { "epoch": 0.31775469831849656, "grad_norm": 0.8477308612881375, "learning_rate": 4.970389235669133e-06, "loss": 0.4884, "step": 1285 }, { "epoch": 0.318001978239367, "grad_norm": 0.8379968825304376, "learning_rate": 4.97033933770692e-06, "loss": 0.4715, "step": 1286 }, { "epoch": 0.3182492581602374, "grad_norm": 0.8551487364632888, "learning_rate": 4.970289397988724e-06, "loss": 0.4888, "step": 1287 }, { "epoch": 0.31849653808110784, "grad_norm": 0.8350034977215137, "learning_rate": 4.970239416515389e-06, "loss": 0.4842, "step": 1288 }, { "epoch": 0.31874381800197826, "grad_norm": 0.8237669521560471, "learning_rate": 4.970189393287761e-06, "loss": 0.4764, "step": 1289 }, { "epoch": 0.3189910979228487, "grad_norm": 0.8858090914826243, "learning_rate": 4.970139328306686e-06, "loss": 0.4789, "step": 1290 }, { "epoch": 0.3192383778437191, "grad_norm": 0.8106320226692121, "learning_rate": 4.970089221573008e-06, "loss": 0.4689, "step": 1291 }, { "epoch": 0.31948565776458954, "grad_norm": 0.8175678750319051, "learning_rate": 4.970039073087577e-06, "loss": 0.4512, "step": 1292 }, { "epoch": 0.31973293768545996, "grad_norm": 0.7995128955825734, "learning_rate": 4.969988882851238e-06, "loss": 0.4832, "step": 1293 }, { "epoch": 0.3199802176063304, "grad_norm": 0.799837226959758, "learning_rate": 4.969938650864841e-06, "loss": 0.4528, "step": 1294 }, { "epoch": 0.3202274975272008, "grad_norm": 0.834121167692778, "learning_rate": 4.969888377129234e-06, "loss": 0.4597, "step": 1295 }, { "epoch": 0.32047477744807124, "grad_norm": 0.8653081086264667, "learning_rate": 4.969838061645268e-06, "loss": 0.4595, "step": 1296 }, { "epoch": 0.32072205736894166, "grad_norm": 0.8673155533135949, "learning_rate": 4.969787704413792e-06, "loss": 0.4812, "step": 1297 }, { "epoch": 0.3209693372898121, "grad_norm": 0.8122719817063542, "learning_rate": 4.969737305435658e-06, "loss": 0.4781, "step": 1298 }, { "epoch": 0.3212166172106825, "grad_norm": 0.8047716005193889, "learning_rate": 4.969686864711718e-06, "loss": 0.4999, "step": 1299 }, { "epoch": 0.32146389713155293, "grad_norm": 0.8407561134099769, "learning_rate": 4.969636382242825e-06, "loss": 0.4627, "step": 1300 }, { "epoch": 0.32171117705242336, "grad_norm": 0.8419429745734757, "learning_rate": 4.969585858029831e-06, "loss": 0.4729, "step": 1301 }, { "epoch": 0.3219584569732938, "grad_norm": 0.8166830456200092, "learning_rate": 4.96953529207359e-06, "loss": 0.4871, "step": 1302 }, { "epoch": 0.3222057368941642, "grad_norm": 0.7917900543444013, "learning_rate": 4.969484684374959e-06, "loss": 0.4935, "step": 1303 }, { "epoch": 0.32245301681503463, "grad_norm": 0.9590690166755755, "learning_rate": 4.9694340349347904e-06, "loss": 0.429, "step": 1304 }, { "epoch": 0.32270029673590506, "grad_norm": 0.8500944513695028, "learning_rate": 4.969383343753943e-06, "loss": 0.456, "step": 1305 }, { "epoch": 0.3229475766567755, "grad_norm": 0.9050469051293906, "learning_rate": 4.9693326108332716e-06, "loss": 0.4792, "step": 1306 }, { "epoch": 0.3231948565776459, "grad_norm": 0.8340987878049144, "learning_rate": 4.969281836173635e-06, "loss": 0.4722, "step": 1307 }, { "epoch": 0.32344213649851633, "grad_norm": 0.8987401808532778, "learning_rate": 4.969231019775891e-06, "loss": 0.4293, "step": 1308 }, { "epoch": 0.32368941641938676, "grad_norm": 0.8581280019324882, "learning_rate": 4.969180161640898e-06, "loss": 0.4519, "step": 1309 }, { "epoch": 0.3239366963402572, "grad_norm": 0.8465852233831679, "learning_rate": 4.9691292617695165e-06, "loss": 0.4632, "step": 1310 }, { "epoch": 0.3241839762611276, "grad_norm": 0.8182499069557968, "learning_rate": 4.969078320162607e-06, "loss": 0.4673, "step": 1311 }, { "epoch": 0.32443125618199803, "grad_norm": 0.8583292489781121, "learning_rate": 4.969027336821029e-06, "loss": 0.4787, "step": 1312 }, { "epoch": 0.32467853610286845, "grad_norm": 0.9144514645243177, "learning_rate": 4.968976311745647e-06, "loss": 0.4611, "step": 1313 }, { "epoch": 0.3249258160237389, "grad_norm": 0.8542093569498418, "learning_rate": 4.96892524493732e-06, "loss": 0.4807, "step": 1314 }, { "epoch": 0.3251730959446093, "grad_norm": 0.8531682089560951, "learning_rate": 4.968874136396914e-06, "loss": 0.463, "step": 1315 }, { "epoch": 0.3254203758654797, "grad_norm": 0.8740204340015105, "learning_rate": 4.968822986125292e-06, "loss": 0.4723, "step": 1316 }, { "epoch": 0.32566765578635015, "grad_norm": 0.7890203994099924, "learning_rate": 4.968771794123318e-06, "loss": 0.4811, "step": 1317 }, { "epoch": 0.3259149357072206, "grad_norm": 0.8646734697790014, "learning_rate": 4.968720560391859e-06, "loss": 0.4412, "step": 1318 }, { "epoch": 0.326162215628091, "grad_norm": 0.8417898419858085, "learning_rate": 4.968669284931779e-06, "loss": 0.4992, "step": 1319 }, { "epoch": 0.3264094955489614, "grad_norm": 0.8403085258475721, "learning_rate": 4.968617967743945e-06, "loss": 0.4671, "step": 1320 }, { "epoch": 0.32665677546983185, "grad_norm": 0.8161124612225035, "learning_rate": 4.968566608829225e-06, "loss": 0.4612, "step": 1321 }, { "epoch": 0.3269040553907023, "grad_norm": 0.8176140395301841, "learning_rate": 4.968515208188487e-06, "loss": 0.4773, "step": 1322 }, { "epoch": 0.3271513353115727, "grad_norm": 0.8612121577584373, "learning_rate": 4.9684637658226e-06, "loss": 0.4636, "step": 1323 }, { "epoch": 0.3273986152324431, "grad_norm": 0.7636475891146703, "learning_rate": 4.968412281732433e-06, "loss": 0.4591, "step": 1324 }, { "epoch": 0.32764589515331355, "grad_norm": 0.8356407912568218, "learning_rate": 4.968360755918858e-06, "loss": 0.4654, "step": 1325 }, { "epoch": 0.327893175074184, "grad_norm": 0.8130038980473393, "learning_rate": 4.968309188382743e-06, "loss": 0.493, "step": 1326 }, { "epoch": 0.3281404549950544, "grad_norm": 0.8073046510001293, "learning_rate": 4.968257579124962e-06, "loss": 0.4621, "step": 1327 }, { "epoch": 0.3283877349159248, "grad_norm": 0.8016870786489134, "learning_rate": 4.968205928146386e-06, "loss": 0.4643, "step": 1328 }, { "epoch": 0.32863501483679525, "grad_norm": 0.8448895040799353, "learning_rate": 4.968154235447889e-06, "loss": 0.4859, "step": 1329 }, { "epoch": 0.32888229475766567, "grad_norm": 0.8191604076948825, "learning_rate": 4.9681025010303445e-06, "loss": 0.4691, "step": 1330 }, { "epoch": 0.3291295746785361, "grad_norm": 0.8128752296677418, "learning_rate": 4.968050724894626e-06, "loss": 0.4621, "step": 1331 }, { "epoch": 0.3293768545994065, "grad_norm": 0.843228610985233, "learning_rate": 4.9679989070416106e-06, "loss": 0.442, "step": 1332 }, { "epoch": 0.32962413452027695, "grad_norm": 0.7963756355605212, "learning_rate": 4.967947047472172e-06, "loss": 0.4616, "step": 1333 }, { "epoch": 0.32987141444114737, "grad_norm": 0.839654855859208, "learning_rate": 4.967895146187189e-06, "loss": 0.4643, "step": 1334 }, { "epoch": 0.3301186943620178, "grad_norm": 0.8274671903075769, "learning_rate": 4.967843203187537e-06, "loss": 0.4426, "step": 1335 }, { "epoch": 0.3303659742828882, "grad_norm": 0.8799303601806644, "learning_rate": 4.967791218474095e-06, "loss": 0.4644, "step": 1336 }, { "epoch": 0.33061325420375864, "grad_norm": 0.8179823637800756, "learning_rate": 4.967739192047741e-06, "loss": 0.4659, "step": 1337 }, { "epoch": 0.33086053412462907, "grad_norm": 0.8195725524565542, "learning_rate": 4.967687123909355e-06, "loss": 0.5096, "step": 1338 }, { "epoch": 0.3311078140454995, "grad_norm": 0.84197720805029, "learning_rate": 4.9676350140598165e-06, "loss": 0.4727, "step": 1339 }, { "epoch": 0.3313550939663699, "grad_norm": 0.8260250113198276, "learning_rate": 4.9675828625000065e-06, "loss": 0.4779, "step": 1340 }, { "epoch": 0.33160237388724034, "grad_norm": 0.8880207738324564, "learning_rate": 4.967530669230808e-06, "loss": 0.4368, "step": 1341 }, { "epoch": 0.33184965380811077, "grad_norm": 0.8222910498101001, "learning_rate": 4.967478434253101e-06, "loss": 0.4881, "step": 1342 }, { "epoch": 0.3320969337289812, "grad_norm": 0.9239814524826327, "learning_rate": 4.9674261575677696e-06, "loss": 0.4394, "step": 1343 }, { "epoch": 0.3323442136498516, "grad_norm": 0.8162367375969642, "learning_rate": 4.967373839175696e-06, "loss": 0.43, "step": 1344 }, { "epoch": 0.33259149357072204, "grad_norm": 0.8420930848992175, "learning_rate": 4.967321479077768e-06, "loss": 0.4708, "step": 1345 }, { "epoch": 0.33283877349159247, "grad_norm": 0.8299200362774546, "learning_rate": 4.967269077274867e-06, "loss": 0.502, "step": 1346 }, { "epoch": 0.3330860534124629, "grad_norm": 0.8903658765485658, "learning_rate": 4.96721663376788e-06, "loss": 0.4873, "step": 1347 }, { "epoch": 0.3333333333333333, "grad_norm": 0.842432576192086, "learning_rate": 4.967164148557694e-06, "loss": 0.4523, "step": 1348 }, { "epoch": 0.33358061325420374, "grad_norm": 0.8784188816986657, "learning_rate": 4.967111621645195e-06, "loss": 0.4648, "step": 1349 }, { "epoch": 0.33382789317507416, "grad_norm": 0.8890735207690358, "learning_rate": 4.967059053031272e-06, "loss": 0.4735, "step": 1350 }, { "epoch": 0.3340751730959446, "grad_norm": 0.9139484525999664, "learning_rate": 4.967006442716814e-06, "loss": 0.4811, "step": 1351 }, { "epoch": 0.334322453016815, "grad_norm": 0.8432725929225718, "learning_rate": 4.966953790702709e-06, "loss": 0.4814, "step": 1352 }, { "epoch": 0.33456973293768544, "grad_norm": 0.7968350339816171, "learning_rate": 4.9669010969898465e-06, "loss": 0.4945, "step": 1353 }, { "epoch": 0.33481701285855586, "grad_norm": 0.8448439028983895, "learning_rate": 4.966848361579119e-06, "loss": 0.471, "step": 1354 }, { "epoch": 0.3350642927794263, "grad_norm": 0.8700792089452078, "learning_rate": 4.966795584471417e-06, "loss": 0.4778, "step": 1355 }, { "epoch": 0.3353115727002967, "grad_norm": 0.8576573597500617, "learning_rate": 4.966742765667632e-06, "loss": 0.439, "step": 1356 }, { "epoch": 0.33555885262116714, "grad_norm": 0.8412617464584977, "learning_rate": 4.9666899051686565e-06, "loss": 0.4579, "step": 1357 }, { "epoch": 0.33580613254203756, "grad_norm": 0.8660641241003527, "learning_rate": 4.966637002975387e-06, "loss": 0.4947, "step": 1358 }, { "epoch": 0.336053412462908, "grad_norm": 0.8784136045891929, "learning_rate": 4.966584059088714e-06, "loss": 0.4509, "step": 1359 }, { "epoch": 0.3363006923837784, "grad_norm": 0.8575154859392102, "learning_rate": 4.966531073509534e-06, "loss": 0.4628, "step": 1360 }, { "epoch": 0.3365479723046489, "grad_norm": 0.8558968082320106, "learning_rate": 4.966478046238742e-06, "loss": 0.4539, "step": 1361 }, { "epoch": 0.3367952522255193, "grad_norm": 0.8463118657623281, "learning_rate": 4.966424977277236e-06, "loss": 0.4874, "step": 1362 }, { "epoch": 0.33704253214638974, "grad_norm": 0.8582147987913828, "learning_rate": 4.966371866625912e-06, "loss": 0.4827, "step": 1363 }, { "epoch": 0.33728981206726016, "grad_norm": 0.8425557042931806, "learning_rate": 4.966318714285667e-06, "loss": 0.5158, "step": 1364 }, { "epoch": 0.3375370919881306, "grad_norm": 0.8198957499784623, "learning_rate": 4.966265520257399e-06, "loss": 0.4602, "step": 1365 }, { "epoch": 0.337784371909001, "grad_norm": 0.901675824405454, "learning_rate": 4.9662122845420105e-06, "loss": 0.4638, "step": 1366 }, { "epoch": 0.33803165182987144, "grad_norm": 0.8421039706556158, "learning_rate": 4.9661590071403975e-06, "loss": 0.4773, "step": 1367 }, { "epoch": 0.33827893175074186, "grad_norm": 0.8881502365059725, "learning_rate": 4.966105688053462e-06, "loss": 0.4977, "step": 1368 }, { "epoch": 0.3385262116716123, "grad_norm": 0.8286924700026043, "learning_rate": 4.966052327282106e-06, "loss": 0.4528, "step": 1369 }, { "epoch": 0.3387734915924827, "grad_norm": 0.815530177047265, "learning_rate": 4.96599892482723e-06, "loss": 0.4678, "step": 1370 }, { "epoch": 0.33902077151335314, "grad_norm": 0.8498159029250026, "learning_rate": 4.965945480689738e-06, "loss": 0.4797, "step": 1371 }, { "epoch": 0.33926805143422356, "grad_norm": 0.837998064753534, "learning_rate": 4.965891994870533e-06, "loss": 0.4518, "step": 1372 }, { "epoch": 0.339515331355094, "grad_norm": 0.8571090602934665, "learning_rate": 4.965838467370518e-06, "loss": 0.4516, "step": 1373 }, { "epoch": 0.3397626112759644, "grad_norm": 0.9212861435307308, "learning_rate": 4.9657848981905985e-06, "loss": 0.4589, "step": 1374 }, { "epoch": 0.34000989119683483, "grad_norm": 0.8646273616764547, "learning_rate": 4.9657312873316806e-06, "loss": 0.4656, "step": 1375 }, { "epoch": 0.34025717111770526, "grad_norm": 0.8649249854703567, "learning_rate": 4.965677634794671e-06, "loss": 0.4678, "step": 1376 }, { "epoch": 0.3405044510385757, "grad_norm": 0.8280715209748639, "learning_rate": 4.965623940580474e-06, "loss": 0.4806, "step": 1377 }, { "epoch": 0.3407517309594461, "grad_norm": 0.8540637518211892, "learning_rate": 4.965570204689999e-06, "loss": 0.466, "step": 1378 }, { "epoch": 0.34099901088031653, "grad_norm": 0.8722297658167012, "learning_rate": 4.965516427124155e-06, "loss": 0.4912, "step": 1379 }, { "epoch": 0.34124629080118696, "grad_norm": 0.8324087882341675, "learning_rate": 4.965462607883849e-06, "loss": 0.4282, "step": 1380 }, { "epoch": 0.3414935707220574, "grad_norm": 0.8542616511213038, "learning_rate": 4.965408746969993e-06, "loss": 0.4565, "step": 1381 }, { "epoch": 0.3417408506429278, "grad_norm": 0.8897532286442891, "learning_rate": 4.965354844383494e-06, "loss": 0.4476, "step": 1382 }, { "epoch": 0.34198813056379823, "grad_norm": 0.900818254020432, "learning_rate": 4.965300900125267e-06, "loss": 0.4626, "step": 1383 }, { "epoch": 0.34223541048466866, "grad_norm": 0.8121891811895046, "learning_rate": 4.965246914196222e-06, "loss": 0.4723, "step": 1384 }, { "epoch": 0.3424826904055391, "grad_norm": 0.8085099498992192, "learning_rate": 4.965192886597271e-06, "loss": 0.4969, "step": 1385 }, { "epoch": 0.3427299703264095, "grad_norm": 0.8169789083197132, "learning_rate": 4.965138817329328e-06, "loss": 0.4983, "step": 1386 }, { "epoch": 0.34297725024727993, "grad_norm": 0.8413101020719048, "learning_rate": 4.965084706393307e-06, "loss": 0.4624, "step": 1387 }, { "epoch": 0.34322453016815035, "grad_norm": 0.8554114796569622, "learning_rate": 4.965030553790123e-06, "loss": 0.4847, "step": 1388 }, { "epoch": 0.3434718100890208, "grad_norm": 0.883414822387632, "learning_rate": 4.964976359520689e-06, "loss": 0.4873, "step": 1389 }, { "epoch": 0.3437190900098912, "grad_norm": 0.9225841299701298, "learning_rate": 4.964922123585924e-06, "loss": 0.4517, "step": 1390 }, { "epoch": 0.34396636993076163, "grad_norm": 0.836069523426395, "learning_rate": 4.964867845986742e-06, "loss": 0.508, "step": 1391 }, { "epoch": 0.34421364985163205, "grad_norm": 0.8230356770161646, "learning_rate": 4.964813526724064e-06, "loss": 0.4811, "step": 1392 }, { "epoch": 0.3444609297725025, "grad_norm": 0.8144205084918253, "learning_rate": 4.964759165798806e-06, "loss": 0.4746, "step": 1393 }, { "epoch": 0.3447082096933729, "grad_norm": 0.848243953448798, "learning_rate": 4.964704763211886e-06, "loss": 0.4605, "step": 1394 }, { "epoch": 0.3449554896142433, "grad_norm": 0.862441294591035, "learning_rate": 4.964650318964224e-06, "loss": 0.4261, "step": 1395 }, { "epoch": 0.34520276953511375, "grad_norm": 0.823727505626556, "learning_rate": 4.964595833056742e-06, "loss": 0.4542, "step": 1396 }, { "epoch": 0.3454500494559842, "grad_norm": 0.8461579909245877, "learning_rate": 4.964541305490359e-06, "loss": 0.4642, "step": 1397 }, { "epoch": 0.3456973293768546, "grad_norm": 0.8682318696565303, "learning_rate": 4.964486736265998e-06, "loss": 0.4619, "step": 1398 }, { "epoch": 0.345944609297725, "grad_norm": 0.8154599041496469, "learning_rate": 4.964432125384581e-06, "loss": 0.49, "step": 1399 }, { "epoch": 0.34619188921859545, "grad_norm": 0.8521804504909071, "learning_rate": 4.96437747284703e-06, "loss": 0.4339, "step": 1400 }, { "epoch": 0.3464391691394659, "grad_norm": 0.8556600517889699, "learning_rate": 4.964322778654271e-06, "loss": 0.4447, "step": 1401 }, { "epoch": 0.3466864490603363, "grad_norm": 0.8688194992174506, "learning_rate": 4.964268042807227e-06, "loss": 0.4644, "step": 1402 }, { "epoch": 0.3469337289812067, "grad_norm": 0.8017975295806874, "learning_rate": 4.9642132653068224e-06, "loss": 0.4561, "step": 1403 }, { "epoch": 0.34718100890207715, "grad_norm": 0.8536874452765987, "learning_rate": 4.964158446153985e-06, "loss": 0.4648, "step": 1404 }, { "epoch": 0.3474282888229476, "grad_norm": 0.8189127192711046, "learning_rate": 4.964103585349639e-06, "loss": 0.4388, "step": 1405 }, { "epoch": 0.347675568743818, "grad_norm": 0.8705939326174833, "learning_rate": 4.9640486828947146e-06, "loss": 0.4596, "step": 1406 }, { "epoch": 0.3479228486646884, "grad_norm": 0.885106043728194, "learning_rate": 4.963993738790138e-06, "loss": 0.4743, "step": 1407 }, { "epoch": 0.34817012858555885, "grad_norm": 0.8332350836081958, "learning_rate": 4.963938753036839e-06, "loss": 0.4795, "step": 1408 }, { "epoch": 0.34841740850642927, "grad_norm": 0.8151625206936463, "learning_rate": 4.963883725635746e-06, "loss": 0.4588, "step": 1409 }, { "epoch": 0.3486646884272997, "grad_norm": 0.8584186317187182, "learning_rate": 4.963828656587789e-06, "loss": 0.4609, "step": 1410 }, { "epoch": 0.3489119683481701, "grad_norm": 0.8374239839439461, "learning_rate": 4.9637735458939e-06, "loss": 0.4663, "step": 1411 }, { "epoch": 0.34915924826904055, "grad_norm": 0.8335071629302205, "learning_rate": 4.96371839355501e-06, "loss": 0.4976, "step": 1412 }, { "epoch": 0.34940652818991097, "grad_norm": 0.8566093656507006, "learning_rate": 4.96366319957205e-06, "loss": 0.4427, "step": 1413 }, { "epoch": 0.3496538081107814, "grad_norm": 0.8797486028317246, "learning_rate": 4.963607963945954e-06, "loss": 0.4638, "step": 1414 }, { "epoch": 0.3499010880316518, "grad_norm": 0.7973883458360015, "learning_rate": 4.963552686677656e-06, "loss": 0.4756, "step": 1415 }, { "epoch": 0.35014836795252224, "grad_norm": 0.8605355070813538, "learning_rate": 4.963497367768091e-06, "loss": 0.4937, "step": 1416 }, { "epoch": 0.35039564787339267, "grad_norm": 0.8087052810593318, "learning_rate": 4.9634420072181925e-06, "loss": 0.5043, "step": 1417 }, { "epoch": 0.3506429277942631, "grad_norm": 0.8042980991613009, "learning_rate": 4.963386605028897e-06, "loss": 0.4866, "step": 1418 }, { "epoch": 0.3508902077151335, "grad_norm": 0.8397881265192508, "learning_rate": 4.96333116120114e-06, "loss": 0.4775, "step": 1419 }, { "epoch": 0.35113748763600394, "grad_norm": 0.8229969968530699, "learning_rate": 4.963275675735859e-06, "loss": 0.4909, "step": 1420 }, { "epoch": 0.35138476755687437, "grad_norm": 0.8582458934680285, "learning_rate": 4.963220148633994e-06, "loss": 0.4483, "step": 1421 }, { "epoch": 0.3516320474777448, "grad_norm": 0.8509944642538084, "learning_rate": 4.963164579896481e-06, "loss": 0.4931, "step": 1422 }, { "epoch": 0.3518793273986152, "grad_norm": 0.7778259466986965, "learning_rate": 4.963108969524261e-06, "loss": 0.4506, "step": 1423 }, { "epoch": 0.35212660731948564, "grad_norm": 0.806063787046905, "learning_rate": 4.9630533175182714e-06, "loss": 0.4828, "step": 1424 }, { "epoch": 0.35237388724035607, "grad_norm": 0.8595506336846482, "learning_rate": 4.962997623879456e-06, "loss": 0.4606, "step": 1425 }, { "epoch": 0.3526211671612265, "grad_norm": 0.8324046367039577, "learning_rate": 4.962941888608754e-06, "loss": 0.4489, "step": 1426 }, { "epoch": 0.3528684470820969, "grad_norm": 0.8519133065926907, "learning_rate": 4.9628861117071095e-06, "loss": 0.4664, "step": 1427 }, { "epoch": 0.35311572700296734, "grad_norm": 0.7904079696015989, "learning_rate": 4.962830293175463e-06, "loss": 0.4657, "step": 1428 }, { "epoch": 0.35336300692383776, "grad_norm": 0.8232281020750102, "learning_rate": 4.96277443301476e-06, "loss": 0.444, "step": 1429 }, { "epoch": 0.3536102868447082, "grad_norm": 0.8535424230625128, "learning_rate": 4.962718531225942e-06, "loss": 0.465, "step": 1430 }, { "epoch": 0.3538575667655786, "grad_norm": 0.8615311397260789, "learning_rate": 4.962662587809957e-06, "loss": 0.4482, "step": 1431 }, { "epoch": 0.35410484668644904, "grad_norm": 0.8179287695934544, "learning_rate": 4.9626066027677496e-06, "loss": 0.4796, "step": 1432 }, { "epoch": 0.35435212660731946, "grad_norm": 0.8435396498957267, "learning_rate": 4.962550576100265e-06, "loss": 0.4504, "step": 1433 }, { "epoch": 0.3545994065281899, "grad_norm": 0.8045830988187997, "learning_rate": 4.962494507808452e-06, "loss": 0.4718, "step": 1434 }, { "epoch": 0.3548466864490603, "grad_norm": 0.7973282650780453, "learning_rate": 4.962438397893256e-06, "loss": 0.5084, "step": 1435 }, { "epoch": 0.35509396636993074, "grad_norm": 0.81788673220588, "learning_rate": 4.962382246355628e-06, "loss": 0.4762, "step": 1436 }, { "epoch": 0.35534124629080116, "grad_norm": 0.8191220728313339, "learning_rate": 4.962326053196515e-06, "loss": 0.459, "step": 1437 }, { "epoch": 0.3555885262116716, "grad_norm": 0.8418837776836511, "learning_rate": 4.9622698184168684e-06, "loss": 0.4832, "step": 1438 }, { "epoch": 0.35583580613254207, "grad_norm": 0.822644289062169, "learning_rate": 4.962213542017638e-06, "loss": 0.4891, "step": 1439 }, { "epoch": 0.3560830860534125, "grad_norm": 0.8055518425958926, "learning_rate": 4.962157223999774e-06, "loss": 0.4805, "step": 1440 }, { "epoch": 0.3563303659742829, "grad_norm": 0.8663900660198538, "learning_rate": 4.962100864364231e-06, "loss": 0.4414, "step": 1441 }, { "epoch": 0.35657764589515334, "grad_norm": 0.8065354382371167, "learning_rate": 4.962044463111959e-06, "loss": 0.472, "step": 1442 }, { "epoch": 0.35682492581602376, "grad_norm": 0.8429417885865965, "learning_rate": 4.961988020243913e-06, "loss": 0.4802, "step": 1443 }, { "epoch": 0.3570722057368942, "grad_norm": 0.8628035030884751, "learning_rate": 4.961931535761046e-06, "loss": 0.4278, "step": 1444 }, { "epoch": 0.3573194856577646, "grad_norm": 0.8255801822657475, "learning_rate": 4.961875009664313e-06, "loss": 0.4637, "step": 1445 }, { "epoch": 0.35756676557863504, "grad_norm": 0.8435723559389733, "learning_rate": 4.9618184419546705e-06, "loss": 0.4711, "step": 1446 }, { "epoch": 0.35781404549950546, "grad_norm": 0.8287755155885044, "learning_rate": 4.961761832633073e-06, "loss": 0.4893, "step": 1447 }, { "epoch": 0.3580613254203759, "grad_norm": 0.8113313485945742, "learning_rate": 4.961705181700479e-06, "loss": 0.4758, "step": 1448 }, { "epoch": 0.3583086053412463, "grad_norm": 0.8541198849431507, "learning_rate": 4.9616484891578455e-06, "loss": 0.447, "step": 1449 }, { "epoch": 0.35855588526211674, "grad_norm": 0.8277973505342479, "learning_rate": 4.96159175500613e-06, "loss": 0.4556, "step": 1450 }, { "epoch": 0.35880316518298716, "grad_norm": 0.8217363815126292, "learning_rate": 4.9615349792462916e-06, "loss": 0.4682, "step": 1451 }, { "epoch": 0.3590504451038576, "grad_norm": 0.8235924475409444, "learning_rate": 4.961478161879291e-06, "loss": 0.4577, "step": 1452 }, { "epoch": 0.359297725024728, "grad_norm": 0.8366896026328494, "learning_rate": 4.961421302906087e-06, "loss": 0.4536, "step": 1453 }, { "epoch": 0.35954500494559843, "grad_norm": 0.8480005034889219, "learning_rate": 4.961364402327643e-06, "loss": 0.467, "step": 1454 }, { "epoch": 0.35979228486646886, "grad_norm": 0.8056929237512162, "learning_rate": 4.961307460144919e-06, "loss": 0.4733, "step": 1455 }, { "epoch": 0.3600395647873393, "grad_norm": 0.8221098249663793, "learning_rate": 4.9612504763588774e-06, "loss": 0.4598, "step": 1456 }, { "epoch": 0.3602868447082097, "grad_norm": 0.8347778715831121, "learning_rate": 4.961193450970483e-06, "loss": 0.4568, "step": 1457 }, { "epoch": 0.36053412462908013, "grad_norm": 0.8772150797089943, "learning_rate": 4.961136383980697e-06, "loss": 0.4588, "step": 1458 }, { "epoch": 0.36078140454995056, "grad_norm": 0.8530561104086893, "learning_rate": 4.9610792753904866e-06, "loss": 0.4616, "step": 1459 }, { "epoch": 0.361028684470821, "grad_norm": 0.8823335046697693, "learning_rate": 4.961022125200816e-06, "loss": 0.4699, "step": 1460 }, { "epoch": 0.3612759643916914, "grad_norm": 0.8485109471759061, "learning_rate": 4.960964933412652e-06, "loss": 0.4378, "step": 1461 }, { "epoch": 0.36152324431256183, "grad_norm": 0.8898917632495861, "learning_rate": 4.96090770002696e-06, "loss": 0.4654, "step": 1462 }, { "epoch": 0.36177052423343226, "grad_norm": 0.8470917053499859, "learning_rate": 4.9608504250447075e-06, "loss": 0.4553, "step": 1463 }, { "epoch": 0.3620178041543027, "grad_norm": 0.8348048460177506, "learning_rate": 4.960793108466863e-06, "loss": 0.4423, "step": 1464 }, { "epoch": 0.3622650840751731, "grad_norm": 0.8837887760254436, "learning_rate": 4.960735750294397e-06, "loss": 0.437, "step": 1465 }, { "epoch": 0.36251236399604353, "grad_norm": 0.8521679859494443, "learning_rate": 4.960678350528277e-06, "loss": 0.4577, "step": 1466 }, { "epoch": 0.36275964391691395, "grad_norm": 0.7953899887086049, "learning_rate": 4.9606209091694734e-06, "loss": 0.4822, "step": 1467 }, { "epoch": 0.3630069238377844, "grad_norm": 0.7982731717026136, "learning_rate": 4.960563426218957e-06, "loss": 0.4584, "step": 1468 }, { "epoch": 0.3632542037586548, "grad_norm": 0.9145430537767979, "learning_rate": 4.960505901677701e-06, "loss": 0.5103, "step": 1469 }, { "epoch": 0.36350148367952523, "grad_norm": 0.8606249759635389, "learning_rate": 4.9604483355466756e-06, "loss": 0.4532, "step": 1470 }, { "epoch": 0.36374876360039565, "grad_norm": 0.8640284738109312, "learning_rate": 4.960390727826856e-06, "loss": 0.4562, "step": 1471 }, { "epoch": 0.3639960435212661, "grad_norm": 0.8927421670919629, "learning_rate": 4.960333078519214e-06, "loss": 0.4427, "step": 1472 }, { "epoch": 0.3642433234421365, "grad_norm": 0.8209190272202821, "learning_rate": 4.9602753876247244e-06, "loss": 0.4658, "step": 1473 }, { "epoch": 0.3644906033630069, "grad_norm": 0.8506090980497512, "learning_rate": 4.960217655144364e-06, "loss": 0.4903, "step": 1474 }, { "epoch": 0.36473788328387735, "grad_norm": 0.8304617275337773, "learning_rate": 4.960159881079106e-06, "loss": 0.4371, "step": 1475 }, { "epoch": 0.3649851632047478, "grad_norm": 0.8074225851216803, "learning_rate": 4.960102065429929e-06, "loss": 0.4615, "step": 1476 }, { "epoch": 0.3652324431256182, "grad_norm": 0.8003088453641801, "learning_rate": 4.96004420819781e-06, "loss": 0.4602, "step": 1477 }, { "epoch": 0.3654797230464886, "grad_norm": 0.8470939389079107, "learning_rate": 4.959986309383726e-06, "loss": 0.4713, "step": 1478 }, { "epoch": 0.36572700296735905, "grad_norm": 0.830521949089574, "learning_rate": 4.959928368988657e-06, "loss": 0.4811, "step": 1479 }, { "epoch": 0.3659742828882295, "grad_norm": 0.8745328214858118, "learning_rate": 4.959870387013581e-06, "loss": 0.4557, "step": 1480 }, { "epoch": 0.3662215628090999, "grad_norm": 0.847094148012273, "learning_rate": 4.959812363459479e-06, "loss": 0.4655, "step": 1481 }, { "epoch": 0.3664688427299703, "grad_norm": 0.8491449367553962, "learning_rate": 4.959754298327332e-06, "loss": 0.4597, "step": 1482 }, { "epoch": 0.36671612265084075, "grad_norm": 0.8229085286999269, "learning_rate": 4.959696191618119e-06, "loss": 0.4854, "step": 1483 }, { "epoch": 0.3669634025717112, "grad_norm": 0.8702802741898958, "learning_rate": 4.959638043332826e-06, "loss": 0.455, "step": 1484 }, { "epoch": 0.3672106824925816, "grad_norm": 0.8459937215244333, "learning_rate": 4.959579853472434e-06, "loss": 0.4758, "step": 1485 }, { "epoch": 0.367457962413452, "grad_norm": 0.8881342218230955, "learning_rate": 4.959521622037925e-06, "loss": 0.468, "step": 1486 }, { "epoch": 0.36770524233432245, "grad_norm": 0.7847108194328761, "learning_rate": 4.959463349030285e-06, "loss": 0.4896, "step": 1487 }, { "epoch": 0.36795252225519287, "grad_norm": 0.8417601497534091, "learning_rate": 4.959405034450501e-06, "loss": 0.4686, "step": 1488 }, { "epoch": 0.3681998021760633, "grad_norm": 0.8122499845921317, "learning_rate": 4.959346678299555e-06, "loss": 0.4692, "step": 1489 }, { "epoch": 0.3684470820969337, "grad_norm": 0.8580283394958202, "learning_rate": 4.9592882805784345e-06, "loss": 0.4742, "step": 1490 }, { "epoch": 0.36869436201780414, "grad_norm": 0.8428257373547411, "learning_rate": 4.959229841288128e-06, "loss": 0.4675, "step": 1491 }, { "epoch": 0.36894164193867457, "grad_norm": 0.8259183229805209, "learning_rate": 4.959171360429621e-06, "loss": 0.4404, "step": 1492 }, { "epoch": 0.369188921859545, "grad_norm": 0.8126446310683214, "learning_rate": 4.959112838003905e-06, "loss": 0.4687, "step": 1493 }, { "epoch": 0.3694362017804154, "grad_norm": 0.7830986243714438, "learning_rate": 4.959054274011966e-06, "loss": 0.4625, "step": 1494 }, { "epoch": 0.36968348170128584, "grad_norm": 0.8237721672583825, "learning_rate": 4.958995668454796e-06, "loss": 0.4679, "step": 1495 }, { "epoch": 0.36993076162215627, "grad_norm": 0.842776685828888, "learning_rate": 4.958937021333384e-06, "loss": 0.46, "step": 1496 }, { "epoch": 0.3701780415430267, "grad_norm": 0.8364831387221224, "learning_rate": 4.958878332648724e-06, "loss": 0.4899, "step": 1497 }, { "epoch": 0.3704253214638971, "grad_norm": 0.7940034967601184, "learning_rate": 4.958819602401806e-06, "loss": 0.4595, "step": 1498 }, { "epoch": 0.37067260138476754, "grad_norm": 0.8153152604939218, "learning_rate": 4.958760830593621e-06, "loss": 0.4808, "step": 1499 }, { "epoch": 0.37091988130563797, "grad_norm": 0.8603126838223264, "learning_rate": 4.958702017225166e-06, "loss": 0.4643, "step": 1500 }, { "epoch": 0.3711671612265084, "grad_norm": 0.832304898978481, "learning_rate": 4.958643162297434e-06, "loss": 0.4609, "step": 1501 }, { "epoch": 0.3714144411473788, "grad_norm": 0.8437671915588434, "learning_rate": 4.958584265811419e-06, "loss": 0.4607, "step": 1502 }, { "epoch": 0.37166172106824924, "grad_norm": 0.8270741301993446, "learning_rate": 4.958525327768117e-06, "loss": 0.4866, "step": 1503 }, { "epoch": 0.37190900098911966, "grad_norm": 0.8458032983321746, "learning_rate": 4.9584663481685235e-06, "loss": 0.4842, "step": 1504 }, { "epoch": 0.3721562809099901, "grad_norm": 0.8240486908432811, "learning_rate": 4.958407327013637e-06, "loss": 0.4836, "step": 1505 }, { "epoch": 0.3724035608308605, "grad_norm": 0.8103231743244376, "learning_rate": 4.9583482643044535e-06, "loss": 0.4607, "step": 1506 }, { "epoch": 0.37265084075173094, "grad_norm": 0.8309144783040975, "learning_rate": 4.9582891600419714e-06, "loss": 0.4767, "step": 1507 }, { "epoch": 0.37289812067260136, "grad_norm": 0.812854985431406, "learning_rate": 4.958230014227191e-06, "loss": 0.5015, "step": 1508 }, { "epoch": 0.3731454005934718, "grad_norm": 0.8389457059849967, "learning_rate": 4.9581708268611116e-06, "loss": 0.4895, "step": 1509 }, { "epoch": 0.3733926805143422, "grad_norm": 0.8324431688711037, "learning_rate": 4.958111597944734e-06, "loss": 0.4802, "step": 1510 }, { "epoch": 0.37363996043521264, "grad_norm": 0.8858341769964009, "learning_rate": 4.9580523274790585e-06, "loss": 0.4677, "step": 1511 }, { "epoch": 0.37388724035608306, "grad_norm": 0.8346574872673881, "learning_rate": 4.957993015465086e-06, "loss": 0.4434, "step": 1512 }, { "epoch": 0.3741345202769535, "grad_norm": 0.8159836769145766, "learning_rate": 4.957933661903822e-06, "loss": 0.49, "step": 1513 }, { "epoch": 0.3743818001978239, "grad_norm": 0.903045314748104, "learning_rate": 4.957874266796267e-06, "loss": 0.4861, "step": 1514 }, { "epoch": 0.37462908011869434, "grad_norm": 0.8234870808009122, "learning_rate": 4.9578148301434255e-06, "loss": 0.4452, "step": 1515 }, { "epoch": 0.37487636003956476, "grad_norm": 0.8153560688366689, "learning_rate": 4.957755351946303e-06, "loss": 0.4734, "step": 1516 }, { "epoch": 0.37512363996043524, "grad_norm": 0.8400329349725666, "learning_rate": 4.957695832205905e-06, "loss": 0.497, "step": 1517 }, { "epoch": 0.37537091988130566, "grad_norm": 0.8358924451402449, "learning_rate": 4.957636270923237e-06, "loss": 0.4562, "step": 1518 }, { "epoch": 0.3756181998021761, "grad_norm": 0.8432421412883345, "learning_rate": 4.9575766680993056e-06, "loss": 0.4428, "step": 1519 }, { "epoch": 0.3758654797230465, "grad_norm": 0.8160910599610325, "learning_rate": 4.957517023735119e-06, "loss": 0.4627, "step": 1520 }, { "epoch": 0.37611275964391694, "grad_norm": 0.8711532573510746, "learning_rate": 4.957457337831684e-06, "loss": 0.4717, "step": 1521 }, { "epoch": 0.37636003956478736, "grad_norm": 0.8242546795865497, "learning_rate": 4.95739761039001e-06, "loss": 0.4819, "step": 1522 }, { "epoch": 0.3766073194856578, "grad_norm": 0.8613610896652953, "learning_rate": 4.957337841411107e-06, "loss": 0.446, "step": 1523 }, { "epoch": 0.3768545994065282, "grad_norm": 0.864261076328205, "learning_rate": 4.9572780308959865e-06, "loss": 0.4698, "step": 1524 }, { "epoch": 0.37710187932739864, "grad_norm": 0.8614226944088311, "learning_rate": 4.957218178845657e-06, "loss": 0.4808, "step": 1525 }, { "epoch": 0.37734915924826906, "grad_norm": 0.8224765428766275, "learning_rate": 4.957158285261131e-06, "loss": 0.4403, "step": 1526 }, { "epoch": 0.3775964391691395, "grad_norm": 0.8181585369424585, "learning_rate": 4.957098350143422e-06, "loss": 0.4766, "step": 1527 }, { "epoch": 0.3778437190900099, "grad_norm": 0.8096774878681889, "learning_rate": 4.957038373493541e-06, "loss": 0.46, "step": 1528 }, { "epoch": 0.37809099901088034, "grad_norm": 0.8490900193792322, "learning_rate": 4.956978355312505e-06, "loss": 0.4781, "step": 1529 }, { "epoch": 0.37833827893175076, "grad_norm": 0.8322232014452549, "learning_rate": 4.956918295601325e-06, "loss": 0.5009, "step": 1530 }, { "epoch": 0.3785855588526212, "grad_norm": 0.7735197922262507, "learning_rate": 4.956858194361018e-06, "loss": 0.478, "step": 1531 }, { "epoch": 0.3788328387734916, "grad_norm": 0.8585921318091921, "learning_rate": 4.9567980515926e-06, "loss": 0.4856, "step": 1532 }, { "epoch": 0.37908011869436203, "grad_norm": 0.8446307436491305, "learning_rate": 4.956737867297086e-06, "loss": 0.4472, "step": 1533 }, { "epoch": 0.37932739861523246, "grad_norm": 0.8357409701620357, "learning_rate": 4.9566776414754955e-06, "loss": 0.4704, "step": 1534 }, { "epoch": 0.3795746785361029, "grad_norm": 0.883193450189684, "learning_rate": 4.9566173741288445e-06, "loss": 0.4445, "step": 1535 }, { "epoch": 0.3798219584569733, "grad_norm": 0.8362706611107679, "learning_rate": 4.956557065258154e-06, "loss": 0.4763, "step": 1536 }, { "epoch": 0.38006923837784373, "grad_norm": 0.8445029539445396, "learning_rate": 4.956496714864442e-06, "loss": 0.4363, "step": 1537 }, { "epoch": 0.38031651829871416, "grad_norm": 0.9005044139041423, "learning_rate": 4.956436322948728e-06, "loss": 0.4257, "step": 1538 }, { "epoch": 0.3805637982195846, "grad_norm": 0.803480549136306, "learning_rate": 4.956375889512033e-06, "loss": 0.4643, "step": 1539 }, { "epoch": 0.380811078140455, "grad_norm": 0.8944145630210211, "learning_rate": 4.95631541455538e-06, "loss": 0.4649, "step": 1540 }, { "epoch": 0.38105835806132543, "grad_norm": 0.8431926814490958, "learning_rate": 4.956254898079789e-06, "loss": 0.4634, "step": 1541 }, { "epoch": 0.38130563798219586, "grad_norm": 0.8623971408995822, "learning_rate": 4.956194340086284e-06, "loss": 0.473, "step": 1542 }, { "epoch": 0.3815529179030663, "grad_norm": 0.855497209762524, "learning_rate": 4.956133740575889e-06, "loss": 0.4384, "step": 1543 }, { "epoch": 0.3818001978239367, "grad_norm": 0.8154439659482268, "learning_rate": 4.9560730995496285e-06, "loss": 0.4714, "step": 1544 }, { "epoch": 0.38204747774480713, "grad_norm": 0.8117948295753515, "learning_rate": 4.956012417008526e-06, "loss": 0.4573, "step": 1545 }, { "epoch": 0.38229475766567755, "grad_norm": 0.8411563068995113, "learning_rate": 4.95595169295361e-06, "loss": 0.447, "step": 1546 }, { "epoch": 0.382542037586548, "grad_norm": 0.840886345860403, "learning_rate": 4.955890927385903e-06, "loss": 0.4373, "step": 1547 }, { "epoch": 0.3827893175074184, "grad_norm": 0.8749065735486805, "learning_rate": 4.955830120306436e-06, "loss": 0.466, "step": 1548 }, { "epoch": 0.3830365974282888, "grad_norm": 0.8266971574028511, "learning_rate": 4.955769271716234e-06, "loss": 0.4524, "step": 1549 }, { "epoch": 0.38328387734915925, "grad_norm": 0.7856800140279594, "learning_rate": 4.955708381616327e-06, "loss": 0.4661, "step": 1550 }, { "epoch": 0.3835311572700297, "grad_norm": 0.8227838319773383, "learning_rate": 4.955647450007743e-06, "loss": 0.4728, "step": 1551 }, { "epoch": 0.3837784371909001, "grad_norm": 0.855400428650959, "learning_rate": 4.955586476891514e-06, "loss": 0.4523, "step": 1552 }, { "epoch": 0.3840257171117705, "grad_norm": 0.854109040398388, "learning_rate": 4.955525462268669e-06, "loss": 0.4733, "step": 1553 }, { "epoch": 0.38427299703264095, "grad_norm": 0.8476868409597142, "learning_rate": 4.955464406140239e-06, "loss": 0.4342, "step": 1554 }, { "epoch": 0.3845202769535114, "grad_norm": 0.8221087986524699, "learning_rate": 4.955403308507257e-06, "loss": 0.4741, "step": 1555 }, { "epoch": 0.3847675568743818, "grad_norm": 0.8985974432339402, "learning_rate": 4.955342169370755e-06, "loss": 0.4764, "step": 1556 }, { "epoch": 0.3850148367952522, "grad_norm": 0.840966394271443, "learning_rate": 4.955280988731768e-06, "loss": 0.4677, "step": 1557 }, { "epoch": 0.38526211671612265, "grad_norm": 0.855372122677701, "learning_rate": 4.9552197665913284e-06, "loss": 0.4412, "step": 1558 }, { "epoch": 0.3855093966369931, "grad_norm": 0.8491148581437344, "learning_rate": 4.955158502950471e-06, "loss": 0.4804, "step": 1559 }, { "epoch": 0.3857566765578635, "grad_norm": 0.8354735780843734, "learning_rate": 4.955097197810233e-06, "loss": 0.461, "step": 1560 }, { "epoch": 0.3860039564787339, "grad_norm": 0.8421198371822611, "learning_rate": 4.955035851171648e-06, "loss": 0.4553, "step": 1561 }, { "epoch": 0.38625123639960435, "grad_norm": 0.8561721132938402, "learning_rate": 4.954974463035756e-06, "loss": 0.4647, "step": 1562 }, { "epoch": 0.38649851632047477, "grad_norm": 0.8326602684541324, "learning_rate": 4.9549130334035925e-06, "loss": 0.4217, "step": 1563 }, { "epoch": 0.3867457962413452, "grad_norm": 0.8318953344187765, "learning_rate": 4.954851562276196e-06, "loss": 0.476, "step": 1564 }, { "epoch": 0.3869930761622156, "grad_norm": 0.8104714755106442, "learning_rate": 4.954790049654608e-06, "loss": 0.4516, "step": 1565 }, { "epoch": 0.38724035608308605, "grad_norm": 0.8723051321964577, "learning_rate": 4.954728495539865e-06, "loss": 0.483, "step": 1566 }, { "epoch": 0.38748763600395647, "grad_norm": 0.8735820265601282, "learning_rate": 4.954666899933008e-06, "loss": 0.5133, "step": 1567 }, { "epoch": 0.3877349159248269, "grad_norm": 0.8858755298043255, "learning_rate": 4.954605262835079e-06, "loss": 0.4557, "step": 1568 }, { "epoch": 0.3879821958456973, "grad_norm": 0.875902503739144, "learning_rate": 4.954543584247121e-06, "loss": 0.4242, "step": 1569 }, { "epoch": 0.38822947576656774, "grad_norm": 0.7924017320572292, "learning_rate": 4.954481864170175e-06, "loss": 0.4822, "step": 1570 }, { "epoch": 0.38847675568743817, "grad_norm": 0.8362697886438909, "learning_rate": 4.9544201026052845e-06, "loss": 0.4602, "step": 1571 }, { "epoch": 0.3887240356083086, "grad_norm": 0.8916382978285358, "learning_rate": 4.954358299553492e-06, "loss": 0.4405, "step": 1572 }, { "epoch": 0.388971315529179, "grad_norm": 0.8634107397727967, "learning_rate": 4.954296455015846e-06, "loss": 0.456, "step": 1573 }, { "epoch": 0.38921859545004944, "grad_norm": 0.928281717353563, "learning_rate": 4.9542345689933875e-06, "loss": 0.466, "step": 1574 }, { "epoch": 0.38946587537091987, "grad_norm": 0.9131256535394252, "learning_rate": 4.954172641487165e-06, "loss": 0.4358, "step": 1575 }, { "epoch": 0.3897131552917903, "grad_norm": 0.8553374784847576, "learning_rate": 4.954110672498226e-06, "loss": 0.447, "step": 1576 }, { "epoch": 0.3899604352126607, "grad_norm": 0.8617713943665484, "learning_rate": 4.954048662027615e-06, "loss": 0.454, "step": 1577 }, { "epoch": 0.39020771513353114, "grad_norm": 0.8378470097296922, "learning_rate": 4.953986610076383e-06, "loss": 0.457, "step": 1578 }, { "epoch": 0.39045499505440157, "grad_norm": 0.890341315715719, "learning_rate": 4.953924516645578e-06, "loss": 0.4542, "step": 1579 }, { "epoch": 0.390702274975272, "grad_norm": 0.867350939044516, "learning_rate": 4.953862381736249e-06, "loss": 0.4343, "step": 1580 }, { "epoch": 0.3909495548961424, "grad_norm": 0.8877957331446623, "learning_rate": 4.953800205349446e-06, "loss": 0.488, "step": 1581 }, { "epoch": 0.39119683481701284, "grad_norm": 0.8807877558443802, "learning_rate": 4.953737987486221e-06, "loss": 0.4735, "step": 1582 }, { "epoch": 0.39144411473788326, "grad_norm": 0.8782111156976654, "learning_rate": 4.953675728147625e-06, "loss": 0.448, "step": 1583 }, { "epoch": 0.3916913946587537, "grad_norm": 0.8500467289046308, "learning_rate": 4.953613427334711e-06, "loss": 0.4617, "step": 1584 }, { "epoch": 0.3919386745796241, "grad_norm": 0.8409368715056174, "learning_rate": 4.953551085048531e-06, "loss": 0.4684, "step": 1585 }, { "epoch": 0.39218595450049454, "grad_norm": 0.8522470314278595, "learning_rate": 4.95348870129014e-06, "loss": 0.4592, "step": 1586 }, { "epoch": 0.39243323442136496, "grad_norm": 0.8168922995846802, "learning_rate": 4.953426276060592e-06, "loss": 0.4258, "step": 1587 }, { "epoch": 0.3926805143422354, "grad_norm": 0.8256035919532246, "learning_rate": 4.953363809360942e-06, "loss": 0.4665, "step": 1588 }, { "epoch": 0.3929277942631058, "grad_norm": 0.8541007709621282, "learning_rate": 4.953301301192246e-06, "loss": 0.4363, "step": 1589 }, { "epoch": 0.39317507418397624, "grad_norm": 0.8882561921472727, "learning_rate": 4.95323875155556e-06, "loss": 0.4635, "step": 1590 }, { "epoch": 0.39342235410484666, "grad_norm": 0.8501720096698456, "learning_rate": 4.953176160451942e-06, "loss": 0.4653, "step": 1591 }, { "epoch": 0.3936696340257171, "grad_norm": 0.8721229757098223, "learning_rate": 4.95311352788245e-06, "loss": 0.4806, "step": 1592 }, { "epoch": 0.3939169139465875, "grad_norm": 0.8979202349196487, "learning_rate": 4.953050853848143e-06, "loss": 0.454, "step": 1593 }, { "epoch": 0.39416419386745793, "grad_norm": 0.8760747889780947, "learning_rate": 4.9529881383500785e-06, "loss": 0.4763, "step": 1594 }, { "epoch": 0.3944114737883284, "grad_norm": 0.8452437439381595, "learning_rate": 4.9529253813893185e-06, "loss": 0.4282, "step": 1595 }, { "epoch": 0.39465875370919884, "grad_norm": 0.8396178678462056, "learning_rate": 4.952862582966923e-06, "loss": 0.4531, "step": 1596 }, { "epoch": 0.39490603363006926, "grad_norm": 0.8401292338194442, "learning_rate": 4.9527997430839535e-06, "loss": 0.4677, "step": 1597 }, { "epoch": 0.3951533135509397, "grad_norm": 0.8078584240478519, "learning_rate": 4.952736861741473e-06, "loss": 0.4612, "step": 1598 }, { "epoch": 0.3954005934718101, "grad_norm": 0.8580871690645315, "learning_rate": 4.952673938940543e-06, "loss": 0.4561, "step": 1599 }, { "epoch": 0.39564787339268054, "grad_norm": 0.8309114317557393, "learning_rate": 4.952610974682228e-06, "loss": 0.4587, "step": 1600 }, { "epoch": 0.39589515331355096, "grad_norm": 0.8018195354020252, "learning_rate": 4.952547968967592e-06, "loss": 0.4764, "step": 1601 }, { "epoch": 0.3961424332344214, "grad_norm": 0.8227914040250693, "learning_rate": 4.9524849217977e-06, "loss": 0.5004, "step": 1602 }, { "epoch": 0.3963897131552918, "grad_norm": 0.9344301223745453, "learning_rate": 4.952421833173618e-06, "loss": 0.4284, "step": 1603 }, { "epoch": 0.39663699307616224, "grad_norm": 0.8125773670413535, "learning_rate": 4.952358703096412e-06, "loss": 0.4878, "step": 1604 }, { "epoch": 0.39688427299703266, "grad_norm": 0.8794216357490461, "learning_rate": 4.952295531567149e-06, "loss": 0.4417, "step": 1605 }, { "epoch": 0.3971315529179031, "grad_norm": 0.8785534278955001, "learning_rate": 4.952232318586897e-06, "loss": 0.4572, "step": 1606 }, { "epoch": 0.3973788328387735, "grad_norm": 0.8732103433976532, "learning_rate": 4.952169064156724e-06, "loss": 0.4628, "step": 1607 }, { "epoch": 0.39762611275964393, "grad_norm": 0.8388512265824324, "learning_rate": 4.952105768277701e-06, "loss": 0.4819, "step": 1608 }, { "epoch": 0.39787339268051436, "grad_norm": 0.8531960312588226, "learning_rate": 4.9520424309508954e-06, "loss": 0.4596, "step": 1609 }, { "epoch": 0.3981206726013848, "grad_norm": 0.8990481924298742, "learning_rate": 4.951979052177379e-06, "loss": 0.4679, "step": 1610 }, { "epoch": 0.3983679525222552, "grad_norm": 0.9241672808577421, "learning_rate": 4.9519156319582226e-06, "loss": 0.4409, "step": 1611 }, { "epoch": 0.39861523244312563, "grad_norm": 0.8075821047905285, "learning_rate": 4.9518521702945e-06, "loss": 0.4304, "step": 1612 }, { "epoch": 0.39886251236399606, "grad_norm": 0.8439405328812108, "learning_rate": 4.951788667187281e-06, "loss": 0.4412, "step": 1613 }, { "epoch": 0.3991097922848665, "grad_norm": 0.8936493881653371, "learning_rate": 4.95172512263764e-06, "loss": 0.446, "step": 1614 }, { "epoch": 0.3993570722057369, "grad_norm": 0.8718520805400518, "learning_rate": 4.9516615366466535e-06, "loss": 0.4565, "step": 1615 }, { "epoch": 0.39960435212660733, "grad_norm": 0.8336286918355128, "learning_rate": 4.951597909215393e-06, "loss": 0.4722, "step": 1616 }, { "epoch": 0.39985163204747776, "grad_norm": 0.8757209786566342, "learning_rate": 4.951534240344936e-06, "loss": 0.4569, "step": 1617 }, { "epoch": 0.4000989119683482, "grad_norm": 0.8705182757213266, "learning_rate": 4.951470530036358e-06, "loss": 0.4605, "step": 1618 }, { "epoch": 0.4003461918892186, "grad_norm": 0.8640519203759794, "learning_rate": 4.951406778290735e-06, "loss": 0.4554, "step": 1619 }, { "epoch": 0.40059347181008903, "grad_norm": 0.8362305667086866, "learning_rate": 4.951342985109147e-06, "loss": 0.4673, "step": 1620 }, { "epoch": 0.40084075173095945, "grad_norm": 0.855065016510783, "learning_rate": 4.951279150492669e-06, "loss": 0.4581, "step": 1621 }, { "epoch": 0.4010880316518299, "grad_norm": 0.9169850417037374, "learning_rate": 4.9512152744423836e-06, "loss": 0.4765, "step": 1622 }, { "epoch": 0.4013353115727003, "grad_norm": 0.8578287913225426, "learning_rate": 4.951151356959368e-06, "loss": 0.4479, "step": 1623 }, { "epoch": 0.40158259149357073, "grad_norm": 0.9055998939207615, "learning_rate": 4.951087398044702e-06, "loss": 0.4566, "step": 1624 }, { "epoch": 0.40182987141444115, "grad_norm": 0.8854013010161614, "learning_rate": 4.951023397699469e-06, "loss": 0.4654, "step": 1625 }, { "epoch": 0.4020771513353116, "grad_norm": 0.8676151411605866, "learning_rate": 4.9509593559247505e-06, "loss": 0.4685, "step": 1626 }, { "epoch": 0.402324431256182, "grad_norm": 0.8113407989272491, "learning_rate": 4.950895272721627e-06, "loss": 0.4685, "step": 1627 }, { "epoch": 0.4025717111770524, "grad_norm": 0.8210151970043335, "learning_rate": 4.950831148091184e-06, "loss": 0.4517, "step": 1628 }, { "epoch": 0.40281899109792285, "grad_norm": 0.8853716567053723, "learning_rate": 4.950766982034504e-06, "loss": 0.4227, "step": 1629 }, { "epoch": 0.4030662710187933, "grad_norm": 0.9285034946776956, "learning_rate": 4.950702774552671e-06, "loss": 0.4095, "step": 1630 }, { "epoch": 0.4033135509396637, "grad_norm": 0.8081586648223669, "learning_rate": 4.950638525646773e-06, "loss": 0.4483, "step": 1631 }, { "epoch": 0.4035608308605341, "grad_norm": 0.814407403090029, "learning_rate": 4.9505742353178935e-06, "loss": 0.4395, "step": 1632 }, { "epoch": 0.40380811078140455, "grad_norm": 0.9318384755859314, "learning_rate": 4.9505099035671185e-06, "loss": 0.4528, "step": 1633 }, { "epoch": 0.404055390702275, "grad_norm": 0.8556784555850027, "learning_rate": 4.950445530395539e-06, "loss": 0.4551, "step": 1634 }, { "epoch": 0.4043026706231454, "grad_norm": 0.8236028720202845, "learning_rate": 4.9503811158042394e-06, "loss": 0.4969, "step": 1635 }, { "epoch": 0.4045499505440158, "grad_norm": 0.7955100118692611, "learning_rate": 4.9503166597943105e-06, "loss": 0.4911, "step": 1636 }, { "epoch": 0.40479723046488625, "grad_norm": 0.8321762635331581, "learning_rate": 4.950252162366841e-06, "loss": 0.4743, "step": 1637 }, { "epoch": 0.4050445103857567, "grad_norm": 0.892782367227542, "learning_rate": 4.950187623522922e-06, "loss": 0.4635, "step": 1638 }, { "epoch": 0.4052917903066271, "grad_norm": 0.8290384221114429, "learning_rate": 4.950123043263644e-06, "loss": 0.4701, "step": 1639 }, { "epoch": 0.4055390702274975, "grad_norm": 0.8635462718575356, "learning_rate": 4.9500584215900975e-06, "loss": 0.4315, "step": 1640 }, { "epoch": 0.40578635014836795, "grad_norm": 0.8303892565178683, "learning_rate": 4.949993758503376e-06, "loss": 0.4925, "step": 1641 }, { "epoch": 0.40603363006923837, "grad_norm": 0.840727633311021, "learning_rate": 4.949929054004572e-06, "loss": 0.4629, "step": 1642 }, { "epoch": 0.4062809099901088, "grad_norm": 0.8634706522730431, "learning_rate": 4.949864308094779e-06, "loss": 0.4796, "step": 1643 }, { "epoch": 0.4065281899109792, "grad_norm": 0.8361394254501443, "learning_rate": 4.949799520775092e-06, "loss": 0.4568, "step": 1644 }, { "epoch": 0.40677546983184965, "grad_norm": 0.8139921355363459, "learning_rate": 4.9497346920466074e-06, "loss": 0.4557, "step": 1645 }, { "epoch": 0.40702274975272007, "grad_norm": 0.812250032386861, "learning_rate": 4.949669821910418e-06, "loss": 0.4566, "step": 1646 }, { "epoch": 0.4072700296735905, "grad_norm": 0.828761594585985, "learning_rate": 4.949604910367623e-06, "loss": 0.4862, "step": 1647 }, { "epoch": 0.4075173095944609, "grad_norm": 0.8177756231917882, "learning_rate": 4.949539957419317e-06, "loss": 0.4586, "step": 1648 }, { "epoch": 0.40776458951533134, "grad_norm": 0.8214768220432287, "learning_rate": 4.949474963066599e-06, "loss": 0.449, "step": 1649 }, { "epoch": 0.40801186943620177, "grad_norm": 0.8194371482269276, "learning_rate": 4.9494099273105686e-06, "loss": 0.457, "step": 1650 }, { "epoch": 0.4082591493570722, "grad_norm": 0.8244018158911955, "learning_rate": 4.9493448501523245e-06, "loss": 0.4345, "step": 1651 }, { "epoch": 0.4085064292779426, "grad_norm": 0.8009139765863647, "learning_rate": 4.949279731592967e-06, "loss": 0.4791, "step": 1652 }, { "epoch": 0.40875370919881304, "grad_norm": 0.8116159409968832, "learning_rate": 4.949214571633595e-06, "loss": 0.4369, "step": 1653 }, { "epoch": 0.40900098911968347, "grad_norm": 0.8562285011172734, "learning_rate": 4.949149370275311e-06, "loss": 0.4557, "step": 1654 }, { "epoch": 0.4092482690405539, "grad_norm": 0.8191314049207302, "learning_rate": 4.949084127519219e-06, "loss": 0.4683, "step": 1655 }, { "epoch": 0.4094955489614243, "grad_norm": 0.7778969873901285, "learning_rate": 4.949018843366419e-06, "loss": 0.4639, "step": 1656 }, { "epoch": 0.40974282888229474, "grad_norm": 0.806816984603995, "learning_rate": 4.9489535178180155e-06, "loss": 0.4335, "step": 1657 }, { "epoch": 0.40999010880316517, "grad_norm": 0.8215100542029089, "learning_rate": 4.9488881508751135e-06, "loss": 0.4436, "step": 1658 }, { "epoch": 0.4102373887240356, "grad_norm": 0.8281492795541459, "learning_rate": 4.948822742538817e-06, "loss": 0.4521, "step": 1659 }, { "epoch": 0.410484668644906, "grad_norm": 0.897412440002854, "learning_rate": 4.9487572928102315e-06, "loss": 0.4777, "step": 1660 }, { "epoch": 0.41073194856577644, "grad_norm": 0.8231014853022581, "learning_rate": 4.948691801690464e-06, "loss": 0.4668, "step": 1661 }, { "epoch": 0.41097922848664686, "grad_norm": 0.8309257711014859, "learning_rate": 4.948626269180621e-06, "loss": 0.423, "step": 1662 }, { "epoch": 0.4112265084075173, "grad_norm": 0.788186095486044, "learning_rate": 4.94856069528181e-06, "loss": 0.4461, "step": 1663 }, { "epoch": 0.4114737883283877, "grad_norm": 0.8031013505892846, "learning_rate": 4.948495079995139e-06, "loss": 0.4581, "step": 1664 }, { "epoch": 0.41172106824925814, "grad_norm": 0.8214990535999098, "learning_rate": 4.948429423321719e-06, "loss": 0.4386, "step": 1665 }, { "epoch": 0.41196834817012856, "grad_norm": 0.8697067040191242, "learning_rate": 4.9483637252626585e-06, "loss": 0.4274, "step": 1666 }, { "epoch": 0.412215628090999, "grad_norm": 0.8208094818047257, "learning_rate": 4.948297985819067e-06, "loss": 0.4561, "step": 1667 }, { "epoch": 0.4124629080118694, "grad_norm": 0.8199301154122366, "learning_rate": 4.9482322049920575e-06, "loss": 0.4606, "step": 1668 }, { "epoch": 0.41271018793273984, "grad_norm": 0.7865329706718255, "learning_rate": 4.948166382782741e-06, "loss": 0.4564, "step": 1669 }, { "epoch": 0.41295746785361026, "grad_norm": 0.8352567987592875, "learning_rate": 4.948100519192229e-06, "loss": 0.4558, "step": 1670 }, { "epoch": 0.4132047477744807, "grad_norm": 0.8861005482099316, "learning_rate": 4.9480346142216375e-06, "loss": 0.4658, "step": 1671 }, { "epoch": 0.4134520276953511, "grad_norm": 0.852460995747833, "learning_rate": 4.947968667872079e-06, "loss": 0.4487, "step": 1672 }, { "epoch": 0.4136993076162216, "grad_norm": 0.8173953757463533, "learning_rate": 4.947902680144667e-06, "loss": 0.4554, "step": 1673 }, { "epoch": 0.413946587537092, "grad_norm": 0.8668203863267794, "learning_rate": 4.947836651040519e-06, "loss": 0.4846, "step": 1674 }, { "epoch": 0.41419386745796244, "grad_norm": 0.7885997646188458, "learning_rate": 4.94777058056075e-06, "loss": 0.4815, "step": 1675 }, { "epoch": 0.41444114737883286, "grad_norm": 0.8506062284072559, "learning_rate": 4.947704468706477e-06, "loss": 0.4362, "step": 1676 }, { "epoch": 0.4146884272997033, "grad_norm": 0.8786876540859795, "learning_rate": 4.947638315478817e-06, "loss": 0.4119, "step": 1677 }, { "epoch": 0.4149357072205737, "grad_norm": 0.782177966395444, "learning_rate": 4.9475721208788885e-06, "loss": 0.478, "step": 1678 }, { "epoch": 0.41518298714144414, "grad_norm": 0.8478757384596047, "learning_rate": 4.94750588490781e-06, "loss": 0.4391, "step": 1679 }, { "epoch": 0.41543026706231456, "grad_norm": 0.811351078735783, "learning_rate": 4.947439607566703e-06, "loss": 0.4447, "step": 1680 }, { "epoch": 0.415677546983185, "grad_norm": 0.7843424493602333, "learning_rate": 4.947373288856685e-06, "loss": 0.4617, "step": 1681 }, { "epoch": 0.4159248269040554, "grad_norm": 0.8235806060021909, "learning_rate": 4.947306928778879e-06, "loss": 0.4864, "step": 1682 }, { "epoch": 0.41617210682492584, "grad_norm": 0.847868232417344, "learning_rate": 4.947240527334406e-06, "loss": 0.4773, "step": 1683 }, { "epoch": 0.41641938674579626, "grad_norm": 0.8697925069615653, "learning_rate": 4.947174084524387e-06, "loss": 0.4453, "step": 1684 }, { "epoch": 0.4166666666666667, "grad_norm": 0.8142283134114524, "learning_rate": 4.947107600349948e-06, "loss": 0.4721, "step": 1685 }, { "epoch": 0.4169139465875371, "grad_norm": 0.8511529611862372, "learning_rate": 4.947041074812211e-06, "loss": 0.4462, "step": 1686 }, { "epoch": 0.41716122650840753, "grad_norm": 0.8329691094863567, "learning_rate": 4.946974507912301e-06, "loss": 0.4389, "step": 1687 }, { "epoch": 0.41740850642927796, "grad_norm": 0.8428860586768815, "learning_rate": 4.946907899651342e-06, "loss": 0.4514, "step": 1688 }, { "epoch": 0.4176557863501484, "grad_norm": 0.81243219915672, "learning_rate": 4.946841250030461e-06, "loss": 0.4397, "step": 1689 }, { "epoch": 0.4179030662710188, "grad_norm": 0.8532101717804385, "learning_rate": 4.946774559050785e-06, "loss": 0.5014, "step": 1690 }, { "epoch": 0.41815034619188923, "grad_norm": 0.8488612058084708, "learning_rate": 4.9467078267134396e-06, "loss": 0.4745, "step": 1691 }, { "epoch": 0.41839762611275966, "grad_norm": 0.8429030041129258, "learning_rate": 4.946641053019554e-06, "loss": 0.4668, "step": 1692 }, { "epoch": 0.4186449060336301, "grad_norm": 0.8126448430228969, "learning_rate": 4.9465742379702574e-06, "loss": 0.4771, "step": 1693 }, { "epoch": 0.4188921859545005, "grad_norm": 0.8732774473739731, "learning_rate": 4.946507381566677e-06, "loss": 0.4761, "step": 1694 }, { "epoch": 0.41913946587537093, "grad_norm": 0.8717402341975771, "learning_rate": 4.946440483809946e-06, "loss": 0.4822, "step": 1695 }, { "epoch": 0.41938674579624136, "grad_norm": 0.8502511813109619, "learning_rate": 4.946373544701193e-06, "loss": 0.4152, "step": 1696 }, { "epoch": 0.4196340257171118, "grad_norm": 0.8596676366588083, "learning_rate": 4.9463065642415485e-06, "loss": 0.4362, "step": 1697 }, { "epoch": 0.4198813056379822, "grad_norm": 0.8901457121778148, "learning_rate": 4.9462395424321476e-06, "loss": 0.4418, "step": 1698 }, { "epoch": 0.42012858555885263, "grad_norm": 0.9208361238486875, "learning_rate": 4.946172479274121e-06, "loss": 0.4329, "step": 1699 }, { "epoch": 0.42037586547972305, "grad_norm": 0.8547536386643835, "learning_rate": 4.946105374768603e-06, "loss": 0.4873, "step": 1700 }, { "epoch": 0.4206231454005935, "grad_norm": 0.8508039548865888, "learning_rate": 4.9460382289167284e-06, "loss": 0.4251, "step": 1701 }, { "epoch": 0.4208704253214639, "grad_norm": 0.7970088950959086, "learning_rate": 4.945971041719631e-06, "loss": 0.4671, "step": 1702 }, { "epoch": 0.42111770524233433, "grad_norm": 0.7841689731189674, "learning_rate": 4.945903813178447e-06, "loss": 0.467, "step": 1703 }, { "epoch": 0.42136498516320475, "grad_norm": 0.8173705703188501, "learning_rate": 4.945836543294312e-06, "loss": 0.4546, "step": 1704 }, { "epoch": 0.4216122650840752, "grad_norm": 0.8161859578647371, "learning_rate": 4.945769232068364e-06, "loss": 0.4916, "step": 1705 }, { "epoch": 0.4218595450049456, "grad_norm": 0.8198882926959018, "learning_rate": 4.945701879501742e-06, "loss": 0.4505, "step": 1706 }, { "epoch": 0.422106824925816, "grad_norm": 0.8118644016805705, "learning_rate": 4.945634485595582e-06, "loss": 0.4554, "step": 1707 }, { "epoch": 0.42235410484668645, "grad_norm": 0.8913670827889651, "learning_rate": 4.945567050351024e-06, "loss": 0.4465, "step": 1708 }, { "epoch": 0.4226013847675569, "grad_norm": 0.8459259044723303, "learning_rate": 4.945499573769209e-06, "loss": 0.4573, "step": 1709 }, { "epoch": 0.4228486646884273, "grad_norm": 0.826742105916524, "learning_rate": 4.945432055851276e-06, "loss": 0.4777, "step": 1710 }, { "epoch": 0.4230959446092977, "grad_norm": 0.7946204416088612, "learning_rate": 4.945364496598366e-06, "loss": 0.4572, "step": 1711 }, { "epoch": 0.42334322453016815, "grad_norm": 0.8168683419578028, "learning_rate": 4.9452968960116235e-06, "loss": 0.448, "step": 1712 }, { "epoch": 0.4235905044510386, "grad_norm": 0.8297167888254489, "learning_rate": 4.945229254092188e-06, "loss": 0.4519, "step": 1713 }, { "epoch": 0.423837784371909, "grad_norm": 0.8970476493506931, "learning_rate": 4.945161570841205e-06, "loss": 0.4432, "step": 1714 }, { "epoch": 0.4240850642927794, "grad_norm": 0.8465002130673689, "learning_rate": 4.945093846259817e-06, "loss": 0.4476, "step": 1715 }, { "epoch": 0.42433234421364985, "grad_norm": 0.8347667071381571, "learning_rate": 4.9450260803491705e-06, "loss": 0.4394, "step": 1716 }, { "epoch": 0.4245796241345203, "grad_norm": 0.814417603831734, "learning_rate": 4.94495827311041e-06, "loss": 0.4542, "step": 1717 }, { "epoch": 0.4248269040553907, "grad_norm": 0.9018559702844308, "learning_rate": 4.944890424544681e-06, "loss": 0.4449, "step": 1718 }, { "epoch": 0.4250741839762611, "grad_norm": 0.7992025415966338, "learning_rate": 4.944822534653131e-06, "loss": 0.462, "step": 1719 }, { "epoch": 0.42532146389713155, "grad_norm": 0.8037703415889722, "learning_rate": 4.944754603436908e-06, "loss": 0.4583, "step": 1720 }, { "epoch": 0.42556874381800197, "grad_norm": 0.8439242492521162, "learning_rate": 4.94468663089716e-06, "loss": 0.4465, "step": 1721 }, { "epoch": 0.4258160237388724, "grad_norm": 0.8326257136253545, "learning_rate": 4.944618617035035e-06, "loss": 0.4599, "step": 1722 }, { "epoch": 0.4260633036597428, "grad_norm": 0.8225161900870498, "learning_rate": 4.944550561851685e-06, "loss": 0.4424, "step": 1723 }, { "epoch": 0.42631058358061324, "grad_norm": 0.8117853626022742, "learning_rate": 4.944482465348257e-06, "loss": 0.4518, "step": 1724 }, { "epoch": 0.42655786350148367, "grad_norm": 0.8618327982125517, "learning_rate": 4.944414327525904e-06, "loss": 0.4339, "step": 1725 }, { "epoch": 0.4268051434223541, "grad_norm": 0.8386378955346201, "learning_rate": 4.944346148385777e-06, "loss": 0.4841, "step": 1726 }, { "epoch": 0.4270524233432245, "grad_norm": 0.8772973032751197, "learning_rate": 4.9442779279290295e-06, "loss": 0.4892, "step": 1727 }, { "epoch": 0.42729970326409494, "grad_norm": 0.908072104090296, "learning_rate": 4.944209666156814e-06, "loss": 0.457, "step": 1728 }, { "epoch": 0.42754698318496537, "grad_norm": 0.821449218350982, "learning_rate": 4.944141363070284e-06, "loss": 0.4392, "step": 1729 }, { "epoch": 0.4277942631058358, "grad_norm": 0.8316991236817805, "learning_rate": 4.944073018670594e-06, "loss": 0.4764, "step": 1730 }, { "epoch": 0.4280415430267062, "grad_norm": 0.8694517096681799, "learning_rate": 4.9440046329589e-06, "loss": 0.4539, "step": 1731 }, { "epoch": 0.42828882294757664, "grad_norm": 0.8471764393664999, "learning_rate": 4.943936205936359e-06, "loss": 0.4114, "step": 1732 }, { "epoch": 0.42853610286844707, "grad_norm": 0.8183886076647181, "learning_rate": 4.943867737604123e-06, "loss": 0.4322, "step": 1733 }, { "epoch": 0.4287833827893175, "grad_norm": 0.8579245008449513, "learning_rate": 4.943799227963354e-06, "loss": 0.4497, "step": 1734 }, { "epoch": 0.4290306627101879, "grad_norm": 0.8792696834992108, "learning_rate": 4.943730677015209e-06, "loss": 0.4437, "step": 1735 }, { "epoch": 0.42927794263105834, "grad_norm": 0.8196225113446304, "learning_rate": 4.9436620847608455e-06, "loss": 0.4486, "step": 1736 }, { "epoch": 0.42952522255192876, "grad_norm": 0.792556437218192, "learning_rate": 4.943593451201424e-06, "loss": 0.4464, "step": 1737 }, { "epoch": 0.4297725024727992, "grad_norm": 0.7962854260626189, "learning_rate": 4.943524776338104e-06, "loss": 0.4723, "step": 1738 }, { "epoch": 0.4300197823936696, "grad_norm": 0.8352264957077538, "learning_rate": 4.943456060172046e-06, "loss": 0.4501, "step": 1739 }, { "epoch": 0.43026706231454004, "grad_norm": 0.8400190502171467, "learning_rate": 4.943387302704412e-06, "loss": 0.454, "step": 1740 }, { "epoch": 0.43051434223541046, "grad_norm": 0.8276334232747109, "learning_rate": 4.943318503936364e-06, "loss": 0.4144, "step": 1741 }, { "epoch": 0.4307616221562809, "grad_norm": 0.8302199804463065, "learning_rate": 4.943249663869066e-06, "loss": 0.4686, "step": 1742 }, { "epoch": 0.4310089020771513, "grad_norm": 0.8207197842738865, "learning_rate": 4.94318078250368e-06, "loss": 0.4644, "step": 1743 }, { "epoch": 0.43125618199802174, "grad_norm": 0.834730998310763, "learning_rate": 4.943111859841371e-06, "loss": 0.4695, "step": 1744 }, { "epoch": 0.43150346191889216, "grad_norm": 0.8070569226814367, "learning_rate": 4.943042895883304e-06, "loss": 0.466, "step": 1745 }, { "epoch": 0.4317507418397626, "grad_norm": 0.8163444224681524, "learning_rate": 4.942973890630645e-06, "loss": 0.4782, "step": 1746 }, { "epoch": 0.431998021760633, "grad_norm": 0.83208535669435, "learning_rate": 4.942904844084559e-06, "loss": 0.4534, "step": 1747 }, { "epoch": 0.43224530168150344, "grad_norm": 0.7887636687926843, "learning_rate": 4.942835756246215e-06, "loss": 0.4407, "step": 1748 }, { "epoch": 0.43249258160237386, "grad_norm": 0.8521733444979064, "learning_rate": 4.942766627116779e-06, "loss": 0.459, "step": 1749 }, { "epoch": 0.43273986152324434, "grad_norm": 0.8518317628217601, "learning_rate": 4.942697456697422e-06, "loss": 0.4615, "step": 1750 }, { "epoch": 0.43298714144411476, "grad_norm": 0.9185604966693071, "learning_rate": 4.94262824498931e-06, "loss": 0.4503, "step": 1751 }, { "epoch": 0.4332344213649852, "grad_norm": 0.8103056740307707, "learning_rate": 4.942558991993615e-06, "loss": 0.4554, "step": 1752 }, { "epoch": 0.4334817012858556, "grad_norm": 0.799261239542613, "learning_rate": 4.942489697711508e-06, "loss": 0.4603, "step": 1753 }, { "epoch": 0.43372898120672604, "grad_norm": 0.8425868034525744, "learning_rate": 4.9424203621441585e-06, "loss": 0.4639, "step": 1754 }, { "epoch": 0.43397626112759646, "grad_norm": 0.813736917430641, "learning_rate": 4.9423509852927395e-06, "loss": 0.4504, "step": 1755 }, { "epoch": 0.4342235410484669, "grad_norm": 0.8325032700998303, "learning_rate": 4.942281567158424e-06, "loss": 0.4486, "step": 1756 }, { "epoch": 0.4344708209693373, "grad_norm": 0.8578441243657652, "learning_rate": 4.942212107742384e-06, "loss": 0.4324, "step": 1757 }, { "epoch": 0.43471810089020774, "grad_norm": 0.7950101986986995, "learning_rate": 4.9421426070457946e-06, "loss": 0.455, "step": 1758 }, { "epoch": 0.43496538081107816, "grad_norm": 0.8464725428496769, "learning_rate": 4.94207306506983e-06, "loss": 0.4338, "step": 1759 }, { "epoch": 0.4352126607319486, "grad_norm": 0.8469706639469825, "learning_rate": 4.942003481815666e-06, "loss": 0.4288, "step": 1760 }, { "epoch": 0.435459940652819, "grad_norm": 0.8775115383798611, "learning_rate": 4.94193385728448e-06, "loss": 0.4337, "step": 1761 }, { "epoch": 0.43570722057368944, "grad_norm": 0.7880837358851591, "learning_rate": 4.9418641914774465e-06, "loss": 0.4471, "step": 1762 }, { "epoch": 0.43595450049455986, "grad_norm": 0.8742884553070869, "learning_rate": 4.9417944843957445e-06, "loss": 0.4342, "step": 1763 }, { "epoch": 0.4362017804154303, "grad_norm": 0.8660240978853062, "learning_rate": 4.941724736040552e-06, "loss": 0.4734, "step": 1764 }, { "epoch": 0.4364490603363007, "grad_norm": 0.855246178964046, "learning_rate": 4.941654946413048e-06, "loss": 0.4821, "step": 1765 }, { "epoch": 0.43669634025717113, "grad_norm": 0.8049402509434078, "learning_rate": 4.941585115514412e-06, "loss": 0.4572, "step": 1766 }, { "epoch": 0.43694362017804156, "grad_norm": 0.8102549094132898, "learning_rate": 4.9415152433458245e-06, "loss": 0.4627, "step": 1767 }, { "epoch": 0.437190900098912, "grad_norm": 0.855260779736409, "learning_rate": 4.941445329908466e-06, "loss": 0.4486, "step": 1768 }, { "epoch": 0.4374381800197824, "grad_norm": 0.8398694779387051, "learning_rate": 4.94137537520352e-06, "loss": 0.4443, "step": 1769 }, { "epoch": 0.43768545994065283, "grad_norm": 0.8495857810077256, "learning_rate": 4.941305379232166e-06, "loss": 0.4494, "step": 1770 }, { "epoch": 0.43793273986152326, "grad_norm": 0.8457483281660204, "learning_rate": 4.941235341995589e-06, "loss": 0.4213, "step": 1771 }, { "epoch": 0.4381800197823937, "grad_norm": 0.8416695102723704, "learning_rate": 4.941165263494974e-06, "loss": 0.4329, "step": 1772 }, { "epoch": 0.4384272997032641, "grad_norm": 0.8077854346661958, "learning_rate": 4.9410951437315034e-06, "loss": 0.4563, "step": 1773 }, { "epoch": 0.43867457962413453, "grad_norm": 0.8591862542056117, "learning_rate": 4.941024982706363e-06, "loss": 0.4395, "step": 1774 }, { "epoch": 0.43892185954500496, "grad_norm": 0.8865112531995624, "learning_rate": 4.9409547804207396e-06, "loss": 0.4369, "step": 1775 }, { "epoch": 0.4391691394658754, "grad_norm": 0.79342992597239, "learning_rate": 4.940884536875817e-06, "loss": 0.45, "step": 1776 }, { "epoch": 0.4394164193867458, "grad_norm": 0.7881270666911689, "learning_rate": 4.940814252072787e-06, "loss": 0.4523, "step": 1777 }, { "epoch": 0.43966369930761623, "grad_norm": 0.855046856544146, "learning_rate": 4.9407439260128345e-06, "loss": 0.4397, "step": 1778 }, { "epoch": 0.43991097922848665, "grad_norm": 0.8388877807693309, "learning_rate": 4.940673558697149e-06, "loss": 0.4475, "step": 1779 }, { "epoch": 0.4401582591493571, "grad_norm": 0.8512455561172892, "learning_rate": 4.940603150126919e-06, "loss": 0.4456, "step": 1780 }, { "epoch": 0.4404055390702275, "grad_norm": 0.8861587872050732, "learning_rate": 4.940532700303337e-06, "loss": 0.4676, "step": 1781 }, { "epoch": 0.4406528189910979, "grad_norm": 0.8159176864311103, "learning_rate": 4.940462209227592e-06, "loss": 0.4761, "step": 1782 }, { "epoch": 0.44090009891196835, "grad_norm": 0.8589329700847905, "learning_rate": 4.9403916769008755e-06, "loss": 0.4238, "step": 1783 }, { "epoch": 0.4411473788328388, "grad_norm": 0.8120103864684454, "learning_rate": 4.940321103324379e-06, "loss": 0.4533, "step": 1784 }, { "epoch": 0.4413946587537092, "grad_norm": 0.830021957146338, "learning_rate": 4.940250488499298e-06, "loss": 0.4708, "step": 1785 }, { "epoch": 0.4416419386745796, "grad_norm": 0.8784720779654032, "learning_rate": 4.9401798324268236e-06, "loss": 0.4853, "step": 1786 }, { "epoch": 0.44188921859545005, "grad_norm": 0.8300399974772742, "learning_rate": 4.940109135108152e-06, "loss": 0.4547, "step": 1787 }, { "epoch": 0.4421364985163205, "grad_norm": 0.8155400116963316, "learning_rate": 4.940038396544476e-06, "loss": 0.4464, "step": 1788 }, { "epoch": 0.4423837784371909, "grad_norm": 0.827571249140311, "learning_rate": 4.939967616736994e-06, "loss": 0.4337, "step": 1789 }, { "epoch": 0.4426310583580613, "grad_norm": 0.8167550515040979, "learning_rate": 4.939896795686899e-06, "loss": 0.4486, "step": 1790 }, { "epoch": 0.44287833827893175, "grad_norm": 0.8494832396966039, "learning_rate": 4.939825933395391e-06, "loss": 0.4496, "step": 1791 }, { "epoch": 0.4431256181998022, "grad_norm": 0.8003204538621603, "learning_rate": 4.939755029863667e-06, "loss": 0.4445, "step": 1792 }, { "epoch": 0.4433728981206726, "grad_norm": 0.7824474758817086, "learning_rate": 4.939684085092925e-06, "loss": 0.4465, "step": 1793 }, { "epoch": 0.443620178041543, "grad_norm": 0.8081718945058649, "learning_rate": 4.939613099084365e-06, "loss": 0.4528, "step": 1794 }, { "epoch": 0.44386745796241345, "grad_norm": 0.8495881551915857, "learning_rate": 4.939542071839185e-06, "loss": 0.4403, "step": 1795 }, { "epoch": 0.44411473788328387, "grad_norm": 0.7924639025505061, "learning_rate": 4.939471003358587e-06, "loss": 0.4559, "step": 1796 }, { "epoch": 0.4443620178041543, "grad_norm": 0.8065926677878719, "learning_rate": 4.939399893643773e-06, "loss": 0.4489, "step": 1797 }, { "epoch": 0.4446092977250247, "grad_norm": 0.8231537032809033, "learning_rate": 4.939328742695943e-06, "loss": 0.4511, "step": 1798 }, { "epoch": 0.44485657764589515, "grad_norm": 0.8629947617643108, "learning_rate": 4.939257550516302e-06, "loss": 0.452, "step": 1799 }, { "epoch": 0.44510385756676557, "grad_norm": 0.8452213524607881, "learning_rate": 4.939186317106051e-06, "loss": 0.4724, "step": 1800 }, { "epoch": 0.445351137487636, "grad_norm": 0.8406637491043357, "learning_rate": 4.939115042466397e-06, "loss": 0.435, "step": 1801 }, { "epoch": 0.4455984174085064, "grad_norm": 0.822963252538923, "learning_rate": 4.9390437265985415e-06, "loss": 0.4258, "step": 1802 }, { "epoch": 0.44584569732937684, "grad_norm": 0.7878763520585504, "learning_rate": 4.93897236950369e-06, "loss": 0.4608, "step": 1803 }, { "epoch": 0.44609297725024727, "grad_norm": 0.8349799067133575, "learning_rate": 4.938900971183053e-06, "loss": 0.4327, "step": 1804 }, { "epoch": 0.4463402571711177, "grad_norm": 0.8816160813241095, "learning_rate": 4.9388295316378325e-06, "loss": 0.4639, "step": 1805 }, { "epoch": 0.4465875370919881, "grad_norm": 0.84235020294276, "learning_rate": 4.938758050869238e-06, "loss": 0.4499, "step": 1806 }, { "epoch": 0.44683481701285854, "grad_norm": 0.8242327113382405, "learning_rate": 4.938686528878477e-06, "loss": 0.4735, "step": 1807 }, { "epoch": 0.44708209693372897, "grad_norm": 0.7886103886587141, "learning_rate": 4.93861496566676e-06, "loss": 0.4615, "step": 1808 }, { "epoch": 0.4473293768545994, "grad_norm": 0.8398273563191035, "learning_rate": 4.938543361235295e-06, "loss": 0.4269, "step": 1809 }, { "epoch": 0.4475766567754698, "grad_norm": 0.8573294083629199, "learning_rate": 4.938471715585293e-06, "loss": 0.4333, "step": 1810 }, { "epoch": 0.44782393669634024, "grad_norm": 0.8849773282454866, "learning_rate": 4.938400028717966e-06, "loss": 0.4464, "step": 1811 }, { "epoch": 0.44807121661721067, "grad_norm": 0.8569807993576385, "learning_rate": 4.938328300634524e-06, "loss": 0.4555, "step": 1812 }, { "epoch": 0.4483184965380811, "grad_norm": 0.8038399479751446, "learning_rate": 4.93825653133618e-06, "loss": 0.4432, "step": 1813 }, { "epoch": 0.4485657764589515, "grad_norm": 0.8316422195623451, "learning_rate": 4.938184720824148e-06, "loss": 0.4357, "step": 1814 }, { "epoch": 0.44881305637982194, "grad_norm": 0.7881623847336401, "learning_rate": 4.938112869099641e-06, "loss": 0.4457, "step": 1815 }, { "epoch": 0.44906033630069236, "grad_norm": 0.8095248000011614, "learning_rate": 4.9380409761638725e-06, "loss": 0.4675, "step": 1816 }, { "epoch": 0.4493076162215628, "grad_norm": 0.8142992574456057, "learning_rate": 4.937969042018059e-06, "loss": 0.4382, "step": 1817 }, { "epoch": 0.4495548961424332, "grad_norm": 0.8037598382565191, "learning_rate": 4.937897066663417e-06, "loss": 0.4547, "step": 1818 }, { "epoch": 0.44980217606330364, "grad_norm": 0.8029225397282946, "learning_rate": 4.937825050101162e-06, "loss": 0.4532, "step": 1819 }, { "epoch": 0.45004945598417406, "grad_norm": 0.8525725054243789, "learning_rate": 4.937752992332512e-06, "loss": 0.4452, "step": 1820 }, { "epoch": 0.4502967359050445, "grad_norm": 0.7978513030283886, "learning_rate": 4.937680893358683e-06, "loss": 0.4496, "step": 1821 }, { "epoch": 0.4505440158259149, "grad_norm": 0.8684712638463695, "learning_rate": 4.9376087531808964e-06, "loss": 0.4336, "step": 1822 }, { "epoch": 0.45079129574678534, "grad_norm": 0.8163578329839308, "learning_rate": 4.93753657180037e-06, "loss": 0.4639, "step": 1823 }, { "epoch": 0.45103857566765576, "grad_norm": 0.8225937594696441, "learning_rate": 4.937464349218325e-06, "loss": 0.4566, "step": 1824 }, { "epoch": 0.4512858555885262, "grad_norm": 0.8008255690442442, "learning_rate": 4.93739208543598e-06, "loss": 0.463, "step": 1825 }, { "epoch": 0.4515331355093966, "grad_norm": 0.8360458473751116, "learning_rate": 4.937319780454559e-06, "loss": 0.4221, "step": 1826 }, { "epoch": 0.45178041543026703, "grad_norm": 0.7805314420818854, "learning_rate": 4.937247434275283e-06, "loss": 0.4615, "step": 1827 }, { "epoch": 0.4520276953511375, "grad_norm": 0.8314479860480369, "learning_rate": 4.937175046899375e-06, "loss": 0.4869, "step": 1828 }, { "epoch": 0.45227497527200794, "grad_norm": 0.7954839890830541, "learning_rate": 4.937102618328058e-06, "loss": 0.4717, "step": 1829 }, { "epoch": 0.45252225519287836, "grad_norm": 0.7896791151974906, "learning_rate": 4.937030148562558e-06, "loss": 0.4561, "step": 1830 }, { "epoch": 0.4527695351137488, "grad_norm": 0.8315873837686758, "learning_rate": 4.936957637604097e-06, "loss": 0.4973, "step": 1831 }, { "epoch": 0.4530168150346192, "grad_norm": 0.8302395670775476, "learning_rate": 4.936885085453904e-06, "loss": 0.4115, "step": 1832 }, { "epoch": 0.45326409495548964, "grad_norm": 0.7907392913162058, "learning_rate": 4.936812492113203e-06, "loss": 0.4398, "step": 1833 }, { "epoch": 0.45351137487636006, "grad_norm": 0.7930420093582806, "learning_rate": 4.936739857583222e-06, "loss": 0.4589, "step": 1834 }, { "epoch": 0.4537586547972305, "grad_norm": 0.8307780202557283, "learning_rate": 4.936667181865188e-06, "loss": 0.4349, "step": 1835 }, { "epoch": 0.4540059347181009, "grad_norm": 0.8218166907095654, "learning_rate": 4.93659446496033e-06, "loss": 0.4617, "step": 1836 }, { "epoch": 0.45425321463897134, "grad_norm": 0.7959798297948486, "learning_rate": 4.936521706869876e-06, "loss": 0.4607, "step": 1837 }, { "epoch": 0.45450049455984176, "grad_norm": 0.8398423129692738, "learning_rate": 4.93644890759506e-06, "loss": 0.4252, "step": 1838 }, { "epoch": 0.4547477744807122, "grad_norm": 0.8146261084755446, "learning_rate": 4.936376067137106e-06, "loss": 0.4191, "step": 1839 }, { "epoch": 0.4549950544015826, "grad_norm": 0.8096282195772391, "learning_rate": 4.936303185497251e-06, "loss": 0.4556, "step": 1840 }, { "epoch": 0.45524233432245303, "grad_norm": 0.8226335223421032, "learning_rate": 4.9362302626767236e-06, "loss": 0.4349, "step": 1841 }, { "epoch": 0.45548961424332346, "grad_norm": 0.8387498079572907, "learning_rate": 4.936157298676757e-06, "loss": 0.4613, "step": 1842 }, { "epoch": 0.4557368941641939, "grad_norm": 0.81593835337991, "learning_rate": 4.936084293498585e-06, "loss": 0.4779, "step": 1843 }, { "epoch": 0.4559841740850643, "grad_norm": 0.804173667458378, "learning_rate": 4.936011247143442e-06, "loss": 0.4465, "step": 1844 }, { "epoch": 0.45623145400593473, "grad_norm": 0.8180659963817345, "learning_rate": 4.935938159612562e-06, "loss": 0.4247, "step": 1845 }, { "epoch": 0.45647873392680516, "grad_norm": 0.8245812500907519, "learning_rate": 4.93586503090718e-06, "loss": 0.4857, "step": 1846 }, { "epoch": 0.4567260138476756, "grad_norm": 0.8247624372284065, "learning_rate": 4.9357918610285326e-06, "loss": 0.44, "step": 1847 }, { "epoch": 0.456973293768546, "grad_norm": 0.8117934491093357, "learning_rate": 4.935718649977857e-06, "loss": 0.443, "step": 1848 }, { "epoch": 0.45722057368941643, "grad_norm": 0.831498957769525, "learning_rate": 4.93564539775639e-06, "loss": 0.4285, "step": 1849 }, { "epoch": 0.45746785361028686, "grad_norm": 0.8178845965621625, "learning_rate": 4.9355721043653705e-06, "loss": 0.4579, "step": 1850 }, { "epoch": 0.4577151335311573, "grad_norm": 0.8435823859766596, "learning_rate": 4.935498769806037e-06, "loss": 0.4849, "step": 1851 }, { "epoch": 0.4579624134520277, "grad_norm": 0.8198546493674795, "learning_rate": 4.9354253940796285e-06, "loss": 0.4764, "step": 1852 }, { "epoch": 0.45820969337289813, "grad_norm": 0.8612598869000662, "learning_rate": 4.9353519771873865e-06, "loss": 0.4655, "step": 1853 }, { "epoch": 0.45845697329376855, "grad_norm": 0.8331800596681603, "learning_rate": 4.935278519130551e-06, "loss": 0.4427, "step": 1854 }, { "epoch": 0.458704253214639, "grad_norm": 0.859575706580175, "learning_rate": 4.935205019910363e-06, "loss": 0.4308, "step": 1855 }, { "epoch": 0.4589515331355094, "grad_norm": 0.8304086390526508, "learning_rate": 4.9351314795280665e-06, "loss": 0.4386, "step": 1856 }, { "epoch": 0.45919881305637983, "grad_norm": 0.8526206499245497, "learning_rate": 4.935057897984904e-06, "loss": 0.4171, "step": 1857 }, { "epoch": 0.45944609297725025, "grad_norm": 0.8122129580874308, "learning_rate": 4.934984275282119e-06, "loss": 0.4592, "step": 1858 }, { "epoch": 0.4596933728981207, "grad_norm": 0.8184420237101967, "learning_rate": 4.9349106114209555e-06, "loss": 0.476, "step": 1859 }, { "epoch": 0.4599406528189911, "grad_norm": 0.8276819930577508, "learning_rate": 4.934836906402659e-06, "loss": 0.4389, "step": 1860 }, { "epoch": 0.4601879327398615, "grad_norm": 0.787148630250681, "learning_rate": 4.934763160228476e-06, "loss": 0.4346, "step": 1861 }, { "epoch": 0.46043521266073195, "grad_norm": 0.8128804151684144, "learning_rate": 4.934689372899653e-06, "loss": 0.4728, "step": 1862 }, { "epoch": 0.4606824925816024, "grad_norm": 0.8461977492066648, "learning_rate": 4.934615544417436e-06, "loss": 0.4389, "step": 1863 }, { "epoch": 0.4609297725024728, "grad_norm": 0.843449430804296, "learning_rate": 4.934541674783074e-06, "loss": 0.4457, "step": 1864 }, { "epoch": 0.4611770524233432, "grad_norm": 0.8086082758083908, "learning_rate": 4.934467763997814e-06, "loss": 0.4343, "step": 1865 }, { "epoch": 0.46142433234421365, "grad_norm": 0.8364626491108798, "learning_rate": 4.934393812062907e-06, "loss": 0.4242, "step": 1866 }, { "epoch": 0.4616716122650841, "grad_norm": 0.8250397394229696, "learning_rate": 4.934319818979604e-06, "loss": 0.4468, "step": 1867 }, { "epoch": 0.4619188921859545, "grad_norm": 0.8365693927663361, "learning_rate": 4.9342457847491525e-06, "loss": 0.4374, "step": 1868 }, { "epoch": 0.4621661721068249, "grad_norm": 0.8496261702902699, "learning_rate": 4.934171709372806e-06, "loss": 0.4261, "step": 1869 }, { "epoch": 0.46241345202769535, "grad_norm": 0.8007783961735965, "learning_rate": 4.934097592851817e-06, "loss": 0.4749, "step": 1870 }, { "epoch": 0.4626607319485658, "grad_norm": 0.9167830740082399, "learning_rate": 4.9340234351874375e-06, "loss": 0.4388, "step": 1871 }, { "epoch": 0.4629080118694362, "grad_norm": 0.8811259241380944, "learning_rate": 4.93394923638092e-06, "loss": 0.4256, "step": 1872 }, { "epoch": 0.4631552917903066, "grad_norm": 0.813618018230188, "learning_rate": 4.933874996433521e-06, "loss": 0.4567, "step": 1873 }, { "epoch": 0.46340257171117705, "grad_norm": 0.8132288219977344, "learning_rate": 4.933800715346493e-06, "loss": 0.4449, "step": 1874 }, { "epoch": 0.46364985163204747, "grad_norm": 0.833392110392238, "learning_rate": 4.933726393121092e-06, "loss": 0.4675, "step": 1875 }, { "epoch": 0.4638971315529179, "grad_norm": 0.8353507684223446, "learning_rate": 4.933652029758577e-06, "loss": 0.4734, "step": 1876 }, { "epoch": 0.4641444114737883, "grad_norm": 0.8628430560657625, "learning_rate": 4.933577625260201e-06, "loss": 0.4304, "step": 1877 }, { "epoch": 0.46439169139465875, "grad_norm": 0.8689457252491675, "learning_rate": 4.933503179627224e-06, "loss": 0.449, "step": 1878 }, { "epoch": 0.46463897131552917, "grad_norm": 0.8110050031566923, "learning_rate": 4.933428692860904e-06, "loss": 0.441, "step": 1879 }, { "epoch": 0.4648862512363996, "grad_norm": 0.8292633865002436, "learning_rate": 4.933354164962499e-06, "loss": 0.429, "step": 1880 }, { "epoch": 0.46513353115727, "grad_norm": 0.8725341632231393, "learning_rate": 4.9332795959332715e-06, "loss": 0.4289, "step": 1881 }, { "epoch": 0.46538081107814044, "grad_norm": 0.8500399734842764, "learning_rate": 4.933204985774479e-06, "loss": 0.4512, "step": 1882 }, { "epoch": 0.46562809099901087, "grad_norm": 0.8080510822724074, "learning_rate": 4.933130334487384e-06, "loss": 0.4194, "step": 1883 }, { "epoch": 0.4658753709198813, "grad_norm": 0.8348679766093604, "learning_rate": 4.933055642073247e-06, "loss": 0.4223, "step": 1884 }, { "epoch": 0.4661226508407517, "grad_norm": 0.811940856384884, "learning_rate": 4.932980908533332e-06, "loss": 0.4511, "step": 1885 }, { "epoch": 0.46636993076162214, "grad_norm": 0.8623912087016934, "learning_rate": 4.9329061338689024e-06, "loss": 0.4067, "step": 1886 }, { "epoch": 0.46661721068249257, "grad_norm": 0.8466626050654013, "learning_rate": 4.932831318081222e-06, "loss": 0.4448, "step": 1887 }, { "epoch": 0.466864490603363, "grad_norm": 0.8167851555645209, "learning_rate": 4.932756461171554e-06, "loss": 0.4504, "step": 1888 }, { "epoch": 0.4671117705242334, "grad_norm": 0.8104154270120225, "learning_rate": 4.932681563141164e-06, "loss": 0.4395, "step": 1889 }, { "epoch": 0.46735905044510384, "grad_norm": 0.8655461155693418, "learning_rate": 4.932606623991319e-06, "loss": 0.4591, "step": 1890 }, { "epoch": 0.46760633036597427, "grad_norm": 0.8176989636313634, "learning_rate": 4.932531643723285e-06, "loss": 0.4427, "step": 1891 }, { "epoch": 0.4678536102868447, "grad_norm": 0.7838206162822197, "learning_rate": 4.9324566223383306e-06, "loss": 0.4498, "step": 1892 }, { "epoch": 0.4681008902077151, "grad_norm": 0.8035646415445724, "learning_rate": 4.9323815598377225e-06, "loss": 0.4471, "step": 1893 }, { "epoch": 0.46834817012858554, "grad_norm": 0.8037368620913304, "learning_rate": 4.93230645622273e-06, "loss": 0.4641, "step": 1894 }, { "epoch": 0.46859545004945596, "grad_norm": 0.8025390965922428, "learning_rate": 4.932231311494622e-06, "loss": 0.4373, "step": 1895 }, { "epoch": 0.4688427299703264, "grad_norm": 0.7945979697539853, "learning_rate": 4.932156125654669e-06, "loss": 0.4559, "step": 1896 }, { "epoch": 0.4690900098911968, "grad_norm": 0.8769209411502517, "learning_rate": 4.9320808987041424e-06, "loss": 0.4586, "step": 1897 }, { "epoch": 0.46933728981206724, "grad_norm": 0.8476950420274959, "learning_rate": 4.932005630644314e-06, "loss": 0.4135, "step": 1898 }, { "epoch": 0.46958456973293766, "grad_norm": 0.8244206095276344, "learning_rate": 4.931930321476455e-06, "loss": 0.4446, "step": 1899 }, { "epoch": 0.4698318496538081, "grad_norm": 0.7977893759785066, "learning_rate": 4.931854971201838e-06, "loss": 0.4703, "step": 1900 }, { "epoch": 0.4700791295746785, "grad_norm": 0.8611413119435934, "learning_rate": 4.9317795798217385e-06, "loss": 0.4591, "step": 1901 }, { "epoch": 0.47032640949554894, "grad_norm": 0.8374357250520812, "learning_rate": 4.931704147337428e-06, "loss": 0.4472, "step": 1902 }, { "epoch": 0.47057368941641936, "grad_norm": 0.8410624597911414, "learning_rate": 4.931628673750185e-06, "loss": 0.439, "step": 1903 }, { "epoch": 0.4708209693372898, "grad_norm": 0.8562823214973215, "learning_rate": 4.931553159061283e-06, "loss": 0.4445, "step": 1904 }, { "epoch": 0.4710682492581602, "grad_norm": 0.8224377914499575, "learning_rate": 4.931477603271999e-06, "loss": 0.4306, "step": 1905 }, { "epoch": 0.4713155291790307, "grad_norm": 0.8697597121950194, "learning_rate": 4.93140200638361e-06, "loss": 0.4548, "step": 1906 }, { "epoch": 0.4715628090999011, "grad_norm": 0.824255221165472, "learning_rate": 4.931326368397394e-06, "loss": 0.4951, "step": 1907 }, { "epoch": 0.47181008902077154, "grad_norm": 0.885166598612944, "learning_rate": 4.9312506893146286e-06, "loss": 0.4285, "step": 1908 }, { "epoch": 0.47205736894164196, "grad_norm": 0.8171167921231146, "learning_rate": 4.931174969136594e-06, "loss": 0.446, "step": 1909 }, { "epoch": 0.4723046488625124, "grad_norm": 0.820165159360279, "learning_rate": 4.93109920786457e-06, "loss": 0.4823, "step": 1910 }, { "epoch": 0.4725519287833828, "grad_norm": 0.8153543462857648, "learning_rate": 4.9310234054998375e-06, "loss": 0.4478, "step": 1911 }, { "epoch": 0.47279920870425324, "grad_norm": 0.8367918771121765, "learning_rate": 4.930947562043677e-06, "loss": 0.4695, "step": 1912 }, { "epoch": 0.47304648862512366, "grad_norm": 0.8471649045198084, "learning_rate": 4.930871677497371e-06, "loss": 0.47, "step": 1913 }, { "epoch": 0.4732937685459941, "grad_norm": 0.8076081839098591, "learning_rate": 4.9307957518622006e-06, "loss": 0.4391, "step": 1914 }, { "epoch": 0.4735410484668645, "grad_norm": 0.8264582394780937, "learning_rate": 4.9307197851394514e-06, "loss": 0.4291, "step": 1915 }, { "epoch": 0.47378832838773494, "grad_norm": 0.8178298174303794, "learning_rate": 4.930643777330407e-06, "loss": 0.4387, "step": 1916 }, { "epoch": 0.47403560830860536, "grad_norm": 0.7791823942240508, "learning_rate": 4.930567728436352e-06, "loss": 0.4531, "step": 1917 }, { "epoch": 0.4742828882294758, "grad_norm": 0.8280393676731757, "learning_rate": 4.930491638458571e-06, "loss": 0.4419, "step": 1918 }, { "epoch": 0.4745301681503462, "grad_norm": 0.8241832719290327, "learning_rate": 4.930415507398351e-06, "loss": 0.4381, "step": 1919 }, { "epoch": 0.47477744807121663, "grad_norm": 0.8217261660458774, "learning_rate": 4.930339335256978e-06, "loss": 0.4505, "step": 1920 }, { "epoch": 0.47502472799208706, "grad_norm": 0.8131794869769368, "learning_rate": 4.93026312203574e-06, "loss": 0.4311, "step": 1921 }, { "epoch": 0.4752720079129575, "grad_norm": 0.8411335977862362, "learning_rate": 4.930186867735926e-06, "loss": 0.4783, "step": 1922 }, { "epoch": 0.4755192878338279, "grad_norm": 0.8241214015182279, "learning_rate": 4.930110572358824e-06, "loss": 0.4497, "step": 1923 }, { "epoch": 0.47576656775469833, "grad_norm": 0.839706324810582, "learning_rate": 4.930034235905724e-06, "loss": 0.4802, "step": 1924 }, { "epoch": 0.47601384767556876, "grad_norm": 0.8397548471567146, "learning_rate": 4.929957858377915e-06, "loss": 0.4202, "step": 1925 }, { "epoch": 0.4762611275964392, "grad_norm": 0.8660864925533305, "learning_rate": 4.929881439776691e-06, "loss": 0.4399, "step": 1926 }, { "epoch": 0.4765084075173096, "grad_norm": 0.8347114497369829, "learning_rate": 4.929804980103341e-06, "loss": 0.4631, "step": 1927 }, { "epoch": 0.47675568743818003, "grad_norm": 0.8646650091416341, "learning_rate": 4.929728479359158e-06, "loss": 0.4393, "step": 1928 }, { "epoch": 0.47700296735905046, "grad_norm": 0.8252581921541579, "learning_rate": 4.929651937545436e-06, "loss": 0.4506, "step": 1929 }, { "epoch": 0.4772502472799209, "grad_norm": 0.8094312396500337, "learning_rate": 4.929575354663467e-06, "loss": 0.4942, "step": 1930 }, { "epoch": 0.4774975272007913, "grad_norm": 0.8772495543820467, "learning_rate": 4.929498730714548e-06, "loss": 0.445, "step": 1931 }, { "epoch": 0.47774480712166173, "grad_norm": 0.7923372192092943, "learning_rate": 4.929422065699972e-06, "loss": 0.4862, "step": 1932 }, { "epoch": 0.47799208704253215, "grad_norm": 0.8712668957725559, "learning_rate": 4.929345359621036e-06, "loss": 0.4351, "step": 1933 }, { "epoch": 0.4782393669634026, "grad_norm": 0.8493635338265155, "learning_rate": 4.929268612479036e-06, "loss": 0.4572, "step": 1934 }, { "epoch": 0.478486646884273, "grad_norm": 0.8666548569127566, "learning_rate": 4.929191824275269e-06, "loss": 0.4427, "step": 1935 }, { "epoch": 0.47873392680514343, "grad_norm": 0.9074290287081372, "learning_rate": 4.929114995011034e-06, "loss": 0.4321, "step": 1936 }, { "epoch": 0.47898120672601385, "grad_norm": 0.8352717250490244, "learning_rate": 4.929038124687629e-06, "loss": 0.4348, "step": 1937 }, { "epoch": 0.4792284866468843, "grad_norm": 0.8112065646293543, "learning_rate": 4.9289612133063536e-06, "loss": 0.4658, "step": 1938 }, { "epoch": 0.4794757665677547, "grad_norm": 0.8327013692076111, "learning_rate": 4.928884260868507e-06, "loss": 0.4465, "step": 1939 }, { "epoch": 0.4797230464886251, "grad_norm": 0.8322228019074666, "learning_rate": 4.928807267375391e-06, "loss": 0.439, "step": 1940 }, { "epoch": 0.47997032640949555, "grad_norm": 0.8225399645495624, "learning_rate": 4.928730232828306e-06, "loss": 0.4131, "step": 1941 }, { "epoch": 0.480217606330366, "grad_norm": 0.8287089897738253, "learning_rate": 4.928653157228555e-06, "loss": 0.4513, "step": 1942 }, { "epoch": 0.4804648862512364, "grad_norm": 0.8387018490840795, "learning_rate": 4.928576040577441e-06, "loss": 0.468, "step": 1943 }, { "epoch": 0.4807121661721068, "grad_norm": 0.7983871186620123, "learning_rate": 4.928498882876266e-06, "loss": 0.4606, "step": 1944 }, { "epoch": 0.48095944609297725, "grad_norm": 0.8726955385115273, "learning_rate": 4.928421684126335e-06, "loss": 0.4226, "step": 1945 }, { "epoch": 0.4812067260138477, "grad_norm": 0.8343516175985839, "learning_rate": 4.928344444328954e-06, "loss": 0.4601, "step": 1946 }, { "epoch": 0.4814540059347181, "grad_norm": 0.8262000466383999, "learning_rate": 4.928267163485427e-06, "loss": 0.4742, "step": 1947 }, { "epoch": 0.4817012858555885, "grad_norm": 0.8684322211013671, "learning_rate": 4.928189841597061e-06, "loss": 0.4459, "step": 1948 }, { "epoch": 0.48194856577645895, "grad_norm": 0.8391599834278922, "learning_rate": 4.928112478665163e-06, "loss": 0.4526, "step": 1949 }, { "epoch": 0.4821958456973294, "grad_norm": 0.847620940773091, "learning_rate": 4.92803507469104e-06, "loss": 0.4429, "step": 1950 }, { "epoch": 0.4824431256181998, "grad_norm": 0.8734227149804495, "learning_rate": 4.927957629676001e-06, "loss": 0.4414, "step": 1951 }, { "epoch": 0.4826904055390702, "grad_norm": 0.875124744595111, "learning_rate": 4.927880143621355e-06, "loss": 0.4464, "step": 1952 }, { "epoch": 0.48293768545994065, "grad_norm": 0.872328400694578, "learning_rate": 4.927802616528412e-06, "loss": 0.4484, "step": 1953 }, { "epoch": 0.48318496538081107, "grad_norm": 0.8407440697376363, "learning_rate": 4.927725048398482e-06, "loss": 0.4523, "step": 1954 }, { "epoch": 0.4834322453016815, "grad_norm": 0.7834132267725564, "learning_rate": 4.927647439232876e-06, "loss": 0.4393, "step": 1955 }, { "epoch": 0.4836795252225519, "grad_norm": 0.8234773490129442, "learning_rate": 4.927569789032907e-06, "loss": 0.4595, "step": 1956 }, { "epoch": 0.48392680514342234, "grad_norm": 0.8497947043946262, "learning_rate": 4.927492097799885e-06, "loss": 0.4303, "step": 1957 }, { "epoch": 0.48417408506429277, "grad_norm": 0.8806520611452342, "learning_rate": 4.927414365535126e-06, "loss": 0.4606, "step": 1958 }, { "epoch": 0.4844213649851632, "grad_norm": 0.8113702305265199, "learning_rate": 4.9273365922399416e-06, "loss": 0.445, "step": 1959 }, { "epoch": 0.4846686449060336, "grad_norm": 0.8248851985890626, "learning_rate": 4.927258777915648e-06, "loss": 0.4465, "step": 1960 }, { "epoch": 0.48491592482690404, "grad_norm": 0.8299620493793199, "learning_rate": 4.92718092256356e-06, "loss": 0.4204, "step": 1961 }, { "epoch": 0.48516320474777447, "grad_norm": 0.8052928255301574, "learning_rate": 4.927103026184993e-06, "loss": 0.478, "step": 1962 }, { "epoch": 0.4854104846686449, "grad_norm": 0.8740281640364033, "learning_rate": 4.927025088781265e-06, "loss": 0.426, "step": 1963 }, { "epoch": 0.4856577645895153, "grad_norm": 0.8183836323362705, "learning_rate": 4.926947110353692e-06, "loss": 0.4432, "step": 1964 }, { "epoch": 0.48590504451038574, "grad_norm": 0.8215966525772359, "learning_rate": 4.926869090903593e-06, "loss": 0.4306, "step": 1965 }, { "epoch": 0.48615232443125617, "grad_norm": 0.8491022861836903, "learning_rate": 4.9267910304322865e-06, "loss": 0.464, "step": 1966 }, { "epoch": 0.4863996043521266, "grad_norm": 0.8538882505150086, "learning_rate": 4.926712928941092e-06, "loss": 0.436, "step": 1967 }, { "epoch": 0.486646884272997, "grad_norm": 0.777009034398573, "learning_rate": 4.926634786431329e-06, "loss": 0.4501, "step": 1968 }, { "epoch": 0.48689416419386744, "grad_norm": 0.8005037204433741, "learning_rate": 4.926556602904319e-06, "loss": 0.4343, "step": 1969 }, { "epoch": 0.48714144411473786, "grad_norm": 0.8289955238627059, "learning_rate": 4.9264783783613835e-06, "loss": 0.4614, "step": 1970 }, { "epoch": 0.4873887240356083, "grad_norm": 0.8455414270465915, "learning_rate": 4.926400112803844e-06, "loss": 0.4669, "step": 1971 }, { "epoch": 0.4876360039564787, "grad_norm": 0.7797141965228852, "learning_rate": 4.926321806233024e-06, "loss": 0.49, "step": 1972 }, { "epoch": 0.48788328387734914, "grad_norm": 0.8493989470575196, "learning_rate": 4.926243458650248e-06, "loss": 0.4349, "step": 1973 }, { "epoch": 0.48813056379821956, "grad_norm": 0.8512014778641054, "learning_rate": 4.926165070056839e-06, "loss": 0.4108, "step": 1974 }, { "epoch": 0.48837784371909, "grad_norm": 0.7979992970173828, "learning_rate": 4.926086640454123e-06, "loss": 0.4706, "step": 1975 }, { "epoch": 0.4886251236399604, "grad_norm": 0.8239102783032143, "learning_rate": 4.926008169843424e-06, "loss": 0.4787, "step": 1976 }, { "epoch": 0.48887240356083084, "grad_norm": 0.8016225529555583, "learning_rate": 4.92592965822607e-06, "loss": 0.4547, "step": 1977 }, { "epoch": 0.48911968348170126, "grad_norm": 0.8531911433461715, "learning_rate": 4.925851105603388e-06, "loss": 0.4488, "step": 1978 }, { "epoch": 0.4893669634025717, "grad_norm": 0.8194447366104644, "learning_rate": 4.925772511976705e-06, "loss": 0.4431, "step": 1979 }, { "epoch": 0.4896142433234421, "grad_norm": 0.8296301370115579, "learning_rate": 4.925693877347349e-06, "loss": 0.4461, "step": 1980 }, { "epoch": 0.48986152324431254, "grad_norm": 0.8462798504011503, "learning_rate": 4.925615201716651e-06, "loss": 0.443, "step": 1981 }, { "epoch": 0.49010880316518296, "grad_norm": 0.8426288056577137, "learning_rate": 4.92553648508594e-06, "loss": 0.425, "step": 1982 }, { "epoch": 0.4903560830860534, "grad_norm": 0.829429706126443, "learning_rate": 4.925457727456546e-06, "loss": 0.4508, "step": 1983 }, { "epoch": 0.49060336300692386, "grad_norm": 0.8059370491729095, "learning_rate": 4.9253789288298e-06, "loss": 0.4502, "step": 1984 }, { "epoch": 0.4908506429277943, "grad_norm": 0.7795504551173943, "learning_rate": 4.925300089207035e-06, "loss": 0.4108, "step": 1985 }, { "epoch": 0.4910979228486647, "grad_norm": 0.8145867834841408, "learning_rate": 4.925221208589584e-06, "loss": 0.4514, "step": 1986 }, { "epoch": 0.49134520276953514, "grad_norm": 0.8452473568571481, "learning_rate": 4.925142286978778e-06, "loss": 0.4305, "step": 1987 }, { "epoch": 0.49159248269040556, "grad_norm": 0.8444624739728522, "learning_rate": 4.925063324375953e-06, "loss": 0.4673, "step": 1988 }, { "epoch": 0.491839762611276, "grad_norm": 0.8256672771190708, "learning_rate": 4.9249843207824434e-06, "loss": 0.4642, "step": 1989 }, { "epoch": 0.4920870425321464, "grad_norm": 0.8404370092223706, "learning_rate": 4.924905276199584e-06, "loss": 0.4288, "step": 1990 }, { "epoch": 0.49233432245301684, "grad_norm": 0.7864654632402006, "learning_rate": 4.924826190628711e-06, "loss": 0.4692, "step": 1991 }, { "epoch": 0.49258160237388726, "grad_norm": 0.8434933818377831, "learning_rate": 4.924747064071163e-06, "loss": 0.4158, "step": 1992 }, { "epoch": 0.4928288822947577, "grad_norm": 0.8244874183035269, "learning_rate": 4.924667896528274e-06, "loss": 0.4198, "step": 1993 }, { "epoch": 0.4930761622156281, "grad_norm": 0.8270973921922327, "learning_rate": 4.924588688001385e-06, "loss": 0.4734, "step": 1994 }, { "epoch": 0.49332344213649854, "grad_norm": 0.8058932904957486, "learning_rate": 4.924509438491834e-06, "loss": 0.4381, "step": 1995 }, { "epoch": 0.49357072205736896, "grad_norm": 0.7956261606694742, "learning_rate": 4.924430148000959e-06, "loss": 0.429, "step": 1996 }, { "epoch": 0.4938180019782394, "grad_norm": 0.7939524290994, "learning_rate": 4.924350816530104e-06, "loss": 0.4465, "step": 1997 }, { "epoch": 0.4940652818991098, "grad_norm": 0.7916027057232425, "learning_rate": 4.924271444080606e-06, "loss": 0.4444, "step": 1998 }, { "epoch": 0.49431256181998023, "grad_norm": 0.8173525084366966, "learning_rate": 4.924192030653808e-06, "loss": 0.4663, "step": 1999 }, { "epoch": 0.49455984174085066, "grad_norm": 0.852494236856881, "learning_rate": 4.924112576251054e-06, "loss": 0.454, "step": 2000 }, { "epoch": 0.4948071216617211, "grad_norm": 0.7988637477633536, "learning_rate": 4.924033080873684e-06, "loss": 0.4392, "step": 2001 }, { "epoch": 0.4950544015825915, "grad_norm": 0.8122968158357641, "learning_rate": 4.923953544523044e-06, "loss": 0.4637, "step": 2002 }, { "epoch": 0.49530168150346193, "grad_norm": 0.81761782432353, "learning_rate": 4.923873967200479e-06, "loss": 0.4378, "step": 2003 }, { "epoch": 0.49554896142433236, "grad_norm": 0.8633300475989552, "learning_rate": 4.923794348907331e-06, "loss": 0.4656, "step": 2004 }, { "epoch": 0.4957962413452028, "grad_norm": 0.7944747654872235, "learning_rate": 4.923714689644948e-06, "loss": 0.4484, "step": 2005 }, { "epoch": 0.4960435212660732, "grad_norm": 0.863856436444404, "learning_rate": 4.923634989414676e-06, "loss": 0.4371, "step": 2006 }, { "epoch": 0.49629080118694363, "grad_norm": 0.8659860035555645, "learning_rate": 4.923555248217864e-06, "loss": 0.4283, "step": 2007 }, { "epoch": 0.49653808110781406, "grad_norm": 0.7954268742364704, "learning_rate": 4.923475466055856e-06, "loss": 0.4547, "step": 2008 }, { "epoch": 0.4967853610286845, "grad_norm": 0.8138754857165406, "learning_rate": 4.9233956429300034e-06, "loss": 0.4108, "step": 2009 }, { "epoch": 0.4970326409495549, "grad_norm": 0.8581978670153796, "learning_rate": 4.9233157788416545e-06, "loss": 0.432, "step": 2010 }, { "epoch": 0.49727992087042533, "grad_norm": 0.8241805365555525, "learning_rate": 4.9232358737921585e-06, "loss": 0.4382, "step": 2011 }, { "epoch": 0.49752720079129575, "grad_norm": 0.8065492877310552, "learning_rate": 4.923155927782868e-06, "loss": 0.4384, "step": 2012 }, { "epoch": 0.4977744807121662, "grad_norm": 0.8521852417108808, "learning_rate": 4.923075940815133e-06, "loss": 0.4417, "step": 2013 }, { "epoch": 0.4980217606330366, "grad_norm": 0.8721828748607178, "learning_rate": 4.922995912890306e-06, "loss": 0.41, "step": 2014 }, { "epoch": 0.498269040553907, "grad_norm": 0.8475215975436352, "learning_rate": 4.922915844009739e-06, "loss": 0.4415, "step": 2015 }, { "epoch": 0.49851632047477745, "grad_norm": 0.7821130765064389, "learning_rate": 4.922835734174786e-06, "loss": 0.4429, "step": 2016 }, { "epoch": 0.4987636003956479, "grad_norm": 0.8511117917014502, "learning_rate": 4.922755583386801e-06, "loss": 0.4497, "step": 2017 }, { "epoch": 0.4990108803165183, "grad_norm": 0.831275796708298, "learning_rate": 4.92267539164714e-06, "loss": 0.4578, "step": 2018 }, { "epoch": 0.4992581602373887, "grad_norm": 0.8509135804106028, "learning_rate": 4.922595158957155e-06, "loss": 0.4569, "step": 2019 }, { "epoch": 0.49950544015825915, "grad_norm": 0.8582138029948618, "learning_rate": 4.922514885318206e-06, "loss": 0.4386, "step": 2020 }, { "epoch": 0.4997527200791296, "grad_norm": 0.8182293486915994, "learning_rate": 4.922434570731648e-06, "loss": 0.4499, "step": 2021 }, { "epoch": 0.5, "grad_norm": 0.8777366413472809, "learning_rate": 4.922354215198838e-06, "loss": 0.4371, "step": 2022 }, { "epoch": 0.5002472799208705, "grad_norm": 0.811791173923638, "learning_rate": 4.922273818721136e-06, "loss": 0.4672, "step": 2023 }, { "epoch": 0.5004945598417408, "grad_norm": 0.8274747192480223, "learning_rate": 4.922193381299899e-06, "loss": 0.427, "step": 2024 }, { "epoch": 0.5007418397626113, "grad_norm": 0.8065694090822414, "learning_rate": 4.922112902936489e-06, "loss": 0.4736, "step": 2025 }, { "epoch": 0.5009891196834817, "grad_norm": 0.8128372001404772, "learning_rate": 4.922032383632263e-06, "loss": 0.44, "step": 2026 }, { "epoch": 0.5012363996043522, "grad_norm": 0.8287731557078928, "learning_rate": 4.9219518233885856e-06, "loss": 0.4414, "step": 2027 }, { "epoch": 0.5014836795252225, "grad_norm": 0.8063495368442325, "learning_rate": 4.921871222206817e-06, "loss": 0.4736, "step": 2028 }, { "epoch": 0.501730959446093, "grad_norm": 0.8380024116633067, "learning_rate": 4.921790580088318e-06, "loss": 0.4712, "step": 2029 }, { "epoch": 0.5019782393669634, "grad_norm": 0.8284317158685391, "learning_rate": 4.921709897034454e-06, "loss": 0.4497, "step": 2030 }, { "epoch": 0.5022255192878339, "grad_norm": 0.8187102724304874, "learning_rate": 4.921629173046588e-06, "loss": 0.4551, "step": 2031 }, { "epoch": 0.5024727992087042, "grad_norm": 0.8512236754910734, "learning_rate": 4.921548408126085e-06, "loss": 0.4412, "step": 2032 }, { "epoch": 0.5027200791295747, "grad_norm": 0.7788881223000658, "learning_rate": 4.921467602274308e-06, "loss": 0.4104, "step": 2033 }, { "epoch": 0.5029673590504451, "grad_norm": 0.8141480380678998, "learning_rate": 4.921386755492625e-06, "loss": 0.4474, "step": 2034 }, { "epoch": 0.5032146389713156, "grad_norm": 0.8407140685541208, "learning_rate": 4.921305867782402e-06, "loss": 0.4082, "step": 2035 }, { "epoch": 0.503461918892186, "grad_norm": 0.8720791912431307, "learning_rate": 4.9212249391450065e-06, "loss": 0.4013, "step": 2036 }, { "epoch": 0.5037091988130564, "grad_norm": 0.8183434796093931, "learning_rate": 4.9211439695818065e-06, "loss": 0.4316, "step": 2037 }, { "epoch": 0.5039564787339268, "grad_norm": 0.8449401193598226, "learning_rate": 4.921062959094169e-06, "loss": 0.426, "step": 2038 }, { "epoch": 0.5042037586547973, "grad_norm": 0.8509257751850934, "learning_rate": 4.9209819076834655e-06, "loss": 0.4525, "step": 2039 }, { "epoch": 0.5044510385756676, "grad_norm": 0.8519430084094141, "learning_rate": 4.920900815351065e-06, "loss": 0.4146, "step": 2040 }, { "epoch": 0.5046983184965381, "grad_norm": 0.8456813326281569, "learning_rate": 4.920819682098338e-06, "loss": 0.4324, "step": 2041 }, { "epoch": 0.5049455984174085, "grad_norm": 0.8287711902840623, "learning_rate": 4.920738507926657e-06, "loss": 0.4613, "step": 2042 }, { "epoch": 0.505192878338279, "grad_norm": 0.8257370297562611, "learning_rate": 4.920657292837392e-06, "loss": 0.4234, "step": 2043 }, { "epoch": 0.5054401582591493, "grad_norm": 0.8961393796184989, "learning_rate": 4.9205760368319175e-06, "loss": 0.4434, "step": 2044 }, { "epoch": 0.5056874381800198, "grad_norm": 0.8204890246004772, "learning_rate": 4.920494739911607e-06, "loss": 0.4794, "step": 2045 }, { "epoch": 0.5059347181008902, "grad_norm": 0.8376177288707035, "learning_rate": 4.9204134020778335e-06, "loss": 0.4282, "step": 2046 }, { "epoch": 0.5061819980217607, "grad_norm": 0.834472891929355, "learning_rate": 4.920332023331973e-06, "loss": 0.4327, "step": 2047 }, { "epoch": 0.506429277942631, "grad_norm": 0.8406099377992237, "learning_rate": 4.9202506036754e-06, "loss": 0.4441, "step": 2048 }, { "epoch": 0.5066765578635015, "grad_norm": 0.8617534109839463, "learning_rate": 4.920169143109491e-06, "loss": 0.4324, "step": 2049 }, { "epoch": 0.5069238377843719, "grad_norm": 0.8676859852723221, "learning_rate": 4.920087641635624e-06, "loss": 0.4398, "step": 2050 }, { "epoch": 0.5071711177052424, "grad_norm": 0.8385705486777898, "learning_rate": 4.920006099255176e-06, "loss": 0.45, "step": 2051 }, { "epoch": 0.5074183976261127, "grad_norm": 0.8654084128937142, "learning_rate": 4.919924515969524e-06, "loss": 0.4203, "step": 2052 }, { "epoch": 0.5076656775469832, "grad_norm": 0.8643853671809885, "learning_rate": 4.919842891780049e-06, "loss": 0.4562, "step": 2053 }, { "epoch": 0.5079129574678536, "grad_norm": 0.8071820873638121, "learning_rate": 4.919761226688129e-06, "loss": 0.4174, "step": 2054 }, { "epoch": 0.5081602373887241, "grad_norm": 0.8008245242716596, "learning_rate": 4.9196795206951455e-06, "loss": 0.4124, "step": 2055 }, { "epoch": 0.5084075173095944, "grad_norm": 0.8074972712657118, "learning_rate": 4.919597773802479e-06, "loss": 0.448, "step": 2056 }, { "epoch": 0.5086547972304649, "grad_norm": 0.8302305293316452, "learning_rate": 4.919515986011512e-06, "loss": 0.4729, "step": 2057 }, { "epoch": 0.5089020771513353, "grad_norm": 0.8081365268996791, "learning_rate": 4.919434157323627e-06, "loss": 0.4572, "step": 2058 }, { "epoch": 0.5091493570722058, "grad_norm": 0.8033712684412141, "learning_rate": 4.919352287740205e-06, "loss": 0.4382, "step": 2059 }, { "epoch": 0.5093966369930761, "grad_norm": 0.7587735127229113, "learning_rate": 4.919270377262633e-06, "loss": 0.473, "step": 2060 }, { "epoch": 0.5096439169139466, "grad_norm": 0.8619234301971191, "learning_rate": 4.9191884258922926e-06, "loss": 0.443, "step": 2061 }, { "epoch": 0.509891196834817, "grad_norm": 0.8365792373646538, "learning_rate": 4.919106433630572e-06, "loss": 0.4133, "step": 2062 }, { "epoch": 0.5101384767556875, "grad_norm": 0.7856790139850826, "learning_rate": 4.919024400478854e-06, "loss": 0.4528, "step": 2063 }, { "epoch": 0.5103857566765578, "grad_norm": 0.7858186305207652, "learning_rate": 4.918942326438527e-06, "loss": 0.4014, "step": 2064 }, { "epoch": 0.5106330365974283, "grad_norm": 0.8088878916274455, "learning_rate": 4.918860211510979e-06, "loss": 0.4551, "step": 2065 }, { "epoch": 0.5108803165182987, "grad_norm": 0.7872550776179847, "learning_rate": 4.918778055697596e-06, "loss": 0.4684, "step": 2066 }, { "epoch": 0.5111275964391692, "grad_norm": 0.817077611575408, "learning_rate": 4.918695858999767e-06, "loss": 0.414, "step": 2067 }, { "epoch": 0.5113748763600395, "grad_norm": 0.8378442452576724, "learning_rate": 4.918613621418883e-06, "loss": 0.4576, "step": 2068 }, { "epoch": 0.51162215628091, "grad_norm": 0.8744527942820566, "learning_rate": 4.918531342956333e-06, "loss": 0.4543, "step": 2069 }, { "epoch": 0.5118694362017804, "grad_norm": 0.8732192451807397, "learning_rate": 4.9184490236135075e-06, "loss": 0.472, "step": 2070 }, { "epoch": 0.5121167161226509, "grad_norm": 0.8461949177925873, "learning_rate": 4.9183666633917986e-06, "loss": 0.4555, "step": 2071 }, { "epoch": 0.5123639960435212, "grad_norm": 0.8418875439184351, "learning_rate": 4.918284262292597e-06, "loss": 0.4366, "step": 2072 }, { "epoch": 0.5126112759643917, "grad_norm": 0.82720798639419, "learning_rate": 4.9182018203172986e-06, "loss": 0.4407, "step": 2073 }, { "epoch": 0.5128585558852621, "grad_norm": 0.8025497377538527, "learning_rate": 4.918119337467293e-06, "loss": 0.4483, "step": 2074 }, { "epoch": 0.5131058358061326, "grad_norm": 0.8042377781404518, "learning_rate": 4.918036813743978e-06, "loss": 0.4375, "step": 2075 }, { "epoch": 0.5133531157270029, "grad_norm": 0.8480814582795039, "learning_rate": 4.9179542491487455e-06, "loss": 0.4151, "step": 2076 }, { "epoch": 0.5136003956478734, "grad_norm": 0.8057648300526595, "learning_rate": 4.917871643682993e-06, "loss": 0.433, "step": 2077 }, { "epoch": 0.5138476755687438, "grad_norm": 0.8360351985687441, "learning_rate": 4.917788997348116e-06, "loss": 0.4263, "step": 2078 }, { "epoch": 0.5140949554896143, "grad_norm": 0.7756523740903111, "learning_rate": 4.9177063101455115e-06, "loss": 0.462, "step": 2079 }, { "epoch": 0.5143422354104846, "grad_norm": 0.8654229974106498, "learning_rate": 4.917623582076577e-06, "loss": 0.4391, "step": 2080 }, { "epoch": 0.5145895153313551, "grad_norm": 0.8568313817015912, "learning_rate": 4.917540813142712e-06, "loss": 0.4283, "step": 2081 }, { "epoch": 0.5148367952522255, "grad_norm": 0.8649603479345634, "learning_rate": 4.917458003345314e-06, "loss": 0.4261, "step": 2082 }, { "epoch": 0.515084075173096, "grad_norm": 0.8187465423672693, "learning_rate": 4.9173751526857835e-06, "loss": 0.4441, "step": 2083 }, { "epoch": 0.5153313550939663, "grad_norm": 0.8573292004045983, "learning_rate": 4.9172922611655205e-06, "loss": 0.4337, "step": 2084 }, { "epoch": 0.5155786350148368, "grad_norm": 0.8663104974580745, "learning_rate": 4.917209328785927e-06, "loss": 0.448, "step": 2085 }, { "epoch": 0.5158259149357072, "grad_norm": 0.8473083515505613, "learning_rate": 4.917126355548404e-06, "loss": 0.4226, "step": 2086 }, { "epoch": 0.5160731948565777, "grad_norm": 0.8896880901970612, "learning_rate": 4.9170433414543545e-06, "loss": 0.4093, "step": 2087 }, { "epoch": 0.516320474777448, "grad_norm": 0.8806978447051235, "learning_rate": 4.916960286505181e-06, "loss": 0.4322, "step": 2088 }, { "epoch": 0.5165677546983185, "grad_norm": 0.8451289370207562, "learning_rate": 4.9168771907022885e-06, "loss": 0.4176, "step": 2089 }, { "epoch": 0.5168150346191889, "grad_norm": 0.7887918587386917, "learning_rate": 4.91679405404708e-06, "loss": 0.4333, "step": 2090 }, { "epoch": 0.5170623145400594, "grad_norm": 0.8093101065598383, "learning_rate": 4.916710876540962e-06, "loss": 0.4482, "step": 2091 }, { "epoch": 0.5173095944609297, "grad_norm": 0.8145596529223303, "learning_rate": 4.916627658185339e-06, "loss": 0.4291, "step": 2092 }, { "epoch": 0.5175568743818002, "grad_norm": 0.8464311393382885, "learning_rate": 4.9165443989816195e-06, "loss": 0.4458, "step": 2093 }, { "epoch": 0.5178041543026706, "grad_norm": 0.8442575884890843, "learning_rate": 4.91646109893121e-06, "loss": 0.4598, "step": 2094 }, { "epoch": 0.518051434223541, "grad_norm": 0.8413652586604167, "learning_rate": 4.916377758035519e-06, "loss": 0.4618, "step": 2095 }, { "epoch": 0.5182987141444114, "grad_norm": 0.8364097355333588, "learning_rate": 4.916294376295954e-06, "loss": 0.4465, "step": 2096 }, { "epoch": 0.5185459940652819, "grad_norm": 0.8400100432544643, "learning_rate": 4.916210953713926e-06, "loss": 0.4313, "step": 2097 }, { "epoch": 0.5187932739861523, "grad_norm": 0.8128995366146822, "learning_rate": 4.916127490290843e-06, "loss": 0.459, "step": 2098 }, { "epoch": 0.5190405539070228, "grad_norm": 0.8073646482891128, "learning_rate": 4.916043986028117e-06, "loss": 0.4503, "step": 2099 }, { "epoch": 0.5192878338278932, "grad_norm": 0.8355382070231939, "learning_rate": 4.91596044092716e-06, "loss": 0.4238, "step": 2100 }, { "epoch": 0.5195351137487636, "grad_norm": 0.8290409040475074, "learning_rate": 4.915876854989384e-06, "loss": 0.4331, "step": 2101 }, { "epoch": 0.5197823936696341, "grad_norm": 0.8473558657695541, "learning_rate": 4.915793228216201e-06, "loss": 0.4177, "step": 2102 }, { "epoch": 0.5200296735905044, "grad_norm": 0.8168178012275521, "learning_rate": 4.915709560609025e-06, "loss": 0.4207, "step": 2103 }, { "epoch": 0.5202769535113749, "grad_norm": 0.8349750801854026, "learning_rate": 4.91562585216927e-06, "loss": 0.4387, "step": 2104 }, { "epoch": 0.5205242334322453, "grad_norm": 0.8332513834402295, "learning_rate": 4.9155421028983515e-06, "loss": 0.4475, "step": 2105 }, { "epoch": 0.5207715133531158, "grad_norm": 0.823978542351043, "learning_rate": 4.915458312797684e-06, "loss": 0.4987, "step": 2106 }, { "epoch": 0.5210187932739861, "grad_norm": 0.8171613069077716, "learning_rate": 4.915374481868685e-06, "loss": 0.4533, "step": 2107 }, { "epoch": 0.5212660731948566, "grad_norm": 0.7912376433723926, "learning_rate": 4.915290610112772e-06, "loss": 0.4411, "step": 2108 }, { "epoch": 0.521513353115727, "grad_norm": 0.8223882924185978, "learning_rate": 4.915206697531361e-06, "loss": 0.4469, "step": 2109 }, { "epoch": 0.5217606330365975, "grad_norm": 0.8122382638898773, "learning_rate": 4.91512274412587e-06, "loss": 0.4555, "step": 2110 }, { "epoch": 0.5220079129574678, "grad_norm": 0.8145767118335325, "learning_rate": 4.9150387498977205e-06, "loss": 0.4355, "step": 2111 }, { "epoch": 0.5222551928783383, "grad_norm": 0.8041076958743947, "learning_rate": 4.91495471484833e-06, "loss": 0.436, "step": 2112 }, { "epoch": 0.5225024727992087, "grad_norm": 0.8253205028728051, "learning_rate": 4.91487063897912e-06, "loss": 0.4334, "step": 2113 }, { "epoch": 0.5227497527200792, "grad_norm": 0.8192406762636769, "learning_rate": 4.9147865222915114e-06, "loss": 0.4581, "step": 2114 }, { "epoch": 0.5229970326409495, "grad_norm": 0.8354577329747063, "learning_rate": 4.914702364786926e-06, "loss": 0.4213, "step": 2115 }, { "epoch": 0.52324431256182, "grad_norm": 0.8290837226364314, "learning_rate": 4.914618166466787e-06, "loss": 0.4234, "step": 2116 }, { "epoch": 0.5234915924826904, "grad_norm": 0.827501095420979, "learning_rate": 4.914533927332516e-06, "loss": 0.3891, "step": 2117 }, { "epoch": 0.5237388724035609, "grad_norm": 0.8342828261706431, "learning_rate": 4.91444964738554e-06, "loss": 0.4336, "step": 2118 }, { "epoch": 0.5239861523244312, "grad_norm": 0.7885249793199698, "learning_rate": 4.914365326627279e-06, "loss": 0.4374, "step": 2119 }, { "epoch": 0.5242334322453017, "grad_norm": 0.8226055988032711, "learning_rate": 4.914280965059162e-06, "loss": 0.4655, "step": 2120 }, { "epoch": 0.5244807121661721, "grad_norm": 0.8010022488750518, "learning_rate": 4.914196562682613e-06, "loss": 0.4765, "step": 2121 }, { "epoch": 0.5247279920870426, "grad_norm": 0.8721911721071652, "learning_rate": 4.91411211949906e-06, "loss": 0.4082, "step": 2122 }, { "epoch": 0.5249752720079129, "grad_norm": 0.8054096169771939, "learning_rate": 4.914027635509929e-06, "loss": 0.4274, "step": 2123 }, { "epoch": 0.5252225519287834, "grad_norm": 0.8140381801310196, "learning_rate": 4.913943110716649e-06, "loss": 0.4405, "step": 2124 }, { "epoch": 0.5254698318496538, "grad_norm": 0.8264133414135034, "learning_rate": 4.913858545120648e-06, "loss": 0.4604, "step": 2125 }, { "epoch": 0.5257171117705243, "grad_norm": 0.8296155953337557, "learning_rate": 4.913773938723356e-06, "loss": 0.431, "step": 2126 }, { "epoch": 0.5259643916913946, "grad_norm": 0.8390771475583482, "learning_rate": 4.913689291526203e-06, "loss": 0.4416, "step": 2127 }, { "epoch": 0.5262116716122651, "grad_norm": 0.7978475489974522, "learning_rate": 4.91360460353062e-06, "loss": 0.4449, "step": 2128 }, { "epoch": 0.5264589515331355, "grad_norm": 0.829407786061133, "learning_rate": 4.913519874738038e-06, "loss": 0.4076, "step": 2129 }, { "epoch": 0.526706231454006, "grad_norm": 0.8555564981475666, "learning_rate": 4.913435105149889e-06, "loss": 0.4267, "step": 2130 }, { "epoch": 0.5269535113748763, "grad_norm": 0.7951753815561883, "learning_rate": 4.913350294767606e-06, "loss": 0.4283, "step": 2131 }, { "epoch": 0.5272007912957468, "grad_norm": 0.7800972057420057, "learning_rate": 4.913265443592623e-06, "loss": 0.4148, "step": 2132 }, { "epoch": 0.5274480712166172, "grad_norm": 0.8085138173904091, "learning_rate": 4.913180551626375e-06, "loss": 0.4119, "step": 2133 }, { "epoch": 0.5276953511374877, "grad_norm": 0.8267372632890972, "learning_rate": 4.913095618870295e-06, "loss": 0.4228, "step": 2134 }, { "epoch": 0.527942631058358, "grad_norm": 0.8510446154835541, "learning_rate": 4.913010645325819e-06, "loss": 0.4278, "step": 2135 }, { "epoch": 0.5281899109792285, "grad_norm": 0.7552565986314601, "learning_rate": 4.912925630994384e-06, "loss": 0.4367, "step": 2136 }, { "epoch": 0.5284371909000989, "grad_norm": 0.8004850586370468, "learning_rate": 4.912840575877427e-06, "loss": 0.4401, "step": 2137 }, { "epoch": 0.5286844708209694, "grad_norm": 0.8428019963490445, "learning_rate": 4.912755479976386e-06, "loss": 0.4457, "step": 2138 }, { "epoch": 0.5289317507418397, "grad_norm": 0.7976182228965645, "learning_rate": 4.912670343292698e-06, "loss": 0.453, "step": 2139 }, { "epoch": 0.5291790306627102, "grad_norm": 0.8429254450002306, "learning_rate": 4.912585165827803e-06, "loss": 0.456, "step": 2140 }, { "epoch": 0.5294263105835806, "grad_norm": 0.7952504632214655, "learning_rate": 4.9124999475831406e-06, "loss": 0.455, "step": 2141 }, { "epoch": 0.5296735905044511, "grad_norm": 0.8351255206913245, "learning_rate": 4.912414688560152e-06, "loss": 0.45, "step": 2142 }, { "epoch": 0.5299208704253214, "grad_norm": 0.8073379583341719, "learning_rate": 4.912329388760277e-06, "loss": 0.4188, "step": 2143 }, { "epoch": 0.5301681503461919, "grad_norm": 0.8233470088787288, "learning_rate": 4.912244048184958e-06, "loss": 0.4293, "step": 2144 }, { "epoch": 0.5304154302670623, "grad_norm": 0.8453326881951997, "learning_rate": 4.912158666835638e-06, "loss": 0.417, "step": 2145 }, { "epoch": 0.5306627101879328, "grad_norm": 0.8241068934917218, "learning_rate": 4.912073244713759e-06, "loss": 0.4182, "step": 2146 }, { "epoch": 0.5309099901088031, "grad_norm": 0.8519056160776881, "learning_rate": 4.911987781820766e-06, "loss": 0.4313, "step": 2147 }, { "epoch": 0.5311572700296736, "grad_norm": 0.853659838700209, "learning_rate": 4.911902278158104e-06, "loss": 0.4107, "step": 2148 }, { "epoch": 0.531404549950544, "grad_norm": 0.801594697582263, "learning_rate": 4.911816733727216e-06, "loss": 0.4403, "step": 2149 }, { "epoch": 0.5316518298714145, "grad_norm": 0.8333840414724853, "learning_rate": 4.9117311485295504e-06, "loss": 0.4948, "step": 2150 }, { "epoch": 0.5318991097922848, "grad_norm": 0.8255020361468853, "learning_rate": 4.911645522566553e-06, "loss": 0.4524, "step": 2151 }, { "epoch": 0.5321463897131553, "grad_norm": 0.8213445420795159, "learning_rate": 4.91155985583967e-06, "loss": 0.4122, "step": 2152 }, { "epoch": 0.5323936696340257, "grad_norm": 0.8492299494064399, "learning_rate": 4.911474148350351e-06, "loss": 0.4305, "step": 2153 }, { "epoch": 0.5326409495548962, "grad_norm": 0.7898914841030381, "learning_rate": 4.9113884001000434e-06, "loss": 0.4021, "step": 2154 }, { "epoch": 0.5328882294757665, "grad_norm": 0.854393304558985, "learning_rate": 4.911302611090198e-06, "loss": 0.4462, "step": 2155 }, { "epoch": 0.533135509396637, "grad_norm": 0.8418573865966376, "learning_rate": 4.911216781322264e-06, "loss": 0.4522, "step": 2156 }, { "epoch": 0.5333827893175074, "grad_norm": 0.8668734938595644, "learning_rate": 4.911130910797693e-06, "loss": 0.422, "step": 2157 }, { "epoch": 0.5336300692383779, "grad_norm": 0.8161443060828762, "learning_rate": 4.911044999517936e-06, "loss": 0.4137, "step": 2158 }, { "epoch": 0.5338773491592482, "grad_norm": 0.812891841442536, "learning_rate": 4.910959047484443e-06, "loss": 0.482, "step": 2159 }, { "epoch": 0.5341246290801187, "grad_norm": 0.8152358678019482, "learning_rate": 4.910873054698671e-06, "loss": 0.4771, "step": 2160 }, { "epoch": 0.5343719090009891, "grad_norm": 0.8338420312345085, "learning_rate": 4.91078702116207e-06, "loss": 0.4435, "step": 2161 }, { "epoch": 0.5346191889218596, "grad_norm": 0.8266232547366531, "learning_rate": 4.910700946876096e-06, "loss": 0.4302, "step": 2162 }, { "epoch": 0.5348664688427299, "grad_norm": 0.8521482137328993, "learning_rate": 4.910614831842203e-06, "loss": 0.4587, "step": 2163 }, { "epoch": 0.5351137487636004, "grad_norm": 0.7961612413527702, "learning_rate": 4.910528676061848e-06, "loss": 0.4333, "step": 2164 }, { "epoch": 0.5353610286844708, "grad_norm": 0.7767270958799087, "learning_rate": 4.910442479536486e-06, "loss": 0.4744, "step": 2165 }, { "epoch": 0.5356083086053413, "grad_norm": 0.85859905757044, "learning_rate": 4.910356242267573e-06, "loss": 0.4421, "step": 2166 }, { "epoch": 0.5358555885262116, "grad_norm": 0.8270715526912454, "learning_rate": 4.91026996425657e-06, "loss": 0.4013, "step": 2167 }, { "epoch": 0.5361028684470821, "grad_norm": 0.8315477443532387, "learning_rate": 4.910183645504932e-06, "loss": 0.4191, "step": 2168 }, { "epoch": 0.5363501483679525, "grad_norm": 0.8379944677971474, "learning_rate": 4.91009728601412e-06, "loss": 0.4465, "step": 2169 }, { "epoch": 0.536597428288823, "grad_norm": 0.8729045253998553, "learning_rate": 4.910010885785593e-06, "loss": 0.3901, "step": 2170 }, { "epoch": 0.5368447082096933, "grad_norm": 0.8667862657203399, "learning_rate": 4.909924444820812e-06, "loss": 0.4309, "step": 2171 }, { "epoch": 0.5370919881305638, "grad_norm": 0.8580076116689658, "learning_rate": 4.909837963121236e-06, "loss": 0.459, "step": 2172 }, { "epoch": 0.5373392680514342, "grad_norm": 0.8721818555952473, "learning_rate": 4.90975144068833e-06, "loss": 0.4297, "step": 2173 }, { "epoch": 0.5375865479723047, "grad_norm": 0.8281536856693168, "learning_rate": 4.9096648775235555e-06, "loss": 0.4258, "step": 2174 }, { "epoch": 0.537833827893175, "grad_norm": 0.8542025748193519, "learning_rate": 4.909578273628374e-06, "loss": 0.4518, "step": 2175 }, { "epoch": 0.5380811078140455, "grad_norm": 0.8190991431494749, "learning_rate": 4.909491629004251e-06, "loss": 0.4541, "step": 2176 }, { "epoch": 0.5383283877349159, "grad_norm": 0.8253186688235863, "learning_rate": 4.909404943652649e-06, "loss": 0.4359, "step": 2177 }, { "epoch": 0.5385756676557863, "grad_norm": 0.7976389178773332, "learning_rate": 4.909318217575036e-06, "loss": 0.4315, "step": 2178 }, { "epoch": 0.5388229475766568, "grad_norm": 0.8230735329961125, "learning_rate": 4.909231450772877e-06, "loss": 0.4417, "step": 2179 }, { "epoch": 0.5390702274975272, "grad_norm": 0.8584664253796905, "learning_rate": 4.909144643247637e-06, "loss": 0.4229, "step": 2180 }, { "epoch": 0.5393175074183977, "grad_norm": 0.8369916036027719, "learning_rate": 4.909057795000786e-06, "loss": 0.4209, "step": 2181 }, { "epoch": 0.539564787339268, "grad_norm": 0.8213458816819943, "learning_rate": 4.90897090603379e-06, "loss": 0.4552, "step": 2182 }, { "epoch": 0.5398120672601385, "grad_norm": 0.7906532652889825, "learning_rate": 4.908883976348118e-06, "loss": 0.4441, "step": 2183 }, { "epoch": 0.5400593471810089, "grad_norm": 0.807640026645016, "learning_rate": 4.908797005945239e-06, "loss": 0.4002, "step": 2184 }, { "epoch": 0.5403066271018794, "grad_norm": 0.8040101227089689, "learning_rate": 4.908709994826625e-06, "loss": 0.4424, "step": 2185 }, { "epoch": 0.5405539070227497, "grad_norm": 0.7864043573853555, "learning_rate": 4.9086229429937445e-06, "loss": 0.438, "step": 2186 }, { "epoch": 0.5408011869436202, "grad_norm": 0.8279325371607597, "learning_rate": 4.908535850448071e-06, "loss": 0.4508, "step": 2187 }, { "epoch": 0.5410484668644906, "grad_norm": 0.8115008524086239, "learning_rate": 4.908448717191074e-06, "loss": 0.4323, "step": 2188 }, { "epoch": 0.5412957467853611, "grad_norm": 0.9052085553109384, "learning_rate": 4.9083615432242285e-06, "loss": 0.4108, "step": 2189 }, { "epoch": 0.5415430267062314, "grad_norm": 0.8303476204316234, "learning_rate": 4.908274328549006e-06, "loss": 0.4361, "step": 2190 }, { "epoch": 0.5417903066271019, "grad_norm": 0.7947377340525147, "learning_rate": 4.908187073166883e-06, "loss": 0.4392, "step": 2191 }, { "epoch": 0.5420375865479723, "grad_norm": 0.7966996712846387, "learning_rate": 4.908099777079334e-06, "loss": 0.4436, "step": 2192 }, { "epoch": 0.5422848664688428, "grad_norm": 0.7775724874301819, "learning_rate": 4.908012440287833e-06, "loss": 0.446, "step": 2193 }, { "epoch": 0.5425321463897131, "grad_norm": 0.8067580307740871, "learning_rate": 4.907925062793858e-06, "loss": 0.4667, "step": 2194 }, { "epoch": 0.5427794263105836, "grad_norm": 0.8398863268604441, "learning_rate": 4.907837644598884e-06, "loss": 0.4295, "step": 2195 }, { "epoch": 0.543026706231454, "grad_norm": 0.8404337735535851, "learning_rate": 4.90775018570439e-06, "loss": 0.4308, "step": 2196 }, { "epoch": 0.5432739861523245, "grad_norm": 0.8528144316022201, "learning_rate": 4.907662686111854e-06, "loss": 0.4287, "step": 2197 }, { "epoch": 0.5435212660731948, "grad_norm": 0.7777032861160702, "learning_rate": 4.907575145822755e-06, "loss": 0.4426, "step": 2198 }, { "epoch": 0.5437685459940653, "grad_norm": 0.8135680163087625, "learning_rate": 4.907487564838573e-06, "loss": 0.448, "step": 2199 }, { "epoch": 0.5440158259149357, "grad_norm": 0.800459169825187, "learning_rate": 4.907399943160787e-06, "loss": 0.4259, "step": 2200 }, { "epoch": 0.5442631058358062, "grad_norm": 0.8160972567128584, "learning_rate": 4.9073122807908815e-06, "loss": 0.4278, "step": 2201 }, { "epoch": 0.5445103857566765, "grad_norm": 0.8310785106901327, "learning_rate": 4.907224577730334e-06, "loss": 0.4305, "step": 2202 }, { "epoch": 0.544757665677547, "grad_norm": 0.828287917795897, "learning_rate": 4.907136833980629e-06, "loss": 0.4288, "step": 2203 }, { "epoch": 0.5450049455984174, "grad_norm": 0.8524021235419231, "learning_rate": 4.907049049543249e-06, "loss": 0.4648, "step": 2204 }, { "epoch": 0.5452522255192879, "grad_norm": 0.8054678015571812, "learning_rate": 4.906961224419679e-06, "loss": 0.3964, "step": 2205 }, { "epoch": 0.5454995054401582, "grad_norm": 0.870197504575575, "learning_rate": 4.9068733586114025e-06, "loss": 0.4401, "step": 2206 }, { "epoch": 0.5457467853610287, "grad_norm": 0.7999008358756727, "learning_rate": 4.9067854521199055e-06, "loss": 0.4216, "step": 2207 }, { "epoch": 0.5459940652818991, "grad_norm": 0.7757865860444793, "learning_rate": 4.906697504946672e-06, "loss": 0.4631, "step": 2208 }, { "epoch": 0.5462413452027696, "grad_norm": 0.8179813608668347, "learning_rate": 4.906609517093192e-06, "loss": 0.4291, "step": 2209 }, { "epoch": 0.5464886251236399, "grad_norm": 0.8254017964289843, "learning_rate": 4.906521488560949e-06, "loss": 0.4165, "step": 2210 }, { "epoch": 0.5467359050445104, "grad_norm": 0.8365497028422612, "learning_rate": 4.906433419351433e-06, "loss": 0.414, "step": 2211 }, { "epoch": 0.5469831849653808, "grad_norm": 0.8249052368071237, "learning_rate": 4.906345309466131e-06, "loss": 0.4337, "step": 2212 }, { "epoch": 0.5472304648862513, "grad_norm": 0.793602182328121, "learning_rate": 4.906257158906536e-06, "loss": 0.4529, "step": 2213 }, { "epoch": 0.5474777448071216, "grad_norm": 0.7932116491251698, "learning_rate": 4.9061689676741335e-06, "loss": 0.4417, "step": 2214 }, { "epoch": 0.5477250247279921, "grad_norm": 0.8341760080668459, "learning_rate": 4.906080735770417e-06, "loss": 0.4301, "step": 2215 }, { "epoch": 0.5479723046488625, "grad_norm": 0.8190689937567308, "learning_rate": 4.905992463196877e-06, "loss": 0.4548, "step": 2216 }, { "epoch": 0.548219584569733, "grad_norm": 0.8905987162330603, "learning_rate": 4.9059041499550055e-06, "loss": 0.4423, "step": 2217 }, { "epoch": 0.5484668644906033, "grad_norm": 0.8042704183629057, "learning_rate": 4.905815796046296e-06, "loss": 0.4159, "step": 2218 }, { "epoch": 0.5487141444114738, "grad_norm": 0.7900037057341976, "learning_rate": 4.905727401472241e-06, "loss": 0.4726, "step": 2219 }, { "epoch": 0.5489614243323442, "grad_norm": 0.8418688230703245, "learning_rate": 4.905638966234335e-06, "loss": 0.4332, "step": 2220 }, { "epoch": 0.5492087042532147, "grad_norm": 0.8111111477828057, "learning_rate": 4.905550490334072e-06, "loss": 0.4698, "step": 2221 }, { "epoch": 0.549455984174085, "grad_norm": 0.7790535493069579, "learning_rate": 4.90546197377295e-06, "loss": 0.4388, "step": 2222 }, { "epoch": 0.5497032640949555, "grad_norm": 0.8159173255576104, "learning_rate": 4.905373416552463e-06, "loss": 0.4341, "step": 2223 }, { "epoch": 0.5499505440158259, "grad_norm": 0.8068905887185784, "learning_rate": 4.905284818674107e-06, "loss": 0.4507, "step": 2224 }, { "epoch": 0.5501978239366964, "grad_norm": 0.7975516282689951, "learning_rate": 4.905196180139382e-06, "loss": 0.4275, "step": 2225 }, { "epoch": 0.5504451038575667, "grad_norm": 0.8363033551597234, "learning_rate": 4.905107500949785e-06, "loss": 0.4033, "step": 2226 }, { "epoch": 0.5506923837784372, "grad_norm": 0.8446055816925565, "learning_rate": 4.905018781106815e-06, "loss": 0.4158, "step": 2227 }, { "epoch": 0.5509396636993076, "grad_norm": 0.8059400557615801, "learning_rate": 4.904930020611972e-06, "loss": 0.4892, "step": 2228 }, { "epoch": 0.5511869436201781, "grad_norm": 0.8230403015167589, "learning_rate": 4.904841219466756e-06, "loss": 0.4299, "step": 2229 }, { "epoch": 0.5514342235410484, "grad_norm": 0.8136682923193483, "learning_rate": 4.904752377672668e-06, "loss": 0.4462, "step": 2230 }, { "epoch": 0.5516815034619189, "grad_norm": 0.8024451412346982, "learning_rate": 4.90466349523121e-06, "loss": 0.4318, "step": 2231 }, { "epoch": 0.5519287833827893, "grad_norm": 0.8662454751543324, "learning_rate": 4.904574572143883e-06, "loss": 0.4286, "step": 2232 }, { "epoch": 0.5521760633036598, "grad_norm": 0.7864315924989522, "learning_rate": 4.904485608412193e-06, "loss": 0.4594, "step": 2233 }, { "epoch": 0.5524233432245301, "grad_norm": 0.7968996130135781, "learning_rate": 4.90439660403764e-06, "loss": 0.4429, "step": 2234 }, { "epoch": 0.5526706231454006, "grad_norm": 0.8056598325190589, "learning_rate": 4.904307559021731e-06, "loss": 0.4406, "step": 2235 }, { "epoch": 0.552917903066271, "grad_norm": 0.7968152385905476, "learning_rate": 4.9042184733659716e-06, "loss": 0.4491, "step": 2236 }, { "epoch": 0.5531651829871415, "grad_norm": 0.8240548698911649, "learning_rate": 4.904129347071866e-06, "loss": 0.4467, "step": 2237 }, { "epoch": 0.5534124629080118, "grad_norm": 0.8459774707886466, "learning_rate": 4.904040180140921e-06, "loss": 0.4174, "step": 2238 }, { "epoch": 0.5536597428288823, "grad_norm": 0.7962096726579939, "learning_rate": 4.903950972574644e-06, "loss": 0.4378, "step": 2239 }, { "epoch": 0.5539070227497527, "grad_norm": 0.7956323439925366, "learning_rate": 4.903861724374542e-06, "loss": 0.4728, "step": 2240 }, { "epoch": 0.5541543026706232, "grad_norm": 0.874177788619472, "learning_rate": 4.903772435542126e-06, "loss": 0.4302, "step": 2241 }, { "epoch": 0.5544015825914935, "grad_norm": 0.8428091902882662, "learning_rate": 4.9036831060789025e-06, "loss": 0.4645, "step": 2242 }, { "epoch": 0.554648862512364, "grad_norm": 0.8492857888020867, "learning_rate": 4.903593735986383e-06, "loss": 0.4369, "step": 2243 }, { "epoch": 0.5548961424332344, "grad_norm": 0.8303933191114768, "learning_rate": 4.903504325266077e-06, "loss": 0.4068, "step": 2244 }, { "epoch": 0.5551434223541049, "grad_norm": 0.827774144006841, "learning_rate": 4.903414873919497e-06, "loss": 0.4302, "step": 2245 }, { "epoch": 0.5553907022749752, "grad_norm": 0.8032810656584819, "learning_rate": 4.903325381948154e-06, "loss": 0.4324, "step": 2246 }, { "epoch": 0.5556379821958457, "grad_norm": 0.7855184005897174, "learning_rate": 4.903235849353562e-06, "loss": 0.4413, "step": 2247 }, { "epoch": 0.5558852621167161, "grad_norm": 0.8569371908994023, "learning_rate": 4.903146276137233e-06, "loss": 0.4295, "step": 2248 }, { "epoch": 0.5561325420375866, "grad_norm": 0.8228709188562383, "learning_rate": 4.903056662300682e-06, "loss": 0.4093, "step": 2249 }, { "epoch": 0.5563798219584569, "grad_norm": 0.7978752521898264, "learning_rate": 4.9029670078454225e-06, "loss": 0.4483, "step": 2250 }, { "epoch": 0.5566271018793274, "grad_norm": 0.8386855590201181, "learning_rate": 4.902877312772973e-06, "loss": 0.4367, "step": 2251 }, { "epoch": 0.5568743818001978, "grad_norm": 0.8097659858522324, "learning_rate": 4.902787577084844e-06, "loss": 0.4458, "step": 2252 }, { "epoch": 0.5571216617210683, "grad_norm": 0.7981580393798596, "learning_rate": 4.902697800782558e-06, "loss": 0.4582, "step": 2253 }, { "epoch": 0.5573689416419386, "grad_norm": 0.8063797456708763, "learning_rate": 4.9026079838676295e-06, "loss": 0.4467, "step": 2254 }, { "epoch": 0.5576162215628091, "grad_norm": 0.8263331039140084, "learning_rate": 4.902518126341577e-06, "loss": 0.4623, "step": 2255 }, { "epoch": 0.5578635014836796, "grad_norm": 0.7836176844916037, "learning_rate": 4.90242822820592e-06, "loss": 0.4364, "step": 2256 }, { "epoch": 0.55811078140455, "grad_norm": 0.8055018134426273, "learning_rate": 4.9023382894621775e-06, "loss": 0.4334, "step": 2257 }, { "epoch": 0.5583580613254204, "grad_norm": 0.8561517123386011, "learning_rate": 4.90224831011187e-06, "loss": 0.4232, "step": 2258 }, { "epoch": 0.5586053412462908, "grad_norm": 0.857649910835037, "learning_rate": 4.902158290156518e-06, "loss": 0.4254, "step": 2259 }, { "epoch": 0.5588526211671613, "grad_norm": 0.847810648250041, "learning_rate": 4.902068229597644e-06, "loss": 0.4346, "step": 2260 }, { "epoch": 0.5590999010880316, "grad_norm": 0.8041283790378243, "learning_rate": 4.901978128436769e-06, "loss": 0.4451, "step": 2261 }, { "epoch": 0.5593471810089021, "grad_norm": 0.8463729671090765, "learning_rate": 4.901887986675418e-06, "loss": 0.4629, "step": 2262 }, { "epoch": 0.5595944609297725, "grad_norm": 0.8255668378936192, "learning_rate": 4.901797804315112e-06, "loss": 0.4526, "step": 2263 }, { "epoch": 0.559841740850643, "grad_norm": 0.8397646315208941, "learning_rate": 4.901707581357377e-06, "loss": 0.4475, "step": 2264 }, { "epoch": 0.5600890207715133, "grad_norm": 0.8508504303490839, "learning_rate": 4.901617317803738e-06, "loss": 0.4254, "step": 2265 }, { "epoch": 0.5603363006923838, "grad_norm": 0.7740850838820026, "learning_rate": 4.9015270136557204e-06, "loss": 0.4467, "step": 2266 }, { "epoch": 0.5605835806132542, "grad_norm": 0.8227529902841786, "learning_rate": 4.9014366689148504e-06, "loss": 0.4195, "step": 2267 }, { "epoch": 0.5608308605341247, "grad_norm": 0.777112849292443, "learning_rate": 4.9013462835826564e-06, "loss": 0.436, "step": 2268 }, { "epoch": 0.561078140454995, "grad_norm": 0.802035885633998, "learning_rate": 4.901255857660664e-06, "loss": 0.4443, "step": 2269 }, { "epoch": 0.5613254203758655, "grad_norm": 0.7937144984071973, "learning_rate": 4.9011653911504035e-06, "loss": 0.4623, "step": 2270 }, { "epoch": 0.5615727002967359, "grad_norm": 0.8332439563357789, "learning_rate": 4.901074884053403e-06, "loss": 0.4373, "step": 2271 }, { "epoch": 0.5618199802176064, "grad_norm": 0.826393197627037, "learning_rate": 4.900984336371192e-06, "loss": 0.4567, "step": 2272 }, { "epoch": 0.5620672601384767, "grad_norm": 0.8725228788732688, "learning_rate": 4.900893748105303e-06, "loss": 0.4052, "step": 2273 }, { "epoch": 0.5623145400593472, "grad_norm": 0.8350692058768774, "learning_rate": 4.900803119257265e-06, "loss": 0.4229, "step": 2274 }, { "epoch": 0.5625618199802176, "grad_norm": 0.7833697423172672, "learning_rate": 4.900712449828611e-06, "loss": 0.4712, "step": 2275 }, { "epoch": 0.5628090999010881, "grad_norm": 0.8084407515407549, "learning_rate": 4.9006217398208735e-06, "loss": 0.4767, "step": 2276 }, { "epoch": 0.5630563798219584, "grad_norm": 0.8163996352400188, "learning_rate": 4.900530989235586e-06, "loss": 0.4593, "step": 2277 }, { "epoch": 0.5633036597428289, "grad_norm": 0.8359981175249533, "learning_rate": 4.9004401980742814e-06, "loss": 0.4633, "step": 2278 }, { "epoch": 0.5635509396636993, "grad_norm": 0.8165262581141532, "learning_rate": 4.900349366338495e-06, "loss": 0.4334, "step": 2279 }, { "epoch": 0.5637982195845698, "grad_norm": 0.8640529713261024, "learning_rate": 4.900258494029763e-06, "loss": 0.4489, "step": 2280 }, { "epoch": 0.5640454995054401, "grad_norm": 0.8400644684388208, "learning_rate": 4.90016758114962e-06, "loss": 0.4168, "step": 2281 }, { "epoch": 0.5642927794263106, "grad_norm": 0.8594985484661135, "learning_rate": 4.9000766276996025e-06, "loss": 0.443, "step": 2282 }, { "epoch": 0.564540059347181, "grad_norm": 0.7941467731546799, "learning_rate": 4.8999856336812495e-06, "loss": 0.4597, "step": 2283 }, { "epoch": 0.5647873392680515, "grad_norm": 0.8334239105274157, "learning_rate": 4.899894599096098e-06, "loss": 0.4527, "step": 2284 }, { "epoch": 0.5650346191889218, "grad_norm": 0.8680664900329874, "learning_rate": 4.899803523945688e-06, "loss": 0.4288, "step": 2285 }, { "epoch": 0.5652818991097923, "grad_norm": 0.827260333304532, "learning_rate": 4.899712408231556e-06, "loss": 0.4289, "step": 2286 }, { "epoch": 0.5655291790306627, "grad_norm": 0.8183857986951192, "learning_rate": 4.899621251955245e-06, "loss": 0.4149, "step": 2287 }, { "epoch": 0.5657764589515332, "grad_norm": 0.8184921245944378, "learning_rate": 4.899530055118295e-06, "loss": 0.4581, "step": 2288 }, { "epoch": 0.5660237388724035, "grad_norm": 0.846124403399315, "learning_rate": 4.899438817722248e-06, "loss": 0.4104, "step": 2289 }, { "epoch": 0.566271018793274, "grad_norm": 0.8560378103575694, "learning_rate": 4.899347539768644e-06, "loss": 0.4358, "step": 2290 }, { "epoch": 0.5665182987141444, "grad_norm": 0.8072714154604986, "learning_rate": 4.899256221259028e-06, "loss": 0.4087, "step": 2291 }, { "epoch": 0.5667655786350149, "grad_norm": 0.7711814123606188, "learning_rate": 4.899164862194943e-06, "loss": 0.4623, "step": 2292 }, { "epoch": 0.5670128585558852, "grad_norm": 0.7892717211222391, "learning_rate": 4.899073462577933e-06, "loss": 0.4566, "step": 2293 }, { "epoch": 0.5672601384767557, "grad_norm": 0.8024640398701117, "learning_rate": 4.898982022409543e-06, "loss": 0.4082, "step": 2294 }, { "epoch": 0.5675074183976261, "grad_norm": 0.8267656493786144, "learning_rate": 4.898890541691319e-06, "loss": 0.4373, "step": 2295 }, { "epoch": 0.5677546983184966, "grad_norm": 0.804452429351204, "learning_rate": 4.898799020424806e-06, "loss": 0.4303, "step": 2296 }, { "epoch": 0.5680019782393669, "grad_norm": 0.8341753209395127, "learning_rate": 4.8987074586115535e-06, "loss": 0.4246, "step": 2297 }, { "epoch": 0.5682492581602374, "grad_norm": 0.8332330741798474, "learning_rate": 4.898615856253107e-06, "loss": 0.4177, "step": 2298 }, { "epoch": 0.5684965380811078, "grad_norm": 0.7813581360985322, "learning_rate": 4.898524213351015e-06, "loss": 0.4176, "step": 2299 }, { "epoch": 0.5687438180019783, "grad_norm": 0.822524089394135, "learning_rate": 4.898432529906827e-06, "loss": 0.465, "step": 2300 }, { "epoch": 0.5689910979228486, "grad_norm": 0.9011958348262217, "learning_rate": 4.8983408059220935e-06, "loss": 0.4204, "step": 2301 }, { "epoch": 0.5692383778437191, "grad_norm": 0.8306695477074517, "learning_rate": 4.898249041398363e-06, "loss": 0.4838, "step": 2302 }, { "epoch": 0.5694856577645895, "grad_norm": 0.8522958342818597, "learning_rate": 4.898157236337189e-06, "loss": 0.4461, "step": 2303 }, { "epoch": 0.56973293768546, "grad_norm": 0.8436724215518849, "learning_rate": 4.898065390740121e-06, "loss": 0.4555, "step": 2304 }, { "epoch": 0.5699802176063303, "grad_norm": 0.7847752288452432, "learning_rate": 4.8979735046087126e-06, "loss": 0.4805, "step": 2305 }, { "epoch": 0.5702274975272008, "grad_norm": 0.8250677875140624, "learning_rate": 4.897881577944517e-06, "loss": 0.4447, "step": 2306 }, { "epoch": 0.5704747774480712, "grad_norm": 0.8081462932632263, "learning_rate": 4.897789610749088e-06, "loss": 0.4424, "step": 2307 }, { "epoch": 0.5707220573689417, "grad_norm": 0.8776056253920698, "learning_rate": 4.89769760302398e-06, "loss": 0.4045, "step": 2308 }, { "epoch": 0.570969337289812, "grad_norm": 0.8319100971376697, "learning_rate": 4.897605554770747e-06, "loss": 0.4583, "step": 2309 }, { "epoch": 0.5712166172106825, "grad_norm": 0.8083704554589096, "learning_rate": 4.897513465990947e-06, "loss": 0.4305, "step": 2310 }, { "epoch": 0.5714638971315529, "grad_norm": 0.8477139545852911, "learning_rate": 4.897421336686136e-06, "loss": 0.4139, "step": 2311 }, { "epoch": 0.5717111770524234, "grad_norm": 0.7994312192421875, "learning_rate": 4.8973291668578705e-06, "loss": 0.4224, "step": 2312 }, { "epoch": 0.5719584569732937, "grad_norm": 0.834166390095418, "learning_rate": 4.897236956507708e-06, "loss": 0.441, "step": 2313 }, { "epoch": 0.5722057368941642, "grad_norm": 0.791463316947777, "learning_rate": 4.897144705637209e-06, "loss": 0.4207, "step": 2314 }, { "epoch": 0.5724530168150346, "grad_norm": 0.8337894517778417, "learning_rate": 4.897052414247931e-06, "loss": 0.432, "step": 2315 }, { "epoch": 0.5727002967359051, "grad_norm": 0.8463594355939713, "learning_rate": 4.8969600823414344e-06, "loss": 0.4503, "step": 2316 }, { "epoch": 0.5729475766567754, "grad_norm": 0.8018986013541408, "learning_rate": 4.896867709919281e-06, "loss": 0.4249, "step": 2317 }, { "epoch": 0.5731948565776459, "grad_norm": 0.7905890182712009, "learning_rate": 4.896775296983031e-06, "loss": 0.4289, "step": 2318 }, { "epoch": 0.5734421364985163, "grad_norm": 0.8617968858880003, "learning_rate": 4.896682843534247e-06, "loss": 0.4388, "step": 2319 }, { "epoch": 0.5736894164193868, "grad_norm": 0.7938863568430938, "learning_rate": 4.896590349574492e-06, "loss": 0.4278, "step": 2320 }, { "epoch": 0.5739366963402571, "grad_norm": 0.8029197121608271, "learning_rate": 4.8964978151053275e-06, "loss": 0.4108, "step": 2321 }, { "epoch": 0.5741839762611276, "grad_norm": 0.8299560729574813, "learning_rate": 4.89640524012832e-06, "loss": 0.4709, "step": 2322 }, { "epoch": 0.574431256181998, "grad_norm": 0.796430638715101, "learning_rate": 4.8963126246450335e-06, "loss": 0.4285, "step": 2323 }, { "epoch": 0.5746785361028685, "grad_norm": 0.8555593774864729, "learning_rate": 4.8962199686570335e-06, "loss": 0.4079, "step": 2324 }, { "epoch": 0.5749258160237388, "grad_norm": 0.803728746348809, "learning_rate": 4.896127272165886e-06, "loss": 0.4427, "step": 2325 }, { "epoch": 0.5751730959446093, "grad_norm": 0.8023320061091146, "learning_rate": 4.896034535173158e-06, "loss": 0.426, "step": 2326 }, { "epoch": 0.5754203758654797, "grad_norm": 0.7665915360838667, "learning_rate": 4.895941757680415e-06, "loss": 0.4767, "step": 2327 }, { "epoch": 0.5756676557863502, "grad_norm": 0.8435315407885349, "learning_rate": 4.8958489396892286e-06, "loss": 0.4291, "step": 2328 }, { "epoch": 0.5759149357072205, "grad_norm": 0.804335725986931, "learning_rate": 4.895756081201166e-06, "loss": 0.4368, "step": 2329 }, { "epoch": 0.576162215628091, "grad_norm": 0.8100890310943418, "learning_rate": 4.895663182217797e-06, "loss": 0.4465, "step": 2330 }, { "epoch": 0.5764094955489614, "grad_norm": 0.8408293669759955, "learning_rate": 4.895570242740692e-06, "loss": 0.4538, "step": 2331 }, { "epoch": 0.5766567754698319, "grad_norm": 0.8239576081794222, "learning_rate": 4.895477262771422e-06, "loss": 0.4191, "step": 2332 }, { "epoch": 0.5769040553907022, "grad_norm": 0.8557709570378603, "learning_rate": 4.895384242311557e-06, "loss": 0.4175, "step": 2333 }, { "epoch": 0.5771513353115727, "grad_norm": 0.8297806420249763, "learning_rate": 4.895291181362673e-06, "loss": 0.4594, "step": 2334 }, { "epoch": 0.5773986152324432, "grad_norm": 0.8394220393681534, "learning_rate": 4.895198079926339e-06, "loss": 0.433, "step": 2335 }, { "epoch": 0.5776458951533135, "grad_norm": 0.8445647987604912, "learning_rate": 4.895104938004131e-06, "loss": 0.4482, "step": 2336 }, { "epoch": 0.577893175074184, "grad_norm": 0.764743578386273, "learning_rate": 4.895011755597622e-06, "loss": 0.4393, "step": 2337 }, { "epoch": 0.5781404549950544, "grad_norm": 0.7726672754820177, "learning_rate": 4.894918532708388e-06, "loss": 0.4574, "step": 2338 }, { "epoch": 0.5783877349159249, "grad_norm": 0.8421556128246851, "learning_rate": 4.894825269338005e-06, "loss": 0.4334, "step": 2339 }, { "epoch": 0.5786350148367952, "grad_norm": 0.8046307352593715, "learning_rate": 4.894731965488049e-06, "loss": 0.46, "step": 2340 }, { "epoch": 0.5788822947576657, "grad_norm": 0.8135881552965002, "learning_rate": 4.894638621160097e-06, "loss": 0.4574, "step": 2341 }, { "epoch": 0.5791295746785361, "grad_norm": 0.8889421866638874, "learning_rate": 4.894545236355728e-06, "loss": 0.4498, "step": 2342 }, { "epoch": 0.5793768545994066, "grad_norm": 0.801204639738931, "learning_rate": 4.894451811076518e-06, "loss": 0.4134, "step": 2343 }, { "epoch": 0.579624134520277, "grad_norm": 0.7606419385924601, "learning_rate": 4.894358345324047e-06, "loss": 0.4414, "step": 2344 }, { "epoch": 0.5798714144411474, "grad_norm": 0.7991068318546964, "learning_rate": 4.894264839099897e-06, "loss": 0.4416, "step": 2345 }, { "epoch": 0.5801186943620178, "grad_norm": 0.7771589957974486, "learning_rate": 4.894171292405646e-06, "loss": 0.4342, "step": 2346 }, { "epoch": 0.5803659742828883, "grad_norm": 0.7968997920942941, "learning_rate": 4.894077705242877e-06, "loss": 0.4354, "step": 2347 }, { "epoch": 0.5806132542037586, "grad_norm": 0.8548778972922664, "learning_rate": 4.8939840776131695e-06, "loss": 0.4334, "step": 2348 }, { "epoch": 0.5808605341246291, "grad_norm": 0.7816682107099382, "learning_rate": 4.893890409518108e-06, "loss": 0.4431, "step": 2349 }, { "epoch": 0.5811078140454995, "grad_norm": 0.799351712968776, "learning_rate": 4.893796700959277e-06, "loss": 0.4222, "step": 2350 }, { "epoch": 0.58135509396637, "grad_norm": 0.8482320335522718, "learning_rate": 4.893702951938257e-06, "loss": 0.4419, "step": 2351 }, { "epoch": 0.5816023738872403, "grad_norm": 0.8284555077453484, "learning_rate": 4.8936091624566355e-06, "loss": 0.4476, "step": 2352 }, { "epoch": 0.5818496538081108, "grad_norm": 0.8275396827906788, "learning_rate": 4.893515332515996e-06, "loss": 0.4343, "step": 2353 }, { "epoch": 0.5820969337289812, "grad_norm": 0.7981461681609753, "learning_rate": 4.893421462117926e-06, "loss": 0.4716, "step": 2354 }, { "epoch": 0.5823442136498517, "grad_norm": 0.8157820886340437, "learning_rate": 4.893327551264011e-06, "loss": 0.4425, "step": 2355 }, { "epoch": 0.582591493570722, "grad_norm": 0.8458746953030688, "learning_rate": 4.893233599955839e-06, "loss": 0.4378, "step": 2356 }, { "epoch": 0.5828387734915925, "grad_norm": 0.7723732034558244, "learning_rate": 4.8931396081949975e-06, "loss": 0.4288, "step": 2357 }, { "epoch": 0.5830860534124629, "grad_norm": 0.8096170301395595, "learning_rate": 4.893045575983076e-06, "loss": 0.4779, "step": 2358 }, { "epoch": 0.5833333333333334, "grad_norm": 0.9007402286717402, "learning_rate": 4.892951503321664e-06, "loss": 0.437, "step": 2359 }, { "epoch": 0.5835806132542037, "grad_norm": 0.8482481245802722, "learning_rate": 4.89285739021235e-06, "loss": 0.4514, "step": 2360 }, { "epoch": 0.5838278931750742, "grad_norm": 0.8062299158701528, "learning_rate": 4.8927632366567275e-06, "loss": 0.4478, "step": 2361 }, { "epoch": 0.5840751730959446, "grad_norm": 0.8283318915862347, "learning_rate": 4.892669042656385e-06, "loss": 0.4219, "step": 2362 }, { "epoch": 0.5843224530168151, "grad_norm": 0.8426695019394065, "learning_rate": 4.892574808212917e-06, "loss": 0.4448, "step": 2363 }, { "epoch": 0.5845697329376854, "grad_norm": 0.8086019012718995, "learning_rate": 4.892480533327915e-06, "loss": 0.4214, "step": 2364 }, { "epoch": 0.5848170128585559, "grad_norm": 0.7901079758203985, "learning_rate": 4.892386218002973e-06, "loss": 0.4535, "step": 2365 }, { "epoch": 0.5850642927794263, "grad_norm": 0.8167317156898983, "learning_rate": 4.892291862239684e-06, "loss": 0.4087, "step": 2366 }, { "epoch": 0.5853115727002968, "grad_norm": 0.8526609048941977, "learning_rate": 4.892197466039646e-06, "loss": 0.4308, "step": 2367 }, { "epoch": 0.5855588526211671, "grad_norm": 0.8253932828103235, "learning_rate": 4.8921030294044515e-06, "loss": 0.403, "step": 2368 }, { "epoch": 0.5858061325420376, "grad_norm": 0.8005452158190326, "learning_rate": 4.892008552335697e-06, "loss": 0.4069, "step": 2369 }, { "epoch": 0.586053412462908, "grad_norm": 0.7694913501113785, "learning_rate": 4.891914034834982e-06, "loss": 0.4201, "step": 2370 }, { "epoch": 0.5863006923837785, "grad_norm": 0.8099442709125199, "learning_rate": 4.891819476903902e-06, "loss": 0.4575, "step": 2371 }, { "epoch": 0.5865479723046488, "grad_norm": 0.8306582160233137, "learning_rate": 4.891724878544054e-06, "loss": 0.4259, "step": 2372 }, { "epoch": 0.5867952522255193, "grad_norm": 0.7982546667998933, "learning_rate": 4.891630239757041e-06, "loss": 0.4317, "step": 2373 }, { "epoch": 0.5870425321463897, "grad_norm": 0.8258893090255622, "learning_rate": 4.891535560544459e-06, "loss": 0.4472, "step": 2374 }, { "epoch": 0.5872898120672602, "grad_norm": 0.7941013746518771, "learning_rate": 4.89144084090791e-06, "loss": 0.422, "step": 2375 }, { "epoch": 0.5875370919881305, "grad_norm": 0.8618283605659312, "learning_rate": 4.891346080848995e-06, "loss": 0.4099, "step": 2376 }, { "epoch": 0.587784371909001, "grad_norm": 0.8359174158930102, "learning_rate": 4.891251280369316e-06, "loss": 0.4186, "step": 2377 }, { "epoch": 0.5880316518298714, "grad_norm": 0.8476514457524892, "learning_rate": 4.891156439470473e-06, "loss": 0.4319, "step": 2378 }, { "epoch": 0.5882789317507419, "grad_norm": 0.8448744222305574, "learning_rate": 4.891061558154073e-06, "loss": 0.4568, "step": 2379 }, { "epoch": 0.5885262116716122, "grad_norm": 0.843451311072349, "learning_rate": 4.890966636421717e-06, "loss": 0.4319, "step": 2380 }, { "epoch": 0.5887734915924827, "grad_norm": 0.8339217168520412, "learning_rate": 4.890871674275011e-06, "loss": 0.4351, "step": 2381 }, { "epoch": 0.5890207715133531, "grad_norm": 0.833107783986654, "learning_rate": 4.890776671715558e-06, "loss": 0.4035, "step": 2382 }, { "epoch": 0.5892680514342236, "grad_norm": 0.8137168641583187, "learning_rate": 4.890681628744966e-06, "loss": 0.4259, "step": 2383 }, { "epoch": 0.5895153313550939, "grad_norm": 0.805892766745161, "learning_rate": 4.890586545364841e-06, "loss": 0.4333, "step": 2384 }, { "epoch": 0.5897626112759644, "grad_norm": 0.8375188065735549, "learning_rate": 4.890491421576788e-06, "loss": 0.4335, "step": 2385 }, { "epoch": 0.5900098911968348, "grad_norm": 0.8230336151396773, "learning_rate": 4.8903962573824185e-06, "loss": 0.4406, "step": 2386 }, { "epoch": 0.5902571711177053, "grad_norm": 0.8155143896059736, "learning_rate": 4.890301052783339e-06, "loss": 0.4545, "step": 2387 }, { "epoch": 0.5905044510385756, "grad_norm": 0.7717233653578202, "learning_rate": 4.890205807781159e-06, "loss": 0.4469, "step": 2388 }, { "epoch": 0.5907517309594461, "grad_norm": 0.8009676088638478, "learning_rate": 4.8901105223774885e-06, "loss": 0.4335, "step": 2389 }, { "epoch": 0.5909990108803165, "grad_norm": 0.8379245612231933, "learning_rate": 4.890015196573938e-06, "loss": 0.4205, "step": 2390 }, { "epoch": 0.591246290801187, "grad_norm": 0.8086859042796692, "learning_rate": 4.889919830372118e-06, "loss": 0.4363, "step": 2391 }, { "epoch": 0.5914935707220573, "grad_norm": 0.8130939334847568, "learning_rate": 4.889824423773642e-06, "loss": 0.4109, "step": 2392 }, { "epoch": 0.5917408506429278, "grad_norm": 0.8476467595351704, "learning_rate": 4.8897289767801225e-06, "loss": 0.4031, "step": 2393 }, { "epoch": 0.5919881305637982, "grad_norm": 0.8461748435957103, "learning_rate": 4.889633489393173e-06, "loss": 0.4091, "step": 2394 }, { "epoch": 0.5922354104846687, "grad_norm": 0.8376353137944713, "learning_rate": 4.889537961614405e-06, "loss": 0.4146, "step": 2395 }, { "epoch": 0.592482690405539, "grad_norm": 0.8239443055595109, "learning_rate": 4.889442393445435e-06, "loss": 0.4447, "step": 2396 }, { "epoch": 0.5927299703264095, "grad_norm": 0.849161150005281, "learning_rate": 4.88934678488788e-06, "loss": 0.4547, "step": 2397 }, { "epoch": 0.5929772502472799, "grad_norm": 0.790329735581731, "learning_rate": 4.889251135943353e-06, "loss": 0.4401, "step": 2398 }, { "epoch": 0.5932245301681504, "grad_norm": 0.8193562709687785, "learning_rate": 4.889155446613473e-06, "loss": 0.4093, "step": 2399 }, { "epoch": 0.5934718100890207, "grad_norm": 0.8170696936647929, "learning_rate": 4.889059716899857e-06, "loss": 0.4233, "step": 2400 }, { "epoch": 0.5937190900098912, "grad_norm": 0.8168596273728271, "learning_rate": 4.888963946804122e-06, "loss": 0.3982, "step": 2401 }, { "epoch": 0.5939663699307616, "grad_norm": 0.8114324383935596, "learning_rate": 4.888868136327888e-06, "loss": 0.4336, "step": 2402 }, { "epoch": 0.594213649851632, "grad_norm": 0.814161224922398, "learning_rate": 4.888772285472773e-06, "loss": 0.4445, "step": 2403 }, { "epoch": 0.5944609297725024, "grad_norm": 0.8203037927462447, "learning_rate": 4.888676394240399e-06, "loss": 0.4143, "step": 2404 }, { "epoch": 0.5947082096933729, "grad_norm": 0.7833477571195605, "learning_rate": 4.888580462632386e-06, "loss": 0.4369, "step": 2405 }, { "epoch": 0.5949554896142433, "grad_norm": 0.8367330917074817, "learning_rate": 4.888484490650355e-06, "loss": 0.423, "step": 2406 }, { "epoch": 0.5952027695351138, "grad_norm": 0.7996837665018429, "learning_rate": 4.888388478295929e-06, "loss": 0.4215, "step": 2407 }, { "epoch": 0.5954500494559841, "grad_norm": 0.8099673447111903, "learning_rate": 4.888292425570731e-06, "loss": 0.4565, "step": 2408 }, { "epoch": 0.5956973293768546, "grad_norm": 0.8315706071687368, "learning_rate": 4.888196332476385e-06, "loss": 0.4251, "step": 2409 }, { "epoch": 0.595944609297725, "grad_norm": 0.8641851324877825, "learning_rate": 4.8881001990145125e-06, "loss": 0.431, "step": 2410 }, { "epoch": 0.5961918892185954, "grad_norm": 0.8151888626416508, "learning_rate": 4.888004025186742e-06, "loss": 0.4106, "step": 2411 }, { "epoch": 0.5964391691394659, "grad_norm": 0.8143057678231387, "learning_rate": 4.887907810994697e-06, "loss": 0.434, "step": 2412 }, { "epoch": 0.5966864490603363, "grad_norm": 0.8169851007290792, "learning_rate": 4.887811556440004e-06, "loss": 0.4379, "step": 2413 }, { "epoch": 0.5969337289812068, "grad_norm": 0.8156004947234917, "learning_rate": 4.887715261524291e-06, "loss": 0.4556, "step": 2414 }, { "epoch": 0.5971810089020771, "grad_norm": 0.8226055472128943, "learning_rate": 4.887618926249185e-06, "loss": 0.4323, "step": 2415 }, { "epoch": 0.5974282888229476, "grad_norm": 0.8130042612264833, "learning_rate": 4.887522550616314e-06, "loss": 0.4472, "step": 2416 }, { "epoch": 0.597675568743818, "grad_norm": 0.7757441744985806, "learning_rate": 4.887426134627308e-06, "loss": 0.4422, "step": 2417 }, { "epoch": 0.5979228486646885, "grad_norm": 0.8293693320678572, "learning_rate": 4.887329678283795e-06, "loss": 0.4411, "step": 2418 }, { "epoch": 0.5981701285855588, "grad_norm": 0.7909863669793495, "learning_rate": 4.887233181587407e-06, "loss": 0.4363, "step": 2419 }, { "epoch": 0.5984174085064293, "grad_norm": 0.8034735828656278, "learning_rate": 4.887136644539775e-06, "loss": 0.4383, "step": 2420 }, { "epoch": 0.5986646884272997, "grad_norm": 0.8340913456941164, "learning_rate": 4.887040067142529e-06, "loss": 0.4121, "step": 2421 }, { "epoch": 0.5989119683481702, "grad_norm": 0.7963542690809786, "learning_rate": 4.886943449397304e-06, "loss": 0.4293, "step": 2422 }, { "epoch": 0.5991592482690405, "grad_norm": 0.8308598116178003, "learning_rate": 4.886846791305732e-06, "loss": 0.4524, "step": 2423 }, { "epoch": 0.599406528189911, "grad_norm": 0.8014680489986464, "learning_rate": 4.886750092869446e-06, "loss": 0.4267, "step": 2424 }, { "epoch": 0.5996538081107814, "grad_norm": 0.7930535832840587, "learning_rate": 4.88665335409008e-06, "loss": 0.4369, "step": 2425 }, { "epoch": 0.5999010880316519, "grad_norm": 0.8039444096609584, "learning_rate": 4.886556574969273e-06, "loss": 0.4326, "step": 2426 }, { "epoch": 0.6001483679525222, "grad_norm": 0.8152125898259113, "learning_rate": 4.886459755508657e-06, "loss": 0.395, "step": 2427 }, { "epoch": 0.6003956478733927, "grad_norm": 0.7958138668006676, "learning_rate": 4.88636289570987e-06, "loss": 0.4426, "step": 2428 }, { "epoch": 0.6006429277942631, "grad_norm": 0.8406325006773193, "learning_rate": 4.886265995574548e-06, "loss": 0.4242, "step": 2429 }, { "epoch": 0.6008902077151336, "grad_norm": 0.7785418865541055, "learning_rate": 4.886169055104331e-06, "loss": 0.4458, "step": 2430 }, { "epoch": 0.6011374876360039, "grad_norm": 0.819082154131098, "learning_rate": 4.886072074300855e-06, "loss": 0.4547, "step": 2431 }, { "epoch": 0.6013847675568744, "grad_norm": 0.8524435533885798, "learning_rate": 4.885975053165762e-06, "loss": 0.4217, "step": 2432 }, { "epoch": 0.6016320474777448, "grad_norm": 0.7912817382969713, "learning_rate": 4.88587799170069e-06, "loss": 0.4079, "step": 2433 }, { "epoch": 0.6018793273986153, "grad_norm": 0.7786233064046465, "learning_rate": 4.88578088990728e-06, "loss": 0.4147, "step": 2434 }, { "epoch": 0.6021266073194856, "grad_norm": 0.7967905199446587, "learning_rate": 4.885683747787174e-06, "loss": 0.4346, "step": 2435 }, { "epoch": 0.6023738872403561, "grad_norm": 0.8044827879750851, "learning_rate": 4.885586565342014e-06, "loss": 0.4604, "step": 2436 }, { "epoch": 0.6026211671612265, "grad_norm": 0.8160589521379803, "learning_rate": 4.885489342573441e-06, "loss": 0.4083, "step": 2437 }, { "epoch": 0.602868447082097, "grad_norm": 0.8213002853671916, "learning_rate": 4.885392079483101e-06, "loss": 0.4524, "step": 2438 }, { "epoch": 0.6031157270029673, "grad_norm": 0.8522852301398287, "learning_rate": 4.885294776072636e-06, "loss": 0.4345, "step": 2439 }, { "epoch": 0.6033630069238378, "grad_norm": 0.859578041404694, "learning_rate": 4.88519743234369e-06, "loss": 0.437, "step": 2440 }, { "epoch": 0.6036102868447082, "grad_norm": 0.9126217387901533, "learning_rate": 4.885100048297911e-06, "loss": 0.4386, "step": 2441 }, { "epoch": 0.6038575667655787, "grad_norm": 0.8240466430659698, "learning_rate": 4.8850026239369435e-06, "loss": 0.4156, "step": 2442 }, { "epoch": 0.604104846686449, "grad_norm": 0.8498653504755754, "learning_rate": 4.884905159262435e-06, "loss": 0.4311, "step": 2443 }, { "epoch": 0.6043521266073195, "grad_norm": 0.8118568331305821, "learning_rate": 4.884807654276031e-06, "loss": 0.4453, "step": 2444 }, { "epoch": 0.6045994065281899, "grad_norm": 0.8184288967709394, "learning_rate": 4.884710108979383e-06, "loss": 0.4434, "step": 2445 }, { "epoch": 0.6048466864490604, "grad_norm": 0.8282720476073526, "learning_rate": 4.884612523374137e-06, "loss": 0.466, "step": 2446 }, { "epoch": 0.6050939663699307, "grad_norm": 0.8750263587818898, "learning_rate": 4.8845148974619435e-06, "loss": 0.4173, "step": 2447 }, { "epoch": 0.6053412462908012, "grad_norm": 0.8459312708646205, "learning_rate": 4.884417231244452e-06, "loss": 0.4133, "step": 2448 }, { "epoch": 0.6055885262116716, "grad_norm": 0.8712529102031519, "learning_rate": 4.8843195247233145e-06, "loss": 0.43, "step": 2449 }, { "epoch": 0.6058358061325421, "grad_norm": 0.8252571385372829, "learning_rate": 4.884221777900182e-06, "loss": 0.4319, "step": 2450 }, { "epoch": 0.6060830860534124, "grad_norm": 0.7656825993441855, "learning_rate": 4.884123990776706e-06, "loss": 0.4252, "step": 2451 }, { "epoch": 0.6063303659742829, "grad_norm": 0.8633963817614291, "learning_rate": 4.88402616335454e-06, "loss": 0.4558, "step": 2452 }, { "epoch": 0.6065776458951533, "grad_norm": 0.8335583680412979, "learning_rate": 4.883928295635338e-06, "loss": 0.4372, "step": 2453 }, { "epoch": 0.6068249258160238, "grad_norm": 0.8826702767603091, "learning_rate": 4.883830387620754e-06, "loss": 0.426, "step": 2454 }, { "epoch": 0.6070722057368941, "grad_norm": 0.8559725178211727, "learning_rate": 4.8837324393124425e-06, "loss": 0.4267, "step": 2455 }, { "epoch": 0.6073194856577646, "grad_norm": 0.7989825274835176, "learning_rate": 4.8836344507120595e-06, "loss": 0.4341, "step": 2456 }, { "epoch": 0.607566765578635, "grad_norm": 0.8038816796422794, "learning_rate": 4.883536421821261e-06, "loss": 0.4224, "step": 2457 }, { "epoch": 0.6078140454995055, "grad_norm": 0.8681157087578908, "learning_rate": 4.883438352641704e-06, "loss": 0.4334, "step": 2458 }, { "epoch": 0.6080613254203758, "grad_norm": 0.799734995402737, "learning_rate": 4.883340243175047e-06, "loss": 0.4226, "step": 2459 }, { "epoch": 0.6083086053412463, "grad_norm": 0.8547585200505449, "learning_rate": 4.883242093422947e-06, "loss": 0.461, "step": 2460 }, { "epoch": 0.6085558852621167, "grad_norm": 0.8419176540033854, "learning_rate": 4.883143903387063e-06, "loss": 0.4581, "step": 2461 }, { "epoch": 0.6088031651829872, "grad_norm": 0.8190904513189141, "learning_rate": 4.8830456730690565e-06, "loss": 0.4149, "step": 2462 }, { "epoch": 0.6090504451038575, "grad_norm": 0.8075726467540836, "learning_rate": 4.882947402470586e-06, "loss": 0.4328, "step": 2463 }, { "epoch": 0.609297725024728, "grad_norm": 0.8180651312891082, "learning_rate": 4.882849091593314e-06, "loss": 0.4176, "step": 2464 }, { "epoch": 0.6095450049455984, "grad_norm": 0.8270321732282798, "learning_rate": 4.882750740438902e-06, "loss": 0.4385, "step": 2465 }, { "epoch": 0.6097922848664689, "grad_norm": 0.813487741003457, "learning_rate": 4.8826523490090104e-06, "loss": 0.4152, "step": 2466 }, { "epoch": 0.6100395647873392, "grad_norm": 0.8395319099633974, "learning_rate": 4.882553917305305e-06, "loss": 0.3927, "step": 2467 }, { "epoch": 0.6102868447082097, "grad_norm": 0.8595231311190774, "learning_rate": 4.882455445329448e-06, "loss": 0.4379, "step": 2468 }, { "epoch": 0.6105341246290801, "grad_norm": 0.8683672743262101, "learning_rate": 4.8823569330831045e-06, "loss": 0.4235, "step": 2469 }, { "epoch": 0.6107814045499506, "grad_norm": 0.8659235169816588, "learning_rate": 4.882258380567939e-06, "loss": 0.3991, "step": 2470 }, { "epoch": 0.6110286844708209, "grad_norm": 0.8208473928771434, "learning_rate": 4.882159787785618e-06, "loss": 0.4522, "step": 2471 }, { "epoch": 0.6112759643916914, "grad_norm": 0.8733025812767434, "learning_rate": 4.882061154737809e-06, "loss": 0.4098, "step": 2472 }, { "epoch": 0.6115232443125618, "grad_norm": 0.8931585574147225, "learning_rate": 4.881962481426176e-06, "loss": 0.4389, "step": 2473 }, { "epoch": 0.6117705242334323, "grad_norm": 0.834574300801114, "learning_rate": 4.88186376785239e-06, "loss": 0.4195, "step": 2474 }, { "epoch": 0.6120178041543026, "grad_norm": 0.8370196298648357, "learning_rate": 4.881765014018118e-06, "loss": 0.4347, "step": 2475 }, { "epoch": 0.6122650840751731, "grad_norm": 0.8634205008935408, "learning_rate": 4.88166621992503e-06, "loss": 0.4259, "step": 2476 }, { "epoch": 0.6125123639960435, "grad_norm": 0.8360755499657381, "learning_rate": 4.881567385574795e-06, "loss": 0.4374, "step": 2477 }, { "epoch": 0.612759643916914, "grad_norm": 0.8426066875433872, "learning_rate": 4.8814685109690846e-06, "loss": 0.4193, "step": 2478 }, { "epoch": 0.6130069238377843, "grad_norm": 0.87903730703909, "learning_rate": 4.8813695961095694e-06, "loss": 0.4389, "step": 2479 }, { "epoch": 0.6132542037586548, "grad_norm": 0.8943708464365521, "learning_rate": 4.881270640997921e-06, "loss": 0.4317, "step": 2480 }, { "epoch": 0.6135014836795252, "grad_norm": 0.8261054283912324, "learning_rate": 4.881171645635814e-06, "loss": 0.4407, "step": 2481 }, { "epoch": 0.6137487636003957, "grad_norm": 0.8064405085915288, "learning_rate": 4.88107261002492e-06, "loss": 0.4425, "step": 2482 }, { "epoch": 0.613996043521266, "grad_norm": 0.784802011195429, "learning_rate": 4.880973534166912e-06, "loss": 0.4488, "step": 2483 }, { "epoch": 0.6142433234421365, "grad_norm": 0.8176221625872651, "learning_rate": 4.880874418063467e-06, "loss": 0.4129, "step": 2484 }, { "epoch": 0.6144906033630069, "grad_norm": 0.8343618719243155, "learning_rate": 4.880775261716259e-06, "loss": 0.3911, "step": 2485 }, { "epoch": 0.6147378832838774, "grad_norm": 0.8644375812951496, "learning_rate": 4.880676065126965e-06, "loss": 0.4122, "step": 2486 }, { "epoch": 0.6149851632047477, "grad_norm": 0.7800771831644979, "learning_rate": 4.88057682829726e-06, "loss": 0.4497, "step": 2487 }, { "epoch": 0.6152324431256182, "grad_norm": 0.850924422669018, "learning_rate": 4.880477551228823e-06, "loss": 0.4184, "step": 2488 }, { "epoch": 0.6154797230464887, "grad_norm": 0.8561928941850189, "learning_rate": 4.880378233923332e-06, "loss": 0.4311, "step": 2489 }, { "epoch": 0.615727002967359, "grad_norm": 0.8350502975323144, "learning_rate": 4.880278876382465e-06, "loss": 0.4259, "step": 2490 }, { "epoch": 0.6159742828882295, "grad_norm": 0.7847878516103233, "learning_rate": 4.8801794786079e-06, "loss": 0.4432, "step": 2491 }, { "epoch": 0.6162215628090999, "grad_norm": 0.7967723156172767, "learning_rate": 4.880080040601322e-06, "loss": 0.4076, "step": 2492 }, { "epoch": 0.6164688427299704, "grad_norm": 0.7983503377091417, "learning_rate": 4.879980562364406e-06, "loss": 0.4051, "step": 2493 }, { "epoch": 0.6167161226508407, "grad_norm": 0.8022288887944488, "learning_rate": 4.879881043898838e-06, "loss": 0.4443, "step": 2494 }, { "epoch": 0.6169634025717112, "grad_norm": 0.8080184803379807, "learning_rate": 4.8797814852062965e-06, "loss": 0.4544, "step": 2495 }, { "epoch": 0.6172106824925816, "grad_norm": 0.8432338832511567, "learning_rate": 4.879681886288467e-06, "loss": 0.4269, "step": 2496 }, { "epoch": 0.6174579624134521, "grad_norm": 0.8274845517353767, "learning_rate": 4.8795822471470326e-06, "loss": 0.4548, "step": 2497 }, { "epoch": 0.6177052423343224, "grad_norm": 0.8422453088573836, "learning_rate": 4.879482567783675e-06, "loss": 0.4357, "step": 2498 }, { "epoch": 0.6179525222551929, "grad_norm": 0.8313181938884414, "learning_rate": 4.8793828482000834e-06, "loss": 0.4106, "step": 2499 }, { "epoch": 0.6181998021760633, "grad_norm": 0.7956229961490899, "learning_rate": 4.87928308839794e-06, "loss": 0.44, "step": 2500 }, { "epoch": 0.6184470820969338, "grad_norm": 0.8021572150909114, "learning_rate": 4.879183288378932e-06, "loss": 0.4505, "step": 2501 }, { "epoch": 0.6186943620178041, "grad_norm": 0.870409141916946, "learning_rate": 4.879083448144747e-06, "loss": 0.4065, "step": 2502 }, { "epoch": 0.6189416419386746, "grad_norm": 0.8154769479555604, "learning_rate": 4.878983567697071e-06, "loss": 0.4026, "step": 2503 }, { "epoch": 0.619188921859545, "grad_norm": 0.7614555739597262, "learning_rate": 4.8788836470375935e-06, "loss": 0.433, "step": 2504 }, { "epoch": 0.6194362017804155, "grad_norm": 0.8216234222194428, "learning_rate": 4.878783686168004e-06, "loss": 0.4329, "step": 2505 }, { "epoch": 0.6196834817012858, "grad_norm": 0.847598350182186, "learning_rate": 4.878683685089991e-06, "loss": 0.4035, "step": 2506 }, { "epoch": 0.6199307616221563, "grad_norm": 0.8281932559908807, "learning_rate": 4.878583643805244e-06, "loss": 0.4334, "step": 2507 }, { "epoch": 0.6201780415430267, "grad_norm": 0.8013841803051386, "learning_rate": 4.878483562315456e-06, "loss": 0.4092, "step": 2508 }, { "epoch": 0.6204253214638972, "grad_norm": 0.7827821396675764, "learning_rate": 4.878383440622318e-06, "loss": 0.4216, "step": 2509 }, { "epoch": 0.6206726013847675, "grad_norm": 0.811958550694751, "learning_rate": 4.878283278727522e-06, "loss": 0.4272, "step": 2510 }, { "epoch": 0.620919881305638, "grad_norm": 0.8295542833041976, "learning_rate": 4.878183076632761e-06, "loss": 0.4248, "step": 2511 }, { "epoch": 0.6211671612265084, "grad_norm": 0.7848392977501718, "learning_rate": 4.878082834339729e-06, "loss": 0.4474, "step": 2512 }, { "epoch": 0.6214144411473789, "grad_norm": 0.7896984047945792, "learning_rate": 4.87798255185012e-06, "loss": 0.4375, "step": 2513 }, { "epoch": 0.6216617210682492, "grad_norm": 0.786506360369637, "learning_rate": 4.87788222916563e-06, "loss": 0.4779, "step": 2514 }, { "epoch": 0.6219090009891197, "grad_norm": 0.7869355956930445, "learning_rate": 4.877781866287953e-06, "loss": 0.4449, "step": 2515 }, { "epoch": 0.6221562809099901, "grad_norm": 0.7693278810302654, "learning_rate": 4.877681463218787e-06, "loss": 0.4643, "step": 2516 }, { "epoch": 0.6224035608308606, "grad_norm": 0.8335221592746881, "learning_rate": 4.877581019959829e-06, "loss": 0.4018, "step": 2517 }, { "epoch": 0.6226508407517309, "grad_norm": 0.8045163991097942, "learning_rate": 4.877480536512777e-06, "loss": 0.4178, "step": 2518 }, { "epoch": 0.6228981206726014, "grad_norm": 0.7830727482060992, "learning_rate": 4.877380012879328e-06, "loss": 0.448, "step": 2519 }, { "epoch": 0.6231454005934718, "grad_norm": 0.7963686363614251, "learning_rate": 4.877279449061182e-06, "loss": 0.4804, "step": 2520 }, { "epoch": 0.6233926805143423, "grad_norm": 0.7780549531823838, "learning_rate": 4.8771788450600384e-06, "loss": 0.4472, "step": 2521 }, { "epoch": 0.6236399604352126, "grad_norm": 0.7994742514234306, "learning_rate": 4.877078200877599e-06, "loss": 0.4585, "step": 2522 }, { "epoch": 0.6238872403560831, "grad_norm": 0.7852106883065006, "learning_rate": 4.876977516515564e-06, "loss": 0.4613, "step": 2523 }, { "epoch": 0.6241345202769535, "grad_norm": 0.7876985072103233, "learning_rate": 4.876876791975635e-06, "loss": 0.4222, "step": 2524 }, { "epoch": 0.624381800197824, "grad_norm": 0.7671611530115617, "learning_rate": 4.876776027259516e-06, "loss": 0.451, "step": 2525 }, { "epoch": 0.6246290801186943, "grad_norm": 0.7946021739256844, "learning_rate": 4.876675222368907e-06, "loss": 0.446, "step": 2526 }, { "epoch": 0.6248763600395648, "grad_norm": 0.8459845780214671, "learning_rate": 4.876574377305516e-06, "loss": 0.4532, "step": 2527 }, { "epoch": 0.6251236399604352, "grad_norm": 0.8008984159682268, "learning_rate": 4.876473492071045e-06, "loss": 0.4374, "step": 2528 }, { "epoch": 0.6253709198813057, "grad_norm": 0.7682503991359931, "learning_rate": 4.876372566667199e-06, "loss": 0.4116, "step": 2529 }, { "epoch": 0.625618199802176, "grad_norm": 0.8258691494063493, "learning_rate": 4.876271601095686e-06, "loss": 0.42, "step": 2530 }, { "epoch": 0.6258654797230465, "grad_norm": 0.7927208100659665, "learning_rate": 4.876170595358211e-06, "loss": 0.434, "step": 2531 }, { "epoch": 0.6261127596439169, "grad_norm": 0.8137626732834563, "learning_rate": 4.8760695494564815e-06, "loss": 0.4102, "step": 2532 }, { "epoch": 0.6263600395647874, "grad_norm": 0.779955387753274, "learning_rate": 4.875968463392206e-06, "loss": 0.4557, "step": 2533 }, { "epoch": 0.6266073194856577, "grad_norm": 0.8225478650613147, "learning_rate": 4.875867337167093e-06, "loss": 0.4651, "step": 2534 }, { "epoch": 0.6268545994065282, "grad_norm": 0.7874547156708713, "learning_rate": 4.875766170782852e-06, "loss": 0.4218, "step": 2535 }, { "epoch": 0.6271018793273986, "grad_norm": 0.8484394458254143, "learning_rate": 4.875664964241191e-06, "loss": 0.4554, "step": 2536 }, { "epoch": 0.6273491592482691, "grad_norm": 0.8084521995284009, "learning_rate": 4.875563717543824e-06, "loss": 0.4319, "step": 2537 }, { "epoch": 0.6275964391691394, "grad_norm": 0.7910596133893639, "learning_rate": 4.87546243069246e-06, "loss": 0.4598, "step": 2538 }, { "epoch": 0.6278437190900099, "grad_norm": 0.796909735607753, "learning_rate": 4.875361103688812e-06, "loss": 0.4447, "step": 2539 }, { "epoch": 0.6280909990108803, "grad_norm": 0.7894299591362924, "learning_rate": 4.875259736534593e-06, "loss": 0.4143, "step": 2540 }, { "epoch": 0.6283382789317508, "grad_norm": 0.8215464045091878, "learning_rate": 4.8751583292315156e-06, "loss": 0.4337, "step": 2541 }, { "epoch": 0.6285855588526211, "grad_norm": 0.8097485457219312, "learning_rate": 4.875056881781294e-06, "loss": 0.423, "step": 2542 }, { "epoch": 0.6288328387734916, "grad_norm": 0.832716101729499, "learning_rate": 4.874955394185643e-06, "loss": 0.4168, "step": 2543 }, { "epoch": 0.629080118694362, "grad_norm": 0.778723112231423, "learning_rate": 4.874853866446279e-06, "loss": 0.4576, "step": 2544 }, { "epoch": 0.6293273986152325, "grad_norm": 0.8635182250262682, "learning_rate": 4.874752298564916e-06, "loss": 0.4457, "step": 2545 }, { "epoch": 0.6295746785361028, "grad_norm": 0.7917765753480467, "learning_rate": 4.874650690543273e-06, "loss": 0.4059, "step": 2546 }, { "epoch": 0.6298219584569733, "grad_norm": 0.8248753697058309, "learning_rate": 4.874549042383066e-06, "loss": 0.3954, "step": 2547 }, { "epoch": 0.6300692383778437, "grad_norm": 0.8478879231826125, "learning_rate": 4.8744473540860136e-06, "loss": 0.4136, "step": 2548 }, { "epoch": 0.6303165182987142, "grad_norm": 0.8212763456739502, "learning_rate": 4.874345625653836e-06, "loss": 0.4684, "step": 2549 }, { "epoch": 0.6305637982195845, "grad_norm": 0.757161231064773, "learning_rate": 4.874243857088251e-06, "loss": 0.4436, "step": 2550 }, { "epoch": 0.630811078140455, "grad_norm": 0.872176501099029, "learning_rate": 4.874142048390978e-06, "loss": 0.4187, "step": 2551 }, { "epoch": 0.6310583580613254, "grad_norm": 0.8151427391928595, "learning_rate": 4.87404019956374e-06, "loss": 0.4146, "step": 2552 }, { "epoch": 0.6313056379821959, "grad_norm": 0.8257722842621575, "learning_rate": 4.873938310608258e-06, "loss": 0.4088, "step": 2553 }, { "epoch": 0.6315529179030662, "grad_norm": 0.824778895269139, "learning_rate": 4.8738363815262535e-06, "loss": 0.4103, "step": 2554 }, { "epoch": 0.6318001978239367, "grad_norm": 0.8206243653518269, "learning_rate": 4.8737344123194495e-06, "loss": 0.443, "step": 2555 }, { "epoch": 0.6320474777448071, "grad_norm": 0.833260027971966, "learning_rate": 4.873632402989571e-06, "loss": 0.4403, "step": 2556 }, { "epoch": 0.6322947576656776, "grad_norm": 0.8392388817195789, "learning_rate": 4.87353035353834e-06, "loss": 0.4167, "step": 2557 }, { "epoch": 0.6325420375865479, "grad_norm": 0.8045955688235943, "learning_rate": 4.873428263967483e-06, "loss": 0.4345, "step": 2558 }, { "epoch": 0.6327893175074184, "grad_norm": 0.8073482847741306, "learning_rate": 4.873326134278725e-06, "loss": 0.4465, "step": 2559 }, { "epoch": 0.6330365974282888, "grad_norm": 0.8417721947718284, "learning_rate": 4.873223964473792e-06, "loss": 0.437, "step": 2560 }, { "epoch": 0.6332838773491593, "grad_norm": 0.835567113423516, "learning_rate": 4.873121754554413e-06, "loss": 0.4601, "step": 2561 }, { "epoch": 0.6335311572700296, "grad_norm": 0.7979539900749805, "learning_rate": 4.873019504522313e-06, "loss": 0.4195, "step": 2562 }, { "epoch": 0.6337784371909001, "grad_norm": 0.7912474454955465, "learning_rate": 4.872917214379221e-06, "loss": 0.4487, "step": 2563 }, { "epoch": 0.6340257171117705, "grad_norm": 0.8307214918215886, "learning_rate": 4.872814884126867e-06, "loss": 0.4173, "step": 2564 }, { "epoch": 0.634272997032641, "grad_norm": 0.8101781344145439, "learning_rate": 4.87271251376698e-06, "loss": 0.4501, "step": 2565 }, { "epoch": 0.6345202769535113, "grad_norm": 0.8204980030353368, "learning_rate": 4.872610103301289e-06, "loss": 0.4757, "step": 2566 }, { "epoch": 0.6347675568743818, "grad_norm": 0.8005074209067652, "learning_rate": 4.872507652731529e-06, "loss": 0.4078, "step": 2567 }, { "epoch": 0.6350148367952523, "grad_norm": 0.7997515924249456, "learning_rate": 4.872405162059428e-06, "loss": 0.45, "step": 2568 }, { "epoch": 0.6352621167161226, "grad_norm": 0.8137264810694476, "learning_rate": 4.87230263128672e-06, "loss": 0.4241, "step": 2569 }, { "epoch": 0.6355093966369931, "grad_norm": 0.824910213201859, "learning_rate": 4.872200060415136e-06, "loss": 0.4292, "step": 2570 }, { "epoch": 0.6357566765578635, "grad_norm": 0.8033275021348851, "learning_rate": 4.872097449446413e-06, "loss": 0.4323, "step": 2571 }, { "epoch": 0.636003956478734, "grad_norm": 0.7746126846914729, "learning_rate": 4.871994798382284e-06, "loss": 0.4206, "step": 2572 }, { "epoch": 0.6362512363996043, "grad_norm": 0.8549318207086319, "learning_rate": 4.871892107224483e-06, "loss": 0.4053, "step": 2573 }, { "epoch": 0.6364985163204748, "grad_norm": 0.7938662480945529, "learning_rate": 4.8717893759747475e-06, "loss": 0.4344, "step": 2574 }, { "epoch": 0.6367457962413452, "grad_norm": 0.7915283947882275, "learning_rate": 4.8716866046348135e-06, "loss": 0.4422, "step": 2575 }, { "epoch": 0.6369930761622157, "grad_norm": 0.7688073874395939, "learning_rate": 4.871583793206417e-06, "loss": 0.4506, "step": 2576 }, { "epoch": 0.637240356083086, "grad_norm": 0.7912905811315122, "learning_rate": 4.871480941691297e-06, "loss": 0.4166, "step": 2577 }, { "epoch": 0.6374876360039565, "grad_norm": 0.8120497456748971, "learning_rate": 4.871378050091191e-06, "loss": 0.4181, "step": 2578 }, { "epoch": 0.6377349159248269, "grad_norm": 0.8028994592647819, "learning_rate": 4.871275118407839e-06, "loss": 0.4311, "step": 2579 }, { "epoch": 0.6379821958456974, "grad_norm": 0.7834446209323545, "learning_rate": 4.871172146642981e-06, "loss": 0.4279, "step": 2580 }, { "epoch": 0.6382294757665677, "grad_norm": 0.8348542999908406, "learning_rate": 4.871069134798357e-06, "loss": 0.4329, "step": 2581 }, { "epoch": 0.6384767556874382, "grad_norm": 0.8272271677793834, "learning_rate": 4.8709660828757084e-06, "loss": 0.4182, "step": 2582 }, { "epoch": 0.6387240356083086, "grad_norm": 0.8689124353130055, "learning_rate": 4.8708629908767765e-06, "loss": 0.4006, "step": 2583 }, { "epoch": 0.6389713155291791, "grad_norm": 0.7859560070726559, "learning_rate": 4.870759858803306e-06, "loss": 0.4099, "step": 2584 }, { "epoch": 0.6392185954500494, "grad_norm": 0.8065301022490496, "learning_rate": 4.870656686657037e-06, "loss": 0.4429, "step": 2585 }, { "epoch": 0.6394658753709199, "grad_norm": 0.8046008098865215, "learning_rate": 4.870553474439715e-06, "loss": 0.4355, "step": 2586 }, { "epoch": 0.6397131552917903, "grad_norm": 0.8173022893337369, "learning_rate": 4.870450222153086e-06, "loss": 0.4541, "step": 2587 }, { "epoch": 0.6399604352126608, "grad_norm": 0.8241623751017143, "learning_rate": 4.870346929798893e-06, "loss": 0.4322, "step": 2588 }, { "epoch": 0.6402077151335311, "grad_norm": 0.7625457826822414, "learning_rate": 4.870243597378882e-06, "loss": 0.4425, "step": 2589 }, { "epoch": 0.6404549950544016, "grad_norm": 0.8650909181528662, "learning_rate": 4.870140224894801e-06, "loss": 0.4208, "step": 2590 }, { "epoch": 0.640702274975272, "grad_norm": 0.807605893764015, "learning_rate": 4.870036812348397e-06, "loss": 0.4301, "step": 2591 }, { "epoch": 0.6409495548961425, "grad_norm": 0.8261404636614745, "learning_rate": 4.8699333597414166e-06, "loss": 0.4278, "step": 2592 }, { "epoch": 0.6411968348170128, "grad_norm": 0.8284238869122891, "learning_rate": 4.869829867075611e-06, "loss": 0.3984, "step": 2593 }, { "epoch": 0.6414441147378833, "grad_norm": 0.7975722051769433, "learning_rate": 4.869726334352727e-06, "loss": 0.4709, "step": 2594 }, { "epoch": 0.6416913946587537, "grad_norm": 0.8507481794439908, "learning_rate": 4.869622761574516e-06, "loss": 0.4238, "step": 2595 }, { "epoch": 0.6419386745796242, "grad_norm": 0.8131973370387947, "learning_rate": 4.869519148742728e-06, "loss": 0.4163, "step": 2596 }, { "epoch": 0.6421859545004945, "grad_norm": 0.7971351475313032, "learning_rate": 4.8694154958591145e-06, "loss": 0.4265, "step": 2597 }, { "epoch": 0.642433234421365, "grad_norm": 0.8116345108123881, "learning_rate": 4.869311802925428e-06, "loss": 0.4123, "step": 2598 }, { "epoch": 0.6426805143422354, "grad_norm": 0.8095032265641844, "learning_rate": 4.8692080699434205e-06, "loss": 0.4268, "step": 2599 }, { "epoch": 0.6429277942631059, "grad_norm": 0.8217190775852606, "learning_rate": 4.869104296914847e-06, "loss": 0.4485, "step": 2600 }, { "epoch": 0.6431750741839762, "grad_norm": 0.8025278787335687, "learning_rate": 4.869000483841459e-06, "loss": 0.4606, "step": 2601 }, { "epoch": 0.6434223541048467, "grad_norm": 0.8116294256653767, "learning_rate": 4.868896630725014e-06, "loss": 0.4358, "step": 2602 }, { "epoch": 0.6436696340257171, "grad_norm": 0.7967259432878298, "learning_rate": 4.868792737567266e-06, "loss": 0.4422, "step": 2603 }, { "epoch": 0.6439169139465876, "grad_norm": 0.8050752940748637, "learning_rate": 4.86868880436997e-06, "loss": 0.4417, "step": 2604 }, { "epoch": 0.6441641938674579, "grad_norm": 0.7917551367908968, "learning_rate": 4.868584831134885e-06, "loss": 0.4185, "step": 2605 }, { "epoch": 0.6444114737883284, "grad_norm": 0.8045620985739201, "learning_rate": 4.868480817863766e-06, "loss": 0.4031, "step": 2606 }, { "epoch": 0.6446587537091988, "grad_norm": 0.7708976802059638, "learning_rate": 4.868376764558374e-06, "loss": 0.4827, "step": 2607 }, { "epoch": 0.6449060336300693, "grad_norm": 0.8344243360154323, "learning_rate": 4.868272671220465e-06, "loss": 0.4184, "step": 2608 }, { "epoch": 0.6451533135509396, "grad_norm": 0.7928287190522306, "learning_rate": 4.868168537851801e-06, "loss": 0.4405, "step": 2609 }, { "epoch": 0.6454005934718101, "grad_norm": 0.8301762127855165, "learning_rate": 4.868064364454141e-06, "loss": 0.4264, "step": 2610 }, { "epoch": 0.6456478733926805, "grad_norm": 0.8234622479401585, "learning_rate": 4.867960151029245e-06, "loss": 0.4101, "step": 2611 }, { "epoch": 0.645895153313551, "grad_norm": 0.8042270905776066, "learning_rate": 4.867855897578876e-06, "loss": 0.4088, "step": 2612 }, { "epoch": 0.6461424332344213, "grad_norm": 0.8118602734915629, "learning_rate": 4.867751604104795e-06, "loss": 0.4279, "step": 2613 }, { "epoch": 0.6463897131552918, "grad_norm": 0.8157638003872915, "learning_rate": 4.8676472706087655e-06, "loss": 0.3937, "step": 2614 }, { "epoch": 0.6466369930761622, "grad_norm": 0.7960341855478246, "learning_rate": 4.867542897092551e-06, "loss": 0.4683, "step": 2615 }, { "epoch": 0.6468842729970327, "grad_norm": 0.8650800961392032, "learning_rate": 4.867438483557916e-06, "loss": 0.41, "step": 2616 }, { "epoch": 0.647131552917903, "grad_norm": 0.7994797297627116, "learning_rate": 4.867334030006624e-06, "loss": 0.3952, "step": 2617 }, { "epoch": 0.6473788328387735, "grad_norm": 0.8182129634502129, "learning_rate": 4.867229536440442e-06, "loss": 0.4538, "step": 2618 }, { "epoch": 0.6476261127596439, "grad_norm": 0.8291856382801164, "learning_rate": 4.867125002861136e-06, "loss": 0.421, "step": 2619 }, { "epoch": 0.6478733926805144, "grad_norm": 0.8143024931629848, "learning_rate": 4.867020429270473e-06, "loss": 0.4306, "step": 2620 }, { "epoch": 0.6481206726013847, "grad_norm": 0.7831998758499393, "learning_rate": 4.866915815670221e-06, "loss": 0.4269, "step": 2621 }, { "epoch": 0.6483679525222552, "grad_norm": 0.7830711376483871, "learning_rate": 4.866811162062146e-06, "loss": 0.4514, "step": 2622 }, { "epoch": 0.6486152324431256, "grad_norm": 0.8088024343741351, "learning_rate": 4.86670646844802e-06, "loss": 0.4059, "step": 2623 }, { "epoch": 0.6488625123639961, "grad_norm": 0.8646016295169404, "learning_rate": 4.86660173482961e-06, "loss": 0.4262, "step": 2624 }, { "epoch": 0.6491097922848664, "grad_norm": 0.832596400950109, "learning_rate": 4.866496961208689e-06, "loss": 0.4289, "step": 2625 }, { "epoch": 0.6493570722057369, "grad_norm": 0.8067755589491356, "learning_rate": 4.866392147587026e-06, "loss": 0.4026, "step": 2626 }, { "epoch": 0.6496043521266073, "grad_norm": 0.7575527345016138, "learning_rate": 4.8662872939663925e-06, "loss": 0.4915, "step": 2627 }, { "epoch": 0.6498516320474778, "grad_norm": 0.882603184161495, "learning_rate": 4.866182400348562e-06, "loss": 0.4028, "step": 2628 }, { "epoch": 0.6500989119683481, "grad_norm": 0.820325274503156, "learning_rate": 4.866077466735307e-06, "loss": 0.4334, "step": 2629 }, { "epoch": 0.6503461918892186, "grad_norm": 0.8311177188830288, "learning_rate": 4.8659724931284014e-06, "loss": 0.4505, "step": 2630 }, { "epoch": 0.650593471810089, "grad_norm": 0.7836144268998563, "learning_rate": 4.865867479529619e-06, "loss": 0.4185, "step": 2631 }, { "epoch": 0.6508407517309595, "grad_norm": 0.769144550632546, "learning_rate": 4.865762425940735e-06, "loss": 0.4447, "step": 2632 }, { "epoch": 0.6510880316518298, "grad_norm": 0.7983531419796737, "learning_rate": 4.865657332363526e-06, "loss": 0.4451, "step": 2633 }, { "epoch": 0.6513353115727003, "grad_norm": 0.8263434045298292, "learning_rate": 4.865552198799767e-06, "loss": 0.4073, "step": 2634 }, { "epoch": 0.6515825914935707, "grad_norm": 0.8110069368531261, "learning_rate": 4.865447025251237e-06, "loss": 0.4525, "step": 2635 }, { "epoch": 0.6518298714144412, "grad_norm": 0.8143820165970546, "learning_rate": 4.86534181171971e-06, "loss": 0.412, "step": 2636 }, { "epoch": 0.6520771513353115, "grad_norm": 0.8320001375672842, "learning_rate": 4.865236558206969e-06, "loss": 0.4605, "step": 2637 }, { "epoch": 0.652324431256182, "grad_norm": 0.8097042928776802, "learning_rate": 4.865131264714791e-06, "loss": 0.3915, "step": 2638 }, { "epoch": 0.6525717111770524, "grad_norm": 0.7994068536215166, "learning_rate": 4.865025931244955e-06, "loss": 0.4416, "step": 2639 }, { "epoch": 0.6528189910979229, "grad_norm": 0.833191717549721, "learning_rate": 4.864920557799243e-06, "loss": 0.3927, "step": 2640 }, { "epoch": 0.6530662710187932, "grad_norm": 0.8287763555318937, "learning_rate": 4.864815144379435e-06, "loss": 0.4285, "step": 2641 }, { "epoch": 0.6533135509396637, "grad_norm": 0.8376260262533985, "learning_rate": 4.864709690987313e-06, "loss": 0.4058, "step": 2642 }, { "epoch": 0.6535608308605341, "grad_norm": 0.8012222473334901, "learning_rate": 4.8646041976246595e-06, "loss": 0.4342, "step": 2643 }, { "epoch": 0.6538081107814046, "grad_norm": 0.7915340668570944, "learning_rate": 4.864498664293258e-06, "loss": 0.4375, "step": 2644 }, { "epoch": 0.654055390702275, "grad_norm": 0.7914492576635661, "learning_rate": 4.864393090994892e-06, "loss": 0.4365, "step": 2645 }, { "epoch": 0.6543026706231454, "grad_norm": 0.7693345141545859, "learning_rate": 4.864287477731346e-06, "loss": 0.4217, "step": 2646 }, { "epoch": 0.6545499505440159, "grad_norm": 0.7878375532840123, "learning_rate": 4.8641818245044065e-06, "loss": 0.4313, "step": 2647 }, { "epoch": 0.6547972304648862, "grad_norm": 0.776760393600863, "learning_rate": 4.8640761313158565e-06, "loss": 0.4121, "step": 2648 }, { "epoch": 0.6550445103857567, "grad_norm": 0.8010628469676324, "learning_rate": 4.8639703981674854e-06, "loss": 0.4049, "step": 2649 }, { "epoch": 0.6552917903066271, "grad_norm": 0.8374209078452766, "learning_rate": 4.863864625061079e-06, "loss": 0.3779, "step": 2650 }, { "epoch": 0.6555390702274976, "grad_norm": 0.8174278279572348, "learning_rate": 4.8637588119984245e-06, "loss": 0.42, "step": 2651 }, { "epoch": 0.655786350148368, "grad_norm": 0.7978953213607124, "learning_rate": 4.863652958981312e-06, "loss": 0.4014, "step": 2652 }, { "epoch": 0.6560336300692384, "grad_norm": 0.839369529972231, "learning_rate": 4.863547066011529e-06, "loss": 0.4557, "step": 2653 }, { "epoch": 0.6562809099901088, "grad_norm": 0.8486133636067605, "learning_rate": 4.863441133090867e-06, "loss": 0.4262, "step": 2654 }, { "epoch": 0.6565281899109793, "grad_norm": 0.8001265604884367, "learning_rate": 4.863335160221116e-06, "loss": 0.3965, "step": 2655 }, { "epoch": 0.6567754698318496, "grad_norm": 0.7686223929805192, "learning_rate": 4.863229147404067e-06, "loss": 0.4275, "step": 2656 }, { "epoch": 0.6570227497527201, "grad_norm": 0.7772075862282518, "learning_rate": 4.863123094641513e-06, "loss": 0.4033, "step": 2657 }, { "epoch": 0.6572700296735905, "grad_norm": 0.7775090647355424, "learning_rate": 4.8630170019352455e-06, "loss": 0.4308, "step": 2658 }, { "epoch": 0.657517309594461, "grad_norm": 0.7985056923918036, "learning_rate": 4.862910869287058e-06, "loss": 0.4574, "step": 2659 }, { "epoch": 0.6577645895153313, "grad_norm": 0.8176547204705846, "learning_rate": 4.862804696698743e-06, "loss": 0.4177, "step": 2660 }, { "epoch": 0.6580118694362018, "grad_norm": 0.7913133572047072, "learning_rate": 4.8626984841720985e-06, "loss": 0.4181, "step": 2661 }, { "epoch": 0.6582591493570722, "grad_norm": 0.8017262292735567, "learning_rate": 4.862592231708917e-06, "loss": 0.4138, "step": 2662 }, { "epoch": 0.6585064292779427, "grad_norm": 0.823785805842316, "learning_rate": 4.862485939310996e-06, "loss": 0.4178, "step": 2663 }, { "epoch": 0.658753709198813, "grad_norm": 0.8011536361874997, "learning_rate": 4.862379606980131e-06, "loss": 0.42, "step": 2664 }, { "epoch": 0.6590009891196835, "grad_norm": 0.8131762554088784, "learning_rate": 4.86227323471812e-06, "loss": 0.4171, "step": 2665 }, { "epoch": 0.6592482690405539, "grad_norm": 0.7743146594982807, "learning_rate": 4.86216682252676e-06, "loss": 0.4358, "step": 2666 }, { "epoch": 0.6594955489614244, "grad_norm": 0.7984111657102294, "learning_rate": 4.862060370407852e-06, "loss": 0.4085, "step": 2667 }, { "epoch": 0.6597428288822947, "grad_norm": 0.8081662281215509, "learning_rate": 4.861953878363193e-06, "loss": 0.406, "step": 2668 }, { "epoch": 0.6599901088031652, "grad_norm": 0.847834361380235, "learning_rate": 4.8618473463945846e-06, "loss": 0.4487, "step": 2669 }, { "epoch": 0.6602373887240356, "grad_norm": 0.8062903347818331, "learning_rate": 4.861740774503827e-06, "loss": 0.4064, "step": 2670 }, { "epoch": 0.6604846686449061, "grad_norm": 0.8131245152077919, "learning_rate": 4.861634162692721e-06, "loss": 0.4103, "step": 2671 }, { "epoch": 0.6607319485657764, "grad_norm": 0.780375927043985, "learning_rate": 4.86152751096307e-06, "loss": 0.4342, "step": 2672 }, { "epoch": 0.6609792284866469, "grad_norm": 0.8053037494452797, "learning_rate": 4.861420819316674e-06, "loss": 0.4044, "step": 2673 }, { "epoch": 0.6612265084075173, "grad_norm": 0.7860517182947623, "learning_rate": 4.861314087755339e-06, "loss": 0.4325, "step": 2674 }, { "epoch": 0.6614737883283878, "grad_norm": 0.8359112628884461, "learning_rate": 4.8612073162808685e-06, "loss": 0.4046, "step": 2675 }, { "epoch": 0.6617210682492581, "grad_norm": 0.8207892956870797, "learning_rate": 4.861100504895067e-06, "loss": 0.4135, "step": 2676 }, { "epoch": 0.6619683481701286, "grad_norm": 0.825628094557653, "learning_rate": 4.86099365359974e-06, "loss": 0.4151, "step": 2677 }, { "epoch": 0.662215628090999, "grad_norm": 0.8562140165251803, "learning_rate": 4.860886762396694e-06, "loss": 0.4417, "step": 2678 }, { "epoch": 0.6624629080118695, "grad_norm": 0.8366896031458023, "learning_rate": 4.860779831287735e-06, "loss": 0.3959, "step": 2679 }, { "epoch": 0.6627101879327398, "grad_norm": 0.862785681802581, "learning_rate": 4.86067286027467e-06, "loss": 0.4187, "step": 2680 }, { "epoch": 0.6629574678536103, "grad_norm": 0.7714723304346427, "learning_rate": 4.860565849359309e-06, "loss": 0.4396, "step": 2681 }, { "epoch": 0.6632047477744807, "grad_norm": 0.8142081920122305, "learning_rate": 4.860458798543459e-06, "loss": 0.428, "step": 2682 }, { "epoch": 0.6634520276953512, "grad_norm": 0.7798010043722234, "learning_rate": 4.8603517078289305e-06, "loss": 0.4388, "step": 2683 }, { "epoch": 0.6636993076162215, "grad_norm": 0.7755345393059091, "learning_rate": 4.860244577217533e-06, "loss": 0.4107, "step": 2684 }, { "epoch": 0.663946587537092, "grad_norm": 0.7674983481753626, "learning_rate": 4.860137406711079e-06, "loss": 0.4143, "step": 2685 }, { "epoch": 0.6641938674579624, "grad_norm": 0.8273272184248035, "learning_rate": 4.860030196311377e-06, "loss": 0.414, "step": 2686 }, { "epoch": 0.6644411473788329, "grad_norm": 0.817916038266538, "learning_rate": 4.859922946020241e-06, "loss": 0.4047, "step": 2687 }, { "epoch": 0.6646884272997032, "grad_norm": 0.8207357627703306, "learning_rate": 4.8598156558394835e-06, "loss": 0.4294, "step": 2688 }, { "epoch": 0.6649357072205737, "grad_norm": 0.8194821245448161, "learning_rate": 4.859708325770919e-06, "loss": 0.4013, "step": 2689 }, { "epoch": 0.6651829871414441, "grad_norm": 0.7698555674508966, "learning_rate": 4.859600955816361e-06, "loss": 0.4349, "step": 2690 }, { "epoch": 0.6654302670623146, "grad_norm": 0.776934367789398, "learning_rate": 4.859493545977624e-06, "loss": 0.4098, "step": 2691 }, { "epoch": 0.6656775469831849, "grad_norm": 0.8173710162756613, "learning_rate": 4.859386096256523e-06, "loss": 0.3915, "step": 2692 }, { "epoch": 0.6659248269040554, "grad_norm": 0.8276330948906291, "learning_rate": 4.859278606654876e-06, "loss": 0.4198, "step": 2693 }, { "epoch": 0.6661721068249258, "grad_norm": 0.775277988894492, "learning_rate": 4.859171077174498e-06, "loss": 0.4151, "step": 2694 }, { "epoch": 0.6664193867457963, "grad_norm": 0.8367562396642206, "learning_rate": 4.8590635078172086e-06, "loss": 0.4578, "step": 2695 }, { "epoch": 0.6666666666666666, "grad_norm": 0.8159711563660788, "learning_rate": 4.858955898584824e-06, "loss": 0.4159, "step": 2696 }, { "epoch": 0.6669139465875371, "grad_norm": 0.8408357991825046, "learning_rate": 4.858848249479165e-06, "loss": 0.4011, "step": 2697 }, { "epoch": 0.6671612265084075, "grad_norm": 0.841572463582256, "learning_rate": 4.858740560502049e-06, "loss": 0.4123, "step": 2698 }, { "epoch": 0.667408506429278, "grad_norm": 0.8179256974131708, "learning_rate": 4.8586328316552974e-06, "loss": 0.439, "step": 2699 }, { "epoch": 0.6676557863501483, "grad_norm": 0.8055382933055434, "learning_rate": 4.858525062940732e-06, "loss": 0.4276, "step": 2700 }, { "epoch": 0.6679030662710188, "grad_norm": 0.8132449133301869, "learning_rate": 4.858417254360173e-06, "loss": 0.4247, "step": 2701 }, { "epoch": 0.6681503461918892, "grad_norm": 0.8017582573504225, "learning_rate": 4.858309405915443e-06, "loss": 0.4138, "step": 2702 }, { "epoch": 0.6683976261127597, "grad_norm": 0.8102328296173832, "learning_rate": 4.858201517608366e-06, "loss": 0.4615, "step": 2703 }, { "epoch": 0.66864490603363, "grad_norm": 0.8565741417342391, "learning_rate": 4.858093589440765e-06, "loss": 0.4398, "step": 2704 }, { "epoch": 0.6688921859545005, "grad_norm": 0.8203380198987199, "learning_rate": 4.8579856214144635e-06, "loss": 0.4477, "step": 2705 }, { "epoch": 0.6691394658753709, "grad_norm": 0.8154367679562924, "learning_rate": 4.8578776135312876e-06, "loss": 0.4182, "step": 2706 }, { "epoch": 0.6693867457962414, "grad_norm": 0.796586375252954, "learning_rate": 4.8577695657930625e-06, "loss": 0.417, "step": 2707 }, { "epoch": 0.6696340257171117, "grad_norm": 0.7857952936500736, "learning_rate": 4.857661478201614e-06, "loss": 0.425, "step": 2708 }, { "epoch": 0.6698813056379822, "grad_norm": 0.7839475383973936, "learning_rate": 4.85755335075877e-06, "loss": 0.4323, "step": 2709 }, { "epoch": 0.6701285855588526, "grad_norm": 0.8317686188823157, "learning_rate": 4.857445183466357e-06, "loss": 0.4038, "step": 2710 }, { "epoch": 0.670375865479723, "grad_norm": 0.7735299641172824, "learning_rate": 4.857336976326205e-06, "loss": 0.4349, "step": 2711 }, { "epoch": 0.6706231454005934, "grad_norm": 0.7993198548331951, "learning_rate": 4.857228729340142e-06, "loss": 0.4505, "step": 2712 }, { "epoch": 0.6708704253214639, "grad_norm": 0.8005486544012455, "learning_rate": 4.8571204425099976e-06, "loss": 0.4316, "step": 2713 }, { "epoch": 0.6711177052423343, "grad_norm": 0.8235107641996137, "learning_rate": 4.857012115837602e-06, "loss": 0.414, "step": 2714 }, { "epoch": 0.6713649851632048, "grad_norm": 0.7544707697511732, "learning_rate": 4.856903749324787e-06, "loss": 0.4061, "step": 2715 }, { "epoch": 0.6716122650840751, "grad_norm": 0.837063786540837, "learning_rate": 4.856795342973385e-06, "loss": 0.4236, "step": 2716 }, { "epoch": 0.6718595450049456, "grad_norm": 0.7794131174029756, "learning_rate": 4.856686896785226e-06, "loss": 0.4053, "step": 2717 }, { "epoch": 0.672106824925816, "grad_norm": 0.8167949776353572, "learning_rate": 4.856578410762145e-06, "loss": 0.3939, "step": 2718 }, { "epoch": 0.6723541048466865, "grad_norm": 0.8159754290439838, "learning_rate": 4.856469884905974e-06, "loss": 0.4235, "step": 2719 }, { "epoch": 0.6726013847675568, "grad_norm": 0.7811653270798669, "learning_rate": 4.8563613192185495e-06, "loss": 0.3975, "step": 2720 }, { "epoch": 0.6728486646884273, "grad_norm": 0.7749258733290558, "learning_rate": 4.856252713701706e-06, "loss": 0.4407, "step": 2721 }, { "epoch": 0.6730959446092978, "grad_norm": 0.8416011342065808, "learning_rate": 4.856144068357279e-06, "loss": 0.4084, "step": 2722 }, { "epoch": 0.6733432245301681, "grad_norm": 0.7965495607636626, "learning_rate": 4.8560353831871035e-06, "loss": 0.4325, "step": 2723 }, { "epoch": 0.6735905044510386, "grad_norm": 0.8046182312914216, "learning_rate": 4.855926658193019e-06, "loss": 0.4498, "step": 2724 }, { "epoch": 0.673837784371909, "grad_norm": 0.7897180049006142, "learning_rate": 4.855817893376862e-06, "loss": 0.4226, "step": 2725 }, { "epoch": 0.6740850642927795, "grad_norm": 0.7673796995879293, "learning_rate": 4.85570908874047e-06, "loss": 0.4749, "step": 2726 }, { "epoch": 0.6743323442136498, "grad_norm": 0.8066313586664126, "learning_rate": 4.855600244285684e-06, "loss": 0.4353, "step": 2727 }, { "epoch": 0.6745796241345203, "grad_norm": 0.8043440775563024, "learning_rate": 4.855491360014343e-06, "loss": 0.4377, "step": 2728 }, { "epoch": 0.6748269040553907, "grad_norm": 0.8052759257323537, "learning_rate": 4.855382435928287e-06, "loss": 0.4242, "step": 2729 }, { "epoch": 0.6750741839762612, "grad_norm": 0.816064941591361, "learning_rate": 4.855273472029358e-06, "loss": 0.4073, "step": 2730 }, { "epoch": 0.6753214638971315, "grad_norm": 0.7606831743034189, "learning_rate": 4.855164468319398e-06, "loss": 0.4127, "step": 2731 }, { "epoch": 0.675568743818002, "grad_norm": 0.8075604491375517, "learning_rate": 4.855055424800249e-06, "loss": 0.4494, "step": 2732 }, { "epoch": 0.6758160237388724, "grad_norm": 0.7534279728737382, "learning_rate": 4.854946341473753e-06, "loss": 0.4519, "step": 2733 }, { "epoch": 0.6760633036597429, "grad_norm": 0.8069270952474595, "learning_rate": 4.8548372183417556e-06, "loss": 0.4065, "step": 2734 }, { "epoch": 0.6763105835806132, "grad_norm": 0.8506776561935846, "learning_rate": 4.854728055406101e-06, "loss": 0.4306, "step": 2735 }, { "epoch": 0.6765578635014837, "grad_norm": 0.8035438540666233, "learning_rate": 4.854618852668632e-06, "loss": 0.4606, "step": 2736 }, { "epoch": 0.6768051434223541, "grad_norm": 0.7934236069117082, "learning_rate": 4.854509610131198e-06, "loss": 0.4424, "step": 2737 }, { "epoch": 0.6770524233432246, "grad_norm": 0.7654027889805268, "learning_rate": 4.854400327795644e-06, "loss": 0.4361, "step": 2738 }, { "epoch": 0.6772997032640949, "grad_norm": 0.8258240340169412, "learning_rate": 4.854291005663816e-06, "loss": 0.4033, "step": 2739 }, { "epoch": 0.6775469831849654, "grad_norm": 0.8126929867632539, "learning_rate": 4.854181643737564e-06, "loss": 0.432, "step": 2740 }, { "epoch": 0.6777942631058358, "grad_norm": 0.8059082743497984, "learning_rate": 4.854072242018734e-06, "loss": 0.422, "step": 2741 }, { "epoch": 0.6780415430267063, "grad_norm": 0.8181609925538584, "learning_rate": 4.853962800509179e-06, "loss": 0.4423, "step": 2742 }, { "epoch": 0.6782888229475766, "grad_norm": 0.7901271764796842, "learning_rate": 4.853853319210745e-06, "loss": 0.4251, "step": 2743 }, { "epoch": 0.6785361028684471, "grad_norm": 0.8030211456123973, "learning_rate": 4.853743798125285e-06, "loss": 0.4312, "step": 2744 }, { "epoch": 0.6787833827893175, "grad_norm": 0.7965447474952801, "learning_rate": 4.8536342372546494e-06, "loss": 0.4439, "step": 2745 }, { "epoch": 0.679030662710188, "grad_norm": 0.7792761987365541, "learning_rate": 4.85352463660069e-06, "loss": 0.4361, "step": 2746 }, { "epoch": 0.6792779426310583, "grad_norm": 0.8066811438330524, "learning_rate": 4.853414996165258e-06, "loss": 0.4504, "step": 2747 }, { "epoch": 0.6795252225519288, "grad_norm": 0.8388942890539145, "learning_rate": 4.85330531595021e-06, "loss": 0.4208, "step": 2748 }, { "epoch": 0.6797725024727992, "grad_norm": 0.8154470508323961, "learning_rate": 4.853195595957398e-06, "loss": 0.4192, "step": 2749 }, { "epoch": 0.6800197823936697, "grad_norm": 0.7756267102531477, "learning_rate": 4.853085836188676e-06, "loss": 0.3995, "step": 2750 }, { "epoch": 0.68026706231454, "grad_norm": 0.8401001254872341, "learning_rate": 4.852976036645899e-06, "loss": 0.4341, "step": 2751 }, { "epoch": 0.6805143422354105, "grad_norm": 0.8190568217830237, "learning_rate": 4.852866197330925e-06, "loss": 0.432, "step": 2752 }, { "epoch": 0.6807616221562809, "grad_norm": 0.776414872949731, "learning_rate": 4.852756318245609e-06, "loss": 0.412, "step": 2753 }, { "epoch": 0.6810089020771514, "grad_norm": 0.7750759553622607, "learning_rate": 4.852646399391808e-06, "loss": 0.4521, "step": 2754 }, { "epoch": 0.6812561819980217, "grad_norm": 0.8000941607465487, "learning_rate": 4.8525364407713825e-06, "loss": 0.4095, "step": 2755 }, { "epoch": 0.6815034619188922, "grad_norm": 0.8196128109500314, "learning_rate": 4.852426442386188e-06, "loss": 0.441, "step": 2756 }, { "epoch": 0.6817507418397626, "grad_norm": 0.8147986557860766, "learning_rate": 4.852316404238085e-06, "loss": 0.4571, "step": 2757 }, { "epoch": 0.6819980217606331, "grad_norm": 0.8017639356034257, "learning_rate": 4.8522063263289336e-06, "loss": 0.4231, "step": 2758 }, { "epoch": 0.6822453016815034, "grad_norm": 0.8257170171004764, "learning_rate": 4.8520962086605945e-06, "loss": 0.442, "step": 2759 }, { "epoch": 0.6824925816023739, "grad_norm": 0.8078217424320431, "learning_rate": 4.8519860512349295e-06, "loss": 0.4357, "step": 2760 }, { "epoch": 0.6827398615232443, "grad_norm": 0.8127981123442076, "learning_rate": 4.851875854053799e-06, "loss": 0.4018, "step": 2761 }, { "epoch": 0.6829871414441148, "grad_norm": 0.808121246312467, "learning_rate": 4.8517656171190665e-06, "loss": 0.4395, "step": 2762 }, { "epoch": 0.6832344213649851, "grad_norm": 0.8616502396837019, "learning_rate": 4.8516553404325965e-06, "loss": 0.421, "step": 2763 }, { "epoch": 0.6834817012858556, "grad_norm": 0.8211179328198835, "learning_rate": 4.851545023996252e-06, "loss": 0.4151, "step": 2764 }, { "epoch": 0.683728981206726, "grad_norm": 0.7971065636543179, "learning_rate": 4.851434667811896e-06, "loss": 0.3961, "step": 2765 }, { "epoch": 0.6839762611275965, "grad_norm": 0.7959678571700568, "learning_rate": 4.851324271881397e-06, "loss": 0.4542, "step": 2766 }, { "epoch": 0.6842235410484668, "grad_norm": 0.8297470002867344, "learning_rate": 4.8512138362066185e-06, "loss": 0.3831, "step": 2767 }, { "epoch": 0.6844708209693373, "grad_norm": 0.8012939597110195, "learning_rate": 4.851103360789428e-06, "loss": 0.4234, "step": 2768 }, { "epoch": 0.6847181008902077, "grad_norm": 0.8087329469986386, "learning_rate": 4.850992845631694e-06, "loss": 0.4243, "step": 2769 }, { "epoch": 0.6849653808110782, "grad_norm": 0.8236396320864248, "learning_rate": 4.850882290735283e-06, "loss": 0.4246, "step": 2770 }, { "epoch": 0.6852126607319485, "grad_norm": 0.8167830829538719, "learning_rate": 4.850771696102066e-06, "loss": 0.4284, "step": 2771 }, { "epoch": 0.685459940652819, "grad_norm": 0.7654752845074516, "learning_rate": 4.850661061733909e-06, "loss": 0.4589, "step": 2772 }, { "epoch": 0.6857072205736894, "grad_norm": 0.8547620176700507, "learning_rate": 4.850550387632683e-06, "loss": 0.4125, "step": 2773 }, { "epoch": 0.6859545004945599, "grad_norm": 0.7917980650045635, "learning_rate": 4.85043967380026e-06, "loss": 0.4348, "step": 2774 }, { "epoch": 0.6862017804154302, "grad_norm": 0.8345478477879663, "learning_rate": 4.850328920238512e-06, "loss": 0.4178, "step": 2775 }, { "epoch": 0.6864490603363007, "grad_norm": 0.8332777404049719, "learning_rate": 4.8502181269493084e-06, "loss": 0.4122, "step": 2776 }, { "epoch": 0.6866963402571711, "grad_norm": 0.7759039805494313, "learning_rate": 4.850107293934524e-06, "loss": 0.4256, "step": 2777 }, { "epoch": 0.6869436201780416, "grad_norm": 0.8252611335287267, "learning_rate": 4.849996421196031e-06, "loss": 0.416, "step": 2778 }, { "epoch": 0.6871909000989119, "grad_norm": 0.788591873666151, "learning_rate": 4.849885508735704e-06, "loss": 0.4711, "step": 2779 }, { "epoch": 0.6874381800197824, "grad_norm": 0.8189554759316249, "learning_rate": 4.849774556555419e-06, "loss": 0.4125, "step": 2780 }, { "epoch": 0.6876854599406528, "grad_norm": 0.7836784844389106, "learning_rate": 4.849663564657049e-06, "loss": 0.4231, "step": 2781 }, { "epoch": 0.6879327398615233, "grad_norm": 0.8058795307766686, "learning_rate": 4.849552533042472e-06, "loss": 0.4194, "step": 2782 }, { "epoch": 0.6881800197823936, "grad_norm": 0.7907983381026115, "learning_rate": 4.8494414617135635e-06, "loss": 0.4243, "step": 2783 }, { "epoch": 0.6884272997032641, "grad_norm": 0.8183733832559971, "learning_rate": 4.8493303506722025e-06, "loss": 0.4179, "step": 2784 }, { "epoch": 0.6886745796241345, "grad_norm": 0.8188930609183032, "learning_rate": 4.849219199920266e-06, "loss": 0.4405, "step": 2785 }, { "epoch": 0.688921859545005, "grad_norm": 0.8305368966836002, "learning_rate": 4.849108009459632e-06, "loss": 0.4278, "step": 2786 }, { "epoch": 0.6891691394658753, "grad_norm": 0.8366002879385498, "learning_rate": 4.8489967792921806e-06, "loss": 0.4391, "step": 2787 }, { "epoch": 0.6894164193867458, "grad_norm": 0.7871906369909654, "learning_rate": 4.848885509419793e-06, "loss": 0.4691, "step": 2788 }, { "epoch": 0.6896636993076162, "grad_norm": 0.8251697333494993, "learning_rate": 4.848774199844348e-06, "loss": 0.4225, "step": 2789 }, { "epoch": 0.6899109792284867, "grad_norm": 0.8012321325556169, "learning_rate": 4.848662850567729e-06, "loss": 0.3791, "step": 2790 }, { "epoch": 0.690158259149357, "grad_norm": 0.7895786794853111, "learning_rate": 4.848551461591817e-06, "loss": 0.4197, "step": 2791 }, { "epoch": 0.6904055390702275, "grad_norm": 0.8122224358345084, "learning_rate": 4.848440032918496e-06, "loss": 0.4122, "step": 2792 }, { "epoch": 0.6906528189910979, "grad_norm": 0.8011124583088066, "learning_rate": 4.848328564549648e-06, "loss": 0.3933, "step": 2793 }, { "epoch": 0.6909000989119684, "grad_norm": 0.8069567267692628, "learning_rate": 4.848217056487158e-06, "loss": 0.4225, "step": 2794 }, { "epoch": 0.6911473788328387, "grad_norm": 0.8025647116960141, "learning_rate": 4.84810550873291e-06, "loss": 0.4056, "step": 2795 }, { "epoch": 0.6913946587537092, "grad_norm": 0.7795941271097871, "learning_rate": 4.84799392128879e-06, "loss": 0.4069, "step": 2796 }, { "epoch": 0.6916419386745796, "grad_norm": 0.8206549077084073, "learning_rate": 4.847882294156684e-06, "loss": 0.4144, "step": 2797 }, { "epoch": 0.69188921859545, "grad_norm": 0.7993709593343162, "learning_rate": 4.847770627338479e-06, "loss": 0.4189, "step": 2798 }, { "epoch": 0.6921364985163204, "grad_norm": 0.7983736260715664, "learning_rate": 4.847658920836063e-06, "loss": 0.4471, "step": 2799 }, { "epoch": 0.6923837784371909, "grad_norm": 0.8042491425514566, "learning_rate": 4.847547174651325e-06, "loss": 0.4155, "step": 2800 }, { "epoch": 0.6926310583580614, "grad_norm": 0.8234388367328737, "learning_rate": 4.84743538878615e-06, "loss": 0.4272, "step": 2801 }, { "epoch": 0.6928783382789317, "grad_norm": 0.7744695018229808, "learning_rate": 4.847323563242431e-06, "loss": 0.4224, "step": 2802 }, { "epoch": 0.6931256181998022, "grad_norm": 0.7872893346801324, "learning_rate": 4.847211698022058e-06, "loss": 0.4265, "step": 2803 }, { "epoch": 0.6933728981206726, "grad_norm": 0.8187757340572908, "learning_rate": 4.84709979312692e-06, "loss": 0.44, "step": 2804 }, { "epoch": 0.6936201780415431, "grad_norm": 0.801267739135154, "learning_rate": 4.84698784855891e-06, "loss": 0.4532, "step": 2805 }, { "epoch": 0.6938674579624134, "grad_norm": 0.8573956823059902, "learning_rate": 4.84687586431992e-06, "loss": 0.4569, "step": 2806 }, { "epoch": 0.6941147378832839, "grad_norm": 0.810081502663985, "learning_rate": 4.846763840411842e-06, "loss": 0.4453, "step": 2807 }, { "epoch": 0.6943620178041543, "grad_norm": 0.7970434492165938, "learning_rate": 4.8466517768365705e-06, "loss": 0.4149, "step": 2808 }, { "epoch": 0.6946092977250248, "grad_norm": 0.7767778121773069, "learning_rate": 4.846539673595999e-06, "loss": 0.4063, "step": 2809 }, { "epoch": 0.6948565776458951, "grad_norm": 0.8376570930059907, "learning_rate": 4.846427530692023e-06, "loss": 0.4213, "step": 2810 }, { "epoch": 0.6951038575667656, "grad_norm": 0.8110481108261645, "learning_rate": 4.846315348126538e-06, "loss": 0.4211, "step": 2811 }, { "epoch": 0.695351137487636, "grad_norm": 0.8193719014283674, "learning_rate": 4.84620312590144e-06, "loss": 0.4341, "step": 2812 }, { "epoch": 0.6955984174085065, "grad_norm": 0.7941431266613678, "learning_rate": 4.846090864018625e-06, "loss": 0.4064, "step": 2813 }, { "epoch": 0.6958456973293768, "grad_norm": 0.7931169995062347, "learning_rate": 4.845978562479993e-06, "loss": 0.4086, "step": 2814 }, { "epoch": 0.6960929772502473, "grad_norm": 0.831336166924252, "learning_rate": 4.84586622128744e-06, "loss": 0.4216, "step": 2815 }, { "epoch": 0.6963402571711177, "grad_norm": 0.7731150224481598, "learning_rate": 4.845753840442865e-06, "loss": 0.4164, "step": 2816 }, { "epoch": 0.6965875370919882, "grad_norm": 0.7920674915282415, "learning_rate": 4.845641419948168e-06, "loss": 0.4356, "step": 2817 }, { "epoch": 0.6968348170128585, "grad_norm": 0.875976281556326, "learning_rate": 4.84552895980525e-06, "loss": 0.4334, "step": 2818 }, { "epoch": 0.697082096933729, "grad_norm": 0.8001556368006945, "learning_rate": 4.845416460016011e-06, "loss": 0.4052, "step": 2819 }, { "epoch": 0.6973293768545994, "grad_norm": 0.7875811128771384, "learning_rate": 4.845303920582353e-06, "loss": 0.4402, "step": 2820 }, { "epoch": 0.6975766567754699, "grad_norm": 0.8171665238112196, "learning_rate": 4.845191341506178e-06, "loss": 0.4224, "step": 2821 }, { "epoch": 0.6978239366963402, "grad_norm": 0.7908125499885357, "learning_rate": 4.845078722789388e-06, "loss": 0.4352, "step": 2822 }, { "epoch": 0.6980712166172107, "grad_norm": 0.823467984833555, "learning_rate": 4.844966064433889e-06, "loss": 0.4124, "step": 2823 }, { "epoch": 0.6983184965380811, "grad_norm": 0.7508391242666943, "learning_rate": 4.844853366441583e-06, "loss": 0.4066, "step": 2824 }, { "epoch": 0.6985657764589516, "grad_norm": 0.7815866665699481, "learning_rate": 4.844740628814376e-06, "loss": 0.4236, "step": 2825 }, { "epoch": 0.6988130563798219, "grad_norm": 0.797117432450336, "learning_rate": 4.8446278515541735e-06, "loss": 0.3852, "step": 2826 }, { "epoch": 0.6990603363006924, "grad_norm": 0.7708383132040468, "learning_rate": 4.844515034662882e-06, "loss": 0.4233, "step": 2827 }, { "epoch": 0.6993076162215628, "grad_norm": 0.8234459455209941, "learning_rate": 4.844402178142408e-06, "loss": 0.4306, "step": 2828 }, { "epoch": 0.6995548961424333, "grad_norm": 0.7891526183943096, "learning_rate": 4.844289281994659e-06, "loss": 0.43, "step": 2829 }, { "epoch": 0.6998021760633036, "grad_norm": 0.8269880587344088, "learning_rate": 4.844176346221543e-06, "loss": 0.382, "step": 2830 }, { "epoch": 0.7000494559841741, "grad_norm": 0.7979649115794843, "learning_rate": 4.844063370824969e-06, "loss": 0.4233, "step": 2831 }, { "epoch": 0.7002967359050445, "grad_norm": 0.7802581744052987, "learning_rate": 4.843950355806848e-06, "loss": 0.4148, "step": 2832 }, { "epoch": 0.700544015825915, "grad_norm": 0.7770547238028475, "learning_rate": 4.84383730116909e-06, "loss": 0.4468, "step": 2833 }, { "epoch": 0.7007912957467853, "grad_norm": 0.79419809474251, "learning_rate": 4.843724206913604e-06, "loss": 0.4328, "step": 2834 }, { "epoch": 0.7010385756676558, "grad_norm": 0.8038231205976583, "learning_rate": 4.843611073042303e-06, "loss": 0.4266, "step": 2835 }, { "epoch": 0.7012858555885262, "grad_norm": 0.7842622977893237, "learning_rate": 4.843497899557099e-06, "loss": 0.4553, "step": 2836 }, { "epoch": 0.7015331355093967, "grad_norm": 0.8707104909836916, "learning_rate": 4.843384686459906e-06, "loss": 0.4047, "step": 2837 }, { "epoch": 0.701780415430267, "grad_norm": 0.8075300737210989, "learning_rate": 4.843271433752635e-06, "loss": 0.3966, "step": 2838 }, { "epoch": 0.7020276953511375, "grad_norm": 0.7771082005270675, "learning_rate": 4.843158141437204e-06, "loss": 0.4482, "step": 2839 }, { "epoch": 0.7022749752720079, "grad_norm": 0.826855389864659, "learning_rate": 4.843044809515525e-06, "loss": 0.4077, "step": 2840 }, { "epoch": 0.7025222551928784, "grad_norm": 0.8162412200875937, "learning_rate": 4.842931437989515e-06, "loss": 0.4583, "step": 2841 }, { "epoch": 0.7027695351137487, "grad_norm": 0.8050089615685367, "learning_rate": 4.84281802686109e-06, "loss": 0.3962, "step": 2842 }, { "epoch": 0.7030168150346192, "grad_norm": 0.8163750044094787, "learning_rate": 4.8427045761321675e-06, "loss": 0.4327, "step": 2843 }, { "epoch": 0.7032640949554896, "grad_norm": 0.8563912067654783, "learning_rate": 4.842591085804664e-06, "loss": 0.4566, "step": 2844 }, { "epoch": 0.7035113748763601, "grad_norm": 0.8223661537798799, "learning_rate": 4.842477555880498e-06, "loss": 0.4242, "step": 2845 }, { "epoch": 0.7037586547972304, "grad_norm": 0.7730639792659905, "learning_rate": 4.84236398636159e-06, "loss": 0.4461, "step": 2846 }, { "epoch": 0.7040059347181009, "grad_norm": 0.7769243044865499, "learning_rate": 4.842250377249858e-06, "loss": 0.4306, "step": 2847 }, { "epoch": 0.7042532146389713, "grad_norm": 0.8167018892846426, "learning_rate": 4.842136728547223e-06, "loss": 0.4237, "step": 2848 }, { "epoch": 0.7045004945598418, "grad_norm": 0.8385873076201484, "learning_rate": 4.842023040255606e-06, "loss": 0.41, "step": 2849 }, { "epoch": 0.7047477744807121, "grad_norm": 0.794449770554456, "learning_rate": 4.841909312376928e-06, "loss": 0.3948, "step": 2850 }, { "epoch": 0.7049950544015826, "grad_norm": 0.8085818080942666, "learning_rate": 4.841795544913112e-06, "loss": 0.4333, "step": 2851 }, { "epoch": 0.705242334322453, "grad_norm": 0.8347190989084778, "learning_rate": 4.841681737866082e-06, "loss": 0.4061, "step": 2852 }, { "epoch": 0.7054896142433235, "grad_norm": 0.7808124598357045, "learning_rate": 4.84156789123776e-06, "loss": 0.4412, "step": 2853 }, { "epoch": 0.7057368941641938, "grad_norm": 0.8393493189297654, "learning_rate": 4.841454005030071e-06, "loss": 0.415, "step": 2854 }, { "epoch": 0.7059841740850643, "grad_norm": 0.786373432859627, "learning_rate": 4.84134007924494e-06, "loss": 0.4337, "step": 2855 }, { "epoch": 0.7062314540059347, "grad_norm": 0.7791712896571225, "learning_rate": 4.841226113884292e-06, "loss": 0.4308, "step": 2856 }, { "epoch": 0.7064787339268052, "grad_norm": 0.7758379505569559, "learning_rate": 4.841112108950055e-06, "loss": 0.4105, "step": 2857 }, { "epoch": 0.7067260138476755, "grad_norm": 0.7336906709851186, "learning_rate": 4.840998064444154e-06, "loss": 0.4304, "step": 2858 }, { "epoch": 0.706973293768546, "grad_norm": 0.7709631481004076, "learning_rate": 4.840883980368518e-06, "loss": 0.4417, "step": 2859 }, { "epoch": 0.7072205736894164, "grad_norm": 0.861736396380686, "learning_rate": 4.840769856725076e-06, "loss": 0.426, "step": 2860 }, { "epoch": 0.7074678536102869, "grad_norm": 0.8644517295452442, "learning_rate": 4.840655693515754e-06, "loss": 0.3996, "step": 2861 }, { "epoch": 0.7077151335311572, "grad_norm": 0.7940413003498069, "learning_rate": 4.840541490742485e-06, "loss": 0.4244, "step": 2862 }, { "epoch": 0.7079624134520277, "grad_norm": 0.8098229584210634, "learning_rate": 4.840427248407199e-06, "loss": 0.4284, "step": 2863 }, { "epoch": 0.7082096933728981, "grad_norm": 0.8133450773470225, "learning_rate": 4.840312966511825e-06, "loss": 0.4395, "step": 2864 }, { "epoch": 0.7084569732937686, "grad_norm": 0.7919331528428261, "learning_rate": 4.840198645058296e-06, "loss": 0.447, "step": 2865 }, { "epoch": 0.7087042532146389, "grad_norm": 0.7965762781992587, "learning_rate": 4.840084284048544e-06, "loss": 0.4315, "step": 2866 }, { "epoch": 0.7089515331355094, "grad_norm": 0.8215063582230092, "learning_rate": 4.839969883484502e-06, "loss": 0.4236, "step": 2867 }, { "epoch": 0.7091988130563798, "grad_norm": 0.8198839600468166, "learning_rate": 4.8398554433681056e-06, "loss": 0.4358, "step": 2868 }, { "epoch": 0.7094460929772503, "grad_norm": 0.7736256143569525, "learning_rate": 4.839740963701286e-06, "loss": 0.4558, "step": 2869 }, { "epoch": 0.7096933728981206, "grad_norm": 0.7829134148515651, "learning_rate": 4.83962644448598e-06, "loss": 0.4205, "step": 2870 }, { "epoch": 0.7099406528189911, "grad_norm": 0.8252334612510036, "learning_rate": 4.839511885724123e-06, "loss": 0.4337, "step": 2871 }, { "epoch": 0.7101879327398615, "grad_norm": 0.8022554245927136, "learning_rate": 4.839397287417652e-06, "loss": 0.457, "step": 2872 }, { "epoch": 0.710435212660732, "grad_norm": 0.8192708918207695, "learning_rate": 4.8392826495685036e-06, "loss": 0.4167, "step": 2873 }, { "epoch": 0.7106824925816023, "grad_norm": 0.803077397303975, "learning_rate": 4.839167972178615e-06, "loss": 0.4098, "step": 2874 }, { "epoch": 0.7109297725024728, "grad_norm": 0.8468082968841901, "learning_rate": 4.839053255249925e-06, "loss": 0.4242, "step": 2875 }, { "epoch": 0.7111770524233432, "grad_norm": 0.8148986206105447, "learning_rate": 4.838938498784373e-06, "loss": 0.4286, "step": 2876 }, { "epoch": 0.7114243323442137, "grad_norm": 0.8170745630142042, "learning_rate": 4.838823702783898e-06, "loss": 0.423, "step": 2877 }, { "epoch": 0.7116716122650841, "grad_norm": 0.7996079185522241, "learning_rate": 4.838708867250441e-06, "loss": 0.4333, "step": 2878 }, { "epoch": 0.7119188921859545, "grad_norm": 0.7439191783301492, "learning_rate": 4.838593992185942e-06, "loss": 0.4235, "step": 2879 }, { "epoch": 0.712166172106825, "grad_norm": 0.7948869447365353, "learning_rate": 4.838479077592345e-06, "loss": 0.4185, "step": 2880 }, { "epoch": 0.7124134520276953, "grad_norm": 0.7963172123490325, "learning_rate": 4.83836412347159e-06, "loss": 0.4368, "step": 2881 }, { "epoch": 0.7126607319485658, "grad_norm": 0.7621389475987713, "learning_rate": 4.838249129825622e-06, "loss": 0.4544, "step": 2882 }, { "epoch": 0.7129080118694362, "grad_norm": 0.8008573013469324, "learning_rate": 4.838134096656383e-06, "loss": 0.4303, "step": 2883 }, { "epoch": 0.7131552917903067, "grad_norm": 0.7845189586215126, "learning_rate": 4.838019023965818e-06, "loss": 0.4131, "step": 2884 }, { "epoch": 0.713402571711177, "grad_norm": 0.8049398377356689, "learning_rate": 4.837903911755872e-06, "loss": 0.4288, "step": 2885 }, { "epoch": 0.7136498516320475, "grad_norm": 0.804018737817397, "learning_rate": 4.837788760028491e-06, "loss": 0.422, "step": 2886 }, { "epoch": 0.7138971315529179, "grad_norm": 0.8148238252599712, "learning_rate": 4.8376735687856215e-06, "loss": 0.4527, "step": 2887 }, { "epoch": 0.7141444114737884, "grad_norm": 0.8173379766329714, "learning_rate": 4.837558338029211e-06, "loss": 0.4449, "step": 2888 }, { "epoch": 0.7143916913946587, "grad_norm": 0.8174244199346332, "learning_rate": 4.837443067761206e-06, "loss": 0.4345, "step": 2889 }, { "epoch": 0.7146389713155292, "grad_norm": 0.7358219327556542, "learning_rate": 4.837327757983556e-06, "loss": 0.4379, "step": 2890 }, { "epoch": 0.7148862512363996, "grad_norm": 0.7666685418017337, "learning_rate": 4.837212408698209e-06, "loss": 0.4546, "step": 2891 }, { "epoch": 0.7151335311572701, "grad_norm": 0.7880217194687349, "learning_rate": 4.837097019907116e-06, "loss": 0.3937, "step": 2892 }, { "epoch": 0.7153808110781404, "grad_norm": 0.7914607948486829, "learning_rate": 4.836981591612226e-06, "loss": 0.4528, "step": 2893 }, { "epoch": 0.7156280909990109, "grad_norm": 0.8349074820163436, "learning_rate": 4.836866123815492e-06, "loss": 0.4242, "step": 2894 }, { "epoch": 0.7158753709198813, "grad_norm": 0.7726278159771962, "learning_rate": 4.836750616518864e-06, "loss": 0.4623, "step": 2895 }, { "epoch": 0.7161226508407518, "grad_norm": 0.8124863076626552, "learning_rate": 4.836635069724295e-06, "loss": 0.3905, "step": 2896 }, { "epoch": 0.7163699307616221, "grad_norm": 0.7841970322379082, "learning_rate": 4.836519483433738e-06, "loss": 0.42, "step": 2897 }, { "epoch": 0.7166172106824926, "grad_norm": 0.8169770200971669, "learning_rate": 4.8364038576491465e-06, "loss": 0.4281, "step": 2898 }, { "epoch": 0.716864490603363, "grad_norm": 0.7934550788486586, "learning_rate": 4.836288192372476e-06, "loss": 0.4258, "step": 2899 }, { "epoch": 0.7171117705242335, "grad_norm": 0.7811046579681694, "learning_rate": 4.8361724876056804e-06, "loss": 0.4334, "step": 2900 }, { "epoch": 0.7173590504451038, "grad_norm": 0.8288956689415943, "learning_rate": 4.836056743350717e-06, "loss": 0.4114, "step": 2901 }, { "epoch": 0.7176063303659743, "grad_norm": 0.8100905415131514, "learning_rate": 4.83594095960954e-06, "loss": 0.4193, "step": 2902 }, { "epoch": 0.7178536102868447, "grad_norm": 0.8349181394617355, "learning_rate": 4.835825136384107e-06, "loss": 0.4089, "step": 2903 }, { "epoch": 0.7181008902077152, "grad_norm": 0.8189708968152819, "learning_rate": 4.835709273676377e-06, "loss": 0.3892, "step": 2904 }, { "epoch": 0.7183481701285855, "grad_norm": 0.8103038676070768, "learning_rate": 4.835593371488308e-06, "loss": 0.4453, "step": 2905 }, { "epoch": 0.718595450049456, "grad_norm": 0.8690922003528119, "learning_rate": 4.835477429821859e-06, "loss": 0.4004, "step": 2906 }, { "epoch": 0.7188427299703264, "grad_norm": 0.8919261323122275, "learning_rate": 4.835361448678989e-06, "loss": 0.3887, "step": 2907 }, { "epoch": 0.7190900098911969, "grad_norm": 0.8034616323291977, "learning_rate": 4.835245428061659e-06, "loss": 0.4287, "step": 2908 }, { "epoch": 0.7193372898120672, "grad_norm": 0.8147872678673408, "learning_rate": 4.8351293679718305e-06, "loss": 0.4226, "step": 2909 }, { "epoch": 0.7195845697329377, "grad_norm": 0.819089910267115, "learning_rate": 4.835013268411465e-06, "loss": 0.4631, "step": 2910 }, { "epoch": 0.7198318496538081, "grad_norm": 0.7978691069618967, "learning_rate": 4.8348971293825245e-06, "loss": 0.4237, "step": 2911 }, { "epoch": 0.7200791295746786, "grad_norm": 0.8174621419121835, "learning_rate": 4.834780950886973e-06, "loss": 0.4376, "step": 2912 }, { "epoch": 0.7203264094955489, "grad_norm": 0.8309583432333282, "learning_rate": 4.834664732926773e-06, "loss": 0.4429, "step": 2913 }, { "epoch": 0.7205736894164194, "grad_norm": 0.8000154938904277, "learning_rate": 4.8345484755038895e-06, "loss": 0.4185, "step": 2914 }, { "epoch": 0.7208209693372898, "grad_norm": 0.7947941622328786, "learning_rate": 4.834432178620288e-06, "loss": 0.414, "step": 2915 }, { "epoch": 0.7210682492581603, "grad_norm": 0.7923969199565712, "learning_rate": 4.834315842277934e-06, "loss": 0.4357, "step": 2916 }, { "epoch": 0.7213155291790306, "grad_norm": 0.7960039620189656, "learning_rate": 4.834199466478793e-06, "loss": 0.438, "step": 2917 }, { "epoch": 0.7215628090999011, "grad_norm": 0.8361359088575793, "learning_rate": 4.8340830512248335e-06, "loss": 0.3967, "step": 2918 }, { "epoch": 0.7218100890207715, "grad_norm": 0.8108901771349343, "learning_rate": 4.833966596518023e-06, "loss": 0.4363, "step": 2919 }, { "epoch": 0.722057368941642, "grad_norm": 0.8549440580224905, "learning_rate": 4.833850102360329e-06, "loss": 0.41, "step": 2920 }, { "epoch": 0.7223046488625123, "grad_norm": 0.8003186776319219, "learning_rate": 4.833733568753721e-06, "loss": 0.4216, "step": 2921 }, { "epoch": 0.7225519287833828, "grad_norm": 0.8035872812352234, "learning_rate": 4.83361699570017e-06, "loss": 0.4182, "step": 2922 }, { "epoch": 0.7227992087042532, "grad_norm": 0.8297400236558385, "learning_rate": 4.8335003832016444e-06, "loss": 0.4316, "step": 2923 }, { "epoch": 0.7230464886251237, "grad_norm": 0.8790681235417003, "learning_rate": 4.833383731260118e-06, "loss": 0.4063, "step": 2924 }, { "epoch": 0.723293768545994, "grad_norm": 0.8268063083338992, "learning_rate": 4.833267039877559e-06, "loss": 0.3856, "step": 2925 }, { "epoch": 0.7235410484668645, "grad_norm": 0.8406419166486392, "learning_rate": 4.833150309055942e-06, "loss": 0.4236, "step": 2926 }, { "epoch": 0.7237883283877349, "grad_norm": 0.7926519696834897, "learning_rate": 4.833033538797241e-06, "loss": 0.4174, "step": 2927 }, { "epoch": 0.7240356083086054, "grad_norm": 0.8187942841023771, "learning_rate": 4.832916729103427e-06, "loss": 0.4387, "step": 2928 }, { "epoch": 0.7242828882294757, "grad_norm": 0.8221501951469953, "learning_rate": 4.832799879976476e-06, "loss": 0.3999, "step": 2929 }, { "epoch": 0.7245301681503462, "grad_norm": 0.7934800047804927, "learning_rate": 4.832682991418364e-06, "loss": 0.4058, "step": 2930 }, { "epoch": 0.7247774480712166, "grad_norm": 0.8330724261819362, "learning_rate": 4.832566063431066e-06, "loss": 0.4321, "step": 2931 }, { "epoch": 0.7250247279920871, "grad_norm": 0.8228536121668583, "learning_rate": 4.832449096016557e-06, "loss": 0.4173, "step": 2932 }, { "epoch": 0.7252720079129574, "grad_norm": 0.837175853076657, "learning_rate": 4.8323320891768166e-06, "loss": 0.4395, "step": 2933 }, { "epoch": 0.7255192878338279, "grad_norm": 0.8578406462579389, "learning_rate": 4.83221504291382e-06, "loss": 0.4025, "step": 2934 }, { "epoch": 0.7257665677546983, "grad_norm": 0.8276438543678113, "learning_rate": 4.832097957229548e-06, "loss": 0.4128, "step": 2935 }, { "epoch": 0.7260138476755688, "grad_norm": 0.7945420798891535, "learning_rate": 4.831980832125978e-06, "loss": 0.4318, "step": 2936 }, { "epoch": 0.7262611275964391, "grad_norm": 0.8624450431948255, "learning_rate": 4.8318636676050906e-06, "loss": 0.4077, "step": 2937 }, { "epoch": 0.7265084075173096, "grad_norm": 0.7943945249796411, "learning_rate": 4.831746463668866e-06, "loss": 0.4147, "step": 2938 }, { "epoch": 0.72675568743818, "grad_norm": 0.7865076359278911, "learning_rate": 4.831629220319285e-06, "loss": 0.4437, "step": 2939 }, { "epoch": 0.7270029673590505, "grad_norm": 0.8245008586056316, "learning_rate": 4.83151193755833e-06, "loss": 0.4172, "step": 2940 }, { "epoch": 0.7272502472799208, "grad_norm": 0.8682372634570225, "learning_rate": 4.831394615387983e-06, "loss": 0.423, "step": 2941 }, { "epoch": 0.7274975272007913, "grad_norm": 0.8425489215333106, "learning_rate": 4.831277253810227e-06, "loss": 0.4299, "step": 2942 }, { "epoch": 0.7277448071216617, "grad_norm": 0.8057793479253231, "learning_rate": 4.831159852827046e-06, "loss": 0.4122, "step": 2943 }, { "epoch": 0.7279920870425322, "grad_norm": 0.7887003730131898, "learning_rate": 4.831042412440424e-06, "loss": 0.4345, "step": 2944 }, { "epoch": 0.7282393669634025, "grad_norm": 0.8003641551533328, "learning_rate": 4.8309249326523475e-06, "loss": 0.4256, "step": 2945 }, { "epoch": 0.728486646884273, "grad_norm": 0.8050096172563999, "learning_rate": 4.8308074134648e-06, "loss": 0.4363, "step": 2946 }, { "epoch": 0.7287339268051434, "grad_norm": 0.8501449867311948, "learning_rate": 4.83068985487977e-06, "loss": 0.4167, "step": 2947 }, { "epoch": 0.7289812067260139, "grad_norm": 0.7875128051256901, "learning_rate": 4.830572256899243e-06, "loss": 0.4462, "step": 2948 }, { "epoch": 0.7292284866468842, "grad_norm": 0.8374013219223678, "learning_rate": 4.830454619525207e-06, "loss": 0.4518, "step": 2949 }, { "epoch": 0.7294757665677547, "grad_norm": 0.817008859188431, "learning_rate": 4.830336942759651e-06, "loss": 0.3903, "step": 2950 }, { "epoch": 0.7297230464886251, "grad_norm": 0.8014171810642367, "learning_rate": 4.830219226604565e-06, "loss": 0.4285, "step": 2951 }, { "epoch": 0.7299703264094956, "grad_norm": 0.7856834039638679, "learning_rate": 4.830101471061936e-06, "loss": 0.3934, "step": 2952 }, { "epoch": 0.7302176063303659, "grad_norm": 0.7748087866010082, "learning_rate": 4.829983676133758e-06, "loss": 0.4545, "step": 2953 }, { "epoch": 0.7304648862512364, "grad_norm": 0.7989513159267758, "learning_rate": 4.829865841822019e-06, "loss": 0.4253, "step": 2954 }, { "epoch": 0.7307121661721068, "grad_norm": 0.7764358392610177, "learning_rate": 4.829747968128712e-06, "loss": 0.4249, "step": 2955 }, { "epoch": 0.7309594460929772, "grad_norm": 0.8279849821092882, "learning_rate": 4.829630055055829e-06, "loss": 0.4512, "step": 2956 }, { "epoch": 0.7312067260138477, "grad_norm": 0.8260366275350098, "learning_rate": 4.8295121026053644e-06, "loss": 0.4295, "step": 2957 }, { "epoch": 0.7314540059347181, "grad_norm": 0.8117838736807556, "learning_rate": 4.82939411077931e-06, "loss": 0.4171, "step": 2958 }, { "epoch": 0.7317012858555886, "grad_norm": 0.7806874920384149, "learning_rate": 4.829276079579662e-06, "loss": 0.4076, "step": 2959 }, { "epoch": 0.731948565776459, "grad_norm": 0.8156644012373938, "learning_rate": 4.829158009008414e-06, "loss": 0.4367, "step": 2960 }, { "epoch": 0.7321958456973294, "grad_norm": 0.8361228747154713, "learning_rate": 4.829039899067563e-06, "loss": 0.4495, "step": 2961 }, { "epoch": 0.7324431256181998, "grad_norm": 0.7970591862132284, "learning_rate": 4.828921749759104e-06, "loss": 0.4057, "step": 2962 }, { "epoch": 0.7326904055390703, "grad_norm": 0.799202037507185, "learning_rate": 4.828803561085034e-06, "loss": 0.4048, "step": 2963 }, { "epoch": 0.7329376854599406, "grad_norm": 0.7759952798464693, "learning_rate": 4.8286853330473535e-06, "loss": 0.4005, "step": 2964 }, { "epoch": 0.7331849653808111, "grad_norm": 0.8369606917142974, "learning_rate": 4.828567065648057e-06, "loss": 0.4266, "step": 2965 }, { "epoch": 0.7334322453016815, "grad_norm": 0.8127543973780628, "learning_rate": 4.828448758889147e-06, "loss": 0.3879, "step": 2966 }, { "epoch": 0.733679525222552, "grad_norm": 0.834168778505788, "learning_rate": 4.828330412772622e-06, "loss": 0.4255, "step": 2967 }, { "epoch": 0.7339268051434223, "grad_norm": 0.7851856770790845, "learning_rate": 4.828212027300481e-06, "loss": 0.4608, "step": 2968 }, { "epoch": 0.7341740850642928, "grad_norm": 0.7632714078688472, "learning_rate": 4.828093602474727e-06, "loss": 0.4416, "step": 2969 }, { "epoch": 0.7344213649851632, "grad_norm": 0.8347666891173107, "learning_rate": 4.827975138297361e-06, "loss": 0.4122, "step": 2970 }, { "epoch": 0.7346686449060337, "grad_norm": 0.8067829929634867, "learning_rate": 4.827856634770385e-06, "loss": 0.4411, "step": 2971 }, { "epoch": 0.734915924826904, "grad_norm": 0.8017276159688024, "learning_rate": 4.8277380918958015e-06, "loss": 0.3973, "step": 2972 }, { "epoch": 0.7351632047477745, "grad_norm": 0.812076660288236, "learning_rate": 4.827619509675616e-06, "loss": 0.4197, "step": 2973 }, { "epoch": 0.7354104846686449, "grad_norm": 0.7788714236767956, "learning_rate": 4.827500888111833e-06, "loss": 0.4337, "step": 2974 }, { "epoch": 0.7356577645895154, "grad_norm": 0.8090103487550155, "learning_rate": 4.8273822272064555e-06, "loss": 0.419, "step": 2975 }, { "epoch": 0.7359050445103857, "grad_norm": 0.8265966587257293, "learning_rate": 4.8272635269614895e-06, "loss": 0.4082, "step": 2976 }, { "epoch": 0.7361523244312562, "grad_norm": 0.7846445990321484, "learning_rate": 4.827144787378944e-06, "loss": 0.437, "step": 2977 }, { "epoch": 0.7363996043521266, "grad_norm": 0.8312527404300895, "learning_rate": 4.827026008460823e-06, "loss": 0.4016, "step": 2978 }, { "epoch": 0.7366468842729971, "grad_norm": 0.7910678678346987, "learning_rate": 4.826907190209136e-06, "loss": 0.427, "step": 2979 }, { "epoch": 0.7368941641938674, "grad_norm": 0.8081223109051165, "learning_rate": 4.82678833262589e-06, "loss": 0.3933, "step": 2980 }, { "epoch": 0.7371414441147379, "grad_norm": 0.8047433161446556, "learning_rate": 4.826669435713096e-06, "loss": 0.4079, "step": 2981 }, { "epoch": 0.7373887240356083, "grad_norm": 0.7999842935873025, "learning_rate": 4.826550499472761e-06, "loss": 0.4615, "step": 2982 }, { "epoch": 0.7376360039564788, "grad_norm": 0.792716091172535, "learning_rate": 4.826431523906898e-06, "loss": 0.391, "step": 2983 }, { "epoch": 0.7378832838773491, "grad_norm": 0.8303626433497433, "learning_rate": 4.826312509017517e-06, "loss": 0.404, "step": 2984 }, { "epoch": 0.7381305637982196, "grad_norm": 0.8374450240264267, "learning_rate": 4.826193454806629e-06, "loss": 0.4143, "step": 2985 }, { "epoch": 0.73837784371909, "grad_norm": 0.7785793171820266, "learning_rate": 4.826074361276247e-06, "loss": 0.4179, "step": 2986 }, { "epoch": 0.7386251236399605, "grad_norm": 0.7920577899878737, "learning_rate": 4.825955228428385e-06, "loss": 0.4276, "step": 2987 }, { "epoch": 0.7388724035608308, "grad_norm": 0.8200588077966792, "learning_rate": 4.825836056265055e-06, "loss": 0.4317, "step": 2988 }, { "epoch": 0.7391196834817013, "grad_norm": 0.8116664468428142, "learning_rate": 4.8257168447882725e-06, "loss": 0.4042, "step": 2989 }, { "epoch": 0.7393669634025717, "grad_norm": 0.776120292987068, "learning_rate": 4.825597594000052e-06, "loss": 0.4418, "step": 2990 }, { "epoch": 0.7396142433234422, "grad_norm": 0.8081847545003181, "learning_rate": 4.825478303902409e-06, "loss": 0.412, "step": 2991 }, { "epoch": 0.7398615232443125, "grad_norm": 0.7950133188013835, "learning_rate": 4.825358974497361e-06, "loss": 0.4246, "step": 2992 }, { "epoch": 0.740108803165183, "grad_norm": 0.7993263391124805, "learning_rate": 4.825239605786924e-06, "loss": 0.4061, "step": 2993 }, { "epoch": 0.7403560830860534, "grad_norm": 0.7997981436722923, "learning_rate": 4.825120197773114e-06, "loss": 0.4563, "step": 2994 }, { "epoch": 0.7406033630069239, "grad_norm": 0.7705130115086049, "learning_rate": 4.825000750457953e-06, "loss": 0.4198, "step": 2995 }, { "epoch": 0.7408506429277942, "grad_norm": 0.8002552475075952, "learning_rate": 4.824881263843458e-06, "loss": 0.4432, "step": 2996 }, { "epoch": 0.7410979228486647, "grad_norm": 0.8018310738296338, "learning_rate": 4.824761737931649e-06, "loss": 0.4402, "step": 2997 }, { "epoch": 0.7413452027695351, "grad_norm": 0.8593903578168341, "learning_rate": 4.8246421727245465e-06, "loss": 0.4054, "step": 2998 }, { "epoch": 0.7415924826904056, "grad_norm": 0.7889330438300733, "learning_rate": 4.8245225682241705e-06, "loss": 0.4422, "step": 2999 }, { "epoch": 0.7418397626112759, "grad_norm": 0.7938169220464205, "learning_rate": 4.824402924432543e-06, "loss": 0.441, "step": 3000 }, { "epoch": 0.7420870425321464, "grad_norm": 0.7844611533247873, "learning_rate": 4.8242832413516874e-06, "loss": 0.4336, "step": 3001 }, { "epoch": 0.7423343224530168, "grad_norm": 0.7749679668683314, "learning_rate": 4.824163518983627e-06, "loss": 0.4142, "step": 3002 }, { "epoch": 0.7425816023738873, "grad_norm": 0.8197901469393682, "learning_rate": 4.824043757330384e-06, "loss": 0.4245, "step": 3003 }, { "epoch": 0.7428288822947576, "grad_norm": 0.8261013183178937, "learning_rate": 4.823923956393982e-06, "loss": 0.4037, "step": 3004 }, { "epoch": 0.7430761622156281, "grad_norm": 0.7558923203936677, "learning_rate": 4.8238041161764475e-06, "loss": 0.4175, "step": 3005 }, { "epoch": 0.7433234421364985, "grad_norm": 0.8320092925247135, "learning_rate": 4.823684236679807e-06, "loss": 0.4256, "step": 3006 }, { "epoch": 0.743570722057369, "grad_norm": 0.8217439102346995, "learning_rate": 4.823564317906085e-06, "loss": 0.427, "step": 3007 }, { "epoch": 0.7438180019782393, "grad_norm": 0.8199819751186989, "learning_rate": 4.823444359857308e-06, "loss": 0.382, "step": 3008 }, { "epoch": 0.7440652818991098, "grad_norm": 0.817959773474465, "learning_rate": 4.823324362535506e-06, "loss": 0.4329, "step": 3009 }, { "epoch": 0.7443125618199802, "grad_norm": 0.8187584290387997, "learning_rate": 4.823204325942706e-06, "loss": 0.4485, "step": 3010 }, { "epoch": 0.7445598417408507, "grad_norm": 0.7898625944514868, "learning_rate": 4.823084250080937e-06, "loss": 0.4069, "step": 3011 }, { "epoch": 0.744807121661721, "grad_norm": 0.7940276326242832, "learning_rate": 4.822964134952229e-06, "loss": 0.4197, "step": 3012 }, { "epoch": 0.7450544015825915, "grad_norm": 0.7927876642320121, "learning_rate": 4.822843980558611e-06, "loss": 0.4299, "step": 3013 }, { "epoch": 0.7453016815034619, "grad_norm": 0.7994978248822859, "learning_rate": 4.8227237869021165e-06, "loss": 0.4234, "step": 3014 }, { "epoch": 0.7455489614243324, "grad_norm": 0.7543327971185407, "learning_rate": 4.822603553984775e-06, "loss": 0.4427, "step": 3015 }, { "epoch": 0.7457962413452027, "grad_norm": 0.8259426315239943, "learning_rate": 4.822483281808619e-06, "loss": 0.4323, "step": 3016 }, { "epoch": 0.7460435212660732, "grad_norm": 0.7824668111245527, "learning_rate": 4.822362970375682e-06, "loss": 0.4243, "step": 3017 }, { "epoch": 0.7462908011869436, "grad_norm": 0.8090132078040282, "learning_rate": 4.822242619687997e-06, "loss": 0.4681, "step": 3018 }, { "epoch": 0.746538081107814, "grad_norm": 0.7841620643031427, "learning_rate": 4.8221222297476e-06, "loss": 0.4296, "step": 3019 }, { "epoch": 0.7467853610286844, "grad_norm": 0.7995190592508252, "learning_rate": 4.822001800556523e-06, "loss": 0.4018, "step": 3020 }, { "epoch": 0.7470326409495549, "grad_norm": 0.7860214365572894, "learning_rate": 4.821881332116804e-06, "loss": 0.4409, "step": 3021 }, { "epoch": 0.7472799208704253, "grad_norm": 0.8040174870471135, "learning_rate": 4.8217608244304794e-06, "loss": 0.4187, "step": 3022 }, { "epoch": 0.7475272007912958, "grad_norm": 0.8042489812626473, "learning_rate": 4.821640277499584e-06, "loss": 0.4127, "step": 3023 }, { "epoch": 0.7477744807121661, "grad_norm": 0.834905466029092, "learning_rate": 4.8215196913261575e-06, "loss": 0.4198, "step": 3024 }, { "epoch": 0.7480217606330366, "grad_norm": 0.7667416755370126, "learning_rate": 4.821399065912237e-06, "loss": 0.4247, "step": 3025 }, { "epoch": 0.748269040553907, "grad_norm": 0.7910830801831669, "learning_rate": 4.821278401259861e-06, "loss": 0.4077, "step": 3026 }, { "epoch": 0.7485163204747775, "grad_norm": 0.7382810269836959, "learning_rate": 4.8211576973710714e-06, "loss": 0.4525, "step": 3027 }, { "epoch": 0.7487636003956478, "grad_norm": 0.8121105617204082, "learning_rate": 4.8210369542479055e-06, "loss": 0.4052, "step": 3028 }, { "epoch": 0.7490108803165183, "grad_norm": 0.8058548126440102, "learning_rate": 4.820916171892407e-06, "loss": 0.4351, "step": 3029 }, { "epoch": 0.7492581602373887, "grad_norm": 0.8577264933262061, "learning_rate": 4.820795350306615e-06, "loss": 0.3981, "step": 3030 }, { "epoch": 0.7495054401582592, "grad_norm": 0.7453444539845234, "learning_rate": 4.820674489492573e-06, "loss": 0.4223, "step": 3031 }, { "epoch": 0.7497527200791295, "grad_norm": 0.8466748330823123, "learning_rate": 4.820553589452323e-06, "loss": 0.3995, "step": 3032 }, { "epoch": 0.75, "grad_norm": 0.7790821202863654, "learning_rate": 4.820432650187911e-06, "loss": 0.404, "step": 3033 }, { "epoch": 0.7502472799208705, "grad_norm": 0.8153973710234346, "learning_rate": 4.820311671701379e-06, "loss": 0.4276, "step": 3034 }, { "epoch": 0.7504945598417408, "grad_norm": 0.7753722323128266, "learning_rate": 4.8201906539947715e-06, "loss": 0.4306, "step": 3035 }, { "epoch": 0.7507418397626113, "grad_norm": 0.7744896340611823, "learning_rate": 4.8200695970701356e-06, "loss": 0.4356, "step": 3036 }, { "epoch": 0.7509891196834817, "grad_norm": 0.7690189042676627, "learning_rate": 4.8199485009295166e-06, "loss": 0.44, "step": 3037 }, { "epoch": 0.7512363996043522, "grad_norm": 0.8033947179211088, "learning_rate": 4.819827365574963e-06, "loss": 0.4139, "step": 3038 }, { "epoch": 0.7514836795252225, "grad_norm": 0.7576932851777864, "learning_rate": 4.819706191008519e-06, "loss": 0.4081, "step": 3039 }, { "epoch": 0.751730959446093, "grad_norm": 0.7967928320460952, "learning_rate": 4.819584977232236e-06, "loss": 0.4257, "step": 3040 }, { "epoch": 0.7519782393669634, "grad_norm": 0.8392913645000412, "learning_rate": 4.8194637242481615e-06, "loss": 0.4197, "step": 3041 }, { "epoch": 0.7522255192878339, "grad_norm": 0.7730924432116476, "learning_rate": 4.819342432058345e-06, "loss": 0.4321, "step": 3042 }, { "epoch": 0.7524727992087042, "grad_norm": 0.8068109563132863, "learning_rate": 4.819221100664836e-06, "loss": 0.3994, "step": 3043 }, { "epoch": 0.7527200791295747, "grad_norm": 0.8260660558085546, "learning_rate": 4.819099730069688e-06, "loss": 0.4048, "step": 3044 }, { "epoch": 0.7529673590504451, "grad_norm": 0.770330180817139, "learning_rate": 4.8189783202749495e-06, "loss": 0.4362, "step": 3045 }, { "epoch": 0.7532146389713156, "grad_norm": 0.831087174442937, "learning_rate": 4.818856871282674e-06, "loss": 0.402, "step": 3046 }, { "epoch": 0.753461918892186, "grad_norm": 0.8055486831247699, "learning_rate": 4.818735383094915e-06, "loss": 0.4169, "step": 3047 }, { "epoch": 0.7537091988130564, "grad_norm": 0.8349348167930836, "learning_rate": 4.818613855713725e-06, "loss": 0.4063, "step": 3048 }, { "epoch": 0.7539564787339268, "grad_norm": 0.7832541815851745, "learning_rate": 4.818492289141159e-06, "loss": 0.42, "step": 3049 }, { "epoch": 0.7542037586547973, "grad_norm": 0.8058628554987075, "learning_rate": 4.818370683379271e-06, "loss": 0.4038, "step": 3050 }, { "epoch": 0.7544510385756676, "grad_norm": 0.7869318129819252, "learning_rate": 4.818249038430117e-06, "loss": 0.4022, "step": 3051 }, { "epoch": 0.7546983184965381, "grad_norm": 0.7429522741807416, "learning_rate": 4.818127354295752e-06, "loss": 0.433, "step": 3052 }, { "epoch": 0.7549455984174085, "grad_norm": 0.8058306975517127, "learning_rate": 4.818005630978235e-06, "loss": 0.4429, "step": 3053 }, { "epoch": 0.755192878338279, "grad_norm": 0.8114294962955277, "learning_rate": 4.817883868479622e-06, "loss": 0.4374, "step": 3054 }, { "epoch": 0.7554401582591493, "grad_norm": 0.7666783664854765, "learning_rate": 4.817762066801971e-06, "loss": 0.4114, "step": 3055 }, { "epoch": 0.7556874381800198, "grad_norm": 0.8336654948850036, "learning_rate": 4.817640225947341e-06, "loss": 0.3976, "step": 3056 }, { "epoch": 0.7559347181008902, "grad_norm": 0.8036876409711966, "learning_rate": 4.817518345917792e-06, "loss": 0.4335, "step": 3057 }, { "epoch": 0.7561819980217607, "grad_norm": 0.8720285437742171, "learning_rate": 4.817396426715384e-06, "loss": 0.4038, "step": 3058 }, { "epoch": 0.756429277942631, "grad_norm": 0.8182607660403057, "learning_rate": 4.8172744683421765e-06, "loss": 0.427, "step": 3059 }, { "epoch": 0.7566765578635015, "grad_norm": 0.8027583929885022, "learning_rate": 4.8171524708002335e-06, "loss": 0.4123, "step": 3060 }, { "epoch": 0.7569238377843719, "grad_norm": 0.8232173834519446, "learning_rate": 4.817030434091615e-06, "loss": 0.413, "step": 3061 }, { "epoch": 0.7571711177052424, "grad_norm": 0.805678272221054, "learning_rate": 4.816908358218384e-06, "loss": 0.4007, "step": 3062 }, { "epoch": 0.7574183976261127, "grad_norm": 0.7726560827200997, "learning_rate": 4.8167862431826054e-06, "loss": 0.3941, "step": 3063 }, { "epoch": 0.7576656775469832, "grad_norm": 0.8068589530095359, "learning_rate": 4.816664088986342e-06, "loss": 0.4074, "step": 3064 }, { "epoch": 0.7579129574678536, "grad_norm": 0.8193830699266273, "learning_rate": 4.816541895631659e-06, "loss": 0.4048, "step": 3065 }, { "epoch": 0.7581602373887241, "grad_norm": 0.802439000408714, "learning_rate": 4.816419663120621e-06, "loss": 0.4228, "step": 3066 }, { "epoch": 0.7584075173095944, "grad_norm": 0.7831520315560794, "learning_rate": 4.816297391455296e-06, "loss": 0.4416, "step": 3067 }, { "epoch": 0.7586547972304649, "grad_norm": 0.8013772553913207, "learning_rate": 4.816175080637748e-06, "loss": 0.4063, "step": 3068 }, { "epoch": 0.7589020771513353, "grad_norm": 0.8069423659312832, "learning_rate": 4.816052730670047e-06, "loss": 0.4101, "step": 3069 }, { "epoch": 0.7591493570722058, "grad_norm": 0.8042760358148239, "learning_rate": 4.815930341554259e-06, "loss": 0.437, "step": 3070 }, { "epoch": 0.7593966369930761, "grad_norm": 0.8528508748773457, "learning_rate": 4.815807913292454e-06, "loss": 0.4203, "step": 3071 }, { "epoch": 0.7596439169139466, "grad_norm": 0.7788038832278472, "learning_rate": 4.815685445886702e-06, "loss": 0.4122, "step": 3072 }, { "epoch": 0.759891196834817, "grad_norm": 0.7895557689997091, "learning_rate": 4.815562939339072e-06, "loss": 0.439, "step": 3073 }, { "epoch": 0.7601384767556875, "grad_norm": 0.7665727269321336, "learning_rate": 4.815440393651635e-06, "loss": 0.4272, "step": 3074 }, { "epoch": 0.7603857566765578, "grad_norm": 0.8193909964159897, "learning_rate": 4.815317808826462e-06, "loss": 0.3975, "step": 3075 }, { "epoch": 0.7606330365974283, "grad_norm": 0.7904789376121926, "learning_rate": 4.815195184865625e-06, "loss": 0.4269, "step": 3076 }, { "epoch": 0.7608803165182987, "grad_norm": 0.8179670797014685, "learning_rate": 4.815072521771197e-06, "loss": 0.3968, "step": 3077 }, { "epoch": 0.7611275964391692, "grad_norm": 0.7733754206107383, "learning_rate": 4.814949819545252e-06, "loss": 0.4223, "step": 3078 }, { "epoch": 0.7613748763600395, "grad_norm": 0.7996203606510405, "learning_rate": 4.8148270781898635e-06, "loss": 0.4292, "step": 3079 }, { "epoch": 0.76162215628091, "grad_norm": 0.8190133067708448, "learning_rate": 4.814704297707105e-06, "loss": 0.4145, "step": 3080 }, { "epoch": 0.7618694362017804, "grad_norm": 0.7937567763174175, "learning_rate": 4.814581478099054e-06, "loss": 0.4103, "step": 3081 }, { "epoch": 0.7621167161226509, "grad_norm": 0.7713599014763641, "learning_rate": 4.814458619367785e-06, "loss": 0.4263, "step": 3082 }, { "epoch": 0.7623639960435212, "grad_norm": 0.819891250654944, "learning_rate": 4.814335721515376e-06, "loss": 0.4097, "step": 3083 }, { "epoch": 0.7626112759643917, "grad_norm": 0.7885235017605721, "learning_rate": 4.814212784543902e-06, "loss": 0.449, "step": 3084 }, { "epoch": 0.7628585558852621, "grad_norm": 0.7893477319871918, "learning_rate": 4.814089808455444e-06, "loss": 0.4257, "step": 3085 }, { "epoch": 0.7631058358061326, "grad_norm": 0.7928417601407314, "learning_rate": 4.813966793252079e-06, "loss": 0.4282, "step": 3086 }, { "epoch": 0.7633531157270029, "grad_norm": 0.7930496478110192, "learning_rate": 4.813843738935886e-06, "loss": 0.4129, "step": 3087 }, { "epoch": 0.7636003956478734, "grad_norm": 0.7492915045783554, "learning_rate": 4.813720645508946e-06, "loss": 0.424, "step": 3088 }, { "epoch": 0.7638476755687438, "grad_norm": 0.7939923318098281, "learning_rate": 4.8135975129733385e-06, "loss": 0.4203, "step": 3089 }, { "epoch": 0.7640949554896143, "grad_norm": 0.8054567000276287, "learning_rate": 4.813474341331145e-06, "loss": 0.4082, "step": 3090 }, { "epoch": 0.7643422354104846, "grad_norm": 0.7615495246999505, "learning_rate": 4.813351130584448e-06, "loss": 0.4363, "step": 3091 }, { "epoch": 0.7645895153313551, "grad_norm": 0.7487931230093278, "learning_rate": 4.813227880735331e-06, "loss": 0.4208, "step": 3092 }, { "epoch": 0.7648367952522255, "grad_norm": 0.7989302908264841, "learning_rate": 4.8131045917858754e-06, "loss": 0.4568, "step": 3093 }, { "epoch": 0.765084075173096, "grad_norm": 0.8150902813166511, "learning_rate": 4.812981263738165e-06, "loss": 0.4299, "step": 3094 }, { "epoch": 0.7653313550939663, "grad_norm": 0.795813383219662, "learning_rate": 4.8128578965942875e-06, "loss": 0.444, "step": 3095 }, { "epoch": 0.7655786350148368, "grad_norm": 0.768998047077691, "learning_rate": 4.812734490356326e-06, "loss": 0.411, "step": 3096 }, { "epoch": 0.7658259149357072, "grad_norm": 0.7876009500354682, "learning_rate": 4.812611045026365e-06, "loss": 0.4065, "step": 3097 }, { "epoch": 0.7660731948565777, "grad_norm": 0.8228181027521226, "learning_rate": 4.812487560606493e-06, "loss": 0.4257, "step": 3098 }, { "epoch": 0.766320474777448, "grad_norm": 0.7899654098672674, "learning_rate": 4.812364037098798e-06, "loss": 0.4281, "step": 3099 }, { "epoch": 0.7665677546983185, "grad_norm": 0.8263770103922008, "learning_rate": 4.812240474505366e-06, "loss": 0.4172, "step": 3100 }, { "epoch": 0.7668150346191889, "grad_norm": 0.8223565750218941, "learning_rate": 4.812116872828285e-06, "loss": 0.4094, "step": 3101 }, { "epoch": 0.7670623145400594, "grad_norm": 0.7882256242313526, "learning_rate": 4.811993232069647e-06, "loss": 0.4405, "step": 3102 }, { "epoch": 0.7673095944609297, "grad_norm": 0.7673647038001873, "learning_rate": 4.81186955223154e-06, "loss": 0.4043, "step": 3103 }, { "epoch": 0.7675568743818002, "grad_norm": 0.7748167909851514, "learning_rate": 4.811745833316056e-06, "loss": 0.4149, "step": 3104 }, { "epoch": 0.7678041543026706, "grad_norm": 0.7804500490847732, "learning_rate": 4.811622075325284e-06, "loss": 0.4149, "step": 3105 }, { "epoch": 0.768051434223541, "grad_norm": 0.7692586391443403, "learning_rate": 4.811498278261318e-06, "loss": 0.4366, "step": 3106 }, { "epoch": 0.7682987141444114, "grad_norm": 0.7842598306966564, "learning_rate": 4.811374442126248e-06, "loss": 0.4212, "step": 3107 }, { "epoch": 0.7685459940652819, "grad_norm": 0.7708815469931595, "learning_rate": 4.8112505669221695e-06, "loss": 0.4464, "step": 3108 }, { "epoch": 0.7687932739861523, "grad_norm": 0.8253602424248783, "learning_rate": 4.811126652651177e-06, "loss": 0.4068, "step": 3109 }, { "epoch": 0.7690405539070228, "grad_norm": 0.8309488561255076, "learning_rate": 4.811002699315362e-06, "loss": 0.4139, "step": 3110 }, { "epoch": 0.7692878338278932, "grad_norm": 0.8085658208306157, "learning_rate": 4.810878706916823e-06, "loss": 0.4377, "step": 3111 }, { "epoch": 0.7695351137487636, "grad_norm": 0.7741149699878886, "learning_rate": 4.8107546754576525e-06, "loss": 0.4274, "step": 3112 }, { "epoch": 0.7697823936696341, "grad_norm": 0.7663408982316708, "learning_rate": 4.81063060493995e-06, "loss": 0.4153, "step": 3113 }, { "epoch": 0.7700296735905044, "grad_norm": 0.8112801558552337, "learning_rate": 4.81050649536581e-06, "loss": 0.4095, "step": 3114 }, { "epoch": 0.7702769535113749, "grad_norm": 0.775543508476325, "learning_rate": 4.810382346737333e-06, "loss": 0.4495, "step": 3115 }, { "epoch": 0.7705242334322453, "grad_norm": 0.7901145505532202, "learning_rate": 4.8102581590566156e-06, "loss": 0.4322, "step": 3116 }, { "epoch": 0.7707715133531158, "grad_norm": 0.7821623933520957, "learning_rate": 4.810133932325758e-06, "loss": 0.4069, "step": 3117 }, { "epoch": 0.7710187932739861, "grad_norm": 0.8265570200176376, "learning_rate": 4.810009666546858e-06, "loss": 0.402, "step": 3118 }, { "epoch": 0.7712660731948566, "grad_norm": 0.8154505625826904, "learning_rate": 4.8098853617220186e-06, "loss": 0.4606, "step": 3119 }, { "epoch": 0.771513353115727, "grad_norm": 0.8235703743873171, "learning_rate": 4.8097610178533396e-06, "loss": 0.4021, "step": 3120 }, { "epoch": 0.7717606330365975, "grad_norm": 0.7884753518627691, "learning_rate": 4.809636634942923e-06, "loss": 0.4118, "step": 3121 }, { "epoch": 0.7720079129574678, "grad_norm": 0.8084593721515126, "learning_rate": 4.809512212992872e-06, "loss": 0.431, "step": 3122 }, { "epoch": 0.7722551928783383, "grad_norm": 0.779666053910998, "learning_rate": 4.809387752005288e-06, "loss": 0.4187, "step": 3123 }, { "epoch": 0.7725024727992087, "grad_norm": 0.82077016879112, "learning_rate": 4.809263251982276e-06, "loss": 0.4131, "step": 3124 }, { "epoch": 0.7727497527200792, "grad_norm": 0.8032424551504816, "learning_rate": 4.80913871292594e-06, "loss": 0.3919, "step": 3125 }, { "epoch": 0.7729970326409495, "grad_norm": 0.7832784986154248, "learning_rate": 4.8090141348383854e-06, "loss": 0.4286, "step": 3126 }, { "epoch": 0.77324431256182, "grad_norm": 0.8535658999730766, "learning_rate": 4.808889517721718e-06, "loss": 0.4152, "step": 3127 }, { "epoch": 0.7734915924826904, "grad_norm": 0.8068467344043826, "learning_rate": 4.808764861578043e-06, "loss": 0.4368, "step": 3128 }, { "epoch": 0.7737388724035609, "grad_norm": 0.8411797207814421, "learning_rate": 4.808640166409469e-06, "loss": 0.4149, "step": 3129 }, { "epoch": 0.7739861523244312, "grad_norm": 0.797745138292174, "learning_rate": 4.808515432218102e-06, "loss": 0.4046, "step": 3130 }, { "epoch": 0.7742334322453017, "grad_norm": 0.7810552927361032, "learning_rate": 4.808390659006053e-06, "loss": 0.4298, "step": 3131 }, { "epoch": 0.7744807121661721, "grad_norm": 0.7977546221024441, "learning_rate": 4.808265846775429e-06, "loss": 0.4287, "step": 3132 }, { "epoch": 0.7747279920870426, "grad_norm": 0.7827566413934264, "learning_rate": 4.8081409955283405e-06, "loss": 0.4265, "step": 3133 }, { "epoch": 0.7749752720079129, "grad_norm": 0.7900617509765616, "learning_rate": 4.808016105266897e-06, "loss": 0.4154, "step": 3134 }, { "epoch": 0.7752225519287834, "grad_norm": 0.7925920557238082, "learning_rate": 4.80789117599321e-06, "loss": 0.4282, "step": 3135 }, { "epoch": 0.7754698318496538, "grad_norm": 0.8133811417479949, "learning_rate": 4.807766207709392e-06, "loss": 0.4471, "step": 3136 }, { "epoch": 0.7757171117705243, "grad_norm": 0.7828796782998949, "learning_rate": 4.807641200417554e-06, "loss": 0.4132, "step": 3137 }, { "epoch": 0.7759643916913946, "grad_norm": 0.8190662126335538, "learning_rate": 4.807516154119809e-06, "loss": 0.4192, "step": 3138 }, { "epoch": 0.7762116716122651, "grad_norm": 0.7894618621921312, "learning_rate": 4.807391068818272e-06, "loss": 0.4287, "step": 3139 }, { "epoch": 0.7764589515331355, "grad_norm": 0.7990613399601455, "learning_rate": 4.807265944515056e-06, "loss": 0.4185, "step": 3140 }, { "epoch": 0.776706231454006, "grad_norm": 0.7940117259476284, "learning_rate": 4.807140781212277e-06, "loss": 0.4296, "step": 3141 }, { "epoch": 0.7769535113748763, "grad_norm": 0.8090210021535224, "learning_rate": 4.80701557891205e-06, "loss": 0.4166, "step": 3142 }, { "epoch": 0.7772007912957468, "grad_norm": 0.798970265827542, "learning_rate": 4.806890337616491e-06, "loss": 0.4122, "step": 3143 }, { "epoch": 0.7774480712166172, "grad_norm": 0.8108410945027389, "learning_rate": 4.806765057327718e-06, "loss": 0.4216, "step": 3144 }, { "epoch": 0.7776953511374877, "grad_norm": 0.7695340798568441, "learning_rate": 4.806639738047847e-06, "loss": 0.4478, "step": 3145 }, { "epoch": 0.777942631058358, "grad_norm": 0.8425189435774728, "learning_rate": 4.806514379778998e-06, "loss": 0.3915, "step": 3146 }, { "epoch": 0.7781899109792285, "grad_norm": 0.7909871752665145, "learning_rate": 4.806388982523289e-06, "loss": 0.4145, "step": 3147 }, { "epoch": 0.7784371909000989, "grad_norm": 0.8395535395280559, "learning_rate": 4.806263546282839e-06, "loss": 0.4229, "step": 3148 }, { "epoch": 0.7786844708209694, "grad_norm": 0.7946048118578783, "learning_rate": 4.806138071059769e-06, "loss": 0.416, "step": 3149 }, { "epoch": 0.7789317507418397, "grad_norm": 0.7696555887999623, "learning_rate": 4.806012556856201e-06, "loss": 0.3952, "step": 3150 }, { "epoch": 0.7791790306627102, "grad_norm": 0.8071199984857714, "learning_rate": 4.805887003674255e-06, "loss": 0.415, "step": 3151 }, { "epoch": 0.7794263105835806, "grad_norm": 0.7989656420229575, "learning_rate": 4.805761411516054e-06, "loss": 0.403, "step": 3152 }, { "epoch": 0.7796735905044511, "grad_norm": 0.8035327928385132, "learning_rate": 4.805635780383719e-06, "loss": 0.387, "step": 3153 }, { "epoch": 0.7799208704253214, "grad_norm": 0.8139828219701931, "learning_rate": 4.805510110279376e-06, "loss": 0.4157, "step": 3154 }, { "epoch": 0.7801681503461919, "grad_norm": 0.8595116770693756, "learning_rate": 4.805384401205147e-06, "loss": 0.42, "step": 3155 }, { "epoch": 0.7804154302670623, "grad_norm": 0.7638842723959627, "learning_rate": 4.80525865316316e-06, "loss": 0.4054, "step": 3156 }, { "epoch": 0.7806627101879328, "grad_norm": 0.7788059644498618, "learning_rate": 4.805132866155538e-06, "loss": 0.4065, "step": 3157 }, { "epoch": 0.7809099901088031, "grad_norm": 0.7917946328097877, "learning_rate": 4.805007040184407e-06, "loss": 0.4034, "step": 3158 }, { "epoch": 0.7811572700296736, "grad_norm": 0.794606043110883, "learning_rate": 4.804881175251895e-06, "loss": 0.4459, "step": 3159 }, { "epoch": 0.781404549950544, "grad_norm": 0.7855478903283287, "learning_rate": 4.804755271360129e-06, "loss": 0.4043, "step": 3160 }, { "epoch": 0.7816518298714145, "grad_norm": 0.8003629127364914, "learning_rate": 4.804629328511238e-06, "loss": 0.4148, "step": 3161 }, { "epoch": 0.7818991097922848, "grad_norm": 0.7983155545329887, "learning_rate": 4.804503346707349e-06, "loss": 0.4337, "step": 3162 }, { "epoch": 0.7821463897131553, "grad_norm": 0.8021511440132186, "learning_rate": 4.804377325950593e-06, "loss": 0.409, "step": 3163 }, { "epoch": 0.7823936696340257, "grad_norm": 0.760133073100139, "learning_rate": 4.804251266243099e-06, "loss": 0.423, "step": 3164 }, { "epoch": 0.7826409495548962, "grad_norm": 0.7848135629022405, "learning_rate": 4.8041251675869996e-06, "loss": 0.4158, "step": 3165 }, { "epoch": 0.7828882294757665, "grad_norm": 0.7925730082390904, "learning_rate": 4.803999029984423e-06, "loss": 0.4211, "step": 3166 }, { "epoch": 0.783135509396637, "grad_norm": 0.7764546739610668, "learning_rate": 4.803872853437506e-06, "loss": 0.4444, "step": 3167 }, { "epoch": 0.7833827893175074, "grad_norm": 0.8499268393363937, "learning_rate": 4.803746637948377e-06, "loss": 0.4121, "step": 3168 }, { "epoch": 0.7836300692383779, "grad_norm": 0.796267303570014, "learning_rate": 4.803620383519171e-06, "loss": 0.4579, "step": 3169 }, { "epoch": 0.7838773491592482, "grad_norm": 0.8586483960136988, "learning_rate": 4.803494090152022e-06, "loss": 0.4167, "step": 3170 }, { "epoch": 0.7841246290801187, "grad_norm": 0.7846703688927068, "learning_rate": 4.803367757849065e-06, "loss": 0.4256, "step": 3171 }, { "epoch": 0.7843719090009891, "grad_norm": 0.8152792507339273, "learning_rate": 4.803241386612436e-06, "loss": 0.3683, "step": 3172 }, { "epoch": 0.7846191889218596, "grad_norm": 0.7998726293610626, "learning_rate": 4.8031149764442695e-06, "loss": 0.4073, "step": 3173 }, { "epoch": 0.7848664688427299, "grad_norm": 0.8425199760576926, "learning_rate": 4.802988527346703e-06, "loss": 0.4096, "step": 3174 }, { "epoch": 0.7851137487636004, "grad_norm": 0.8087625745415404, "learning_rate": 4.802862039321875e-06, "loss": 0.4172, "step": 3175 }, { "epoch": 0.7853610286844708, "grad_norm": 0.8103640303916235, "learning_rate": 4.802735512371922e-06, "loss": 0.4016, "step": 3176 }, { "epoch": 0.7856083086053413, "grad_norm": 0.7885395510106586, "learning_rate": 4.8026089464989825e-06, "loss": 0.4076, "step": 3177 }, { "epoch": 0.7858555885262116, "grad_norm": 0.8642951165136358, "learning_rate": 4.802482341705197e-06, "loss": 0.4139, "step": 3178 }, { "epoch": 0.7861028684470821, "grad_norm": 0.7819223306835482, "learning_rate": 4.8023556979927045e-06, "loss": 0.4341, "step": 3179 }, { "epoch": 0.7863501483679525, "grad_norm": 0.8193735193209689, "learning_rate": 4.802229015363646e-06, "loss": 0.4271, "step": 3180 }, { "epoch": 0.786597428288823, "grad_norm": 0.7692047832916445, "learning_rate": 4.802102293820162e-06, "loss": 0.4266, "step": 3181 }, { "epoch": 0.7868447082096933, "grad_norm": 0.7580998926928701, "learning_rate": 4.801975533364397e-06, "loss": 0.4232, "step": 3182 }, { "epoch": 0.7870919881305638, "grad_norm": 0.7952151355924447, "learning_rate": 4.801848733998491e-06, "loss": 0.4287, "step": 3183 }, { "epoch": 0.7873392680514342, "grad_norm": 0.768566684447632, "learning_rate": 4.801721895724588e-06, "loss": 0.4352, "step": 3184 }, { "epoch": 0.7875865479723047, "grad_norm": 0.8392183142924099, "learning_rate": 4.801595018544834e-06, "loss": 0.4161, "step": 3185 }, { "epoch": 0.787833827893175, "grad_norm": 0.8229971702026877, "learning_rate": 4.80146810246137e-06, "loss": 0.4035, "step": 3186 }, { "epoch": 0.7880811078140455, "grad_norm": 0.7669879130914552, "learning_rate": 4.801341147476343e-06, "loss": 0.3952, "step": 3187 }, { "epoch": 0.7883283877349159, "grad_norm": 0.7891804474395242, "learning_rate": 4.801214153591899e-06, "loss": 0.3975, "step": 3188 }, { "epoch": 0.7885756676557863, "grad_norm": 0.7986833755586805, "learning_rate": 4.801087120810185e-06, "loss": 0.413, "step": 3189 }, { "epoch": 0.7888229475766568, "grad_norm": 0.7952880723142738, "learning_rate": 4.800960049133347e-06, "loss": 0.4372, "step": 3190 }, { "epoch": 0.7890702274975272, "grad_norm": 0.7757455316227299, "learning_rate": 4.800832938563534e-06, "loss": 0.4249, "step": 3191 }, { "epoch": 0.7893175074183977, "grad_norm": 0.7949907790570365, "learning_rate": 4.800705789102894e-06, "loss": 0.431, "step": 3192 }, { "epoch": 0.789564787339268, "grad_norm": 0.7868569621757431, "learning_rate": 4.800578600753577e-06, "loss": 0.424, "step": 3193 }, { "epoch": 0.7898120672601385, "grad_norm": 0.7864424676820765, "learning_rate": 4.800451373517732e-06, "loss": 0.3849, "step": 3194 }, { "epoch": 0.7900593471810089, "grad_norm": 0.784595550816719, "learning_rate": 4.800324107397509e-06, "loss": 0.4098, "step": 3195 }, { "epoch": 0.7903066271018794, "grad_norm": 0.830043984168889, "learning_rate": 4.800196802395061e-06, "loss": 0.3888, "step": 3196 }, { "epoch": 0.7905539070227497, "grad_norm": 0.790118097460834, "learning_rate": 4.800069458512538e-06, "loss": 0.4102, "step": 3197 }, { "epoch": 0.7908011869436202, "grad_norm": 0.8227085580927859, "learning_rate": 4.799942075752093e-06, "loss": 0.4375, "step": 3198 }, { "epoch": 0.7910484668644906, "grad_norm": 0.7982196753808344, "learning_rate": 4.799814654115879e-06, "loss": 0.4529, "step": 3199 }, { "epoch": 0.7912957467853611, "grad_norm": 0.7866905634175901, "learning_rate": 4.799687193606052e-06, "loss": 0.4251, "step": 3200 }, { "epoch": 0.7915430267062314, "grad_norm": 0.7685080791035871, "learning_rate": 4.799559694224763e-06, "loss": 0.4215, "step": 3201 }, { "epoch": 0.7917903066271019, "grad_norm": 0.7599904496578714, "learning_rate": 4.799432155974168e-06, "loss": 0.4219, "step": 3202 }, { "epoch": 0.7920375865479723, "grad_norm": 0.8263878111249411, "learning_rate": 4.799304578856425e-06, "loss": 0.4235, "step": 3203 }, { "epoch": 0.7922848664688428, "grad_norm": 0.7741646735638096, "learning_rate": 4.799176962873689e-06, "loss": 0.4094, "step": 3204 }, { "epoch": 0.7925321463897131, "grad_norm": 0.7842075482726589, "learning_rate": 4.799049308028116e-06, "loss": 0.4265, "step": 3205 }, { "epoch": 0.7927794263105836, "grad_norm": 0.8011953412570326, "learning_rate": 4.7989216143218655e-06, "loss": 0.4101, "step": 3206 }, { "epoch": 0.793026706231454, "grad_norm": 0.8305619346107188, "learning_rate": 4.798793881757095e-06, "loss": 0.4166, "step": 3207 }, { "epoch": 0.7932739861523245, "grad_norm": 0.8065231582621645, "learning_rate": 4.798666110335963e-06, "loss": 0.4233, "step": 3208 }, { "epoch": 0.7935212660731948, "grad_norm": 0.7874595373516532, "learning_rate": 4.798538300060631e-06, "loss": 0.4178, "step": 3209 }, { "epoch": 0.7937685459940653, "grad_norm": 0.7907591698271821, "learning_rate": 4.798410450933258e-06, "loss": 0.4226, "step": 3210 }, { "epoch": 0.7940158259149357, "grad_norm": 0.7625097668467142, "learning_rate": 4.798282562956005e-06, "loss": 0.415, "step": 3211 }, { "epoch": 0.7942631058358062, "grad_norm": 0.7561667754981604, "learning_rate": 4.798154636131033e-06, "loss": 0.4355, "step": 3212 }, { "epoch": 0.7945103857566765, "grad_norm": 0.7809583776002582, "learning_rate": 4.7980266704605064e-06, "loss": 0.4113, "step": 3213 }, { "epoch": 0.794757665677547, "grad_norm": 0.784211514845161, "learning_rate": 4.797898665946587e-06, "loss": 0.4288, "step": 3214 }, { "epoch": 0.7950049455984174, "grad_norm": 0.7780178015749081, "learning_rate": 4.797770622591439e-06, "loss": 0.4187, "step": 3215 }, { "epoch": 0.7952522255192879, "grad_norm": 0.8068492708723259, "learning_rate": 4.797642540397226e-06, "loss": 0.4221, "step": 3216 }, { "epoch": 0.7954995054401582, "grad_norm": 0.8164724169733689, "learning_rate": 4.797514419366112e-06, "loss": 0.4121, "step": 3217 }, { "epoch": 0.7957467853610287, "grad_norm": 0.7890501236318611, "learning_rate": 4.7973862595002655e-06, "loss": 0.4102, "step": 3218 }, { "epoch": 0.7959940652818991, "grad_norm": 0.8065399895842733, "learning_rate": 4.79725806080185e-06, "loss": 0.4579, "step": 3219 }, { "epoch": 0.7962413452027696, "grad_norm": 0.7813246830312838, "learning_rate": 4.797129823273035e-06, "loss": 0.4026, "step": 3220 }, { "epoch": 0.7964886251236399, "grad_norm": 0.7719196802952721, "learning_rate": 4.797001546915985e-06, "loss": 0.4604, "step": 3221 }, { "epoch": 0.7967359050445104, "grad_norm": 0.7951454051368005, "learning_rate": 4.796873231732871e-06, "loss": 0.4071, "step": 3222 }, { "epoch": 0.7969831849653808, "grad_norm": 0.8307970839902238, "learning_rate": 4.796744877725861e-06, "loss": 0.4101, "step": 3223 }, { "epoch": 0.7972304648862513, "grad_norm": 0.7701120556585122, "learning_rate": 4.796616484897123e-06, "loss": 0.4492, "step": 3224 }, { "epoch": 0.7974777448071216, "grad_norm": 0.7773449035476312, "learning_rate": 4.79648805324883e-06, "loss": 0.378, "step": 3225 }, { "epoch": 0.7977250247279921, "grad_norm": 0.8036758045319646, "learning_rate": 4.796359582783151e-06, "loss": 0.4119, "step": 3226 }, { "epoch": 0.7979723046488625, "grad_norm": 0.8288205702589608, "learning_rate": 4.796231073502258e-06, "loss": 0.4325, "step": 3227 }, { "epoch": 0.798219584569733, "grad_norm": 0.8388175569444808, "learning_rate": 4.796102525408323e-06, "loss": 0.3907, "step": 3228 }, { "epoch": 0.7984668644906033, "grad_norm": 0.7803655660453329, "learning_rate": 4.795973938503518e-06, "loss": 0.3959, "step": 3229 }, { "epoch": 0.7987141444114738, "grad_norm": 0.7853139273322179, "learning_rate": 4.79584531279002e-06, "loss": 0.3897, "step": 3230 }, { "epoch": 0.7989614243323442, "grad_norm": 0.7871116216692523, "learning_rate": 4.7957166482699985e-06, "loss": 0.4291, "step": 3231 }, { "epoch": 0.7992087042532147, "grad_norm": 0.7671489638405405, "learning_rate": 4.795587944945631e-06, "loss": 0.4324, "step": 3232 }, { "epoch": 0.799455984174085, "grad_norm": 0.8063052507791727, "learning_rate": 4.795459202819093e-06, "loss": 0.3814, "step": 3233 }, { "epoch": 0.7997032640949555, "grad_norm": 0.7942943583781706, "learning_rate": 4.795330421892559e-06, "loss": 0.4134, "step": 3234 }, { "epoch": 0.7999505440158259, "grad_norm": 0.8014136850289755, "learning_rate": 4.795201602168208e-06, "loss": 0.4275, "step": 3235 }, { "epoch": 0.8001978239366964, "grad_norm": 0.793107629545374, "learning_rate": 4.795072743648216e-06, "loss": 0.4141, "step": 3236 }, { "epoch": 0.8004451038575667, "grad_norm": 0.8333140544980959, "learning_rate": 4.794943846334761e-06, "loss": 0.4016, "step": 3237 }, { "epoch": 0.8006923837784372, "grad_norm": 0.7937306545181533, "learning_rate": 4.7948149102300214e-06, "loss": 0.3894, "step": 3238 }, { "epoch": 0.8009396636993076, "grad_norm": 0.7752640899581088, "learning_rate": 4.794685935336178e-06, "loss": 0.4351, "step": 3239 }, { "epoch": 0.8011869436201781, "grad_norm": 0.792963683311261, "learning_rate": 4.79455692165541e-06, "loss": 0.4454, "step": 3240 }, { "epoch": 0.8014342235410484, "grad_norm": 0.8299219831294848, "learning_rate": 4.794427869189898e-06, "loss": 0.3952, "step": 3241 }, { "epoch": 0.8016815034619189, "grad_norm": 0.8157943140762436, "learning_rate": 4.7942987779418245e-06, "loss": 0.4332, "step": 3242 }, { "epoch": 0.8019287833827893, "grad_norm": 0.7986078907851436, "learning_rate": 4.79416964791337e-06, "loss": 0.4322, "step": 3243 }, { "epoch": 0.8021760633036598, "grad_norm": 0.786261870460221, "learning_rate": 4.794040479106718e-06, "loss": 0.4235, "step": 3244 }, { "epoch": 0.8024233432245301, "grad_norm": 0.8283195848120665, "learning_rate": 4.7939112715240515e-06, "loss": 0.4319, "step": 3245 }, { "epoch": 0.8026706231454006, "grad_norm": 0.8287242798951121, "learning_rate": 4.793782025167555e-06, "loss": 0.4509, "step": 3246 }, { "epoch": 0.802917903066271, "grad_norm": 0.7817295347972962, "learning_rate": 4.793652740039412e-06, "loss": 0.405, "step": 3247 }, { "epoch": 0.8031651829871415, "grad_norm": 0.7838023242726265, "learning_rate": 4.79352341614181e-06, "loss": 0.4224, "step": 3248 }, { "epoch": 0.8034124629080118, "grad_norm": 0.7761306457370105, "learning_rate": 4.793394053476932e-06, "loss": 0.4359, "step": 3249 }, { "epoch": 0.8036597428288823, "grad_norm": 0.8218425977051614, "learning_rate": 4.793264652046967e-06, "loss": 0.4233, "step": 3250 }, { "epoch": 0.8039070227497527, "grad_norm": 0.7921841515228916, "learning_rate": 4.7931352118541e-06, "loss": 0.4177, "step": 3251 }, { "epoch": 0.8041543026706232, "grad_norm": 0.8773579262315243, "learning_rate": 4.793005732900522e-06, "loss": 0.3918, "step": 3252 }, { "epoch": 0.8044015825914935, "grad_norm": 0.829400566392459, "learning_rate": 4.792876215188419e-06, "loss": 0.4072, "step": 3253 }, { "epoch": 0.804648862512364, "grad_norm": 0.7823791901433861, "learning_rate": 4.792746658719982e-06, "loss": 0.4101, "step": 3254 }, { "epoch": 0.8048961424332344, "grad_norm": 0.7881281107043909, "learning_rate": 4.792617063497399e-06, "loss": 0.4145, "step": 3255 }, { "epoch": 0.8051434223541049, "grad_norm": 0.8476540235058267, "learning_rate": 4.792487429522862e-06, "loss": 0.4036, "step": 3256 }, { "epoch": 0.8053907022749752, "grad_norm": 0.8152889870566801, "learning_rate": 4.792357756798561e-06, "loss": 0.418, "step": 3257 }, { "epoch": 0.8056379821958457, "grad_norm": 0.8076974577039581, "learning_rate": 4.79222804532669e-06, "loss": 0.4021, "step": 3258 }, { "epoch": 0.8058852621167161, "grad_norm": 0.8058805425863745, "learning_rate": 4.792098295109439e-06, "loss": 0.3917, "step": 3259 }, { "epoch": 0.8061325420375866, "grad_norm": 0.8234389705832926, "learning_rate": 4.791968506149003e-06, "loss": 0.4173, "step": 3260 }, { "epoch": 0.8063798219584569, "grad_norm": 0.7886416606971659, "learning_rate": 4.791838678447574e-06, "loss": 0.405, "step": 3261 }, { "epoch": 0.8066271018793274, "grad_norm": 0.779637253269339, "learning_rate": 4.7917088120073484e-06, "loss": 0.4245, "step": 3262 }, { "epoch": 0.8068743818001978, "grad_norm": 0.8095012054220153, "learning_rate": 4.79157890683052e-06, "loss": 0.4503, "step": 3263 }, { "epoch": 0.8071216617210683, "grad_norm": 0.8194829000253359, "learning_rate": 4.791448962919285e-06, "loss": 0.4007, "step": 3264 }, { "epoch": 0.8073689416419386, "grad_norm": 0.8104089346079402, "learning_rate": 4.7913189802758405e-06, "loss": 0.4139, "step": 3265 }, { "epoch": 0.8076162215628091, "grad_norm": 0.8149788810165006, "learning_rate": 4.791188958902382e-06, "loss": 0.4001, "step": 3266 }, { "epoch": 0.8078635014836796, "grad_norm": 0.7880461660692727, "learning_rate": 4.791058898801109e-06, "loss": 0.4242, "step": 3267 }, { "epoch": 0.80811078140455, "grad_norm": 0.8440145345829424, "learning_rate": 4.790928799974219e-06, "loss": 0.4047, "step": 3268 }, { "epoch": 0.8083580613254204, "grad_norm": 0.8081825088885769, "learning_rate": 4.790798662423911e-06, "loss": 0.4055, "step": 3269 }, { "epoch": 0.8086053412462908, "grad_norm": 0.7968073833564706, "learning_rate": 4.790668486152385e-06, "loss": 0.4071, "step": 3270 }, { "epoch": 0.8088526211671613, "grad_norm": 0.7841070913783371, "learning_rate": 4.790538271161841e-06, "loss": 0.4171, "step": 3271 }, { "epoch": 0.8090999010880316, "grad_norm": 0.8286577863722515, "learning_rate": 4.79040801745448e-06, "loss": 0.3833, "step": 3272 }, { "epoch": 0.8093471810089021, "grad_norm": 0.7644216257502693, "learning_rate": 4.790277725032504e-06, "loss": 0.4089, "step": 3273 }, { "epoch": 0.8095944609297725, "grad_norm": 0.8298216917114288, "learning_rate": 4.790147393898116e-06, "loss": 0.4184, "step": 3274 }, { "epoch": 0.809841740850643, "grad_norm": 0.8373160134004612, "learning_rate": 4.790017024053517e-06, "loss": 0.4097, "step": 3275 }, { "epoch": 0.8100890207715133, "grad_norm": 0.8003961462949286, "learning_rate": 4.789886615500912e-06, "loss": 0.4208, "step": 3276 }, { "epoch": 0.8103363006923838, "grad_norm": 0.8084367466040833, "learning_rate": 4.789756168242506e-06, "loss": 0.4008, "step": 3277 }, { "epoch": 0.8105835806132542, "grad_norm": 0.8328646094691385, "learning_rate": 4.789625682280503e-06, "loss": 0.3932, "step": 3278 }, { "epoch": 0.8108308605341247, "grad_norm": 0.817103508185311, "learning_rate": 4.789495157617108e-06, "loss": 0.4241, "step": 3279 }, { "epoch": 0.811078140454995, "grad_norm": 0.7878968437266687, "learning_rate": 4.789364594254529e-06, "loss": 0.4245, "step": 3280 }, { "epoch": 0.8113254203758655, "grad_norm": 0.7723691927519781, "learning_rate": 4.78923399219497e-06, "loss": 0.3915, "step": 3281 }, { "epoch": 0.8115727002967359, "grad_norm": 0.8168370329750326, "learning_rate": 4.789103351440641e-06, "loss": 0.3639, "step": 3282 }, { "epoch": 0.8118199802176064, "grad_norm": 0.7968216032656887, "learning_rate": 4.788972671993751e-06, "loss": 0.3938, "step": 3283 }, { "epoch": 0.8120672601384767, "grad_norm": 0.8185040720148407, "learning_rate": 4.788841953856506e-06, "loss": 0.4247, "step": 3284 }, { "epoch": 0.8123145400593472, "grad_norm": 0.7781045150914397, "learning_rate": 4.788711197031118e-06, "loss": 0.4036, "step": 3285 }, { "epoch": 0.8125618199802176, "grad_norm": 0.7785731429716326, "learning_rate": 4.788580401519794e-06, "loss": 0.3986, "step": 3286 }, { "epoch": 0.8128090999010881, "grad_norm": 0.8212032505997505, "learning_rate": 4.7884495673247496e-06, "loss": 0.4266, "step": 3287 }, { "epoch": 0.8130563798219584, "grad_norm": 0.7484700477722402, "learning_rate": 4.788318694448192e-06, "loss": 0.4596, "step": 3288 }, { "epoch": 0.8133036597428289, "grad_norm": 0.8209584736189361, "learning_rate": 4.788187782892336e-06, "loss": 0.3874, "step": 3289 }, { "epoch": 0.8135509396636993, "grad_norm": 0.7842763948585224, "learning_rate": 4.788056832659392e-06, "loss": 0.4281, "step": 3290 }, { "epoch": 0.8137982195845698, "grad_norm": 0.809014432481598, "learning_rate": 4.787925843751576e-06, "loss": 0.4021, "step": 3291 }, { "epoch": 0.8140454995054401, "grad_norm": 0.8093191327442567, "learning_rate": 4.787794816171101e-06, "loss": 0.4046, "step": 3292 }, { "epoch": 0.8142927794263106, "grad_norm": 0.7877046692252317, "learning_rate": 4.7876637499201815e-06, "loss": 0.4486, "step": 3293 }, { "epoch": 0.814540059347181, "grad_norm": 0.798840334202066, "learning_rate": 4.787532645001033e-06, "loss": 0.4032, "step": 3294 }, { "epoch": 0.8147873392680515, "grad_norm": 0.8255728729719902, "learning_rate": 4.787401501415871e-06, "loss": 0.452, "step": 3295 }, { "epoch": 0.8150346191889218, "grad_norm": 0.797243236400735, "learning_rate": 4.787270319166913e-06, "loss": 0.4275, "step": 3296 }, { "epoch": 0.8152818991097923, "grad_norm": 0.8349074546118905, "learning_rate": 4.787139098256377e-06, "loss": 0.3907, "step": 3297 }, { "epoch": 0.8155291790306627, "grad_norm": 0.8110622701045463, "learning_rate": 4.7870078386864795e-06, "loss": 0.4357, "step": 3298 }, { "epoch": 0.8157764589515332, "grad_norm": 0.7921550904844632, "learning_rate": 4.78687654045944e-06, "loss": 0.4277, "step": 3299 }, { "epoch": 0.8160237388724035, "grad_norm": 0.7754244489726932, "learning_rate": 4.7867452035774774e-06, "loss": 0.412, "step": 3300 }, { "epoch": 0.816271018793274, "grad_norm": 0.791617530083558, "learning_rate": 4.786613828042813e-06, "loss": 0.4174, "step": 3301 }, { "epoch": 0.8165182987141444, "grad_norm": 0.7944566773450151, "learning_rate": 4.786482413857666e-06, "loss": 0.4104, "step": 3302 }, { "epoch": 0.8167655786350149, "grad_norm": 0.8172864231650154, "learning_rate": 4.786350961024257e-06, "loss": 0.4232, "step": 3303 }, { "epoch": 0.8170128585558852, "grad_norm": 0.844980815358175, "learning_rate": 4.78621946954481e-06, "loss": 0.3849, "step": 3304 }, { "epoch": 0.8172601384767557, "grad_norm": 0.7757666993402713, "learning_rate": 4.786087939421547e-06, "loss": 0.4008, "step": 3305 }, { "epoch": 0.8175074183976261, "grad_norm": 0.794111476583098, "learning_rate": 4.7859563706566914e-06, "loss": 0.3986, "step": 3306 }, { "epoch": 0.8177546983184966, "grad_norm": 0.7773637704984339, "learning_rate": 4.785824763252466e-06, "loss": 0.3883, "step": 3307 }, { "epoch": 0.8180019782393669, "grad_norm": 0.798335262514005, "learning_rate": 4.785693117211095e-06, "loss": 0.4097, "step": 3308 }, { "epoch": 0.8182492581602374, "grad_norm": 0.8023022414258852, "learning_rate": 4.785561432534806e-06, "loss": 0.3993, "step": 3309 }, { "epoch": 0.8184965380811078, "grad_norm": 0.8301219030490221, "learning_rate": 4.7854297092258216e-06, "loss": 0.3828, "step": 3310 }, { "epoch": 0.8187438180019783, "grad_norm": 0.8153453055334471, "learning_rate": 4.785297947286372e-06, "loss": 0.4305, "step": 3311 }, { "epoch": 0.8189910979228486, "grad_norm": 0.8111562590093313, "learning_rate": 4.785166146718681e-06, "loss": 0.4078, "step": 3312 }, { "epoch": 0.8192383778437191, "grad_norm": 0.7826670595098245, "learning_rate": 4.785034307524979e-06, "loss": 0.4281, "step": 3313 }, { "epoch": 0.8194856577645895, "grad_norm": 0.7793094315853807, "learning_rate": 4.784902429707493e-06, "loss": 0.4373, "step": 3314 }, { "epoch": 0.81973293768546, "grad_norm": 0.7994195562682976, "learning_rate": 4.784770513268452e-06, "loss": 0.4028, "step": 3315 }, { "epoch": 0.8199802176063303, "grad_norm": 0.777986973844296, "learning_rate": 4.784638558210086e-06, "loss": 0.4135, "step": 3316 }, { "epoch": 0.8202274975272008, "grad_norm": 0.8011858291824131, "learning_rate": 4.784506564534627e-06, "loss": 0.4556, "step": 3317 }, { "epoch": 0.8204747774480712, "grad_norm": 0.7695970936651433, "learning_rate": 4.784374532244304e-06, "loss": 0.4264, "step": 3318 }, { "epoch": 0.8207220573689417, "grad_norm": 0.7854179279542661, "learning_rate": 4.78424246134135e-06, "loss": 0.3941, "step": 3319 }, { "epoch": 0.820969337289812, "grad_norm": 0.8145456782664094, "learning_rate": 4.784110351827996e-06, "loss": 0.4303, "step": 3320 }, { "epoch": 0.8212166172106825, "grad_norm": 0.7778866148610745, "learning_rate": 4.783978203706476e-06, "loss": 0.3849, "step": 3321 }, { "epoch": 0.8214638971315529, "grad_norm": 0.8002029621999994, "learning_rate": 4.783846016979024e-06, "loss": 0.3992, "step": 3322 }, { "epoch": 0.8217111770524234, "grad_norm": 0.8349311151236508, "learning_rate": 4.7837137916478745e-06, "loss": 0.3903, "step": 3323 }, { "epoch": 0.8219584569732937, "grad_norm": 0.7790180389747006, "learning_rate": 4.783581527715261e-06, "loss": 0.3822, "step": 3324 }, { "epoch": 0.8222057368941642, "grad_norm": 0.7629026266310897, "learning_rate": 4.783449225183421e-06, "loss": 0.4195, "step": 3325 }, { "epoch": 0.8224530168150346, "grad_norm": 0.7649237556248503, "learning_rate": 4.783316884054589e-06, "loss": 0.4185, "step": 3326 }, { "epoch": 0.8227002967359051, "grad_norm": 0.7809836127526398, "learning_rate": 4.7831845043310034e-06, "loss": 0.4325, "step": 3327 }, { "epoch": 0.8229475766567754, "grad_norm": 0.771282433024842, "learning_rate": 4.783052086014901e-06, "loss": 0.4005, "step": 3328 }, { "epoch": 0.8231948565776459, "grad_norm": 0.7837387184470643, "learning_rate": 4.7829196291085205e-06, "loss": 0.4038, "step": 3329 }, { "epoch": 0.8234421364985163, "grad_norm": 0.7763923077993041, "learning_rate": 4.7827871336141006e-06, "loss": 0.4172, "step": 3330 }, { "epoch": 0.8236894164193868, "grad_norm": 0.7888187786186903, "learning_rate": 4.782654599533881e-06, "loss": 0.4086, "step": 3331 }, { "epoch": 0.8239366963402571, "grad_norm": 0.7964113323354067, "learning_rate": 4.7825220268701015e-06, "loss": 0.3942, "step": 3332 }, { "epoch": 0.8241839762611276, "grad_norm": 0.7949089727526033, "learning_rate": 4.782389415625003e-06, "loss": 0.427, "step": 3333 }, { "epoch": 0.824431256181998, "grad_norm": 0.7898222003899379, "learning_rate": 4.782256765800828e-06, "loss": 0.4198, "step": 3334 }, { "epoch": 0.8246785361028685, "grad_norm": 0.7713468303220107, "learning_rate": 4.782124077399818e-06, "loss": 0.4082, "step": 3335 }, { "epoch": 0.8249258160237388, "grad_norm": 0.760638496140234, "learning_rate": 4.7819913504242156e-06, "loss": 0.4271, "step": 3336 }, { "epoch": 0.8251730959446093, "grad_norm": 0.7949387100448536, "learning_rate": 4.7818585848762645e-06, "loss": 0.4114, "step": 3337 }, { "epoch": 0.8254203758654797, "grad_norm": 0.7580505449504422, "learning_rate": 4.781725780758208e-06, "loss": 0.4131, "step": 3338 }, { "epoch": 0.8256676557863502, "grad_norm": 0.8045167857797156, "learning_rate": 4.781592938072292e-06, "loss": 0.4223, "step": 3339 }, { "epoch": 0.8259149357072205, "grad_norm": 0.8159002530056874, "learning_rate": 4.781460056820763e-06, "loss": 0.4332, "step": 3340 }, { "epoch": 0.826162215628091, "grad_norm": 0.7873685673757427, "learning_rate": 4.781327137005865e-06, "loss": 0.4053, "step": 3341 }, { "epoch": 0.8264094955489614, "grad_norm": 0.8123786085064364, "learning_rate": 4.781194178629844e-06, "loss": 0.417, "step": 3342 }, { "epoch": 0.8266567754698319, "grad_norm": 0.819766015202432, "learning_rate": 4.781061181694949e-06, "loss": 0.4162, "step": 3343 }, { "epoch": 0.8269040553907022, "grad_norm": 0.7979483243492991, "learning_rate": 4.78092814620343e-06, "loss": 0.4317, "step": 3344 }, { "epoch": 0.8271513353115727, "grad_norm": 0.7688666283808752, "learning_rate": 4.780795072157532e-06, "loss": 0.43, "step": 3345 }, { "epoch": 0.8273986152324432, "grad_norm": 0.7990950578586927, "learning_rate": 4.780661959559506e-06, "loss": 0.372, "step": 3346 }, { "epoch": 0.8276458951533135, "grad_norm": 0.7721501590793813, "learning_rate": 4.780528808411602e-06, "loss": 0.4185, "step": 3347 }, { "epoch": 0.827893175074184, "grad_norm": 0.7854619913809469, "learning_rate": 4.780395618716071e-06, "loss": 0.3988, "step": 3348 }, { "epoch": 0.8281404549950544, "grad_norm": 0.8290833887661643, "learning_rate": 4.7802623904751626e-06, "loss": 0.4358, "step": 3349 }, { "epoch": 0.8283877349159249, "grad_norm": 0.8158026726230498, "learning_rate": 4.780129123691131e-06, "loss": 0.3959, "step": 3350 }, { "epoch": 0.8286350148367952, "grad_norm": 0.8003195625474808, "learning_rate": 4.779995818366227e-06, "loss": 0.4162, "step": 3351 }, { "epoch": 0.8288822947576657, "grad_norm": 0.7900036436217165, "learning_rate": 4.779862474502705e-06, "loss": 0.4293, "step": 3352 }, { "epoch": 0.8291295746785361, "grad_norm": 0.8004447888167544, "learning_rate": 4.779729092102818e-06, "loss": 0.4007, "step": 3353 }, { "epoch": 0.8293768545994066, "grad_norm": 0.7964423636908463, "learning_rate": 4.779595671168822e-06, "loss": 0.3976, "step": 3354 }, { "epoch": 0.829624134520277, "grad_norm": 0.8067022591755529, "learning_rate": 4.779462211702971e-06, "loss": 0.4154, "step": 3355 }, { "epoch": 0.8298714144411474, "grad_norm": 0.8050964956297638, "learning_rate": 4.77932871370752e-06, "loss": 0.417, "step": 3356 }, { "epoch": 0.8301186943620178, "grad_norm": 0.7859285185827618, "learning_rate": 4.779195177184728e-06, "loss": 0.4388, "step": 3357 }, { "epoch": 0.8303659742828883, "grad_norm": 0.8209308533484606, "learning_rate": 4.779061602136851e-06, "loss": 0.4343, "step": 3358 }, { "epoch": 0.8306132542037586, "grad_norm": 0.7901126945376796, "learning_rate": 4.778927988566146e-06, "loss": 0.4181, "step": 3359 }, { "epoch": 0.8308605341246291, "grad_norm": 0.8069670725815871, "learning_rate": 4.778794336474873e-06, "loss": 0.3819, "step": 3360 }, { "epoch": 0.8311078140454995, "grad_norm": 0.7769685258141426, "learning_rate": 4.778660645865288e-06, "loss": 0.4084, "step": 3361 }, { "epoch": 0.83135509396637, "grad_norm": 0.8246160503291277, "learning_rate": 4.7785269167396545e-06, "loss": 0.3947, "step": 3362 }, { "epoch": 0.8316023738872403, "grad_norm": 0.8136590431488328, "learning_rate": 4.778393149100231e-06, "loss": 0.4101, "step": 3363 }, { "epoch": 0.8318496538081108, "grad_norm": 0.7911097863165703, "learning_rate": 4.778259342949279e-06, "loss": 0.4493, "step": 3364 }, { "epoch": 0.8320969337289812, "grad_norm": 0.812883523661831, "learning_rate": 4.77812549828906e-06, "loss": 0.3993, "step": 3365 }, { "epoch": 0.8323442136498517, "grad_norm": 0.8022234638234113, "learning_rate": 4.777991615121837e-06, "loss": 0.4131, "step": 3366 }, { "epoch": 0.832591493570722, "grad_norm": 0.7795760679299366, "learning_rate": 4.777857693449871e-06, "loss": 0.4185, "step": 3367 }, { "epoch": 0.8328387734915925, "grad_norm": 0.7837567396640641, "learning_rate": 4.777723733275429e-06, "loss": 0.4314, "step": 3368 }, { "epoch": 0.8330860534124629, "grad_norm": 0.7892606037338461, "learning_rate": 4.7775897346007726e-06, "loss": 0.4224, "step": 3369 }, { "epoch": 0.8333333333333334, "grad_norm": 0.7831937189039634, "learning_rate": 4.7774556974281685e-06, "loss": 0.4399, "step": 3370 }, { "epoch": 0.8335806132542037, "grad_norm": 0.8124136973914873, "learning_rate": 4.77732162175988e-06, "loss": 0.4537, "step": 3371 }, { "epoch": 0.8338278931750742, "grad_norm": 0.7723322289863942, "learning_rate": 4.777187507598177e-06, "loss": 0.388, "step": 3372 }, { "epoch": 0.8340751730959446, "grad_norm": 0.7743046452175267, "learning_rate": 4.777053354945322e-06, "loss": 0.416, "step": 3373 }, { "epoch": 0.8343224530168151, "grad_norm": 0.8135790787271157, "learning_rate": 4.776919163803587e-06, "loss": 0.4058, "step": 3374 }, { "epoch": 0.8345697329376854, "grad_norm": 0.7661956419487184, "learning_rate": 4.776784934175237e-06, "loss": 0.4246, "step": 3375 }, { "epoch": 0.8348170128585559, "grad_norm": 0.80368496419414, "learning_rate": 4.7766506660625414e-06, "loss": 0.4271, "step": 3376 }, { "epoch": 0.8350642927794263, "grad_norm": 0.7714493561021236, "learning_rate": 4.776516359467771e-06, "loss": 0.4371, "step": 3377 }, { "epoch": 0.8353115727002968, "grad_norm": 0.8100023760100212, "learning_rate": 4.776382014393195e-06, "loss": 0.4019, "step": 3378 }, { "epoch": 0.8355588526211671, "grad_norm": 0.7884436871457682, "learning_rate": 4.776247630841085e-06, "loss": 0.3934, "step": 3379 }, { "epoch": 0.8358061325420376, "grad_norm": 0.7671160651595207, "learning_rate": 4.776113208813712e-06, "loss": 0.4291, "step": 3380 }, { "epoch": 0.836053412462908, "grad_norm": 0.7938160957249958, "learning_rate": 4.775978748313348e-06, "loss": 0.4187, "step": 3381 }, { "epoch": 0.8363006923837785, "grad_norm": 0.8224792228789037, "learning_rate": 4.775844249342265e-06, "loss": 0.4066, "step": 3382 }, { "epoch": 0.8365479723046488, "grad_norm": 0.7905005579082747, "learning_rate": 4.775709711902738e-06, "loss": 0.4017, "step": 3383 }, { "epoch": 0.8367952522255193, "grad_norm": 0.8057947131158326, "learning_rate": 4.7755751359970405e-06, "loss": 0.4054, "step": 3384 }, { "epoch": 0.8370425321463897, "grad_norm": 0.8175423715679635, "learning_rate": 4.775440521627447e-06, "loss": 0.409, "step": 3385 }, { "epoch": 0.8372898120672602, "grad_norm": 0.8008681519700376, "learning_rate": 4.7753058687962325e-06, "loss": 0.4221, "step": 3386 }, { "epoch": 0.8375370919881305, "grad_norm": 0.7850063473346038, "learning_rate": 4.775171177505674e-06, "loss": 0.377, "step": 3387 }, { "epoch": 0.837784371909001, "grad_norm": 0.7947371747318924, "learning_rate": 4.775036447758048e-06, "loss": 0.4143, "step": 3388 }, { "epoch": 0.8380316518298714, "grad_norm": 0.7655744610178552, "learning_rate": 4.774901679555631e-06, "loss": 0.3918, "step": 3389 }, { "epoch": 0.8382789317507419, "grad_norm": 0.7762446116793785, "learning_rate": 4.774766872900702e-06, "loss": 0.4381, "step": 3390 }, { "epoch": 0.8385262116716122, "grad_norm": 0.8303410835002127, "learning_rate": 4.7746320277955395e-06, "loss": 0.3758, "step": 3391 }, { "epoch": 0.8387734915924827, "grad_norm": 0.7985702070353878, "learning_rate": 4.774497144242421e-06, "loss": 0.3818, "step": 3392 }, { "epoch": 0.8390207715133531, "grad_norm": 0.7920132547874397, "learning_rate": 4.774362222243629e-06, "loss": 0.4347, "step": 3393 }, { "epoch": 0.8392680514342236, "grad_norm": 0.8030043793173209, "learning_rate": 4.774227261801442e-06, "loss": 0.3946, "step": 3394 }, { "epoch": 0.8395153313550939, "grad_norm": 0.747732048647203, "learning_rate": 4.774092262918143e-06, "loss": 0.4537, "step": 3395 }, { "epoch": 0.8397626112759644, "grad_norm": 0.840523303115213, "learning_rate": 4.773957225596013e-06, "loss": 0.4157, "step": 3396 }, { "epoch": 0.8400098911968348, "grad_norm": 0.8110229327341179, "learning_rate": 4.773822149837334e-06, "loss": 0.4228, "step": 3397 }, { "epoch": 0.8402571711177053, "grad_norm": 0.7683317822796379, "learning_rate": 4.77368703564439e-06, "loss": 0.4239, "step": 3398 }, { "epoch": 0.8405044510385756, "grad_norm": 0.782689216594726, "learning_rate": 4.7735518830194635e-06, "loss": 0.4216, "step": 3399 }, { "epoch": 0.8407517309594461, "grad_norm": 0.7907944083692497, "learning_rate": 4.773416691964842e-06, "loss": 0.3986, "step": 3400 }, { "epoch": 0.8409990108803165, "grad_norm": 0.7655934114807923, "learning_rate": 4.7732814624828075e-06, "loss": 0.4165, "step": 3401 }, { "epoch": 0.841246290801187, "grad_norm": 0.8110535411816441, "learning_rate": 4.773146194575647e-06, "loss": 0.4225, "step": 3402 }, { "epoch": 0.8414935707220573, "grad_norm": 0.8100327921657837, "learning_rate": 4.773010888245647e-06, "loss": 0.42, "step": 3403 }, { "epoch": 0.8417408506429278, "grad_norm": 0.7530456461140547, "learning_rate": 4.772875543495094e-06, "loss": 0.4248, "step": 3404 }, { "epoch": 0.8419881305637982, "grad_norm": 0.7879760448848543, "learning_rate": 4.772740160326276e-06, "loss": 0.4314, "step": 3405 }, { "epoch": 0.8422354104846687, "grad_norm": 0.8606818943078248, "learning_rate": 4.772604738741482e-06, "loss": 0.3978, "step": 3406 }, { "epoch": 0.842482690405539, "grad_norm": 0.7549716602552334, "learning_rate": 4.7724692787430006e-06, "loss": 0.4289, "step": 3407 }, { "epoch": 0.8427299703264095, "grad_norm": 0.7633266980017795, "learning_rate": 4.772333780333121e-06, "loss": 0.4274, "step": 3408 }, { "epoch": 0.8429772502472799, "grad_norm": 0.765061521639252, "learning_rate": 4.772198243514135e-06, "loss": 0.41, "step": 3409 }, { "epoch": 0.8432245301681504, "grad_norm": 0.7783131637239651, "learning_rate": 4.772062668288332e-06, "loss": 0.4222, "step": 3410 }, { "epoch": 0.8434718100890207, "grad_norm": 0.7613563968237722, "learning_rate": 4.771927054658003e-06, "loss": 0.4069, "step": 3411 }, { "epoch": 0.8437190900098912, "grad_norm": 0.8018125862329029, "learning_rate": 4.771791402625442e-06, "loss": 0.411, "step": 3412 }, { "epoch": 0.8439663699307616, "grad_norm": 0.8350909510173182, "learning_rate": 4.771655712192942e-06, "loss": 0.4001, "step": 3413 }, { "epoch": 0.844213649851632, "grad_norm": 0.7891030802950871, "learning_rate": 4.771519983362795e-06, "loss": 0.4467, "step": 3414 }, { "epoch": 0.8444609297725024, "grad_norm": 0.8525872477343289, "learning_rate": 4.771384216137297e-06, "loss": 0.3848, "step": 3415 }, { "epoch": 0.8447082096933729, "grad_norm": 0.8166630385029069, "learning_rate": 4.771248410518742e-06, "loss": 0.4117, "step": 3416 }, { "epoch": 0.8449554896142433, "grad_norm": 0.8127217435899223, "learning_rate": 4.771112566509424e-06, "loss": 0.4449, "step": 3417 }, { "epoch": 0.8452027695351138, "grad_norm": 0.7989077116900508, "learning_rate": 4.770976684111643e-06, "loss": 0.4309, "step": 3418 }, { "epoch": 0.8454500494559841, "grad_norm": 0.7742207773656186, "learning_rate": 4.770840763327691e-06, "loss": 0.4169, "step": 3419 }, { "epoch": 0.8456973293768546, "grad_norm": 0.8009671807975369, "learning_rate": 4.770704804159869e-06, "loss": 0.4272, "step": 3420 }, { "epoch": 0.845944609297725, "grad_norm": 0.7762396091246215, "learning_rate": 4.770568806610474e-06, "loss": 0.4071, "step": 3421 }, { "epoch": 0.8461918892185954, "grad_norm": 0.79048982739237, "learning_rate": 4.770432770681804e-06, "loss": 0.4142, "step": 3422 }, { "epoch": 0.8464391691394659, "grad_norm": 0.7925764780479834, "learning_rate": 4.7702966963761595e-06, "loss": 0.4406, "step": 3423 }, { "epoch": 0.8466864490603363, "grad_norm": 0.8229734761276529, "learning_rate": 4.770160583695841e-06, "loss": 0.4051, "step": 3424 }, { "epoch": 0.8469337289812068, "grad_norm": 0.7812549831681974, "learning_rate": 4.7700244326431485e-06, "loss": 0.426, "step": 3425 }, { "epoch": 0.8471810089020771, "grad_norm": 0.8153456675383441, "learning_rate": 4.769888243220382e-06, "loss": 0.4306, "step": 3426 }, { "epoch": 0.8474282888229476, "grad_norm": 0.8228319181988596, "learning_rate": 4.769752015429846e-06, "loss": 0.4103, "step": 3427 }, { "epoch": 0.847675568743818, "grad_norm": 0.8350865864963404, "learning_rate": 4.769615749273842e-06, "loss": 0.4163, "step": 3428 }, { "epoch": 0.8479228486646885, "grad_norm": 0.8015176047552672, "learning_rate": 4.769479444754672e-06, "loss": 0.4199, "step": 3429 }, { "epoch": 0.8481701285855588, "grad_norm": 0.8301892125593503, "learning_rate": 4.769343101874643e-06, "loss": 0.4164, "step": 3430 }, { "epoch": 0.8484174085064293, "grad_norm": 0.8073898224130004, "learning_rate": 4.769206720636056e-06, "loss": 0.4482, "step": 3431 }, { "epoch": 0.8486646884272997, "grad_norm": 0.8309199793371376, "learning_rate": 4.769070301041219e-06, "loss": 0.4566, "step": 3432 }, { "epoch": 0.8489119683481702, "grad_norm": 0.7678874274807607, "learning_rate": 4.768933843092436e-06, "loss": 0.3946, "step": 3433 }, { "epoch": 0.8491592482690405, "grad_norm": 0.7549429356875449, "learning_rate": 4.768797346792015e-06, "loss": 0.4287, "step": 3434 }, { "epoch": 0.849406528189911, "grad_norm": 0.7876208123929652, "learning_rate": 4.768660812142263e-06, "loss": 0.405, "step": 3435 }, { "epoch": 0.8496538081107814, "grad_norm": 0.8291311437122808, "learning_rate": 4.768524239145487e-06, "loss": 0.4087, "step": 3436 }, { "epoch": 0.8499010880316519, "grad_norm": 0.8268178012129144, "learning_rate": 4.768387627803996e-06, "loss": 0.4097, "step": 3437 }, { "epoch": 0.8501483679525222, "grad_norm": 0.7851962802571755, "learning_rate": 4.7682509781200995e-06, "loss": 0.435, "step": 3438 }, { "epoch": 0.8503956478733927, "grad_norm": 0.8224586447080902, "learning_rate": 4.768114290096106e-06, "loss": 0.407, "step": 3439 }, { "epoch": 0.8506429277942631, "grad_norm": 0.8156576914563988, "learning_rate": 4.7679775637343275e-06, "loss": 0.4067, "step": 3440 }, { "epoch": 0.8508902077151336, "grad_norm": 0.8416785505706572, "learning_rate": 4.767840799037074e-06, "loss": 0.4297, "step": 3441 }, { "epoch": 0.8511374876360039, "grad_norm": 0.7581910072086763, "learning_rate": 4.767703996006658e-06, "loss": 0.439, "step": 3442 }, { "epoch": 0.8513847675568744, "grad_norm": 0.8036041084178673, "learning_rate": 4.767567154645392e-06, "loss": 0.3876, "step": 3443 }, { "epoch": 0.8516320474777448, "grad_norm": 0.8077582774150418, "learning_rate": 4.767430274955587e-06, "loss": 0.4529, "step": 3444 }, { "epoch": 0.8518793273986153, "grad_norm": 0.7940995416916322, "learning_rate": 4.767293356939559e-06, "loss": 0.4045, "step": 3445 }, { "epoch": 0.8521266073194856, "grad_norm": 0.8380267975480605, "learning_rate": 4.7671564005996215e-06, "loss": 0.4236, "step": 3446 }, { "epoch": 0.8523738872403561, "grad_norm": 0.7782724418448519, "learning_rate": 4.767019405938089e-06, "loss": 0.4095, "step": 3447 }, { "epoch": 0.8526211671612265, "grad_norm": 0.8634191317576759, "learning_rate": 4.766882372957278e-06, "loss": 0.4273, "step": 3448 }, { "epoch": 0.852868447082097, "grad_norm": 0.7886128298243266, "learning_rate": 4.7667453016595044e-06, "loss": 0.4336, "step": 3449 }, { "epoch": 0.8531157270029673, "grad_norm": 0.7839418502660845, "learning_rate": 4.766608192047084e-06, "loss": 0.4042, "step": 3450 }, { "epoch": 0.8533630069238378, "grad_norm": 0.779315107663836, "learning_rate": 4.766471044122337e-06, "loss": 0.4116, "step": 3451 }, { "epoch": 0.8536102868447082, "grad_norm": 0.8130332426670765, "learning_rate": 4.766333857887579e-06, "loss": 0.3986, "step": 3452 }, { "epoch": 0.8538575667655787, "grad_norm": 0.8316154784681559, "learning_rate": 4.7661966333451305e-06, "loss": 0.3984, "step": 3453 }, { "epoch": 0.854104846686449, "grad_norm": 0.7697149493976957, "learning_rate": 4.766059370497309e-06, "loss": 0.4037, "step": 3454 }, { "epoch": 0.8543521266073195, "grad_norm": 0.8222479951816859, "learning_rate": 4.765922069346437e-06, "loss": 0.3778, "step": 3455 }, { "epoch": 0.8545994065281899, "grad_norm": 0.8444888274019681, "learning_rate": 4.765784729894834e-06, "loss": 0.3792, "step": 3456 }, { "epoch": 0.8548466864490604, "grad_norm": 0.8094354195563492, "learning_rate": 4.765647352144822e-06, "loss": 0.4236, "step": 3457 }, { "epoch": 0.8550939663699307, "grad_norm": 0.7945699779942299, "learning_rate": 4.7655099360987225e-06, "loss": 0.419, "step": 3458 }, { "epoch": 0.8553412462908012, "grad_norm": 0.7623631949030496, "learning_rate": 4.765372481758859e-06, "loss": 0.4047, "step": 3459 }, { "epoch": 0.8555885262116716, "grad_norm": 0.8006217233235627, "learning_rate": 4.7652349891275525e-06, "loss": 0.4199, "step": 3460 }, { "epoch": 0.8558358061325421, "grad_norm": 0.8071519894620478, "learning_rate": 4.765097458207131e-06, "loss": 0.429, "step": 3461 }, { "epoch": 0.8560830860534124, "grad_norm": 0.7998455842155523, "learning_rate": 4.764959888999917e-06, "loss": 0.3849, "step": 3462 }, { "epoch": 0.8563303659742829, "grad_norm": 0.7739611268657531, "learning_rate": 4.7648222815082345e-06, "loss": 0.4204, "step": 3463 }, { "epoch": 0.8565776458951533, "grad_norm": 0.7927935553548467, "learning_rate": 4.764684635734412e-06, "loss": 0.4075, "step": 3464 }, { "epoch": 0.8568249258160238, "grad_norm": 0.7803766875608863, "learning_rate": 4.764546951680775e-06, "loss": 0.4338, "step": 3465 }, { "epoch": 0.8570722057368941, "grad_norm": 0.8016805038029136, "learning_rate": 4.76440922934965e-06, "loss": 0.4044, "step": 3466 }, { "epoch": 0.8573194856577646, "grad_norm": 0.8185021142595483, "learning_rate": 4.764271468743367e-06, "loss": 0.4072, "step": 3467 }, { "epoch": 0.857566765578635, "grad_norm": 0.7925774571128128, "learning_rate": 4.764133669864253e-06, "loss": 0.4161, "step": 3468 }, { "epoch": 0.8578140454995055, "grad_norm": 0.8062989314810408, "learning_rate": 4.763995832714636e-06, "loss": 0.3727, "step": 3469 }, { "epoch": 0.8580613254203758, "grad_norm": 0.7957403676082958, "learning_rate": 4.763857957296849e-06, "loss": 0.4329, "step": 3470 }, { "epoch": 0.8583086053412463, "grad_norm": 0.8384575802088189, "learning_rate": 4.7637200436132194e-06, "loss": 0.4043, "step": 3471 }, { "epoch": 0.8585558852621167, "grad_norm": 0.807113219782392, "learning_rate": 4.76358209166608e-06, "loss": 0.4014, "step": 3472 }, { "epoch": 0.8588031651829872, "grad_norm": 0.8043008375712767, "learning_rate": 4.7634441014577635e-06, "loss": 0.4005, "step": 3473 }, { "epoch": 0.8590504451038575, "grad_norm": 0.7842916597723525, "learning_rate": 4.763306072990601e-06, "loss": 0.391, "step": 3474 }, { "epoch": 0.859297725024728, "grad_norm": 0.7986848229547799, "learning_rate": 4.763168006266925e-06, "loss": 0.4219, "step": 3475 }, { "epoch": 0.8595450049455984, "grad_norm": 0.8198764103686891, "learning_rate": 4.76302990128907e-06, "loss": 0.3762, "step": 3476 }, { "epoch": 0.8597922848664689, "grad_norm": 0.7972917241817145, "learning_rate": 4.76289175805937e-06, "loss": 0.4477, "step": 3477 }, { "epoch": 0.8600395647873392, "grad_norm": 0.8004509954487877, "learning_rate": 4.762753576580161e-06, "loss": 0.4219, "step": 3478 }, { "epoch": 0.8602868447082097, "grad_norm": 0.7815966556815089, "learning_rate": 4.762615356853779e-06, "loss": 0.4269, "step": 3479 }, { "epoch": 0.8605341246290801, "grad_norm": 0.8140688940745657, "learning_rate": 4.762477098882558e-06, "loss": 0.4224, "step": 3480 }, { "epoch": 0.8607814045499506, "grad_norm": 0.8085670395416449, "learning_rate": 4.762338802668838e-06, "loss": 0.4033, "step": 3481 }, { "epoch": 0.8610286844708209, "grad_norm": 0.8114398196934406, "learning_rate": 4.762200468214953e-06, "loss": 0.405, "step": 3482 }, { "epoch": 0.8612759643916914, "grad_norm": 0.7957454762289632, "learning_rate": 4.7620620955232435e-06, "loss": 0.3963, "step": 3483 }, { "epoch": 0.8615232443125618, "grad_norm": 0.7716297802639464, "learning_rate": 4.7619236845960495e-06, "loss": 0.3725, "step": 3484 }, { "epoch": 0.8617705242334323, "grad_norm": 0.7498275856769105, "learning_rate": 4.7617852354357085e-06, "loss": 0.408, "step": 3485 }, { "epoch": 0.8620178041543026, "grad_norm": 0.8209720763151387, "learning_rate": 4.761646748044561e-06, "loss": 0.4222, "step": 3486 }, { "epoch": 0.8622650840751731, "grad_norm": 0.7811869145474315, "learning_rate": 4.761508222424948e-06, "loss": 0.4029, "step": 3487 }, { "epoch": 0.8625123639960435, "grad_norm": 0.774668109393603, "learning_rate": 4.761369658579213e-06, "loss": 0.4089, "step": 3488 }, { "epoch": 0.862759643916914, "grad_norm": 0.774508807733018, "learning_rate": 4.761231056509694e-06, "loss": 0.4087, "step": 3489 }, { "epoch": 0.8630069238377843, "grad_norm": 0.8043812063644213, "learning_rate": 4.761092416218737e-06, "loss": 0.4133, "step": 3490 }, { "epoch": 0.8632542037586548, "grad_norm": 0.8291349458889816, "learning_rate": 4.760953737708685e-06, "loss": 0.4119, "step": 3491 }, { "epoch": 0.8635014836795252, "grad_norm": 0.7920356279520397, "learning_rate": 4.7608150209818815e-06, "loss": 0.3771, "step": 3492 }, { "epoch": 0.8637487636003957, "grad_norm": 0.8072523281275508, "learning_rate": 4.760676266040671e-06, "loss": 0.4068, "step": 3493 }, { "epoch": 0.863996043521266, "grad_norm": 0.7899951360565894, "learning_rate": 4.7605374728874e-06, "loss": 0.4221, "step": 3494 }, { "epoch": 0.8642433234421365, "grad_norm": 0.7947865396993798, "learning_rate": 4.760398641524413e-06, "loss": 0.4459, "step": 3495 }, { "epoch": 0.8644906033630069, "grad_norm": 0.7709649897446718, "learning_rate": 4.760259771954058e-06, "loss": 0.4485, "step": 3496 }, { "epoch": 0.8647378832838774, "grad_norm": 0.8107187352574584, "learning_rate": 4.7601208641786814e-06, "loss": 0.4296, "step": 3497 }, { "epoch": 0.8649851632047477, "grad_norm": 0.808173993566204, "learning_rate": 4.759981918200632e-06, "loss": 0.3832, "step": 3498 }, { "epoch": 0.8652324431256182, "grad_norm": 0.7733226232262745, "learning_rate": 4.7598429340222565e-06, "loss": 0.4412, "step": 3499 }, { "epoch": 0.8654797230464887, "grad_norm": 0.7802454421928897, "learning_rate": 4.7597039116459065e-06, "loss": 0.44, "step": 3500 }, { "epoch": 0.865727002967359, "grad_norm": 0.7551425262142368, "learning_rate": 4.75956485107393e-06, "loss": 0.4164, "step": 3501 }, { "epoch": 0.8659742828882295, "grad_norm": 0.7607363103302663, "learning_rate": 4.75942575230868e-06, "loss": 0.4278, "step": 3502 }, { "epoch": 0.8662215628090999, "grad_norm": 0.7928219599691498, "learning_rate": 4.759286615352504e-06, "loss": 0.4044, "step": 3503 }, { "epoch": 0.8664688427299704, "grad_norm": 0.7746463131564298, "learning_rate": 4.759147440207758e-06, "loss": 0.4119, "step": 3504 }, { "epoch": 0.8667161226508407, "grad_norm": 0.8030529000110455, "learning_rate": 4.7590082268767906e-06, "loss": 0.3837, "step": 3505 }, { "epoch": 0.8669634025717112, "grad_norm": 0.8373758297950461, "learning_rate": 4.758868975361958e-06, "loss": 0.4134, "step": 3506 }, { "epoch": 0.8672106824925816, "grad_norm": 0.847300972466465, "learning_rate": 4.758729685665612e-06, "loss": 0.4137, "step": 3507 }, { "epoch": 0.8674579624134521, "grad_norm": 0.822308602045918, "learning_rate": 4.758590357790107e-06, "loss": 0.4266, "step": 3508 }, { "epoch": 0.8677052423343224, "grad_norm": 0.7751883119985707, "learning_rate": 4.7584509917378e-06, "loss": 0.4068, "step": 3509 }, { "epoch": 0.8679525222551929, "grad_norm": 0.7472605460681306, "learning_rate": 4.758311587511044e-06, "loss": 0.4192, "step": 3510 }, { "epoch": 0.8681998021760633, "grad_norm": 0.7846163021154071, "learning_rate": 4.758172145112198e-06, "loss": 0.4236, "step": 3511 }, { "epoch": 0.8684470820969338, "grad_norm": 0.8040444889492215, "learning_rate": 4.758032664543617e-06, "loss": 0.4068, "step": 3512 }, { "epoch": 0.8686943620178041, "grad_norm": 0.7854285165758984, "learning_rate": 4.757893145807659e-06, "loss": 0.4087, "step": 3513 }, { "epoch": 0.8689416419386746, "grad_norm": 0.8123417665178312, "learning_rate": 4.757753588906684e-06, "loss": 0.3667, "step": 3514 }, { "epoch": 0.869188921859545, "grad_norm": 0.783236901262894, "learning_rate": 4.757613993843048e-06, "loss": 0.4303, "step": 3515 }, { "epoch": 0.8694362017804155, "grad_norm": 0.7909692214395833, "learning_rate": 4.757474360619113e-06, "loss": 0.4018, "step": 3516 }, { "epoch": 0.8696834817012858, "grad_norm": 0.8481627332283962, "learning_rate": 4.757334689237239e-06, "loss": 0.4067, "step": 3517 }, { "epoch": 0.8699307616221563, "grad_norm": 0.7615068125391466, "learning_rate": 4.757194979699784e-06, "loss": 0.4368, "step": 3518 }, { "epoch": 0.8701780415430267, "grad_norm": 0.801705773275248, "learning_rate": 4.757055232009113e-06, "loss": 0.4375, "step": 3519 }, { "epoch": 0.8704253214638972, "grad_norm": 0.8221944799768854, "learning_rate": 4.756915446167587e-06, "loss": 0.389, "step": 3520 }, { "epoch": 0.8706726013847675, "grad_norm": 0.7862370813701264, "learning_rate": 4.756775622177568e-06, "loss": 0.42, "step": 3521 }, { "epoch": 0.870919881305638, "grad_norm": 0.7597195820726511, "learning_rate": 4.756635760041421e-06, "loss": 0.3909, "step": 3522 }, { "epoch": 0.8711671612265084, "grad_norm": 0.8076111401656502, "learning_rate": 4.7564958597615085e-06, "loss": 0.4135, "step": 3523 }, { "epoch": 0.8714144411473789, "grad_norm": 0.8192089416161316, "learning_rate": 4.756355921340197e-06, "loss": 0.3929, "step": 3524 }, { "epoch": 0.8716617210682492, "grad_norm": 0.8545678851827884, "learning_rate": 4.7562159447798485e-06, "loss": 0.3968, "step": 3525 }, { "epoch": 0.8719090009891197, "grad_norm": 0.7846539307702833, "learning_rate": 4.756075930082833e-06, "loss": 0.4232, "step": 3526 }, { "epoch": 0.8721562809099901, "grad_norm": 0.7645948649778886, "learning_rate": 4.755935877251515e-06, "loss": 0.3992, "step": 3527 }, { "epoch": 0.8724035608308606, "grad_norm": 0.8118160536750945, "learning_rate": 4.755795786288262e-06, "loss": 0.3972, "step": 3528 }, { "epoch": 0.8726508407517309, "grad_norm": 0.8161177141362831, "learning_rate": 4.7556556571954414e-06, "loss": 0.4113, "step": 3529 }, { "epoch": 0.8728981206726014, "grad_norm": 0.7550227042035801, "learning_rate": 4.755515489975424e-06, "loss": 0.4113, "step": 3530 }, { "epoch": 0.8731454005934718, "grad_norm": 0.7841549066488641, "learning_rate": 4.755375284630577e-06, "loss": 0.3964, "step": 3531 }, { "epoch": 0.8733926805143423, "grad_norm": 0.7504063160687925, "learning_rate": 4.75523504116327e-06, "loss": 0.4069, "step": 3532 }, { "epoch": 0.8736399604352126, "grad_norm": 0.8184342628628202, "learning_rate": 4.755094759575875e-06, "loss": 0.4015, "step": 3533 }, { "epoch": 0.8738872403560831, "grad_norm": 0.7998457424375706, "learning_rate": 4.754954439870763e-06, "loss": 0.3947, "step": 3534 }, { "epoch": 0.8741345202769535, "grad_norm": 0.7918492983576808, "learning_rate": 4.754814082050305e-06, "loss": 0.4025, "step": 3535 }, { "epoch": 0.874381800197824, "grad_norm": 0.826262336173255, "learning_rate": 4.7546736861168745e-06, "loss": 0.4033, "step": 3536 }, { "epoch": 0.8746290801186943, "grad_norm": 0.8139016489751498, "learning_rate": 4.754533252072843e-06, "loss": 0.4127, "step": 3537 }, { "epoch": 0.8748763600395648, "grad_norm": 0.8212185358376062, "learning_rate": 4.754392779920585e-06, "loss": 0.4218, "step": 3538 }, { "epoch": 0.8751236399604352, "grad_norm": 0.8123701589959741, "learning_rate": 4.754252269662476e-06, "loss": 0.4212, "step": 3539 }, { "epoch": 0.8753709198813057, "grad_norm": 0.7360744756469361, "learning_rate": 4.754111721300889e-06, "loss": 0.4146, "step": 3540 }, { "epoch": 0.875618199802176, "grad_norm": 0.7851546187922345, "learning_rate": 4.753971134838202e-06, "loss": 0.4179, "step": 3541 }, { "epoch": 0.8758654797230465, "grad_norm": 0.7721531761045248, "learning_rate": 4.753830510276789e-06, "loss": 0.4435, "step": 3542 }, { "epoch": 0.8761127596439169, "grad_norm": 0.7785287323560217, "learning_rate": 4.7536898476190295e-06, "loss": 0.4226, "step": 3543 }, { "epoch": 0.8763600395647874, "grad_norm": 0.7967610882789702, "learning_rate": 4.753549146867299e-06, "loss": 0.4254, "step": 3544 }, { "epoch": 0.8766073194856577, "grad_norm": 0.8096712743989584, "learning_rate": 4.753408408023976e-06, "loss": 0.3831, "step": 3545 }, { "epoch": 0.8768545994065282, "grad_norm": 0.7636869739295397, "learning_rate": 4.75326763109144e-06, "loss": 0.427, "step": 3546 }, { "epoch": 0.8771018793273986, "grad_norm": 0.771914882644073, "learning_rate": 4.753126816072071e-06, "loss": 0.3803, "step": 3547 }, { "epoch": 0.8773491592482691, "grad_norm": 0.7653991534397886, "learning_rate": 4.752985962968247e-06, "loss": 0.4219, "step": 3548 }, { "epoch": 0.8775964391691394, "grad_norm": 0.8189550150303511, "learning_rate": 4.752845071782352e-06, "loss": 0.4325, "step": 3549 }, { "epoch": 0.8778437190900099, "grad_norm": 0.7692889761933356, "learning_rate": 4.752704142516765e-06, "loss": 0.3801, "step": 3550 }, { "epoch": 0.8780909990108803, "grad_norm": 0.7996162297522486, "learning_rate": 4.7525631751738696e-06, "loss": 0.4072, "step": 3551 }, { "epoch": 0.8783382789317508, "grad_norm": 0.7928390591889003, "learning_rate": 4.752422169756048e-06, "loss": 0.4187, "step": 3552 }, { "epoch": 0.8785855588526211, "grad_norm": 0.8121066718850152, "learning_rate": 4.7522811262656835e-06, "loss": 0.4002, "step": 3553 }, { "epoch": 0.8788328387734916, "grad_norm": 0.7825370933836572, "learning_rate": 4.752140044705161e-06, "loss": 0.4406, "step": 3554 }, { "epoch": 0.879080118694362, "grad_norm": 0.7621264478238917, "learning_rate": 4.751998925076863e-06, "loss": 0.417, "step": 3555 }, { "epoch": 0.8793273986152325, "grad_norm": 0.7962830855667626, "learning_rate": 4.7518577673831765e-06, "loss": 0.4318, "step": 3556 }, { "epoch": 0.8795746785361028, "grad_norm": 0.7761619234912889, "learning_rate": 4.7517165716264866e-06, "loss": 0.4136, "step": 3557 }, { "epoch": 0.8798219584569733, "grad_norm": 0.8059167848034989, "learning_rate": 4.751575337809183e-06, "loss": 0.4084, "step": 3558 }, { "epoch": 0.8800692383778437, "grad_norm": 0.8124913029375966, "learning_rate": 4.751434065933648e-06, "loss": 0.4045, "step": 3559 }, { "epoch": 0.8803165182987142, "grad_norm": 0.7814128265870901, "learning_rate": 4.751292756002273e-06, "loss": 0.4258, "step": 3560 }, { "epoch": 0.8805637982195845, "grad_norm": 0.8039331824474487, "learning_rate": 4.751151408017445e-06, "loss": 0.4056, "step": 3561 }, { "epoch": 0.880811078140455, "grad_norm": 0.8269124609448832, "learning_rate": 4.751010021981555e-06, "loss": 0.4022, "step": 3562 }, { "epoch": 0.8810583580613254, "grad_norm": 0.8059296222218977, "learning_rate": 4.75086859789699e-06, "loss": 0.4137, "step": 3563 }, { "epoch": 0.8813056379821959, "grad_norm": 0.8257234417066797, "learning_rate": 4.750727135766143e-06, "loss": 0.4017, "step": 3564 }, { "epoch": 0.8815529179030662, "grad_norm": 0.7868621973148561, "learning_rate": 4.750585635591404e-06, "loss": 0.4026, "step": 3565 }, { "epoch": 0.8818001978239367, "grad_norm": 0.8049536206026485, "learning_rate": 4.750444097375165e-06, "loss": 0.3992, "step": 3566 }, { "epoch": 0.8820474777448071, "grad_norm": 0.8126870559722053, "learning_rate": 4.750302521119819e-06, "loss": 0.4046, "step": 3567 }, { "epoch": 0.8822947576656776, "grad_norm": 0.8647264312706785, "learning_rate": 4.750160906827758e-06, "loss": 0.4248, "step": 3568 }, { "epoch": 0.8825420375865479, "grad_norm": 0.742871129693659, "learning_rate": 4.750019254501376e-06, "loss": 0.4345, "step": 3569 }, { "epoch": 0.8827893175074184, "grad_norm": 0.8191164799952924, "learning_rate": 4.749877564143067e-06, "loss": 0.424, "step": 3570 }, { "epoch": 0.8830365974282888, "grad_norm": 0.781549768862716, "learning_rate": 4.749735835755227e-06, "loss": 0.4298, "step": 3571 }, { "epoch": 0.8832838773491593, "grad_norm": 0.8053711023623178, "learning_rate": 4.749594069340252e-06, "loss": 0.413, "step": 3572 }, { "epoch": 0.8835311572700296, "grad_norm": 0.8262873093017418, "learning_rate": 4.749452264900536e-06, "loss": 0.427, "step": 3573 }, { "epoch": 0.8837784371909001, "grad_norm": 0.7651153848178928, "learning_rate": 4.749310422438478e-06, "loss": 0.4328, "step": 3574 }, { "epoch": 0.8840257171117705, "grad_norm": 0.821565389817644, "learning_rate": 4.749168541956475e-06, "loss": 0.4025, "step": 3575 }, { "epoch": 0.884272997032641, "grad_norm": 0.8771003427825715, "learning_rate": 4.749026623456925e-06, "loss": 0.3983, "step": 3576 }, { "epoch": 0.8845202769535113, "grad_norm": 0.8193490682302352, "learning_rate": 4.748884666942226e-06, "loss": 0.4175, "step": 3577 }, { "epoch": 0.8847675568743818, "grad_norm": 0.7881409124419267, "learning_rate": 4.748742672414779e-06, "loss": 0.4199, "step": 3578 }, { "epoch": 0.8850148367952523, "grad_norm": 0.7939165086785582, "learning_rate": 4.748600639876983e-06, "loss": 0.4059, "step": 3579 }, { "epoch": 0.8852621167161226, "grad_norm": 0.7968599992051688, "learning_rate": 4.748458569331239e-06, "loss": 0.4462, "step": 3580 }, { "epoch": 0.8855093966369931, "grad_norm": 0.8478289232046818, "learning_rate": 4.7483164607799495e-06, "loss": 0.3866, "step": 3581 }, { "epoch": 0.8857566765578635, "grad_norm": 0.7865185184922773, "learning_rate": 4.748174314225515e-06, "loss": 0.415, "step": 3582 }, { "epoch": 0.886003956478734, "grad_norm": 0.7813174862734311, "learning_rate": 4.748032129670339e-06, "loss": 0.4436, "step": 3583 }, { "epoch": 0.8862512363996043, "grad_norm": 0.8084377607109554, "learning_rate": 4.747889907116826e-06, "loss": 0.3953, "step": 3584 }, { "epoch": 0.8864985163204748, "grad_norm": 0.8036795735133182, "learning_rate": 4.747747646567378e-06, "loss": 0.3895, "step": 3585 }, { "epoch": 0.8867457962413452, "grad_norm": 0.8098549722269822, "learning_rate": 4.747605348024399e-06, "loss": 0.4254, "step": 3586 }, { "epoch": 0.8869930761622157, "grad_norm": 0.8081490604273397, "learning_rate": 4.747463011490297e-06, "loss": 0.4039, "step": 3587 }, { "epoch": 0.887240356083086, "grad_norm": 0.7912104338630691, "learning_rate": 4.747320636967476e-06, "loss": 0.4045, "step": 3588 }, { "epoch": 0.8874876360039565, "grad_norm": 0.8185827574341613, "learning_rate": 4.747178224458343e-06, "loss": 0.3962, "step": 3589 }, { "epoch": 0.8877349159248269, "grad_norm": 0.804633825051263, "learning_rate": 4.7470357739653055e-06, "loss": 0.4151, "step": 3590 }, { "epoch": 0.8879821958456974, "grad_norm": 0.8270157295952916, "learning_rate": 4.746893285490771e-06, "loss": 0.3745, "step": 3591 }, { "epoch": 0.8882294757665677, "grad_norm": 0.794707377298943, "learning_rate": 4.746750759037148e-06, "loss": 0.4317, "step": 3592 }, { "epoch": 0.8884767556874382, "grad_norm": 0.815353758945955, "learning_rate": 4.746608194606845e-06, "loss": 0.4009, "step": 3593 }, { "epoch": 0.8887240356083086, "grad_norm": 0.7851323106201907, "learning_rate": 4.746465592202273e-06, "loss": 0.3853, "step": 3594 }, { "epoch": 0.8889713155291791, "grad_norm": 0.7726796238264945, "learning_rate": 4.7463229518258424e-06, "loss": 0.4068, "step": 3595 }, { "epoch": 0.8892185954500494, "grad_norm": 0.842200959923491, "learning_rate": 4.746180273479963e-06, "loss": 0.4096, "step": 3596 }, { "epoch": 0.8894658753709199, "grad_norm": 0.7998155688698845, "learning_rate": 4.746037557167047e-06, "loss": 0.4087, "step": 3597 }, { "epoch": 0.8897131552917903, "grad_norm": 0.7937182610227635, "learning_rate": 4.745894802889507e-06, "loss": 0.422, "step": 3598 }, { "epoch": 0.8899604352126608, "grad_norm": 0.8111751844441354, "learning_rate": 4.745752010649755e-06, "loss": 0.39, "step": 3599 }, { "epoch": 0.8902077151335311, "grad_norm": 0.7714646667870872, "learning_rate": 4.745609180450207e-06, "loss": 0.4192, "step": 3600 }, { "epoch": 0.8904549950544016, "grad_norm": 0.7554803232211826, "learning_rate": 4.745466312293275e-06, "loss": 0.4306, "step": 3601 }, { "epoch": 0.890702274975272, "grad_norm": 0.7757947210978946, "learning_rate": 4.745323406181375e-06, "loss": 0.4142, "step": 3602 }, { "epoch": 0.8909495548961425, "grad_norm": 0.760204249748166, "learning_rate": 4.7451804621169214e-06, "loss": 0.4446, "step": 3603 }, { "epoch": 0.8911968348170128, "grad_norm": 0.7870209440781624, "learning_rate": 4.745037480102332e-06, "loss": 0.3776, "step": 3604 }, { "epoch": 0.8914441147378833, "grad_norm": 0.7948033100309609, "learning_rate": 4.744894460140021e-06, "loss": 0.4145, "step": 3605 }, { "epoch": 0.8916913946587537, "grad_norm": 0.8094052552150242, "learning_rate": 4.7447514022324085e-06, "loss": 0.3901, "step": 3606 }, { "epoch": 0.8919386745796242, "grad_norm": 0.8210557157284226, "learning_rate": 4.744608306381912e-06, "loss": 0.444, "step": 3607 }, { "epoch": 0.8921859545004945, "grad_norm": 0.8307590466318364, "learning_rate": 4.744465172590949e-06, "loss": 0.3834, "step": 3608 }, { "epoch": 0.892433234421365, "grad_norm": 0.7843833688120964, "learning_rate": 4.7443220008619405e-06, "loss": 0.4132, "step": 3609 }, { "epoch": 0.8926805143422354, "grad_norm": 0.7944958514405386, "learning_rate": 4.744178791197305e-06, "loss": 0.4175, "step": 3610 }, { "epoch": 0.8929277942631059, "grad_norm": 0.7998369942157381, "learning_rate": 4.744035543599464e-06, "loss": 0.3737, "step": 3611 }, { "epoch": 0.8931750741839762, "grad_norm": 0.8009239130803604, "learning_rate": 4.74389225807084e-06, "loss": 0.4203, "step": 3612 }, { "epoch": 0.8934223541048467, "grad_norm": 0.800511982201152, "learning_rate": 4.743748934613853e-06, "loss": 0.4466, "step": 3613 }, { "epoch": 0.8936696340257171, "grad_norm": 0.7686053384089964, "learning_rate": 4.743605573230926e-06, "loss": 0.4101, "step": 3614 }, { "epoch": 0.8939169139465876, "grad_norm": 0.8060434016909004, "learning_rate": 4.7434621739244826e-06, "loss": 0.4232, "step": 3615 }, { "epoch": 0.8941641938674579, "grad_norm": 0.8230882644339693, "learning_rate": 4.7433187366969465e-06, "loss": 0.4046, "step": 3616 }, { "epoch": 0.8944114737883284, "grad_norm": 0.7989957008420914, "learning_rate": 4.743175261550743e-06, "loss": 0.4167, "step": 3617 }, { "epoch": 0.8946587537091988, "grad_norm": 0.8394401197348738, "learning_rate": 4.7430317484882956e-06, "loss": 0.4009, "step": 3618 }, { "epoch": 0.8949060336300693, "grad_norm": 0.8094052684678341, "learning_rate": 4.7428881975120325e-06, "loss": 0.3933, "step": 3619 }, { "epoch": 0.8951533135509396, "grad_norm": 0.7851229819463934, "learning_rate": 4.742744608624377e-06, "loss": 0.4113, "step": 3620 }, { "epoch": 0.8954005934718101, "grad_norm": 0.7662309230132146, "learning_rate": 4.742600981827759e-06, "loss": 0.4065, "step": 3621 }, { "epoch": 0.8956478733926805, "grad_norm": 0.7710743555606467, "learning_rate": 4.7424573171246045e-06, "loss": 0.4206, "step": 3622 }, { "epoch": 0.895895153313551, "grad_norm": 0.8099130000859085, "learning_rate": 4.742313614517342e-06, "loss": 0.3821, "step": 3623 }, { "epoch": 0.8961424332344213, "grad_norm": 0.8223717726070423, "learning_rate": 4.7421698740084024e-06, "loss": 0.4351, "step": 3624 }, { "epoch": 0.8963897131552918, "grad_norm": 0.785125594303034, "learning_rate": 4.742026095600213e-06, "loss": 0.3946, "step": 3625 }, { "epoch": 0.8966369930761622, "grad_norm": 0.8069026278651512, "learning_rate": 4.741882279295204e-06, "loss": 0.3847, "step": 3626 }, { "epoch": 0.8968842729970327, "grad_norm": 0.8105176440474819, "learning_rate": 4.7417384250958085e-06, "loss": 0.4036, "step": 3627 }, { "epoch": 0.897131552917903, "grad_norm": 0.8156431875624227, "learning_rate": 4.741594533004455e-06, "loss": 0.3943, "step": 3628 }, { "epoch": 0.8973788328387735, "grad_norm": 0.8325389032978646, "learning_rate": 4.74145060302358e-06, "loss": 0.3985, "step": 3629 }, { "epoch": 0.8976261127596439, "grad_norm": 0.7812029183112011, "learning_rate": 4.741306635155613e-06, "loss": 0.4174, "step": 3630 }, { "epoch": 0.8978733926805144, "grad_norm": 0.7443379316555324, "learning_rate": 4.741162629402987e-06, "loss": 0.4244, "step": 3631 }, { "epoch": 0.8981206726013847, "grad_norm": 0.7918379844224285, "learning_rate": 4.741018585768139e-06, "loss": 0.4298, "step": 3632 }, { "epoch": 0.8983679525222552, "grad_norm": 0.8155608902187095, "learning_rate": 4.740874504253501e-06, "loss": 0.4136, "step": 3633 }, { "epoch": 0.8986152324431256, "grad_norm": 0.798431437148323, "learning_rate": 4.740730384861511e-06, "loss": 0.4152, "step": 3634 }, { "epoch": 0.8988625123639961, "grad_norm": 0.7817921688685995, "learning_rate": 4.740586227594602e-06, "loss": 0.4322, "step": 3635 }, { "epoch": 0.8991097922848664, "grad_norm": 0.801265782510891, "learning_rate": 4.740442032455213e-06, "loss": 0.3742, "step": 3636 }, { "epoch": 0.8993570722057369, "grad_norm": 0.774678482565866, "learning_rate": 4.740297799445781e-06, "loss": 0.4431, "step": 3637 }, { "epoch": 0.8996043521266073, "grad_norm": 0.7968222612747385, "learning_rate": 4.740153528568743e-06, "loss": 0.4176, "step": 3638 }, { "epoch": 0.8998516320474778, "grad_norm": 0.7887370276926967, "learning_rate": 4.740009219826538e-06, "loss": 0.4488, "step": 3639 }, { "epoch": 0.9000989119683481, "grad_norm": 0.8051668166188332, "learning_rate": 4.739864873221607e-06, "loss": 0.4105, "step": 3640 }, { "epoch": 0.9003461918892186, "grad_norm": 0.7819691265369665, "learning_rate": 4.739720488756387e-06, "loss": 0.4241, "step": 3641 }, { "epoch": 0.900593471810089, "grad_norm": 0.834960143043667, "learning_rate": 4.73957606643332e-06, "loss": 0.4472, "step": 3642 }, { "epoch": 0.9008407517309595, "grad_norm": 0.8080501371876072, "learning_rate": 4.739431606254847e-06, "loss": 0.3909, "step": 3643 }, { "epoch": 0.9010880316518298, "grad_norm": 0.7919301864894568, "learning_rate": 4.73928710822341e-06, "loss": 0.412, "step": 3644 }, { "epoch": 0.9013353115727003, "grad_norm": 0.7911907598771045, "learning_rate": 4.739142572341451e-06, "loss": 0.437, "step": 3645 }, { "epoch": 0.9015825914935707, "grad_norm": 0.7705392923487695, "learning_rate": 4.738997998611413e-06, "loss": 0.4042, "step": 3646 }, { "epoch": 0.9018298714144412, "grad_norm": 0.8233674027397053, "learning_rate": 4.7388533870357415e-06, "loss": 0.414, "step": 3647 }, { "epoch": 0.9020771513353115, "grad_norm": 0.7503969461053938, "learning_rate": 4.738708737616879e-06, "loss": 0.4066, "step": 3648 }, { "epoch": 0.902324431256182, "grad_norm": 0.8049770365300944, "learning_rate": 4.73856405035727e-06, "loss": 0.41, "step": 3649 }, { "epoch": 0.9025717111770524, "grad_norm": 0.8109425037455092, "learning_rate": 4.7384193252593606e-06, "loss": 0.4445, "step": 3650 }, { "epoch": 0.9028189910979229, "grad_norm": 0.8369589101352245, "learning_rate": 4.7382745623255985e-06, "loss": 0.4313, "step": 3651 }, { "epoch": 0.9030662710187932, "grad_norm": 0.8046568527922425, "learning_rate": 4.73812976155843e-06, "loss": 0.3856, "step": 3652 }, { "epoch": 0.9033135509396637, "grad_norm": 0.8099061520890892, "learning_rate": 4.737984922960301e-06, "loss": 0.4034, "step": 3653 }, { "epoch": 0.9035608308605341, "grad_norm": 0.7989778433300206, "learning_rate": 4.737840046533662e-06, "loss": 0.3918, "step": 3654 }, { "epoch": 0.9038081107814046, "grad_norm": 0.7793440943753789, "learning_rate": 4.737695132280961e-06, "loss": 0.3995, "step": 3655 }, { "epoch": 0.904055390702275, "grad_norm": 0.7676663113860338, "learning_rate": 4.737550180204646e-06, "loss": 0.3792, "step": 3656 }, { "epoch": 0.9043026706231454, "grad_norm": 0.8045089888506259, "learning_rate": 4.737405190307169e-06, "loss": 0.4119, "step": 3657 }, { "epoch": 0.9045499505440159, "grad_norm": 0.7690497853722972, "learning_rate": 4.7372601625909805e-06, "loss": 0.4096, "step": 3658 }, { "epoch": 0.9047972304648862, "grad_norm": 0.8071716454958764, "learning_rate": 4.737115097058532e-06, "loss": 0.4048, "step": 3659 }, { "epoch": 0.9050445103857567, "grad_norm": 0.7895776592417868, "learning_rate": 4.736969993712275e-06, "loss": 0.4167, "step": 3660 }, { "epoch": 0.9052917903066271, "grad_norm": 0.8240625463753163, "learning_rate": 4.736824852554661e-06, "loss": 0.403, "step": 3661 }, { "epoch": 0.9055390702274976, "grad_norm": 0.8195618957323566, "learning_rate": 4.736679673588146e-06, "loss": 0.3815, "step": 3662 }, { "epoch": 0.905786350148368, "grad_norm": 0.7834351939345308, "learning_rate": 4.736534456815182e-06, "loss": 0.4219, "step": 3663 }, { "epoch": 0.9060336300692384, "grad_norm": 0.8324618159473429, "learning_rate": 4.736389202238224e-06, "loss": 0.3964, "step": 3664 }, { "epoch": 0.9062809099901088, "grad_norm": 0.8600961054014756, "learning_rate": 4.736243909859727e-06, "loss": 0.3904, "step": 3665 }, { "epoch": 0.9065281899109793, "grad_norm": 0.7577065431947011, "learning_rate": 4.736098579682148e-06, "loss": 0.4157, "step": 3666 }, { "epoch": 0.9067754698318496, "grad_norm": 0.7866848892631093, "learning_rate": 4.735953211707942e-06, "loss": 0.403, "step": 3667 }, { "epoch": 0.9070227497527201, "grad_norm": 0.7505834429164107, "learning_rate": 4.735807805939568e-06, "loss": 0.4294, "step": 3668 }, { "epoch": 0.9072700296735905, "grad_norm": 0.8346896722270837, "learning_rate": 4.735662362379482e-06, "loss": 0.4088, "step": 3669 }, { "epoch": 0.907517309594461, "grad_norm": 0.8556942291684705, "learning_rate": 4.735516881030143e-06, "loss": 0.3931, "step": 3670 }, { "epoch": 0.9077645895153313, "grad_norm": 0.8109171826553419, "learning_rate": 4.7353713618940104e-06, "loss": 0.3865, "step": 3671 }, { "epoch": 0.9080118694362018, "grad_norm": 0.7873661354765638, "learning_rate": 4.735225804973543e-06, "loss": 0.3986, "step": 3672 }, { "epoch": 0.9082591493570722, "grad_norm": 0.8094256714029084, "learning_rate": 4.735080210271202e-06, "loss": 0.4157, "step": 3673 }, { "epoch": 0.9085064292779427, "grad_norm": 0.8159543106898893, "learning_rate": 4.734934577789449e-06, "loss": 0.3597, "step": 3674 }, { "epoch": 0.908753709198813, "grad_norm": 0.826502911376005, "learning_rate": 4.734788907530744e-06, "loss": 0.3858, "step": 3675 }, { "epoch": 0.9090009891196835, "grad_norm": 0.7757107363293632, "learning_rate": 4.734643199497551e-06, "loss": 0.4146, "step": 3676 }, { "epoch": 0.9092482690405539, "grad_norm": 0.7897304584611788, "learning_rate": 4.73449745369233e-06, "loss": 0.3961, "step": 3677 }, { "epoch": 0.9094955489614244, "grad_norm": 0.8082471594665909, "learning_rate": 4.734351670117548e-06, "loss": 0.3993, "step": 3678 }, { "epoch": 0.9097428288822947, "grad_norm": 0.8032744698939259, "learning_rate": 4.734205848775667e-06, "loss": 0.4244, "step": 3679 }, { "epoch": 0.9099901088031652, "grad_norm": 0.7602920123766033, "learning_rate": 4.734059989669153e-06, "loss": 0.3965, "step": 3680 }, { "epoch": 0.9102373887240356, "grad_norm": 0.8009664738803186, "learning_rate": 4.73391409280047e-06, "loss": 0.4085, "step": 3681 }, { "epoch": 0.9104846686449061, "grad_norm": 0.8213732647282298, "learning_rate": 4.733768158172086e-06, "loss": 0.3911, "step": 3682 }, { "epoch": 0.9107319485657764, "grad_norm": 0.7848207476521984, "learning_rate": 4.733622185786466e-06, "loss": 0.4079, "step": 3683 }, { "epoch": 0.9109792284866469, "grad_norm": 0.7988798539247747, "learning_rate": 4.733476175646079e-06, "loss": 0.4248, "step": 3684 }, { "epoch": 0.9112265084075173, "grad_norm": 0.791433923608785, "learning_rate": 4.733330127753391e-06, "loss": 0.374, "step": 3685 }, { "epoch": 0.9114737883283878, "grad_norm": 0.8508840280452902, "learning_rate": 4.733184042110872e-06, "loss": 0.3851, "step": 3686 }, { "epoch": 0.9117210682492581, "grad_norm": 0.7563054793404609, "learning_rate": 4.733037918720991e-06, "loss": 0.4036, "step": 3687 }, { "epoch": 0.9119683481701286, "grad_norm": 0.7806311523663997, "learning_rate": 4.732891757586217e-06, "loss": 0.4269, "step": 3688 }, { "epoch": 0.912215628090999, "grad_norm": 0.8016731889784404, "learning_rate": 4.732745558709022e-06, "loss": 0.4159, "step": 3689 }, { "epoch": 0.9124629080118695, "grad_norm": 0.841535918475194, "learning_rate": 4.732599322091878e-06, "loss": 0.3978, "step": 3690 }, { "epoch": 0.9127101879327398, "grad_norm": 0.8294101182941553, "learning_rate": 4.732453047737254e-06, "loss": 0.3808, "step": 3691 }, { "epoch": 0.9129574678536103, "grad_norm": 0.7925075807432463, "learning_rate": 4.7323067356476236e-06, "loss": 0.39, "step": 3692 }, { "epoch": 0.9132047477744807, "grad_norm": 0.8122022329751005, "learning_rate": 4.7321603858254615e-06, "loss": 0.3956, "step": 3693 }, { "epoch": 0.9134520276953512, "grad_norm": 0.8872703411191297, "learning_rate": 4.732013998273239e-06, "loss": 0.3825, "step": 3694 }, { "epoch": 0.9136993076162215, "grad_norm": 0.8172203043153907, "learning_rate": 4.7318675729934325e-06, "loss": 0.4327, "step": 3695 }, { "epoch": 0.913946587537092, "grad_norm": 0.7698700210872599, "learning_rate": 4.731721109988516e-06, "loss": 0.4249, "step": 3696 }, { "epoch": 0.9141938674579624, "grad_norm": 0.7909208463040441, "learning_rate": 4.731574609260965e-06, "loss": 0.413, "step": 3697 }, { "epoch": 0.9144411473788329, "grad_norm": 0.85111940345262, "learning_rate": 4.7314280708132555e-06, "loss": 0.3766, "step": 3698 }, { "epoch": 0.9146884272997032, "grad_norm": 0.8267173832504716, "learning_rate": 4.731281494647866e-06, "loss": 0.4056, "step": 3699 }, { "epoch": 0.9149357072205737, "grad_norm": 0.781799158173567, "learning_rate": 4.731134880767273e-06, "loss": 0.3793, "step": 3700 }, { "epoch": 0.9151829871414441, "grad_norm": 0.7796258192015334, "learning_rate": 4.730988229173955e-06, "loss": 0.4309, "step": 3701 }, { "epoch": 0.9154302670623146, "grad_norm": 0.8493662394700314, "learning_rate": 4.7308415398703896e-06, "loss": 0.4401, "step": 3702 }, { "epoch": 0.9156775469831849, "grad_norm": 0.8252101322373908, "learning_rate": 4.730694812859058e-06, "loss": 0.425, "step": 3703 }, { "epoch": 0.9159248269040554, "grad_norm": 0.8436892482893468, "learning_rate": 4.73054804814244e-06, "loss": 0.4001, "step": 3704 }, { "epoch": 0.9161721068249258, "grad_norm": 0.7990937103631333, "learning_rate": 4.730401245723015e-06, "loss": 0.4528, "step": 3705 }, { "epoch": 0.9164193867457963, "grad_norm": 0.7426409943836156, "learning_rate": 4.730254405603266e-06, "loss": 0.4139, "step": 3706 }, { "epoch": 0.9166666666666666, "grad_norm": 0.8458020418541371, "learning_rate": 4.730107527785675e-06, "loss": 0.3864, "step": 3707 }, { "epoch": 0.9169139465875371, "grad_norm": 0.7763715601421739, "learning_rate": 4.729960612272724e-06, "loss": 0.413, "step": 3708 }, { "epoch": 0.9171612265084075, "grad_norm": 0.772693666164887, "learning_rate": 4.729813659066895e-06, "loss": 0.3964, "step": 3709 }, { "epoch": 0.917408506429278, "grad_norm": 0.7765688668068649, "learning_rate": 4.729666668170675e-06, "loss": 0.3879, "step": 3710 }, { "epoch": 0.9176557863501483, "grad_norm": 0.7526284361207995, "learning_rate": 4.729519639586546e-06, "loss": 0.4087, "step": 3711 }, { "epoch": 0.9179030662710188, "grad_norm": 0.7880056084680264, "learning_rate": 4.729372573316994e-06, "loss": 0.3952, "step": 3712 }, { "epoch": 0.9181503461918892, "grad_norm": 0.8119548569197702, "learning_rate": 4.729225469364506e-06, "loss": 0.4106, "step": 3713 }, { "epoch": 0.9183976261127597, "grad_norm": 0.8080727821692407, "learning_rate": 4.729078327731566e-06, "loss": 0.4168, "step": 3714 }, { "epoch": 0.91864490603363, "grad_norm": 0.7656106325715241, "learning_rate": 4.728931148420663e-06, "loss": 0.4209, "step": 3715 }, { "epoch": 0.9188921859545005, "grad_norm": 0.7482334472892916, "learning_rate": 4.728783931434285e-06, "loss": 0.427, "step": 3716 }, { "epoch": 0.9191394658753709, "grad_norm": 0.7750948068078986, "learning_rate": 4.7286366767749195e-06, "loss": 0.3864, "step": 3717 }, { "epoch": 0.9193867457962414, "grad_norm": 0.802420863654844, "learning_rate": 4.728489384445055e-06, "loss": 0.4016, "step": 3718 }, { "epoch": 0.9196340257171117, "grad_norm": 0.7601304461080065, "learning_rate": 4.728342054447183e-06, "loss": 0.4446, "step": 3719 }, { "epoch": 0.9198813056379822, "grad_norm": 0.8089882839309193, "learning_rate": 4.728194686783792e-06, "loss": 0.3926, "step": 3720 }, { "epoch": 0.9201285855588526, "grad_norm": 0.7875812610990902, "learning_rate": 4.728047281457374e-06, "loss": 0.3865, "step": 3721 }, { "epoch": 0.920375865479723, "grad_norm": 0.8304273323171134, "learning_rate": 4.7278998384704215e-06, "loss": 0.3886, "step": 3722 }, { "epoch": 0.9206231454005934, "grad_norm": 0.7793749974809633, "learning_rate": 4.727752357825424e-06, "loss": 0.3991, "step": 3723 }, { "epoch": 0.9208704253214639, "grad_norm": 0.7655198670497095, "learning_rate": 4.7276048395248755e-06, "loss": 0.4016, "step": 3724 }, { "epoch": 0.9211177052423343, "grad_norm": 0.7785444053497739, "learning_rate": 4.7274572835712706e-06, "loss": 0.3837, "step": 3725 }, { "epoch": 0.9213649851632048, "grad_norm": 0.794178493200149, "learning_rate": 4.727309689967103e-06, "loss": 0.4141, "step": 3726 }, { "epoch": 0.9216122650840751, "grad_norm": 0.7616756186660109, "learning_rate": 4.727162058714867e-06, "loss": 0.4016, "step": 3727 }, { "epoch": 0.9218595450049456, "grad_norm": 0.827382828719098, "learning_rate": 4.7270143898170575e-06, "loss": 0.3829, "step": 3728 }, { "epoch": 0.922106824925816, "grad_norm": 0.7964010549622547, "learning_rate": 4.7268666832761725e-06, "loss": 0.3913, "step": 3729 }, { "epoch": 0.9223541048466865, "grad_norm": 0.8188786377099656, "learning_rate": 4.726718939094706e-06, "loss": 0.4117, "step": 3730 }, { "epoch": 0.9226013847675568, "grad_norm": 0.7849413309879384, "learning_rate": 4.726571157275157e-06, "loss": 0.4084, "step": 3731 }, { "epoch": 0.9228486646884273, "grad_norm": 0.8072618169447672, "learning_rate": 4.726423337820023e-06, "loss": 0.4058, "step": 3732 }, { "epoch": 0.9230959446092978, "grad_norm": 0.807105005048506, "learning_rate": 4.726275480731803e-06, "loss": 0.3882, "step": 3733 }, { "epoch": 0.9233432245301681, "grad_norm": 0.760185353289157, "learning_rate": 4.726127586012996e-06, "loss": 0.4132, "step": 3734 }, { "epoch": 0.9235905044510386, "grad_norm": 0.7754454391486514, "learning_rate": 4.7259796536661016e-06, "loss": 0.426, "step": 3735 }, { "epoch": 0.923837784371909, "grad_norm": 0.8142785355549925, "learning_rate": 4.725831683693621e-06, "loss": 0.398, "step": 3736 }, { "epoch": 0.9240850642927795, "grad_norm": 0.7681071514435952, "learning_rate": 4.725683676098054e-06, "loss": 0.4131, "step": 3737 }, { "epoch": 0.9243323442136498, "grad_norm": 0.7718039403892808, "learning_rate": 4.725535630881904e-06, "loss": 0.428, "step": 3738 }, { "epoch": 0.9245796241345203, "grad_norm": 0.7862011212454346, "learning_rate": 4.725387548047672e-06, "loss": 0.3932, "step": 3739 }, { "epoch": 0.9248269040553907, "grad_norm": 0.7927179649242823, "learning_rate": 4.725239427597862e-06, "loss": 0.4271, "step": 3740 }, { "epoch": 0.9250741839762612, "grad_norm": 0.7860521643345544, "learning_rate": 4.725091269534976e-06, "loss": 0.3935, "step": 3741 }, { "epoch": 0.9253214638971315, "grad_norm": 0.796238835215501, "learning_rate": 4.724943073861521e-06, "loss": 0.3948, "step": 3742 }, { "epoch": 0.925568743818002, "grad_norm": 0.8406638282243356, "learning_rate": 4.724794840580001e-06, "loss": 0.4006, "step": 3743 }, { "epoch": 0.9258160237388724, "grad_norm": 0.7855740670031731, "learning_rate": 4.724646569692919e-06, "loss": 0.3985, "step": 3744 }, { "epoch": 0.9260633036597429, "grad_norm": 0.7799292341163916, "learning_rate": 4.7244982612027845e-06, "loss": 0.3917, "step": 3745 }, { "epoch": 0.9263105835806132, "grad_norm": 0.7735208824748577, "learning_rate": 4.724349915112103e-06, "loss": 0.4268, "step": 3746 }, { "epoch": 0.9265578635014837, "grad_norm": 0.7861442577149689, "learning_rate": 4.724201531423383e-06, "loss": 0.4117, "step": 3747 }, { "epoch": 0.9268051434223541, "grad_norm": 0.7621689766137228, "learning_rate": 4.72405311013913e-06, "loss": 0.4052, "step": 3748 }, { "epoch": 0.9270524233432246, "grad_norm": 0.7738986753047935, "learning_rate": 4.723904651261855e-06, "loss": 0.4308, "step": 3749 }, { "epoch": 0.9272997032640949, "grad_norm": 0.7464914050650943, "learning_rate": 4.723756154794068e-06, "loss": 0.4125, "step": 3750 }, { "epoch": 0.9275469831849654, "grad_norm": 0.7875071720493665, "learning_rate": 4.7236076207382765e-06, "loss": 0.4171, "step": 3751 }, { "epoch": 0.9277942631058358, "grad_norm": 0.7695819617858256, "learning_rate": 4.7234590490969935e-06, "loss": 0.4251, "step": 3752 }, { "epoch": 0.9280415430267063, "grad_norm": 0.7860254683196816, "learning_rate": 4.723310439872729e-06, "loss": 0.4174, "step": 3753 }, { "epoch": 0.9282888229475766, "grad_norm": 0.799371539348264, "learning_rate": 4.723161793067995e-06, "loss": 0.4048, "step": 3754 }, { "epoch": 0.9285361028684471, "grad_norm": 0.766005328064519, "learning_rate": 4.723013108685306e-06, "loss": 0.3992, "step": 3755 }, { "epoch": 0.9287833827893175, "grad_norm": 0.8184769776996572, "learning_rate": 4.722864386727171e-06, "loss": 0.4098, "step": 3756 }, { "epoch": 0.929030662710188, "grad_norm": 0.767670477728639, "learning_rate": 4.722715627196109e-06, "loss": 0.376, "step": 3757 }, { "epoch": 0.9292779426310583, "grad_norm": 0.7691881581821964, "learning_rate": 4.72256683009463e-06, "loss": 0.4209, "step": 3758 }, { "epoch": 0.9295252225519288, "grad_norm": 0.779597647151623, "learning_rate": 4.722417995425252e-06, "loss": 0.3974, "step": 3759 }, { "epoch": 0.9297725024727992, "grad_norm": 0.7958084407546698, "learning_rate": 4.72226912319049e-06, "loss": 0.3913, "step": 3760 }, { "epoch": 0.9300197823936697, "grad_norm": 0.7578575157603611, "learning_rate": 4.722120213392859e-06, "loss": 0.4177, "step": 3761 }, { "epoch": 0.93026706231454, "grad_norm": 0.7738447839180078, "learning_rate": 4.721971266034878e-06, "loss": 0.4515, "step": 3762 }, { "epoch": 0.9305143422354105, "grad_norm": 0.7907234951401526, "learning_rate": 4.721822281119064e-06, "loss": 0.4325, "step": 3763 }, { "epoch": 0.9307616221562809, "grad_norm": 0.7756955998159536, "learning_rate": 4.721673258647934e-06, "loss": 0.4332, "step": 3764 }, { "epoch": 0.9310089020771514, "grad_norm": 0.782257930528881, "learning_rate": 4.721524198624009e-06, "loss": 0.4034, "step": 3765 }, { "epoch": 0.9312561819980217, "grad_norm": 0.7801610811208322, "learning_rate": 4.721375101049807e-06, "loss": 0.3815, "step": 3766 }, { "epoch": 0.9315034619188922, "grad_norm": 0.8097011083585558, "learning_rate": 4.721225965927848e-06, "loss": 0.4462, "step": 3767 }, { "epoch": 0.9317507418397626, "grad_norm": 0.7898701624455864, "learning_rate": 4.721076793260655e-06, "loss": 0.4028, "step": 3768 }, { "epoch": 0.9319980217606331, "grad_norm": 0.7551579097684481, "learning_rate": 4.720927583050747e-06, "loss": 0.4243, "step": 3769 }, { "epoch": 0.9322453016815034, "grad_norm": 0.7764822151923534, "learning_rate": 4.720778335300647e-06, "loss": 0.4082, "step": 3770 }, { "epoch": 0.9324925816023739, "grad_norm": 0.7862920635864419, "learning_rate": 4.720629050012879e-06, "loss": 0.3879, "step": 3771 }, { "epoch": 0.9327398615232443, "grad_norm": 0.8223007795021665, "learning_rate": 4.720479727189964e-06, "loss": 0.4085, "step": 3772 }, { "epoch": 0.9329871414441148, "grad_norm": 0.7707108563497688, "learning_rate": 4.720330366834427e-06, "loss": 0.4027, "step": 3773 }, { "epoch": 0.9332344213649851, "grad_norm": 0.8109320443845112, "learning_rate": 4.7201809689487935e-06, "loss": 0.3899, "step": 3774 }, { "epoch": 0.9334817012858556, "grad_norm": 0.7964599002381397, "learning_rate": 4.720031533535589e-06, "loss": 0.4535, "step": 3775 }, { "epoch": 0.933728981206726, "grad_norm": 0.7915463325437578, "learning_rate": 4.719882060597336e-06, "loss": 0.4162, "step": 3776 }, { "epoch": 0.9339762611275965, "grad_norm": 0.7698383537920458, "learning_rate": 4.719732550136565e-06, "loss": 0.4373, "step": 3777 }, { "epoch": 0.9342235410484668, "grad_norm": 0.7705957905234673, "learning_rate": 4.719583002155801e-06, "loss": 0.4066, "step": 3778 }, { "epoch": 0.9344708209693373, "grad_norm": 0.7787562181312311, "learning_rate": 4.719433416657573e-06, "loss": 0.4164, "step": 3779 }, { "epoch": 0.9347181008902077, "grad_norm": 0.7859748027749637, "learning_rate": 4.719283793644409e-06, "loss": 0.4083, "step": 3780 }, { "epoch": 0.9349653808110782, "grad_norm": 0.8513132306964396, "learning_rate": 4.719134133118838e-06, "loss": 0.3746, "step": 3781 }, { "epoch": 0.9352126607319485, "grad_norm": 0.8106609584332799, "learning_rate": 4.718984435083389e-06, "loss": 0.411, "step": 3782 }, { "epoch": 0.935459940652819, "grad_norm": 0.7840411155887811, "learning_rate": 4.718834699540593e-06, "loss": 0.3892, "step": 3783 }, { "epoch": 0.9357072205736894, "grad_norm": 0.7987354153840486, "learning_rate": 4.718684926492982e-06, "loss": 0.4125, "step": 3784 }, { "epoch": 0.9359545004945599, "grad_norm": 0.7769870805002546, "learning_rate": 4.718535115943085e-06, "loss": 0.4186, "step": 3785 }, { "epoch": 0.9362017804154302, "grad_norm": 0.7953525655481078, "learning_rate": 4.718385267893437e-06, "loss": 0.4571, "step": 3786 }, { "epoch": 0.9364490603363007, "grad_norm": 0.8257447321469814, "learning_rate": 4.718235382346569e-06, "loss": 0.4258, "step": 3787 }, { "epoch": 0.9366963402571711, "grad_norm": 0.7979742103452806, "learning_rate": 4.718085459305015e-06, "loss": 0.4325, "step": 3788 }, { "epoch": 0.9369436201780416, "grad_norm": 0.7755001210635785, "learning_rate": 4.717935498771311e-06, "loss": 0.3987, "step": 3789 }, { "epoch": 0.9371909000989119, "grad_norm": 0.7855397941016596, "learning_rate": 4.717785500747988e-06, "loss": 0.4179, "step": 3790 }, { "epoch": 0.9374381800197824, "grad_norm": 0.7659770262453232, "learning_rate": 4.717635465237584e-06, "loss": 0.3917, "step": 3791 }, { "epoch": 0.9376854599406528, "grad_norm": 0.7815948203349444, "learning_rate": 4.717485392242636e-06, "loss": 0.4193, "step": 3792 }, { "epoch": 0.9379327398615233, "grad_norm": 0.7930513949333287, "learning_rate": 4.717335281765677e-06, "loss": 0.3904, "step": 3793 }, { "epoch": 0.9381800197823936, "grad_norm": 0.8129070197480318, "learning_rate": 4.717185133809248e-06, "loss": 0.4164, "step": 3794 }, { "epoch": 0.9384272997032641, "grad_norm": 0.7668091089072321, "learning_rate": 4.7170349483758845e-06, "loss": 0.4051, "step": 3795 }, { "epoch": 0.9386745796241345, "grad_norm": 0.7910228910052575, "learning_rate": 4.716884725468127e-06, "loss": 0.4083, "step": 3796 }, { "epoch": 0.938921859545005, "grad_norm": 0.7732700837398885, "learning_rate": 4.716734465088513e-06, "loss": 0.4034, "step": 3797 }, { "epoch": 0.9391691394658753, "grad_norm": 0.774749652475244, "learning_rate": 4.716584167239584e-06, "loss": 0.4185, "step": 3798 }, { "epoch": 0.9394164193867458, "grad_norm": 0.8554633807185527, "learning_rate": 4.716433831923879e-06, "loss": 0.391, "step": 3799 }, { "epoch": 0.9396636993076162, "grad_norm": 0.8449891799779463, "learning_rate": 4.716283459143939e-06, "loss": 0.3926, "step": 3800 }, { "epoch": 0.9399109792284867, "grad_norm": 0.8158798360289666, "learning_rate": 4.716133048902307e-06, "loss": 0.4356, "step": 3801 }, { "epoch": 0.940158259149357, "grad_norm": 0.7615640183067676, "learning_rate": 4.715982601201525e-06, "loss": 0.4172, "step": 3802 }, { "epoch": 0.9404055390702275, "grad_norm": 0.7871218583450545, "learning_rate": 4.715832116044135e-06, "loss": 0.3677, "step": 3803 }, { "epoch": 0.9406528189910979, "grad_norm": 0.7582932974852993, "learning_rate": 4.715681593432683e-06, "loss": 0.4602, "step": 3804 }, { "epoch": 0.9409000989119684, "grad_norm": 0.7746130525975381, "learning_rate": 4.71553103336971e-06, "loss": 0.3856, "step": 3805 }, { "epoch": 0.9411473788328387, "grad_norm": 0.7636577412053083, "learning_rate": 4.715380435857763e-06, "loss": 0.4131, "step": 3806 }, { "epoch": 0.9413946587537092, "grad_norm": 0.7811739159391169, "learning_rate": 4.715229800899388e-06, "loss": 0.3884, "step": 3807 }, { "epoch": 0.9416419386745796, "grad_norm": 0.8188404252385233, "learning_rate": 4.715079128497129e-06, "loss": 0.3946, "step": 3808 }, { "epoch": 0.94188921859545, "grad_norm": 0.7875551448948243, "learning_rate": 4.714928418653535e-06, "loss": 0.3932, "step": 3809 }, { "epoch": 0.9421364985163204, "grad_norm": 0.8046139238238394, "learning_rate": 4.714777671371152e-06, "loss": 0.4221, "step": 3810 }, { "epoch": 0.9423837784371909, "grad_norm": 0.8110204164182576, "learning_rate": 4.71462688665253e-06, "loss": 0.4177, "step": 3811 }, { "epoch": 0.9426310583580614, "grad_norm": 0.7878491388246204, "learning_rate": 4.714476064500215e-06, "loss": 0.39, "step": 3812 }, { "epoch": 0.9428783382789317, "grad_norm": 0.7495863308390222, "learning_rate": 4.714325204916758e-06, "loss": 0.421, "step": 3813 }, { "epoch": 0.9431256181998022, "grad_norm": 0.77849672486737, "learning_rate": 4.714174307904709e-06, "loss": 0.4247, "step": 3814 }, { "epoch": 0.9433728981206726, "grad_norm": 0.802560196408335, "learning_rate": 4.714023373466618e-06, "loss": 0.408, "step": 3815 }, { "epoch": 0.9436201780415431, "grad_norm": 0.7786351448112381, "learning_rate": 4.713872401605036e-06, "loss": 0.3885, "step": 3816 }, { "epoch": 0.9438674579624134, "grad_norm": 0.8164733266440941, "learning_rate": 4.713721392322515e-06, "loss": 0.4153, "step": 3817 }, { "epoch": 0.9441147378832839, "grad_norm": 0.7955726912893689, "learning_rate": 4.713570345621609e-06, "loss": 0.4145, "step": 3818 }, { "epoch": 0.9443620178041543, "grad_norm": 0.7689706793710702, "learning_rate": 4.71341926150487e-06, "loss": 0.3869, "step": 3819 }, { "epoch": 0.9446092977250248, "grad_norm": 0.804572440311293, "learning_rate": 4.713268139974851e-06, "loss": 0.4042, "step": 3820 }, { "epoch": 0.9448565776458951, "grad_norm": 0.8035662350608253, "learning_rate": 4.713116981034107e-06, "loss": 0.4172, "step": 3821 }, { "epoch": 0.9451038575667656, "grad_norm": 0.7746997086586535, "learning_rate": 4.712965784685194e-06, "loss": 0.3954, "step": 3822 }, { "epoch": 0.945351137487636, "grad_norm": 0.7831748839896366, "learning_rate": 4.712814550930667e-06, "loss": 0.4129, "step": 3823 }, { "epoch": 0.9455984174085065, "grad_norm": 0.7809792520188084, "learning_rate": 4.712663279773081e-06, "loss": 0.4347, "step": 3824 }, { "epoch": 0.9458456973293768, "grad_norm": 0.7673768265462751, "learning_rate": 4.7125119712149944e-06, "loss": 0.4052, "step": 3825 }, { "epoch": 0.9460929772502473, "grad_norm": 0.7798452737328526, "learning_rate": 4.712360625258965e-06, "loss": 0.4024, "step": 3826 }, { "epoch": 0.9463402571711177, "grad_norm": 0.7778031688861111, "learning_rate": 4.7122092419075496e-06, "loss": 0.3775, "step": 3827 }, { "epoch": 0.9465875370919882, "grad_norm": 0.7806188818311592, "learning_rate": 4.712057821163308e-06, "loss": 0.4155, "step": 3828 }, { "epoch": 0.9468348170128585, "grad_norm": 0.7931029006594754, "learning_rate": 4.7119063630288e-06, "loss": 0.4178, "step": 3829 }, { "epoch": 0.947082096933729, "grad_norm": 0.7694503362483404, "learning_rate": 4.711754867506585e-06, "loss": 0.4008, "step": 3830 }, { "epoch": 0.9473293768545994, "grad_norm": 0.8129310646415457, "learning_rate": 4.711603334599224e-06, "loss": 0.3733, "step": 3831 }, { "epoch": 0.9475766567754699, "grad_norm": 0.7761005503274951, "learning_rate": 4.711451764309278e-06, "loss": 0.4384, "step": 3832 }, { "epoch": 0.9478239366963402, "grad_norm": 0.7912406175514075, "learning_rate": 4.711300156639309e-06, "loss": 0.403, "step": 3833 }, { "epoch": 0.9480712166172107, "grad_norm": 0.7841375432382586, "learning_rate": 4.7111485115918795e-06, "loss": 0.4205, "step": 3834 }, { "epoch": 0.9483184965380811, "grad_norm": 0.7876792036708451, "learning_rate": 4.710996829169554e-06, "loss": 0.3802, "step": 3835 }, { "epoch": 0.9485657764589516, "grad_norm": 0.7883706920212202, "learning_rate": 4.710845109374895e-06, "loss": 0.3764, "step": 3836 }, { "epoch": 0.9488130563798219, "grad_norm": 0.7713104412410063, "learning_rate": 4.710693352210468e-06, "loss": 0.4153, "step": 3837 }, { "epoch": 0.9490603363006924, "grad_norm": 0.774838074480863, "learning_rate": 4.7105415576788375e-06, "loss": 0.4099, "step": 3838 }, { "epoch": 0.9493076162215628, "grad_norm": 0.7482537027491292, "learning_rate": 4.710389725782568e-06, "loss": 0.4218, "step": 3839 }, { "epoch": 0.9495548961424333, "grad_norm": 0.7904541367962616, "learning_rate": 4.710237856524229e-06, "loss": 0.4037, "step": 3840 }, { "epoch": 0.9498021760633036, "grad_norm": 0.7702306085739898, "learning_rate": 4.710085949906385e-06, "loss": 0.4299, "step": 3841 }, { "epoch": 0.9500494559841741, "grad_norm": 0.7976548908474161, "learning_rate": 4.709934005931605e-06, "loss": 0.3735, "step": 3842 }, { "epoch": 0.9502967359050445, "grad_norm": 0.8362496119917581, "learning_rate": 4.709782024602456e-06, "loss": 0.4022, "step": 3843 }, { "epoch": 0.950544015825915, "grad_norm": 0.7829890013758223, "learning_rate": 4.709630005921508e-06, "loss": 0.4207, "step": 3844 }, { "epoch": 0.9507912957467853, "grad_norm": 0.7769273804916441, "learning_rate": 4.709477949891331e-06, "loss": 0.4209, "step": 3845 }, { "epoch": 0.9510385756676558, "grad_norm": 0.7589464323885547, "learning_rate": 4.709325856514494e-06, "loss": 0.4101, "step": 3846 }, { "epoch": 0.9512858555885262, "grad_norm": 0.8020651971148175, "learning_rate": 4.709173725793567e-06, "loss": 0.4404, "step": 3847 }, { "epoch": 0.9515331355093967, "grad_norm": 0.7785190052402172, "learning_rate": 4.709021557731125e-06, "loss": 0.3814, "step": 3848 }, { "epoch": 0.951780415430267, "grad_norm": 0.7755037590030682, "learning_rate": 4.708869352329736e-06, "loss": 0.3985, "step": 3849 }, { "epoch": 0.9520276953511375, "grad_norm": 0.7900519608011191, "learning_rate": 4.708717109591976e-06, "loss": 0.4362, "step": 3850 }, { "epoch": 0.9522749752720079, "grad_norm": 0.8073740775342424, "learning_rate": 4.708564829520416e-06, "loss": 0.4269, "step": 3851 }, { "epoch": 0.9525222551928784, "grad_norm": 0.8233059438001666, "learning_rate": 4.708412512117631e-06, "loss": 0.4044, "step": 3852 }, { "epoch": 0.9527695351137487, "grad_norm": 0.8118457866395735, "learning_rate": 4.708260157386196e-06, "loss": 0.4115, "step": 3853 }, { "epoch": 0.9530168150346192, "grad_norm": 0.7583537392743612, "learning_rate": 4.708107765328685e-06, "loss": 0.416, "step": 3854 }, { "epoch": 0.9532640949554896, "grad_norm": 0.8297248524334135, "learning_rate": 4.707955335947675e-06, "loss": 0.3898, "step": 3855 }, { "epoch": 0.9535113748763601, "grad_norm": 0.7870425199589566, "learning_rate": 4.707802869245742e-06, "loss": 0.4179, "step": 3856 }, { "epoch": 0.9537586547972304, "grad_norm": 0.8316754519208652, "learning_rate": 4.707650365225463e-06, "loss": 0.4201, "step": 3857 }, { "epoch": 0.9540059347181009, "grad_norm": 0.7792225618981867, "learning_rate": 4.7074978238894164e-06, "loss": 0.41, "step": 3858 }, { "epoch": 0.9542532146389713, "grad_norm": 0.7799396685116958, "learning_rate": 4.70734524524018e-06, "loss": 0.4066, "step": 3859 }, { "epoch": 0.9545004945598418, "grad_norm": 0.8053702110699514, "learning_rate": 4.707192629280334e-06, "loss": 0.3941, "step": 3860 }, { "epoch": 0.9547477744807121, "grad_norm": 0.7618664972873861, "learning_rate": 4.707039976012457e-06, "loss": 0.4125, "step": 3861 }, { "epoch": 0.9549950544015826, "grad_norm": 0.7931884558229111, "learning_rate": 4.706887285439128e-06, "loss": 0.3761, "step": 3862 }, { "epoch": 0.955242334322453, "grad_norm": 0.7728504886433268, "learning_rate": 4.70673455756293e-06, "loss": 0.3919, "step": 3863 }, { "epoch": 0.9554896142433235, "grad_norm": 0.7955438947171283, "learning_rate": 4.7065817923864435e-06, "loss": 0.3957, "step": 3864 }, { "epoch": 0.9557368941641938, "grad_norm": 0.7809682843371737, "learning_rate": 4.7064289899122515e-06, "loss": 0.3659, "step": 3865 }, { "epoch": 0.9559841740850643, "grad_norm": 0.801135077053223, "learning_rate": 4.706276150142936e-06, "loss": 0.4134, "step": 3866 }, { "epoch": 0.9562314540059347, "grad_norm": 0.769903315991128, "learning_rate": 4.706123273081081e-06, "loss": 0.4145, "step": 3867 }, { "epoch": 0.9564787339268052, "grad_norm": 0.7714718747779309, "learning_rate": 4.7059703587292706e-06, "loss": 0.4187, "step": 3868 }, { "epoch": 0.9567260138476755, "grad_norm": 0.7771131713870824, "learning_rate": 4.705817407090089e-06, "loss": 0.4276, "step": 3869 }, { "epoch": 0.956973293768546, "grad_norm": 0.7812494248399661, "learning_rate": 4.705664418166122e-06, "loss": 0.4195, "step": 3870 }, { "epoch": 0.9572205736894164, "grad_norm": 0.7870184851971324, "learning_rate": 4.705511391959955e-06, "loss": 0.3779, "step": 3871 }, { "epoch": 0.9574678536102869, "grad_norm": 0.7744463770512644, "learning_rate": 4.7053583284741745e-06, "loss": 0.4138, "step": 3872 }, { "epoch": 0.9577151335311572, "grad_norm": 0.7958653995952897, "learning_rate": 4.7052052277113695e-06, "loss": 0.3956, "step": 3873 }, { "epoch": 0.9579624134520277, "grad_norm": 0.7816603192193171, "learning_rate": 4.705052089674125e-06, "loss": 0.4006, "step": 3874 }, { "epoch": 0.9582096933728981, "grad_norm": 0.7918038182911267, "learning_rate": 4.704898914365032e-06, "loss": 0.4128, "step": 3875 }, { "epoch": 0.9584569732937686, "grad_norm": 0.8150385320639588, "learning_rate": 4.704745701786678e-06, "loss": 0.4069, "step": 3876 }, { "epoch": 0.9587042532146389, "grad_norm": 0.7920833803794489, "learning_rate": 4.704592451941654e-06, "loss": 0.4187, "step": 3877 }, { "epoch": 0.9589515331355094, "grad_norm": 0.7923085557647959, "learning_rate": 4.704439164832549e-06, "loss": 0.4362, "step": 3878 }, { "epoch": 0.9591988130563798, "grad_norm": 0.7660991000196078, "learning_rate": 4.704285840461955e-06, "loss": 0.4174, "step": 3879 }, { "epoch": 0.9594460929772503, "grad_norm": 0.8128783497936076, "learning_rate": 4.704132478832464e-06, "loss": 0.3888, "step": 3880 }, { "epoch": 0.9596933728981206, "grad_norm": 0.7698482384519877, "learning_rate": 4.703979079946667e-06, "loss": 0.4169, "step": 3881 }, { "epoch": 0.9599406528189911, "grad_norm": 0.7892348762474982, "learning_rate": 4.703825643807157e-06, "loss": 0.411, "step": 3882 }, { "epoch": 0.9601879327398615, "grad_norm": 0.7861064933964294, "learning_rate": 4.703672170416529e-06, "loss": 0.4015, "step": 3883 }, { "epoch": 0.960435212660732, "grad_norm": 0.8174752026252636, "learning_rate": 4.703518659777376e-06, "loss": 0.3917, "step": 3884 }, { "epoch": 0.9606824925816023, "grad_norm": 0.7732844681830707, "learning_rate": 4.703365111892293e-06, "loss": 0.3775, "step": 3885 }, { "epoch": 0.9609297725024728, "grad_norm": 0.805848268421217, "learning_rate": 4.703211526763875e-06, "loss": 0.3776, "step": 3886 }, { "epoch": 0.9611770524233432, "grad_norm": 0.7655914125589361, "learning_rate": 4.703057904394719e-06, "loss": 0.3853, "step": 3887 }, { "epoch": 0.9614243323442137, "grad_norm": 0.7744602016571512, "learning_rate": 4.7029042447874205e-06, "loss": 0.4206, "step": 3888 }, { "epoch": 0.9616716122650841, "grad_norm": 0.7876431280098052, "learning_rate": 4.702750547944577e-06, "loss": 0.434, "step": 3889 }, { "epoch": 0.9619188921859545, "grad_norm": 0.7701420784704784, "learning_rate": 4.702596813868787e-06, "loss": 0.4142, "step": 3890 }, { "epoch": 0.962166172106825, "grad_norm": 0.7876056794108254, "learning_rate": 4.70244304256265e-06, "loss": 0.392, "step": 3891 }, { "epoch": 0.9624134520276953, "grad_norm": 0.7944473649588513, "learning_rate": 4.702289234028763e-06, "loss": 0.411, "step": 3892 }, { "epoch": 0.9626607319485658, "grad_norm": 0.7746154516350491, "learning_rate": 4.702135388269727e-06, "loss": 0.3754, "step": 3893 }, { "epoch": 0.9629080118694362, "grad_norm": 0.7747445417607648, "learning_rate": 4.701981505288142e-06, "loss": 0.4035, "step": 3894 }, { "epoch": 0.9631552917903067, "grad_norm": 0.7628514933888112, "learning_rate": 4.70182758508661e-06, "loss": 0.4276, "step": 3895 }, { "epoch": 0.963402571711177, "grad_norm": 0.7697009449996881, "learning_rate": 4.701673627667732e-06, "loss": 0.4199, "step": 3896 }, { "epoch": 0.9636498516320475, "grad_norm": 0.8210442405395625, "learning_rate": 4.70151963303411e-06, "loss": 0.429, "step": 3897 }, { "epoch": 0.9638971315529179, "grad_norm": 0.7867985724002428, "learning_rate": 4.7013656011883476e-06, "loss": 0.433, "step": 3898 }, { "epoch": 0.9641444114737884, "grad_norm": 0.7943173211816381, "learning_rate": 4.7012115321330484e-06, "loss": 0.4057, "step": 3899 }, { "epoch": 0.9643916913946587, "grad_norm": 0.8182944927299638, "learning_rate": 4.701057425870816e-06, "loss": 0.3819, "step": 3900 }, { "epoch": 0.9646389713155292, "grad_norm": 0.7479471012595492, "learning_rate": 4.700903282404256e-06, "loss": 0.4035, "step": 3901 }, { "epoch": 0.9648862512363996, "grad_norm": 0.7812074899937719, "learning_rate": 4.700749101735973e-06, "loss": 0.4126, "step": 3902 }, { "epoch": 0.9651335311572701, "grad_norm": 0.8278036061767997, "learning_rate": 4.7005948838685735e-06, "loss": 0.3914, "step": 3903 }, { "epoch": 0.9653808110781404, "grad_norm": 0.8387896149443221, "learning_rate": 4.700440628804665e-06, "loss": 0.3896, "step": 3904 }, { "epoch": 0.9656280909990109, "grad_norm": 0.7947700311503184, "learning_rate": 4.700286336546854e-06, "loss": 0.4004, "step": 3905 }, { "epoch": 0.9658753709198813, "grad_norm": 0.7794144464386743, "learning_rate": 4.700132007097748e-06, "loss": 0.3994, "step": 3906 }, { "epoch": 0.9661226508407518, "grad_norm": 0.8099820963076695, "learning_rate": 4.699977640459958e-06, "loss": 0.4252, "step": 3907 }, { "epoch": 0.9663699307616221, "grad_norm": 0.7791668503349546, "learning_rate": 4.699823236636091e-06, "loss": 0.4112, "step": 3908 }, { "epoch": 0.9666172106824926, "grad_norm": 0.7799461943658222, "learning_rate": 4.6996687956287564e-06, "loss": 0.3938, "step": 3909 }, { "epoch": 0.966864490603363, "grad_norm": 0.7954451636878649, "learning_rate": 4.699514317440567e-06, "loss": 0.4116, "step": 3910 }, { "epoch": 0.9671117705242335, "grad_norm": 0.7551128256490328, "learning_rate": 4.699359802074131e-06, "loss": 0.4095, "step": 3911 }, { "epoch": 0.9673590504451038, "grad_norm": 0.7816752155120708, "learning_rate": 4.6992052495320635e-06, "loss": 0.3919, "step": 3912 }, { "epoch": 0.9676063303659743, "grad_norm": 0.7748996073968227, "learning_rate": 4.699050659816975e-06, "loss": 0.4034, "step": 3913 }, { "epoch": 0.9678536102868447, "grad_norm": 0.7713815417049966, "learning_rate": 4.698896032931478e-06, "loss": 0.3945, "step": 3914 }, { "epoch": 0.9681008902077152, "grad_norm": 0.7706764167647603, "learning_rate": 4.698741368878187e-06, "loss": 0.4321, "step": 3915 }, { "epoch": 0.9683481701285855, "grad_norm": 0.8023547706545407, "learning_rate": 4.698586667659717e-06, "loss": 0.4382, "step": 3916 }, { "epoch": 0.968595450049456, "grad_norm": 0.8233471376751998, "learning_rate": 4.698431929278681e-06, "loss": 0.397, "step": 3917 }, { "epoch": 0.9688427299703264, "grad_norm": 0.7778633351955079, "learning_rate": 4.698277153737697e-06, "loss": 0.4199, "step": 3918 }, { "epoch": 0.9690900098911969, "grad_norm": 0.7703112244491745, "learning_rate": 4.698122341039379e-06, "loss": 0.4158, "step": 3919 }, { "epoch": 0.9693372898120672, "grad_norm": 0.7727244260981386, "learning_rate": 4.697967491186345e-06, "loss": 0.4101, "step": 3920 }, { "epoch": 0.9695845697329377, "grad_norm": 0.8320384841071571, "learning_rate": 4.697812604181211e-06, "loss": 0.395, "step": 3921 }, { "epoch": 0.9698318496538081, "grad_norm": 0.7941589609812207, "learning_rate": 4.697657680026597e-06, "loss": 0.3636, "step": 3922 }, { "epoch": 0.9700791295746786, "grad_norm": 0.8215449942248417, "learning_rate": 4.69750271872512e-06, "loss": 0.383, "step": 3923 }, { "epoch": 0.9703264094955489, "grad_norm": 0.7842158778990416, "learning_rate": 4.697347720279401e-06, "loss": 0.3928, "step": 3924 }, { "epoch": 0.9705736894164194, "grad_norm": 0.7975302553095485, "learning_rate": 4.697192684692058e-06, "loss": 0.4068, "step": 3925 }, { "epoch": 0.9708209693372898, "grad_norm": 0.7778323064622887, "learning_rate": 4.697037611965713e-06, "loss": 0.4463, "step": 3926 }, { "epoch": 0.9710682492581603, "grad_norm": 0.7775846948755031, "learning_rate": 4.696882502102987e-06, "loss": 0.4284, "step": 3927 }, { "epoch": 0.9713155291790306, "grad_norm": 0.7746054597343937, "learning_rate": 4.6967273551065005e-06, "loss": 0.395, "step": 3928 }, { "epoch": 0.9715628090999011, "grad_norm": 0.8127997502657887, "learning_rate": 4.696572170978877e-06, "loss": 0.3914, "step": 3929 }, { "epoch": 0.9718100890207715, "grad_norm": 0.777611998632522, "learning_rate": 4.69641694972274e-06, "loss": 0.4205, "step": 3930 }, { "epoch": 0.972057368941642, "grad_norm": 0.7780004765147993, "learning_rate": 4.6962616913407125e-06, "loss": 0.433, "step": 3931 }, { "epoch": 0.9723046488625123, "grad_norm": 0.8241222014341494, "learning_rate": 4.6961063958354195e-06, "loss": 0.3979, "step": 3932 }, { "epoch": 0.9725519287833828, "grad_norm": 0.8089490443001208, "learning_rate": 4.695951063209485e-06, "loss": 0.3662, "step": 3933 }, { "epoch": 0.9727992087042532, "grad_norm": 0.7923141861732071, "learning_rate": 4.695795693465536e-06, "loss": 0.3904, "step": 3934 }, { "epoch": 0.9730464886251237, "grad_norm": 0.829573517979138, "learning_rate": 4.695640286606196e-06, "loss": 0.413, "step": 3935 }, { "epoch": 0.973293768545994, "grad_norm": 0.839797270153677, "learning_rate": 4.695484842634094e-06, "loss": 0.3953, "step": 3936 }, { "epoch": 0.9735410484668645, "grad_norm": 0.7857025307651592, "learning_rate": 4.695329361551858e-06, "loss": 0.3986, "step": 3937 }, { "epoch": 0.9737883283877349, "grad_norm": 0.7638491096993291, "learning_rate": 4.695173843362115e-06, "loss": 0.3932, "step": 3938 }, { "epoch": 0.9740356083086054, "grad_norm": 0.7812683265166083, "learning_rate": 4.6950182880674935e-06, "loss": 0.4217, "step": 3939 }, { "epoch": 0.9742828882294757, "grad_norm": 0.7603154077113041, "learning_rate": 4.694862695670623e-06, "loss": 0.4107, "step": 3940 }, { "epoch": 0.9745301681503462, "grad_norm": 0.7539347980917882, "learning_rate": 4.694707066174133e-06, "loss": 0.4051, "step": 3941 }, { "epoch": 0.9747774480712166, "grad_norm": 0.7863706446058237, "learning_rate": 4.694551399580656e-06, "loss": 0.4218, "step": 3942 }, { "epoch": 0.9750247279920871, "grad_norm": 0.7665569429715551, "learning_rate": 4.6943956958928215e-06, "loss": 0.4019, "step": 3943 }, { "epoch": 0.9752720079129574, "grad_norm": 0.799832166832461, "learning_rate": 4.694239955113262e-06, "loss": 0.3906, "step": 3944 }, { "epoch": 0.9755192878338279, "grad_norm": 0.8274704820268617, "learning_rate": 4.69408417724461e-06, "loss": 0.3743, "step": 3945 }, { "epoch": 0.9757665677546983, "grad_norm": 0.7752699483338866, "learning_rate": 4.6939283622894975e-06, "loss": 0.4137, "step": 3946 }, { "epoch": 0.9760138476755688, "grad_norm": 0.7891688093341738, "learning_rate": 4.693772510250559e-06, "loss": 0.4211, "step": 3947 }, { "epoch": 0.9762611275964391, "grad_norm": 0.7871416681868997, "learning_rate": 4.69361662113043e-06, "loss": 0.4214, "step": 3948 }, { "epoch": 0.9765084075173096, "grad_norm": 0.7961084106239589, "learning_rate": 4.693460694931744e-06, "loss": 0.3997, "step": 3949 }, { "epoch": 0.97675568743818, "grad_norm": 0.7892962345788587, "learning_rate": 4.693304731657138e-06, "loss": 0.405, "step": 3950 }, { "epoch": 0.9770029673590505, "grad_norm": 0.7894503245971667, "learning_rate": 4.6931487313092465e-06, "loss": 0.4026, "step": 3951 }, { "epoch": 0.9772502472799208, "grad_norm": 0.7703856475017746, "learning_rate": 4.692992693890706e-06, "loss": 0.4249, "step": 3952 }, { "epoch": 0.9774975272007913, "grad_norm": 0.8133799239732469, "learning_rate": 4.692836619404156e-06, "loss": 0.4114, "step": 3953 }, { "epoch": 0.9777448071216617, "grad_norm": 0.8152538089456904, "learning_rate": 4.692680507852235e-06, "loss": 0.3853, "step": 3954 }, { "epoch": 0.9779920870425322, "grad_norm": 0.8184149190064318, "learning_rate": 4.692524359237579e-06, "loss": 0.3976, "step": 3955 }, { "epoch": 0.9782393669634025, "grad_norm": 0.8254887145013703, "learning_rate": 4.69236817356283e-06, "loss": 0.3947, "step": 3956 }, { "epoch": 0.978486646884273, "grad_norm": 0.8164624279733338, "learning_rate": 4.692211950830626e-06, "loss": 0.3985, "step": 3957 }, { "epoch": 0.9787339268051434, "grad_norm": 0.7978082335627064, "learning_rate": 4.6920556910436085e-06, "loss": 0.4057, "step": 3958 }, { "epoch": 0.9789812067260139, "grad_norm": 0.7763542013199648, "learning_rate": 4.69189939420442e-06, "loss": 0.4137, "step": 3959 }, { "epoch": 0.9792284866468842, "grad_norm": 0.7984236860463884, "learning_rate": 4.6917430603157e-06, "loss": 0.41, "step": 3960 }, { "epoch": 0.9794757665677547, "grad_norm": 0.7842647035839468, "learning_rate": 4.691586689380092e-06, "loss": 0.3884, "step": 3961 }, { "epoch": 0.9797230464886251, "grad_norm": 0.7907516321710724, "learning_rate": 4.69143028140024e-06, "loss": 0.3949, "step": 3962 }, { "epoch": 0.9799703264094956, "grad_norm": 0.7992643147336413, "learning_rate": 4.691273836378787e-06, "loss": 0.4191, "step": 3963 }, { "epoch": 0.9802176063303659, "grad_norm": 0.796090473738687, "learning_rate": 4.691117354318377e-06, "loss": 0.3897, "step": 3964 }, { "epoch": 0.9804648862512364, "grad_norm": 0.7815628862657795, "learning_rate": 4.690960835221655e-06, "loss": 0.3789, "step": 3965 }, { "epoch": 0.9807121661721068, "grad_norm": 0.7507676442911566, "learning_rate": 4.690804279091268e-06, "loss": 0.3793, "step": 3966 }, { "epoch": 0.9809594460929772, "grad_norm": 0.7732006075013503, "learning_rate": 4.690647685929861e-06, "loss": 0.4298, "step": 3967 }, { "epoch": 0.9812067260138477, "grad_norm": 0.7922497219388176, "learning_rate": 4.69049105574008e-06, "loss": 0.4311, "step": 3968 }, { "epoch": 0.9814540059347181, "grad_norm": 0.762528197693143, "learning_rate": 4.690334388524576e-06, "loss": 0.4116, "step": 3969 }, { "epoch": 0.9817012858555886, "grad_norm": 0.7784231738630654, "learning_rate": 4.6901776842859926e-06, "loss": 0.4128, "step": 3970 }, { "epoch": 0.981948565776459, "grad_norm": 0.8111640731341906, "learning_rate": 4.690020943026982e-06, "loss": 0.3863, "step": 3971 }, { "epoch": 0.9821958456973294, "grad_norm": 0.783142213384446, "learning_rate": 4.689864164750192e-06, "loss": 0.3814, "step": 3972 }, { "epoch": 0.9824431256181998, "grad_norm": 0.7916818596859191, "learning_rate": 4.689707349458273e-06, "loss": 0.4082, "step": 3973 }, { "epoch": 0.9826904055390703, "grad_norm": 0.7979637978189713, "learning_rate": 4.689550497153876e-06, "loss": 0.4196, "step": 3974 }, { "epoch": 0.9829376854599406, "grad_norm": 0.7940260223007418, "learning_rate": 4.689393607839652e-06, "loss": 0.4083, "step": 3975 }, { "epoch": 0.9831849653808111, "grad_norm": 0.7639072225409764, "learning_rate": 4.6892366815182515e-06, "loss": 0.3994, "step": 3976 }, { "epoch": 0.9834322453016815, "grad_norm": 0.7817053106289357, "learning_rate": 4.689079718192329e-06, "loss": 0.3889, "step": 3977 }, { "epoch": 0.983679525222552, "grad_norm": 0.7598430403880667, "learning_rate": 4.688922717864537e-06, "loss": 0.4018, "step": 3978 }, { "epoch": 0.9839268051434223, "grad_norm": 0.8086789487803981, "learning_rate": 4.6887656805375296e-06, "loss": 0.3999, "step": 3979 }, { "epoch": 0.9841740850642928, "grad_norm": 0.7725826354053251, "learning_rate": 4.68860860621396e-06, "loss": 0.3951, "step": 3980 }, { "epoch": 0.9844213649851632, "grad_norm": 0.7875872055007198, "learning_rate": 4.688451494896485e-06, "loss": 0.4003, "step": 3981 }, { "epoch": 0.9846686449060337, "grad_norm": 0.7822097745681463, "learning_rate": 4.688294346587759e-06, "loss": 0.417, "step": 3982 }, { "epoch": 0.984915924826904, "grad_norm": 0.7809298685569199, "learning_rate": 4.688137161290438e-06, "loss": 0.3968, "step": 3983 }, { "epoch": 0.9851632047477745, "grad_norm": 0.7923832949438695, "learning_rate": 4.687979939007179e-06, "loss": 0.4056, "step": 3984 }, { "epoch": 0.9854104846686449, "grad_norm": 0.7708418159185525, "learning_rate": 4.687822679740641e-06, "loss": 0.4172, "step": 3985 }, { "epoch": 0.9856577645895154, "grad_norm": 0.7589092584911677, "learning_rate": 4.68766538349348e-06, "loss": 0.398, "step": 3986 }, { "epoch": 0.9859050445103857, "grad_norm": 0.7732339144056797, "learning_rate": 4.687508050268357e-06, "loss": 0.4171, "step": 3987 }, { "epoch": 0.9861523244312562, "grad_norm": 0.7643439359678004, "learning_rate": 4.6873506800679295e-06, "loss": 0.4489, "step": 3988 }, { "epoch": 0.9863996043521266, "grad_norm": 0.755783549108914, "learning_rate": 4.687193272894859e-06, "loss": 0.4125, "step": 3989 }, { "epoch": 0.9866468842729971, "grad_norm": 0.7987812317499954, "learning_rate": 4.6870358287518046e-06, "loss": 0.3959, "step": 3990 }, { "epoch": 0.9868941641938674, "grad_norm": 0.7920689363171113, "learning_rate": 4.686878347641428e-06, "loss": 0.4421, "step": 3991 }, { "epoch": 0.9871414441147379, "grad_norm": 0.7521273189464834, "learning_rate": 4.686720829566393e-06, "loss": 0.419, "step": 3992 }, { "epoch": 0.9873887240356083, "grad_norm": 0.7824768932828919, "learning_rate": 4.686563274529359e-06, "loss": 0.4081, "step": 3993 }, { "epoch": 0.9876360039564788, "grad_norm": 0.8262243329490202, "learning_rate": 4.686405682532992e-06, "loss": 0.3794, "step": 3994 }, { "epoch": 0.9878832838773491, "grad_norm": 0.7889887455036464, "learning_rate": 4.686248053579953e-06, "loss": 0.3703, "step": 3995 }, { "epoch": 0.9881305637982196, "grad_norm": 0.7825185955719633, "learning_rate": 4.686090387672909e-06, "loss": 0.4105, "step": 3996 }, { "epoch": 0.98837784371909, "grad_norm": 0.7783848883203941, "learning_rate": 4.685932684814524e-06, "loss": 0.4152, "step": 3997 }, { "epoch": 0.9886251236399605, "grad_norm": 0.791414411907457, "learning_rate": 4.6857749450074625e-06, "loss": 0.409, "step": 3998 }, { "epoch": 0.9888724035608308, "grad_norm": 0.8043289643137662, "learning_rate": 4.685617168254393e-06, "loss": 0.4222, "step": 3999 }, { "epoch": 0.9891196834817013, "grad_norm": 0.7714257806897765, "learning_rate": 4.68545935455798e-06, "loss": 0.4047, "step": 4000 }, { "epoch": 0.9893669634025717, "grad_norm": 0.7612816309042126, "learning_rate": 4.6853015039208924e-06, "loss": 0.3969, "step": 4001 }, { "epoch": 0.9896142433234422, "grad_norm": 0.8149288705742704, "learning_rate": 4.685143616345799e-06, "loss": 0.4114, "step": 4002 }, { "epoch": 0.9898615232443125, "grad_norm": 0.8201740720242746, "learning_rate": 4.684985691835367e-06, "loss": 0.3888, "step": 4003 }, { "epoch": 0.990108803165183, "grad_norm": 0.801323731002106, "learning_rate": 4.684827730392267e-06, "loss": 0.3877, "step": 4004 }, { "epoch": 0.9903560830860534, "grad_norm": 0.8193208923254229, "learning_rate": 4.6846697320191685e-06, "loss": 0.3644, "step": 4005 }, { "epoch": 0.9906033630069239, "grad_norm": 0.795362255642567, "learning_rate": 4.684511696718741e-06, "loss": 0.3919, "step": 4006 }, { "epoch": 0.9908506429277942, "grad_norm": 0.7751441931444772, "learning_rate": 4.684353624493658e-06, "loss": 0.4054, "step": 4007 }, { "epoch": 0.9910979228486647, "grad_norm": 0.8464714446984203, "learning_rate": 4.68419551534659e-06, "loss": 0.409, "step": 4008 }, { "epoch": 0.9913452027695351, "grad_norm": 0.7602446475018649, "learning_rate": 4.68403736928021e-06, "loss": 0.4328, "step": 4009 }, { "epoch": 0.9915924826904056, "grad_norm": 0.7966158870763892, "learning_rate": 4.683879186297191e-06, "loss": 0.3946, "step": 4010 }, { "epoch": 0.9918397626112759, "grad_norm": 0.7517907281222944, "learning_rate": 4.683720966400206e-06, "loss": 0.3914, "step": 4011 }, { "epoch": 0.9920870425321464, "grad_norm": 0.7889222304421734, "learning_rate": 4.683562709591931e-06, "loss": 0.3978, "step": 4012 }, { "epoch": 0.9923343224530168, "grad_norm": 0.7811980760364818, "learning_rate": 4.683404415875039e-06, "loss": 0.4237, "step": 4013 }, { "epoch": 0.9925816023738873, "grad_norm": 0.8277250098361327, "learning_rate": 4.683246085252207e-06, "loss": 0.3695, "step": 4014 }, { "epoch": 0.9928288822947576, "grad_norm": 0.7931595016445199, "learning_rate": 4.683087717726112e-06, "loss": 0.4025, "step": 4015 }, { "epoch": 0.9930761622156281, "grad_norm": 0.7703771521508883, "learning_rate": 4.682929313299428e-06, "loss": 0.3843, "step": 4016 }, { "epoch": 0.9933234421364985, "grad_norm": 0.7864352072212776, "learning_rate": 4.682770871974835e-06, "loss": 0.3914, "step": 4017 }, { "epoch": 0.993570722057369, "grad_norm": 0.8004356756819567, "learning_rate": 4.6826123937550115e-06, "loss": 0.4228, "step": 4018 }, { "epoch": 0.9938180019782393, "grad_norm": 0.767390308350493, "learning_rate": 4.682453878642634e-06, "loss": 0.4035, "step": 4019 }, { "epoch": 0.9940652818991098, "grad_norm": 0.7933226964116108, "learning_rate": 4.682295326640383e-06, "loss": 0.4025, "step": 4020 }, { "epoch": 0.9943125618199802, "grad_norm": 0.7925071712238786, "learning_rate": 4.68213673775094e-06, "loss": 0.3889, "step": 4021 }, { "epoch": 0.9945598417408507, "grad_norm": 0.7849972882849776, "learning_rate": 4.681978111976983e-06, "loss": 0.4121, "step": 4022 }, { "epoch": 0.994807121661721, "grad_norm": 0.7912155124220194, "learning_rate": 4.681819449321194e-06, "loss": 0.3771, "step": 4023 }, { "epoch": 0.9950544015825915, "grad_norm": 0.7829532894668891, "learning_rate": 4.681660749786257e-06, "loss": 0.3937, "step": 4024 }, { "epoch": 0.9953016815034619, "grad_norm": 0.8047332493651175, "learning_rate": 4.6815020133748514e-06, "loss": 0.392, "step": 4025 }, { "epoch": 0.9955489614243324, "grad_norm": 0.796903690277019, "learning_rate": 4.6813432400896615e-06, "loss": 0.4058, "step": 4026 }, { "epoch": 0.9957962413452027, "grad_norm": 0.7651108232160881, "learning_rate": 4.681184429933372e-06, "loss": 0.4037, "step": 4027 }, { "epoch": 0.9960435212660732, "grad_norm": 0.8032283012798399, "learning_rate": 4.681025582908666e-06, "loss": 0.3986, "step": 4028 }, { "epoch": 0.9962908011869436, "grad_norm": 0.7857611793933696, "learning_rate": 4.68086669901823e-06, "loss": 0.4026, "step": 4029 }, { "epoch": 0.996538081107814, "grad_norm": 0.7659698488027594, "learning_rate": 4.680707778264747e-06, "loss": 0.4015, "step": 4030 }, { "epoch": 0.9967853610286844, "grad_norm": 0.8012692642045438, "learning_rate": 4.680548820650905e-06, "loss": 0.3898, "step": 4031 }, { "epoch": 0.9970326409495549, "grad_norm": 0.7579225666375018, "learning_rate": 4.680389826179391e-06, "loss": 0.4337, "step": 4032 }, { "epoch": 0.9972799208704253, "grad_norm": 0.8076161741050162, "learning_rate": 4.680230794852892e-06, "loss": 0.406, "step": 4033 }, { "epoch": 0.9975272007912958, "grad_norm": 0.7625082478334948, "learning_rate": 4.680071726674097e-06, "loss": 0.4151, "step": 4034 }, { "epoch": 0.9977744807121661, "grad_norm": 0.7806980832371719, "learning_rate": 4.679912621645693e-06, "loss": 0.4038, "step": 4035 }, { "epoch": 0.9980217606330366, "grad_norm": 0.8086404913136939, "learning_rate": 4.6797534797703705e-06, "loss": 0.3958, "step": 4036 }, { "epoch": 0.998269040553907, "grad_norm": 0.759896651238656, "learning_rate": 4.679594301050819e-06, "loss": 0.4097, "step": 4037 }, { "epoch": 0.9985163204747775, "grad_norm": 0.7791843543913921, "learning_rate": 4.67943508548973e-06, "loss": 0.37, "step": 4038 }, { "epoch": 0.9987636003956478, "grad_norm": 0.7930431997735501, "learning_rate": 4.679275833089793e-06, "loss": 0.4134, "step": 4039 }, { "epoch": 0.9990108803165183, "grad_norm": 0.7597904031408639, "learning_rate": 4.679116543853702e-06, "loss": 0.4118, "step": 4040 }, { "epoch": 0.9992581602373887, "grad_norm": 0.7896276040674204, "learning_rate": 4.678957217784147e-06, "loss": 0.3747, "step": 4041 }, { "epoch": 0.9995054401582592, "grad_norm": 0.7923093015887974, "learning_rate": 4.678797854883823e-06, "loss": 0.4067, "step": 4042 }, { "epoch": 0.9997527200791295, "grad_norm": 0.769422575838774, "learning_rate": 4.678638455155424e-06, "loss": 0.4326, "step": 4043 }, { "epoch": 1.0, "grad_norm": 0.7984512022584634, "learning_rate": 4.6784790186016425e-06, "loss": 0.3915, "step": 4044 }, { "epoch": 1.0002472799208704, "grad_norm": 0.7970864291796281, "learning_rate": 4.678319545225174e-06, "loss": 0.4245, "step": 4045 }, { "epoch": 1.000494559841741, "grad_norm": 0.756692452161391, "learning_rate": 4.678160035028715e-06, "loss": 0.3739, "step": 4046 }, { "epoch": 1.0007418397626113, "grad_norm": 0.7962425785044392, "learning_rate": 4.678000488014961e-06, "loss": 0.3877, "step": 4047 }, { "epoch": 1.0009891196834817, "grad_norm": 0.8321386599550697, "learning_rate": 4.6778409041866095e-06, "loss": 0.3791, "step": 4048 }, { "epoch": 1.001236399604352, "grad_norm": 0.7734906756009456, "learning_rate": 4.677681283546356e-06, "loss": 0.4051, "step": 4049 }, { "epoch": 1.0014836795252227, "grad_norm": 0.802870927671154, "learning_rate": 4.6775216260969006e-06, "loss": 0.4375, "step": 4050 }, { "epoch": 1.001730959446093, "grad_norm": 0.8293040115584968, "learning_rate": 4.6773619318409415e-06, "loss": 0.3501, "step": 4051 }, { "epoch": 1.0019782393669634, "grad_norm": 0.7621891610048048, "learning_rate": 4.677202200781178e-06, "loss": 0.3945, "step": 4052 }, { "epoch": 1.0022255192878338, "grad_norm": 0.772462094941228, "learning_rate": 4.67704243292031e-06, "loss": 0.4216, "step": 4053 }, { "epoch": 1.0024727992087044, "grad_norm": 0.7791758272461141, "learning_rate": 4.676882628261037e-06, "loss": 0.3848, "step": 4054 }, { "epoch": 1.0027200791295747, "grad_norm": 0.7893908241739953, "learning_rate": 4.676722786806062e-06, "loss": 0.3514, "step": 4055 }, { "epoch": 1.002967359050445, "grad_norm": 0.7916228288978282, "learning_rate": 4.6765629085580846e-06, "loss": 0.3828, "step": 4056 }, { "epoch": 1.0032146389713155, "grad_norm": 0.7521295792197796, "learning_rate": 4.676402993519809e-06, "loss": 0.405, "step": 4057 }, { "epoch": 1.003461918892186, "grad_norm": 0.752665999139312, "learning_rate": 4.676243041693938e-06, "loss": 0.3839, "step": 4058 }, { "epoch": 1.0037091988130564, "grad_norm": 0.7922192226581196, "learning_rate": 4.676083053083175e-06, "loss": 0.3843, "step": 4059 }, { "epoch": 1.0039564787339268, "grad_norm": 0.8016017819671889, "learning_rate": 4.675923027690224e-06, "loss": 0.3761, "step": 4060 }, { "epoch": 1.0042037586547972, "grad_norm": 0.8093204291676045, "learning_rate": 4.675762965517789e-06, "loss": 0.3762, "step": 4061 }, { "epoch": 1.0044510385756678, "grad_norm": 0.7803337445463355, "learning_rate": 4.675602866568578e-06, "loss": 0.3947, "step": 4062 }, { "epoch": 1.0046983184965381, "grad_norm": 0.7735252551905951, "learning_rate": 4.675442730845296e-06, "loss": 0.3819, "step": 4063 }, { "epoch": 1.0049455984174085, "grad_norm": 0.8062171503246767, "learning_rate": 4.675282558350648e-06, "loss": 0.4062, "step": 4064 }, { "epoch": 1.0051928783382789, "grad_norm": 0.8218852923879694, "learning_rate": 4.675122349087344e-06, "loss": 0.3707, "step": 4065 }, { "epoch": 1.0054401582591495, "grad_norm": 0.7795814745419494, "learning_rate": 4.674962103058091e-06, "loss": 0.3863, "step": 4066 }, { "epoch": 1.0056874381800198, "grad_norm": 0.8169837589075865, "learning_rate": 4.674801820265597e-06, "loss": 0.3983, "step": 4067 }, { "epoch": 1.0059347181008902, "grad_norm": 0.7972698414960018, "learning_rate": 4.674641500712573e-06, "loss": 0.3748, "step": 4068 }, { "epoch": 1.0061819980217606, "grad_norm": 0.7884834296010512, "learning_rate": 4.674481144401726e-06, "loss": 0.377, "step": 4069 }, { "epoch": 1.0064292779426312, "grad_norm": 0.8014252638605766, "learning_rate": 4.674320751335769e-06, "loss": 0.4062, "step": 4070 }, { "epoch": 1.0066765578635015, "grad_norm": 0.8006655698663971, "learning_rate": 4.674160321517412e-06, "loss": 0.4025, "step": 4071 }, { "epoch": 1.006923837784372, "grad_norm": 0.7956670449136097, "learning_rate": 4.673999854949367e-06, "loss": 0.3797, "step": 4072 }, { "epoch": 1.0071711177052423, "grad_norm": 0.8321795553325289, "learning_rate": 4.6738393516343465e-06, "loss": 0.4011, "step": 4073 }, { "epoch": 1.0074183976261128, "grad_norm": 0.7545805089392921, "learning_rate": 4.6736788115750635e-06, "loss": 0.3867, "step": 4074 }, { "epoch": 1.0076656775469832, "grad_norm": 0.7909802938893084, "learning_rate": 4.673518234774231e-06, "loss": 0.4022, "step": 4075 }, { "epoch": 1.0079129574678536, "grad_norm": 0.8057561665214222, "learning_rate": 4.673357621234564e-06, "loss": 0.4026, "step": 4076 }, { "epoch": 1.008160237388724, "grad_norm": 0.7634473409948251, "learning_rate": 4.673196970958777e-06, "loss": 0.4078, "step": 4077 }, { "epoch": 1.0084075173095945, "grad_norm": 0.8009993459594132, "learning_rate": 4.673036283949585e-06, "loss": 0.3991, "step": 4078 }, { "epoch": 1.008654797230465, "grad_norm": 0.8026859577711525, "learning_rate": 4.672875560209705e-06, "loss": 0.3947, "step": 4079 }, { "epoch": 1.0089020771513353, "grad_norm": 0.7859980884466496, "learning_rate": 4.672714799741854e-06, "loss": 0.4161, "step": 4080 }, { "epoch": 1.0091493570722057, "grad_norm": 0.8187412615067825, "learning_rate": 4.6725540025487475e-06, "loss": 0.3802, "step": 4081 }, { "epoch": 1.0093966369930762, "grad_norm": 0.8509406583494764, "learning_rate": 4.672393168633105e-06, "loss": 0.3811, "step": 4082 }, { "epoch": 1.0096439169139466, "grad_norm": 0.8101520292570353, "learning_rate": 4.672232297997644e-06, "loss": 0.3789, "step": 4083 }, { "epoch": 1.009891196834817, "grad_norm": 0.8168285467822295, "learning_rate": 4.6720713906450845e-06, "loss": 0.3782, "step": 4084 }, { "epoch": 1.0101384767556874, "grad_norm": 0.8375691371715392, "learning_rate": 4.671910446578146e-06, "loss": 0.3848, "step": 4085 }, { "epoch": 1.010385756676558, "grad_norm": 0.7626502378331581, "learning_rate": 4.67174946579955e-06, "loss": 0.4239, "step": 4086 }, { "epoch": 1.0106330365974283, "grad_norm": 0.7974655065730021, "learning_rate": 4.6715884483120155e-06, "loss": 0.381, "step": 4087 }, { "epoch": 1.0108803165182987, "grad_norm": 0.8063593711692676, "learning_rate": 4.671427394118265e-06, "loss": 0.3901, "step": 4088 }, { "epoch": 1.011127596439169, "grad_norm": 0.7868680848287519, "learning_rate": 4.671266303221021e-06, "loss": 0.3735, "step": 4089 }, { "epoch": 1.0113748763600396, "grad_norm": 0.7845754631056757, "learning_rate": 4.671105175623008e-06, "loss": 0.3609, "step": 4090 }, { "epoch": 1.01162215628091, "grad_norm": 0.8409734635681012, "learning_rate": 4.670944011326946e-06, "loss": 0.3894, "step": 4091 }, { "epoch": 1.0118694362017804, "grad_norm": 0.8122040832293331, "learning_rate": 4.670782810335563e-06, "loss": 0.4174, "step": 4092 }, { "epoch": 1.0121167161226508, "grad_norm": 0.7758553072110376, "learning_rate": 4.670621572651581e-06, "loss": 0.4145, "step": 4093 }, { "epoch": 1.0123639960435213, "grad_norm": 0.7806610470520114, "learning_rate": 4.6704602982777245e-06, "loss": 0.3745, "step": 4094 }, { "epoch": 1.0126112759643917, "grad_norm": 0.7654427159669956, "learning_rate": 4.670298987216723e-06, "loss": 0.3925, "step": 4095 }, { "epoch": 1.012858555885262, "grad_norm": 0.7618483501676195, "learning_rate": 4.670137639471301e-06, "loss": 0.3771, "step": 4096 }, { "epoch": 1.0131058358061324, "grad_norm": 0.8181200977146363, "learning_rate": 4.6699762550441864e-06, "loss": 0.3566, "step": 4097 }, { "epoch": 1.013353115727003, "grad_norm": 0.8057354125370859, "learning_rate": 4.6698148339381064e-06, "loss": 0.3783, "step": 4098 }, { "epoch": 1.0136003956478734, "grad_norm": 0.8077648631684345, "learning_rate": 4.66965337615579e-06, "loss": 0.3822, "step": 4099 }, { "epoch": 1.0138476755687438, "grad_norm": 0.7858161573498595, "learning_rate": 4.6694918816999664e-06, "loss": 0.3663, "step": 4100 }, { "epoch": 1.0140949554896141, "grad_norm": 0.7983608536372774, "learning_rate": 4.669330350573366e-06, "loss": 0.4036, "step": 4101 }, { "epoch": 1.0143422354104847, "grad_norm": 0.7584905874628877, "learning_rate": 4.669168782778717e-06, "loss": 0.3682, "step": 4102 }, { "epoch": 1.014589515331355, "grad_norm": 0.8437779067741491, "learning_rate": 4.669007178318752e-06, "loss": 0.3987, "step": 4103 }, { "epoch": 1.0148367952522255, "grad_norm": 0.7953403317965364, "learning_rate": 4.668845537196201e-06, "loss": 0.4034, "step": 4104 }, { "epoch": 1.0150840751730958, "grad_norm": 0.7756184563388581, "learning_rate": 4.668683859413799e-06, "loss": 0.403, "step": 4105 }, { "epoch": 1.0153313550939664, "grad_norm": 0.7884271147798568, "learning_rate": 4.668522144974278e-06, "loss": 0.3968, "step": 4106 }, { "epoch": 1.0155786350148368, "grad_norm": 0.7661640302136515, "learning_rate": 4.668360393880369e-06, "loss": 0.4098, "step": 4107 }, { "epoch": 1.0158259149357072, "grad_norm": 0.7875994762206087, "learning_rate": 4.6681986061348085e-06, "loss": 0.364, "step": 4108 }, { "epoch": 1.0160731948565775, "grad_norm": 0.7960260372720682, "learning_rate": 4.668036781740331e-06, "loss": 0.3562, "step": 4109 }, { "epoch": 1.0163204747774481, "grad_norm": 0.8307302519167433, "learning_rate": 4.667874920699671e-06, "loss": 0.3638, "step": 4110 }, { "epoch": 1.0165677546983185, "grad_norm": 0.8208891443626891, "learning_rate": 4.667713023015564e-06, "loss": 0.3765, "step": 4111 }, { "epoch": 1.0168150346191889, "grad_norm": 0.7797732823346349, "learning_rate": 4.667551088690748e-06, "loss": 0.3922, "step": 4112 }, { "epoch": 1.0170623145400592, "grad_norm": 0.7873135528690548, "learning_rate": 4.667389117727959e-06, "loss": 0.3966, "step": 4113 }, { "epoch": 1.0173095944609298, "grad_norm": 0.7735255511043266, "learning_rate": 4.667227110129935e-06, "loss": 0.4041, "step": 4114 }, { "epoch": 1.0175568743818002, "grad_norm": 0.7728022270203908, "learning_rate": 4.6670650658994156e-06, "loss": 0.3969, "step": 4115 }, { "epoch": 1.0178041543026706, "grad_norm": 0.782751409630276, "learning_rate": 4.6669029850391375e-06, "loss": 0.4055, "step": 4116 }, { "epoch": 1.018051434223541, "grad_norm": 0.7849930918165627, "learning_rate": 4.6667408675518435e-06, "loss": 0.38, "step": 4117 }, { "epoch": 1.0182987141444115, "grad_norm": 0.8191914200546412, "learning_rate": 4.6665787134402705e-06, "loss": 0.3782, "step": 4118 }, { "epoch": 1.018545994065282, "grad_norm": 0.7833949430952412, "learning_rate": 4.666416522707162e-06, "loss": 0.3895, "step": 4119 }, { "epoch": 1.0187932739861523, "grad_norm": 0.7607709138666664, "learning_rate": 4.666254295355258e-06, "loss": 0.383, "step": 4120 }, { "epoch": 1.0190405539070229, "grad_norm": 0.8038297232266377, "learning_rate": 4.6660920313873e-06, "loss": 0.3999, "step": 4121 }, { "epoch": 1.0192878338278932, "grad_norm": 0.8028656622551785, "learning_rate": 4.665929730806034e-06, "loss": 0.3797, "step": 4122 }, { "epoch": 1.0195351137487636, "grad_norm": 0.8246190456666256, "learning_rate": 4.665767393614199e-06, "loss": 0.3823, "step": 4123 }, { "epoch": 1.019782393669634, "grad_norm": 0.8374731259371345, "learning_rate": 4.665605019814542e-06, "loss": 0.376, "step": 4124 }, { "epoch": 1.0200296735905046, "grad_norm": 0.7715911001710387, "learning_rate": 4.6654426094098065e-06, "loss": 0.3934, "step": 4125 }, { "epoch": 1.020276953511375, "grad_norm": 0.8264804992274714, "learning_rate": 4.665280162402738e-06, "loss": 0.3784, "step": 4126 }, { "epoch": 1.0205242334322453, "grad_norm": 0.8238792003924764, "learning_rate": 4.665117678796083e-06, "loss": 0.3655, "step": 4127 }, { "epoch": 1.0207715133531157, "grad_norm": 0.7664401577473372, "learning_rate": 4.6649551585925855e-06, "loss": 0.3714, "step": 4128 }, { "epoch": 1.0210187932739863, "grad_norm": 0.7915268626812306, "learning_rate": 4.664792601794996e-06, "loss": 0.3896, "step": 4129 }, { "epoch": 1.0212660731948566, "grad_norm": 0.7724243965526881, "learning_rate": 4.66463000840606e-06, "loss": 0.3922, "step": 4130 }, { "epoch": 1.021513353115727, "grad_norm": 0.7903438021158881, "learning_rate": 4.664467378428526e-06, "loss": 0.4121, "step": 4131 }, { "epoch": 1.0217606330365974, "grad_norm": 0.8337792021490061, "learning_rate": 4.664304711865143e-06, "loss": 0.4111, "step": 4132 }, { "epoch": 1.022007912957468, "grad_norm": 0.7473157140988479, "learning_rate": 4.664142008718661e-06, "loss": 0.4076, "step": 4133 }, { "epoch": 1.0222551928783383, "grad_norm": 0.7963130196438288, "learning_rate": 4.663979268991829e-06, "loss": 0.4056, "step": 4134 }, { "epoch": 1.0225024727992087, "grad_norm": 0.7646197508993408, "learning_rate": 4.6638164926874e-06, "loss": 0.3953, "step": 4135 }, { "epoch": 1.022749752720079, "grad_norm": 0.77977631254344, "learning_rate": 4.663653679808123e-06, "loss": 0.4102, "step": 4136 }, { "epoch": 1.0229970326409497, "grad_norm": 0.8031597147851005, "learning_rate": 4.663490830356752e-06, "loss": 0.4062, "step": 4137 }, { "epoch": 1.02324431256182, "grad_norm": 0.778844295583755, "learning_rate": 4.663327944336038e-06, "loss": 0.397, "step": 4138 }, { "epoch": 1.0234915924826904, "grad_norm": 0.8058685023015318, "learning_rate": 4.663165021748735e-06, "loss": 0.4072, "step": 4139 }, { "epoch": 1.0237388724035608, "grad_norm": 0.7985909831286393, "learning_rate": 4.663002062597598e-06, "loss": 0.4017, "step": 4140 }, { "epoch": 1.0239861523244314, "grad_norm": 0.8103317328141605, "learning_rate": 4.662839066885379e-06, "loss": 0.4107, "step": 4141 }, { "epoch": 1.0242334322453017, "grad_norm": 0.812865232811803, "learning_rate": 4.6626760346148345e-06, "loss": 0.409, "step": 4142 }, { "epoch": 1.024480712166172, "grad_norm": 0.7972973484270046, "learning_rate": 4.662512965788721e-06, "loss": 0.4106, "step": 4143 }, { "epoch": 1.0247279920870425, "grad_norm": 0.7988129329985753, "learning_rate": 4.662349860409793e-06, "loss": 0.3874, "step": 4144 }, { "epoch": 1.024975272007913, "grad_norm": 0.7364198779897235, "learning_rate": 4.662186718480809e-06, "loss": 0.3838, "step": 4145 }, { "epoch": 1.0252225519287834, "grad_norm": 0.7759132157875801, "learning_rate": 4.662023540004526e-06, "loss": 0.386, "step": 4146 }, { "epoch": 1.0254698318496538, "grad_norm": 0.8053454196029963, "learning_rate": 4.661860324983702e-06, "loss": 0.3912, "step": 4147 }, { "epoch": 1.0257171117705242, "grad_norm": 0.7899041695992118, "learning_rate": 4.661697073421096e-06, "loss": 0.3618, "step": 4148 }, { "epoch": 1.0259643916913948, "grad_norm": 0.7573350947616337, "learning_rate": 4.661533785319468e-06, "loss": 0.3873, "step": 4149 }, { "epoch": 1.0262116716122651, "grad_norm": 0.776646479969273, "learning_rate": 4.661370460681578e-06, "loss": 0.4134, "step": 4150 }, { "epoch": 1.0264589515331355, "grad_norm": 0.7879482245079387, "learning_rate": 4.661207099510184e-06, "loss": 0.3894, "step": 4151 }, { "epoch": 1.0267062314540059, "grad_norm": 0.7996014893710166, "learning_rate": 4.661043701808052e-06, "loss": 0.4027, "step": 4152 }, { "epoch": 1.0269535113748764, "grad_norm": 0.79221369405161, "learning_rate": 4.660880267577939e-06, "loss": 0.3796, "step": 4153 }, { "epoch": 1.0272007912957468, "grad_norm": 0.768153548089279, "learning_rate": 4.660716796822612e-06, "loss": 0.412, "step": 4154 }, { "epoch": 1.0274480712166172, "grad_norm": 0.806022756652035, "learning_rate": 4.66055328954483e-06, "loss": 0.3866, "step": 4155 }, { "epoch": 1.0276953511374876, "grad_norm": 0.7938169914206649, "learning_rate": 4.66038974574736e-06, "loss": 0.3807, "step": 4156 }, { "epoch": 1.0279426310583581, "grad_norm": 0.7742218767289185, "learning_rate": 4.660226165432966e-06, "loss": 0.4098, "step": 4157 }, { "epoch": 1.0281899109792285, "grad_norm": 0.7856391773744521, "learning_rate": 4.66006254860441e-06, "loss": 0.3657, "step": 4158 }, { "epoch": 1.0284371909000989, "grad_norm": 0.7812346499332757, "learning_rate": 4.6598988952644614e-06, "loss": 0.4097, "step": 4159 }, { "epoch": 1.0286844708209693, "grad_norm": 0.7920213678406632, "learning_rate": 4.659735205415884e-06, "loss": 0.4118, "step": 4160 }, { "epoch": 1.0289317507418398, "grad_norm": 0.7823230745193774, "learning_rate": 4.659571479061445e-06, "loss": 0.4118, "step": 4161 }, { "epoch": 1.0291790306627102, "grad_norm": 0.7865689701084525, "learning_rate": 4.659407716203913e-06, "loss": 0.3811, "step": 4162 }, { "epoch": 1.0294263105835806, "grad_norm": 0.8082111597123086, "learning_rate": 4.659243916846055e-06, "loss": 0.3758, "step": 4163 }, { "epoch": 1.029673590504451, "grad_norm": 0.8125517219965859, "learning_rate": 4.659080080990641e-06, "loss": 0.3781, "step": 4164 }, { "epoch": 1.0299208704253215, "grad_norm": 0.7672187133690398, "learning_rate": 4.658916208640438e-06, "loss": 0.393, "step": 4165 }, { "epoch": 1.030168150346192, "grad_norm": 0.8012478232481274, "learning_rate": 4.6587522997982185e-06, "loss": 0.3986, "step": 4166 }, { "epoch": 1.0304154302670623, "grad_norm": 0.8108923282341577, "learning_rate": 4.658588354466751e-06, "loss": 0.406, "step": 4167 }, { "epoch": 1.0306627101879327, "grad_norm": 0.7979022897161229, "learning_rate": 4.658424372648807e-06, "loss": 0.3947, "step": 4168 }, { "epoch": 1.0309099901088032, "grad_norm": 0.7697685963832798, "learning_rate": 4.658260354347159e-06, "loss": 0.3906, "step": 4169 }, { "epoch": 1.0311572700296736, "grad_norm": 0.8020140173491006, "learning_rate": 4.658096299564579e-06, "loss": 0.3663, "step": 4170 }, { "epoch": 1.031404549950544, "grad_norm": 0.8084317421236424, "learning_rate": 4.65793220830384e-06, "loss": 0.3836, "step": 4171 }, { "epoch": 1.0316518298714143, "grad_norm": 0.8242337678183874, "learning_rate": 4.657768080567716e-06, "loss": 0.4002, "step": 4172 }, { "epoch": 1.031899109792285, "grad_norm": 0.8334198165848743, "learning_rate": 4.657603916358981e-06, "loss": 0.3716, "step": 4173 }, { "epoch": 1.0321463897131553, "grad_norm": 0.7840899623616464, "learning_rate": 4.65743971568041e-06, "loss": 0.4087, "step": 4174 }, { "epoch": 1.0323936696340257, "grad_norm": 0.8204985830706817, "learning_rate": 4.657275478534778e-06, "loss": 0.3998, "step": 4175 }, { "epoch": 1.032640949554896, "grad_norm": 0.8233900531733466, "learning_rate": 4.657111204924862e-06, "loss": 0.3911, "step": 4176 }, { "epoch": 1.0328882294757666, "grad_norm": 0.8087641387464256, "learning_rate": 4.656946894853438e-06, "loss": 0.384, "step": 4177 }, { "epoch": 1.033135509396637, "grad_norm": 0.7637142016125394, "learning_rate": 4.6567825483232835e-06, "loss": 0.4047, "step": 4178 }, { "epoch": 1.0333827893175074, "grad_norm": 0.7489974874694156, "learning_rate": 4.656618165337175e-06, "loss": 0.3929, "step": 4179 }, { "epoch": 1.0336300692383777, "grad_norm": 0.7744600154411531, "learning_rate": 4.6564537458978935e-06, "loss": 0.3999, "step": 4180 }, { "epoch": 1.0338773491592483, "grad_norm": 0.8133642969421636, "learning_rate": 4.656289290008217e-06, "loss": 0.3701, "step": 4181 }, { "epoch": 1.0341246290801187, "grad_norm": 0.7871375960226067, "learning_rate": 4.656124797670925e-06, "loss": 0.3907, "step": 4182 }, { "epoch": 1.034371909000989, "grad_norm": 0.7721832592728892, "learning_rate": 4.655960268888799e-06, "loss": 0.3869, "step": 4183 }, { "epoch": 1.0346191889218594, "grad_norm": 0.774784534125747, "learning_rate": 4.655795703664618e-06, "loss": 0.3763, "step": 4184 }, { "epoch": 1.03486646884273, "grad_norm": 0.7407471869855703, "learning_rate": 4.655631102001166e-06, "loss": 0.3793, "step": 4185 }, { "epoch": 1.0351137487636004, "grad_norm": 0.8001169719046306, "learning_rate": 4.655466463901223e-06, "loss": 0.4055, "step": 4186 }, { "epoch": 1.0353610286844708, "grad_norm": 0.8059242896682198, "learning_rate": 4.655301789367575e-06, "loss": 0.3819, "step": 4187 }, { "epoch": 1.0356083086053411, "grad_norm": 0.7882767260677556, "learning_rate": 4.655137078403003e-06, "loss": 0.4093, "step": 4188 }, { "epoch": 1.0358555885262117, "grad_norm": 0.7754975557395789, "learning_rate": 4.654972331010291e-06, "loss": 0.3897, "step": 4189 }, { "epoch": 1.036102868447082, "grad_norm": 0.7997831506016271, "learning_rate": 4.654807547192224e-06, "loss": 0.3966, "step": 4190 }, { "epoch": 1.0363501483679525, "grad_norm": 0.7838184339647526, "learning_rate": 4.654642726951588e-06, "loss": 0.3934, "step": 4191 }, { "epoch": 1.0365974282888228, "grad_norm": 0.7545172195796929, "learning_rate": 4.654477870291169e-06, "loss": 0.4026, "step": 4192 }, { "epoch": 1.0368447082096934, "grad_norm": 0.8089589143892814, "learning_rate": 4.654312977213753e-06, "loss": 0.3628, "step": 4193 }, { "epoch": 1.0370919881305638, "grad_norm": 0.8307687692460426, "learning_rate": 4.6541480477221265e-06, "loss": 0.3904, "step": 4194 }, { "epoch": 1.0373392680514342, "grad_norm": 0.7913977975299474, "learning_rate": 4.65398308181908e-06, "loss": 0.3816, "step": 4195 }, { "epoch": 1.0375865479723045, "grad_norm": 0.8127831484783226, "learning_rate": 4.653818079507398e-06, "loss": 0.4037, "step": 4196 }, { "epoch": 1.0378338278931751, "grad_norm": 0.7895910187503419, "learning_rate": 4.6536530407898725e-06, "loss": 0.4007, "step": 4197 }, { "epoch": 1.0380811078140455, "grad_norm": 0.8254549249073165, "learning_rate": 4.653487965669293e-06, "loss": 0.3863, "step": 4198 }, { "epoch": 1.0383283877349159, "grad_norm": 0.8033944951131544, "learning_rate": 4.6533228541484485e-06, "loss": 0.3871, "step": 4199 }, { "epoch": 1.0385756676557865, "grad_norm": 0.7505585019590592, "learning_rate": 4.65315770623013e-06, "loss": 0.4048, "step": 4200 }, { "epoch": 1.0388229475766568, "grad_norm": 0.8061843026175791, "learning_rate": 4.652992521917129e-06, "loss": 0.3688, "step": 4201 }, { "epoch": 1.0390702274975272, "grad_norm": 0.7829745213340227, "learning_rate": 4.652827301212238e-06, "loss": 0.3854, "step": 4202 }, { "epoch": 1.0393175074183976, "grad_norm": 0.7717400973577578, "learning_rate": 4.65266204411825e-06, "loss": 0.3966, "step": 4203 }, { "epoch": 1.039564787339268, "grad_norm": 0.7946280678594378, "learning_rate": 4.652496750637958e-06, "loss": 0.3709, "step": 4204 }, { "epoch": 1.0398120672601385, "grad_norm": 0.7936192671577446, "learning_rate": 4.652331420774156e-06, "loss": 0.3804, "step": 4205 }, { "epoch": 1.040059347181009, "grad_norm": 0.8086932665276823, "learning_rate": 4.652166054529639e-06, "loss": 0.3904, "step": 4206 }, { "epoch": 1.0403066271018793, "grad_norm": 0.8059849107776803, "learning_rate": 4.652000651907201e-06, "loss": 0.3818, "step": 4207 }, { "epoch": 1.0405539070227499, "grad_norm": 0.7613794236761812, "learning_rate": 4.6518352129096375e-06, "loss": 0.4139, "step": 4208 }, { "epoch": 1.0408011869436202, "grad_norm": 0.8282540055529727, "learning_rate": 4.651669737539747e-06, "loss": 0.3734, "step": 4209 }, { "epoch": 1.0410484668644906, "grad_norm": 0.8020366343621053, "learning_rate": 4.651504225800325e-06, "loss": 0.3646, "step": 4210 }, { "epoch": 1.041295746785361, "grad_norm": 0.7703491895017631, "learning_rate": 4.65133867769417e-06, "loss": 0.3685, "step": 4211 }, { "epoch": 1.0415430267062316, "grad_norm": 0.8143804553304974, "learning_rate": 4.651173093224079e-06, "loss": 0.3909, "step": 4212 }, { "epoch": 1.041790306627102, "grad_norm": 0.7448579294583147, "learning_rate": 4.651007472392852e-06, "loss": 0.3705, "step": 4213 }, { "epoch": 1.0420375865479723, "grad_norm": 0.8055656935012121, "learning_rate": 4.650841815203288e-06, "loss": 0.3958, "step": 4214 }, { "epoch": 1.0422848664688427, "grad_norm": 0.7692113862172067, "learning_rate": 4.650676121658187e-06, "loss": 0.3925, "step": 4215 }, { "epoch": 1.0425321463897133, "grad_norm": 0.7704811100006247, "learning_rate": 4.65051039176035e-06, "loss": 0.3742, "step": 4216 }, { "epoch": 1.0427794263105836, "grad_norm": 0.8037844750917374, "learning_rate": 4.650344625512578e-06, "loss": 0.4149, "step": 4217 }, { "epoch": 1.043026706231454, "grad_norm": 0.7945151991654863, "learning_rate": 4.650178822917674e-06, "loss": 0.3994, "step": 4218 }, { "epoch": 1.0432739861523244, "grad_norm": 0.7757459763804186, "learning_rate": 4.650012983978439e-06, "loss": 0.4282, "step": 4219 }, { "epoch": 1.043521266073195, "grad_norm": 0.7899656886000685, "learning_rate": 4.6498471086976775e-06, "loss": 0.3954, "step": 4220 }, { "epoch": 1.0437685459940653, "grad_norm": 0.804854975157165, "learning_rate": 4.649681197078192e-06, "loss": 0.382, "step": 4221 }, { "epoch": 1.0440158259149357, "grad_norm": 0.7703312574677142, "learning_rate": 4.649515249122787e-06, "loss": 0.3764, "step": 4222 }, { "epoch": 1.044263105835806, "grad_norm": 0.7930522666781, "learning_rate": 4.649349264834269e-06, "loss": 0.4162, "step": 4223 }, { "epoch": 1.0445103857566767, "grad_norm": 0.7918031038145316, "learning_rate": 4.649183244215442e-06, "loss": 0.4545, "step": 4224 }, { "epoch": 1.044757665677547, "grad_norm": 0.7579044769246266, "learning_rate": 4.649017187269114e-06, "loss": 0.3898, "step": 4225 }, { "epoch": 1.0450049455984174, "grad_norm": 0.7818201778135752, "learning_rate": 4.6488510939980894e-06, "loss": 0.3779, "step": 4226 }, { "epoch": 1.0452522255192878, "grad_norm": 0.803813832768149, "learning_rate": 4.6486849644051775e-06, "loss": 0.3668, "step": 4227 }, { "epoch": 1.0454995054401583, "grad_norm": 0.8047257615281099, "learning_rate": 4.648518798493186e-06, "loss": 0.3826, "step": 4228 }, { "epoch": 1.0457467853610287, "grad_norm": 0.8080320554338043, "learning_rate": 4.648352596264924e-06, "loss": 0.3729, "step": 4229 }, { "epoch": 1.045994065281899, "grad_norm": 0.7960122578991025, "learning_rate": 4.648186357723199e-06, "loss": 0.403, "step": 4230 }, { "epoch": 1.0462413452027695, "grad_norm": 0.7703755022007169, "learning_rate": 4.648020082870823e-06, "loss": 0.3767, "step": 4231 }, { "epoch": 1.04648862512364, "grad_norm": 0.7776091095371102, "learning_rate": 4.647853771710606e-06, "loss": 0.3595, "step": 4232 }, { "epoch": 1.0467359050445104, "grad_norm": 0.8112551697318591, "learning_rate": 4.647687424245358e-06, "loss": 0.3937, "step": 4233 }, { "epoch": 1.0469831849653808, "grad_norm": 0.7810016961207867, "learning_rate": 4.647521040477893e-06, "loss": 0.3592, "step": 4234 }, { "epoch": 1.0472304648862512, "grad_norm": 0.8188860327809285, "learning_rate": 4.64735462041102e-06, "loss": 0.4117, "step": 4235 }, { "epoch": 1.0474777448071217, "grad_norm": 0.8172159999260146, "learning_rate": 4.647188164047555e-06, "loss": 0.3848, "step": 4236 }, { "epoch": 1.0477250247279921, "grad_norm": 0.8134348696735962, "learning_rate": 4.647021671390311e-06, "loss": 0.3749, "step": 4237 }, { "epoch": 1.0479723046488625, "grad_norm": 0.7832081621623097, "learning_rate": 4.646855142442102e-06, "loss": 0.3655, "step": 4238 }, { "epoch": 1.0482195845697329, "grad_norm": 0.7576261094638913, "learning_rate": 4.646688577205742e-06, "loss": 0.4515, "step": 4239 }, { "epoch": 1.0484668644906034, "grad_norm": 0.825080964198382, "learning_rate": 4.646521975684048e-06, "loss": 0.3769, "step": 4240 }, { "epoch": 1.0487141444114738, "grad_norm": 0.815041103320893, "learning_rate": 4.646355337879833e-06, "loss": 0.3614, "step": 4241 }, { "epoch": 1.0489614243323442, "grad_norm": 0.7721780725940247, "learning_rate": 4.646188663795918e-06, "loss": 0.425, "step": 4242 }, { "epoch": 1.0492087042532146, "grad_norm": 0.8160036870990871, "learning_rate": 4.646021953435117e-06, "loss": 0.3838, "step": 4243 }, { "epoch": 1.0494559841740851, "grad_norm": 0.7858404045001812, "learning_rate": 4.645855206800249e-06, "loss": 0.3882, "step": 4244 }, { "epoch": 1.0497032640949555, "grad_norm": 0.8325540576350815, "learning_rate": 4.645688423894133e-06, "loss": 0.3909, "step": 4245 }, { "epoch": 1.0499505440158259, "grad_norm": 0.7889856575386257, "learning_rate": 4.645521604719587e-06, "loss": 0.3744, "step": 4246 }, { "epoch": 1.0501978239366963, "grad_norm": 0.7971110357541104, "learning_rate": 4.645354749279432e-06, "loss": 0.3748, "step": 4247 }, { "epoch": 1.0504451038575668, "grad_norm": 0.7887045099299254, "learning_rate": 4.6451878575764876e-06, "loss": 0.3908, "step": 4248 }, { "epoch": 1.0506923837784372, "grad_norm": 0.7705544648941555, "learning_rate": 4.645020929613574e-06, "loss": 0.4071, "step": 4249 }, { "epoch": 1.0509396636993076, "grad_norm": 0.7995895038029855, "learning_rate": 4.6448539653935145e-06, "loss": 0.3984, "step": 4250 }, { "epoch": 1.051186943620178, "grad_norm": 0.7463819189343223, "learning_rate": 4.644686964919131e-06, "loss": 0.4195, "step": 4251 }, { "epoch": 1.0514342235410485, "grad_norm": 0.8109557851914531, "learning_rate": 4.644519928193244e-06, "loss": 0.3829, "step": 4252 }, { "epoch": 1.051681503461919, "grad_norm": 0.8014713612447628, "learning_rate": 4.64435285521868e-06, "loss": 0.3911, "step": 4253 }, { "epoch": 1.0519287833827893, "grad_norm": 0.7581480414365138, "learning_rate": 4.644185745998261e-06, "loss": 0.3935, "step": 4254 }, { "epoch": 1.0521760633036596, "grad_norm": 0.8034803290662654, "learning_rate": 4.644018600534812e-06, "loss": 0.405, "step": 4255 }, { "epoch": 1.0524233432245302, "grad_norm": 0.7989800930152032, "learning_rate": 4.643851418831159e-06, "loss": 0.3582, "step": 4256 }, { "epoch": 1.0526706231454006, "grad_norm": 0.7847832588453156, "learning_rate": 4.643684200890127e-06, "loss": 0.3764, "step": 4257 }, { "epoch": 1.052917903066271, "grad_norm": 0.7932982586519345, "learning_rate": 4.643516946714543e-06, "loss": 0.4008, "step": 4258 }, { "epoch": 1.0531651829871413, "grad_norm": 0.7941695568052275, "learning_rate": 4.643349656307235e-06, "loss": 0.3806, "step": 4259 }, { "epoch": 1.053412462908012, "grad_norm": 0.7576839705652183, "learning_rate": 4.643182329671028e-06, "loss": 0.3786, "step": 4260 }, { "epoch": 1.0536597428288823, "grad_norm": 0.7480784059742078, "learning_rate": 4.6430149668087535e-06, "loss": 0.4021, "step": 4261 }, { "epoch": 1.0539070227497527, "grad_norm": 0.7874562690760987, "learning_rate": 4.6428475677232375e-06, "loss": 0.395, "step": 4262 }, { "epoch": 1.054154302670623, "grad_norm": 0.794073075158186, "learning_rate": 4.642680132417312e-06, "loss": 0.3883, "step": 4263 }, { "epoch": 1.0544015825914936, "grad_norm": 0.8161706625447627, "learning_rate": 4.642512660893805e-06, "loss": 0.4004, "step": 4264 }, { "epoch": 1.054648862512364, "grad_norm": 0.8136072755859499, "learning_rate": 4.6423451531555485e-06, "loss": 0.3948, "step": 4265 }, { "epoch": 1.0548961424332344, "grad_norm": 0.7874852260241233, "learning_rate": 4.642177609205375e-06, "loss": 0.4259, "step": 4266 }, { "epoch": 1.0551434223541047, "grad_norm": 0.7983118223504635, "learning_rate": 4.642010029046114e-06, "loss": 0.412, "step": 4267 }, { "epoch": 1.0553907022749753, "grad_norm": 0.7873974040941972, "learning_rate": 4.641842412680599e-06, "loss": 0.4129, "step": 4268 }, { "epoch": 1.0556379821958457, "grad_norm": 0.736454924062351, "learning_rate": 4.641674760111664e-06, "loss": 0.3937, "step": 4269 }, { "epoch": 1.055885262116716, "grad_norm": 0.7757021490893236, "learning_rate": 4.641507071342142e-06, "loss": 0.409, "step": 4270 }, { "epoch": 1.0561325420375864, "grad_norm": 0.7960882523429013, "learning_rate": 4.641339346374868e-06, "loss": 0.3871, "step": 4271 }, { "epoch": 1.056379821958457, "grad_norm": 0.8146194733280869, "learning_rate": 4.641171585212677e-06, "loss": 0.4017, "step": 4272 }, { "epoch": 1.0566271018793274, "grad_norm": 0.8417103180639266, "learning_rate": 4.641003787858404e-06, "loss": 0.3976, "step": 4273 }, { "epoch": 1.0568743818001978, "grad_norm": 0.7689279569644257, "learning_rate": 4.6408359543148865e-06, "loss": 0.4043, "step": 4274 }, { "epoch": 1.0571216617210681, "grad_norm": 0.804639020481011, "learning_rate": 4.640668084584959e-06, "loss": 0.3813, "step": 4275 }, { "epoch": 1.0573689416419387, "grad_norm": 0.8083164458032077, "learning_rate": 4.640500178671462e-06, "loss": 0.3745, "step": 4276 }, { "epoch": 1.057616221562809, "grad_norm": 0.7949676227732114, "learning_rate": 4.640332236577232e-06, "loss": 0.3897, "step": 4277 }, { "epoch": 1.0578635014836795, "grad_norm": 0.7807393818349563, "learning_rate": 4.640164258305108e-06, "loss": 0.406, "step": 4278 }, { "epoch": 1.05811078140455, "grad_norm": 0.757256217668179, "learning_rate": 4.639996243857928e-06, "loss": 0.4096, "step": 4279 }, { "epoch": 1.0583580613254204, "grad_norm": 0.7660605587394923, "learning_rate": 4.639828193238534e-06, "loss": 0.407, "step": 4280 }, { "epoch": 1.0586053412462908, "grad_norm": 0.82360930926668, "learning_rate": 4.639660106449766e-06, "loss": 0.36, "step": 4281 }, { "epoch": 1.0588526211671612, "grad_norm": 0.7662496551168183, "learning_rate": 4.639491983494464e-06, "loss": 0.3979, "step": 4282 }, { "epoch": 1.0590999010880318, "grad_norm": 0.7908942996532721, "learning_rate": 4.639323824375471e-06, "loss": 0.3886, "step": 4283 }, { "epoch": 1.0593471810089021, "grad_norm": 0.7902252066662773, "learning_rate": 4.6391556290956295e-06, "loss": 0.3785, "step": 4284 }, { "epoch": 1.0595944609297725, "grad_norm": 0.7933006770454022, "learning_rate": 4.638987397657782e-06, "loss": 0.3734, "step": 4285 }, { "epoch": 1.0598417408506429, "grad_norm": 0.8198753766255277, "learning_rate": 4.638819130064772e-06, "loss": 0.3657, "step": 4286 }, { "epoch": 1.0600890207715135, "grad_norm": 0.811320630647155, "learning_rate": 4.638650826319443e-06, "loss": 0.4098, "step": 4287 }, { "epoch": 1.0603363006923838, "grad_norm": 0.7832330285186926, "learning_rate": 4.638482486424641e-06, "loss": 0.3818, "step": 4288 }, { "epoch": 1.0605835806132542, "grad_norm": 0.7839894377091607, "learning_rate": 4.638314110383212e-06, "loss": 0.3964, "step": 4289 }, { "epoch": 1.0608308605341246, "grad_norm": 0.7967382095357566, "learning_rate": 4.638145698198001e-06, "loss": 0.3924, "step": 4290 }, { "epoch": 1.0610781404549952, "grad_norm": 0.8132461312551288, "learning_rate": 4.637977249871853e-06, "loss": 0.4104, "step": 4291 }, { "epoch": 1.0613254203758655, "grad_norm": 0.809624561872919, "learning_rate": 4.637808765407619e-06, "loss": 0.3753, "step": 4292 }, { "epoch": 1.061572700296736, "grad_norm": 0.8332462069831172, "learning_rate": 4.637640244808145e-06, "loss": 0.3946, "step": 4293 }, { "epoch": 1.0618199802176063, "grad_norm": 0.8405991626418421, "learning_rate": 4.6374716880762786e-06, "loss": 0.386, "step": 4294 }, { "epoch": 1.0620672601384769, "grad_norm": 0.7919761749656209, "learning_rate": 4.63730309521487e-06, "loss": 0.3844, "step": 4295 }, { "epoch": 1.0623145400593472, "grad_norm": 0.8179300696924955, "learning_rate": 4.637134466226768e-06, "loss": 0.3879, "step": 4296 }, { "epoch": 1.0625618199802176, "grad_norm": 0.7924248198420661, "learning_rate": 4.636965801114824e-06, "loss": 0.3761, "step": 4297 }, { "epoch": 1.062809099901088, "grad_norm": 0.775130261415437, "learning_rate": 4.636797099881889e-06, "loss": 0.3817, "step": 4298 }, { "epoch": 1.0630563798219586, "grad_norm": 0.7772149482407928, "learning_rate": 4.636628362530814e-06, "loss": 0.3647, "step": 4299 }, { "epoch": 1.063303659742829, "grad_norm": 0.7773637784639948, "learning_rate": 4.636459589064451e-06, "loss": 0.3978, "step": 4300 }, { "epoch": 1.0635509396636993, "grad_norm": 0.8090321897590025, "learning_rate": 4.636290779485653e-06, "loss": 0.3607, "step": 4301 }, { "epoch": 1.0637982195845697, "grad_norm": 0.8047204844615943, "learning_rate": 4.6361219337972725e-06, "loss": 0.4076, "step": 4302 }, { "epoch": 1.0640454995054403, "grad_norm": 0.8017345314190111, "learning_rate": 4.6359530520021656e-06, "loss": 0.3602, "step": 4303 }, { "epoch": 1.0642927794263106, "grad_norm": 0.759186510701692, "learning_rate": 4.635784134103185e-06, "loss": 0.4062, "step": 4304 }, { "epoch": 1.064540059347181, "grad_norm": 0.8166130738443433, "learning_rate": 4.635615180103187e-06, "loss": 0.3877, "step": 4305 }, { "epoch": 1.0647873392680514, "grad_norm": 0.7947774540402034, "learning_rate": 4.635446190005028e-06, "loss": 0.3852, "step": 4306 }, { "epoch": 1.065034619188922, "grad_norm": 0.7567635532580184, "learning_rate": 4.6352771638115615e-06, "loss": 0.3934, "step": 4307 }, { "epoch": 1.0652818991097923, "grad_norm": 0.7989181252034733, "learning_rate": 4.635108101525647e-06, "loss": 0.3736, "step": 4308 }, { "epoch": 1.0655291790306627, "grad_norm": 0.7634549259575483, "learning_rate": 4.634939003150142e-06, "loss": 0.3934, "step": 4309 }, { "epoch": 1.065776458951533, "grad_norm": 0.7633449999304338, "learning_rate": 4.634769868687904e-06, "loss": 0.383, "step": 4310 }, { "epoch": 1.0660237388724036, "grad_norm": 0.7803946560226314, "learning_rate": 4.634600698141793e-06, "loss": 0.4067, "step": 4311 }, { "epoch": 1.066271018793274, "grad_norm": 0.7882088191827373, "learning_rate": 4.634431491514666e-06, "loss": 0.3917, "step": 4312 }, { "epoch": 1.0665182987141444, "grad_norm": 0.7806572656308762, "learning_rate": 4.634262248809386e-06, "loss": 0.3631, "step": 4313 }, { "epoch": 1.0667655786350148, "grad_norm": 0.7889807579763315, "learning_rate": 4.634092970028812e-06, "loss": 0.3875, "step": 4314 }, { "epoch": 1.0670128585558853, "grad_norm": 0.7795926766365951, "learning_rate": 4.633923655175806e-06, "loss": 0.3959, "step": 4315 }, { "epoch": 1.0672601384767557, "grad_norm": 0.800587818639521, "learning_rate": 4.6337543042532305e-06, "loss": 0.4055, "step": 4316 }, { "epoch": 1.067507418397626, "grad_norm": 0.8085854154225866, "learning_rate": 4.633584917263946e-06, "loss": 0.4087, "step": 4317 }, { "epoch": 1.0677546983184965, "grad_norm": 0.8119905250188897, "learning_rate": 4.633415494210818e-06, "loss": 0.3779, "step": 4318 }, { "epoch": 1.068001978239367, "grad_norm": 0.7716700019333239, "learning_rate": 4.633246035096708e-06, "loss": 0.3955, "step": 4319 }, { "epoch": 1.0682492581602374, "grad_norm": 0.8721152721882687, "learning_rate": 4.6330765399244825e-06, "loss": 0.3611, "step": 4320 }, { "epoch": 1.0684965380811078, "grad_norm": 0.7993584428615499, "learning_rate": 4.632907008697005e-06, "loss": 0.3774, "step": 4321 }, { "epoch": 1.0687438180019782, "grad_norm": 0.7903393080038614, "learning_rate": 4.632737441417141e-06, "loss": 0.3993, "step": 4322 }, { "epoch": 1.0689910979228487, "grad_norm": 0.7917998367197775, "learning_rate": 4.6325678380877585e-06, "loss": 0.3864, "step": 4323 }, { "epoch": 1.0692383778437191, "grad_norm": 0.8204126184105696, "learning_rate": 4.6323981987117215e-06, "loss": 0.3885, "step": 4324 }, { "epoch": 1.0694856577645895, "grad_norm": 0.8445058892500932, "learning_rate": 4.6322285232919e-06, "loss": 0.3541, "step": 4325 }, { "epoch": 1.0697329376854599, "grad_norm": 0.7921870851196744, "learning_rate": 4.63205881183116e-06, "loss": 0.3693, "step": 4326 }, { "epoch": 1.0699802176063304, "grad_norm": 0.7846713053583934, "learning_rate": 4.631889064332372e-06, "loss": 0.3705, "step": 4327 }, { "epoch": 1.0702274975272008, "grad_norm": 0.7781826242183534, "learning_rate": 4.631719280798404e-06, "loss": 0.3993, "step": 4328 }, { "epoch": 1.0704747774480712, "grad_norm": 0.8225439280520509, "learning_rate": 4.631549461232126e-06, "loss": 0.3724, "step": 4329 }, { "epoch": 1.0707220573689415, "grad_norm": 0.7837799994707015, "learning_rate": 4.631379605636408e-06, "loss": 0.3575, "step": 4330 }, { "epoch": 1.0709693372898121, "grad_norm": 0.7746713644854968, "learning_rate": 4.631209714014122e-06, "loss": 0.394, "step": 4331 }, { "epoch": 1.0712166172106825, "grad_norm": 0.817647858623306, "learning_rate": 4.63103978636814e-06, "loss": 0.392, "step": 4332 }, { "epoch": 1.0714638971315529, "grad_norm": 0.8121486426510394, "learning_rate": 4.630869822701332e-06, "loss": 0.3734, "step": 4333 }, { "epoch": 1.0717111770524232, "grad_norm": 0.8006890103877174, "learning_rate": 4.630699823016574e-06, "loss": 0.3854, "step": 4334 }, { "epoch": 1.0719584569732938, "grad_norm": 0.7970895669489745, "learning_rate": 4.6305297873167375e-06, "loss": 0.3895, "step": 4335 }, { "epoch": 1.0722057368941642, "grad_norm": 0.7596795935140328, "learning_rate": 4.630359715604697e-06, "loss": 0.4028, "step": 4336 }, { "epoch": 1.0724530168150346, "grad_norm": 0.7968436348401924, "learning_rate": 4.630189607883328e-06, "loss": 0.3762, "step": 4337 }, { "epoch": 1.072700296735905, "grad_norm": 0.7761492763660368, "learning_rate": 4.6300194641555026e-06, "loss": 0.3788, "step": 4338 }, { "epoch": 1.0729475766567755, "grad_norm": 0.8052930230665576, "learning_rate": 4.6298492844241015e-06, "loss": 0.3784, "step": 4339 }, { "epoch": 1.073194856577646, "grad_norm": 0.7942217048128182, "learning_rate": 4.629679068691999e-06, "loss": 0.4197, "step": 4340 }, { "epoch": 1.0734421364985163, "grad_norm": 0.7914453771559034, "learning_rate": 4.62950881696207e-06, "loss": 0.3768, "step": 4341 }, { "epoch": 1.0736894164193866, "grad_norm": 0.7860614963000404, "learning_rate": 4.6293385292371965e-06, "loss": 0.4089, "step": 4342 }, { "epoch": 1.0739366963402572, "grad_norm": 0.788374108272834, "learning_rate": 4.629168205520254e-06, "loss": 0.412, "step": 4343 }, { "epoch": 1.0741839762611276, "grad_norm": 0.8018061348303731, "learning_rate": 4.628997845814123e-06, "loss": 0.4037, "step": 4344 }, { "epoch": 1.074431256181998, "grad_norm": 0.799922795712132, "learning_rate": 4.628827450121681e-06, "loss": 0.3544, "step": 4345 }, { "epoch": 1.0746785361028683, "grad_norm": 0.7699595851905867, "learning_rate": 4.62865701844581e-06, "loss": 0.3897, "step": 4346 }, { "epoch": 1.074925816023739, "grad_norm": 0.7780021743592083, "learning_rate": 4.628486550789391e-06, "loss": 0.3934, "step": 4347 }, { "epoch": 1.0751730959446093, "grad_norm": 0.8067121509570305, "learning_rate": 4.628316047155305e-06, "loss": 0.3794, "step": 4348 }, { "epoch": 1.0754203758654797, "grad_norm": 0.7849712091670001, "learning_rate": 4.628145507546433e-06, "loss": 0.3892, "step": 4349 }, { "epoch": 1.0756676557863503, "grad_norm": 0.804632316257273, "learning_rate": 4.6279749319656584e-06, "loss": 0.3705, "step": 4350 }, { "epoch": 1.0759149357072206, "grad_norm": 0.7722962143560209, "learning_rate": 4.627804320415864e-06, "loss": 0.4038, "step": 4351 }, { "epoch": 1.076162215628091, "grad_norm": 0.8124951312848541, "learning_rate": 4.627633672899934e-06, "loss": 0.3932, "step": 4352 }, { "epoch": 1.0764094955489614, "grad_norm": 0.8129527593791547, "learning_rate": 4.627462989420753e-06, "loss": 0.3944, "step": 4353 }, { "epoch": 1.0766567754698317, "grad_norm": 0.7773915079947215, "learning_rate": 4.6272922699812076e-06, "loss": 0.4158, "step": 4354 }, { "epoch": 1.0769040553907023, "grad_norm": 0.8301360026785094, "learning_rate": 4.62712151458418e-06, "loss": 0.3794, "step": 4355 }, { "epoch": 1.0771513353115727, "grad_norm": 0.772666046582316, "learning_rate": 4.626950723232558e-06, "loss": 0.3848, "step": 4356 }, { "epoch": 1.077398615232443, "grad_norm": 0.7761555638126303, "learning_rate": 4.62677989592923e-06, "loss": 0.406, "step": 4357 }, { "epoch": 1.0776458951533137, "grad_norm": 0.8022549165269791, "learning_rate": 4.626609032677082e-06, "loss": 0.3889, "step": 4358 }, { "epoch": 1.077893175074184, "grad_norm": 0.8071200531196575, "learning_rate": 4.626438133479002e-06, "loss": 0.3788, "step": 4359 }, { "epoch": 1.0781404549950544, "grad_norm": 0.8190775870483752, "learning_rate": 4.626267198337878e-06, "loss": 0.3762, "step": 4360 }, { "epoch": 1.0783877349159248, "grad_norm": 0.8007731820214046, "learning_rate": 4.6260962272566015e-06, "loss": 0.3785, "step": 4361 }, { "epoch": 1.0786350148367951, "grad_norm": 0.7769443936928495, "learning_rate": 4.625925220238061e-06, "loss": 0.395, "step": 4362 }, { "epoch": 1.0788822947576657, "grad_norm": 0.8071203728969769, "learning_rate": 4.625754177285147e-06, "loss": 0.3923, "step": 4363 }, { "epoch": 1.079129574678536, "grad_norm": 0.7950792582562991, "learning_rate": 4.625583098400751e-06, "loss": 0.4157, "step": 4364 }, { "epoch": 1.0793768545994065, "grad_norm": 0.8025265140191271, "learning_rate": 4.625411983587764e-06, "loss": 0.3686, "step": 4365 }, { "epoch": 1.079624134520277, "grad_norm": 0.8371001492583094, "learning_rate": 4.62524083284908e-06, "loss": 0.3665, "step": 4366 }, { "epoch": 1.0798714144411474, "grad_norm": 0.7864689768249224, "learning_rate": 4.62506964618759e-06, "loss": 0.3845, "step": 4367 }, { "epoch": 1.0801186943620178, "grad_norm": 0.7959047227946812, "learning_rate": 4.6248984236061896e-06, "loss": 0.39, "step": 4368 }, { "epoch": 1.0803659742828882, "grad_norm": 0.8069266750505678, "learning_rate": 4.624727165107771e-06, "loss": 0.3852, "step": 4369 }, { "epoch": 1.0806132542037588, "grad_norm": 0.7811813295093905, "learning_rate": 4.62455587069523e-06, "loss": 0.3918, "step": 4370 }, { "epoch": 1.0808605341246291, "grad_norm": 0.8124377339021684, "learning_rate": 4.624384540371461e-06, "loss": 0.3636, "step": 4371 }, { "epoch": 1.0811078140454995, "grad_norm": 0.8574201016591452, "learning_rate": 4.624213174139362e-06, "loss": 0.4069, "step": 4372 }, { "epoch": 1.0813550939663699, "grad_norm": 0.8190156168079833, "learning_rate": 4.6240417720018275e-06, "loss": 0.3846, "step": 4373 }, { "epoch": 1.0816023738872405, "grad_norm": 0.8049853540276851, "learning_rate": 4.623870333961755e-06, "loss": 0.3787, "step": 4374 }, { "epoch": 1.0818496538081108, "grad_norm": 0.8063224774627666, "learning_rate": 4.623698860022043e-06, "loss": 0.3769, "step": 4375 }, { "epoch": 1.0820969337289812, "grad_norm": 0.7627082438105972, "learning_rate": 4.623527350185591e-06, "loss": 0.3946, "step": 4376 }, { "epoch": 1.0823442136498516, "grad_norm": 0.8150713506611518, "learning_rate": 4.623355804455296e-06, "loss": 0.382, "step": 4377 }, { "epoch": 1.0825914935707222, "grad_norm": 0.8287101957828621, "learning_rate": 4.623184222834059e-06, "loss": 0.3828, "step": 4378 }, { "epoch": 1.0828387734915925, "grad_norm": 0.7713507709323122, "learning_rate": 4.623012605324778e-06, "loss": 0.3735, "step": 4379 }, { "epoch": 1.083086053412463, "grad_norm": 0.8057709211217413, "learning_rate": 4.6228409519303564e-06, "loss": 0.3804, "step": 4380 }, { "epoch": 1.0833333333333333, "grad_norm": 0.8073418347377654, "learning_rate": 4.6226692626536955e-06, "loss": 0.3845, "step": 4381 }, { "epoch": 1.0835806132542039, "grad_norm": 0.7749949974019674, "learning_rate": 4.6224975374976954e-06, "loss": 0.4137, "step": 4382 }, { "epoch": 1.0838278931750742, "grad_norm": 0.8035297034215543, "learning_rate": 4.622325776465261e-06, "loss": 0.3794, "step": 4383 }, { "epoch": 1.0840751730959446, "grad_norm": 0.8322855471722911, "learning_rate": 4.622153979559294e-06, "loss": 0.3673, "step": 4384 }, { "epoch": 1.084322453016815, "grad_norm": 0.7676598132115356, "learning_rate": 4.6219821467826985e-06, "loss": 0.3914, "step": 4385 }, { "epoch": 1.0845697329376855, "grad_norm": 0.7738743573271007, "learning_rate": 4.6218102781383795e-06, "loss": 0.3625, "step": 4386 }, { "epoch": 1.084817012858556, "grad_norm": 0.8349926411809825, "learning_rate": 4.621638373629241e-06, "loss": 0.4147, "step": 4387 }, { "epoch": 1.0850642927794263, "grad_norm": 0.8151740791367348, "learning_rate": 4.621466433258191e-06, "loss": 0.3755, "step": 4388 }, { "epoch": 1.0853115727002967, "grad_norm": 0.8300503084955049, "learning_rate": 4.621294457028134e-06, "loss": 0.3639, "step": 4389 }, { "epoch": 1.0855588526211672, "grad_norm": 0.7999334059688192, "learning_rate": 4.621122444941976e-06, "loss": 0.3732, "step": 4390 }, { "epoch": 1.0858061325420376, "grad_norm": 0.7853816787497419, "learning_rate": 4.6209503970026265e-06, "loss": 0.3912, "step": 4391 }, { "epoch": 1.086053412462908, "grad_norm": 0.8009264369566632, "learning_rate": 4.620778313212993e-06, "loss": 0.3938, "step": 4392 }, { "epoch": 1.0863006923837784, "grad_norm": 0.7780030951232997, "learning_rate": 4.6206061935759834e-06, "loss": 0.3912, "step": 4393 }, { "epoch": 1.086547972304649, "grad_norm": 0.7601313321873178, "learning_rate": 4.620434038094509e-06, "loss": 0.395, "step": 4394 }, { "epoch": 1.0867952522255193, "grad_norm": 0.812219189005838, "learning_rate": 4.620261846771478e-06, "loss": 0.3559, "step": 4395 }, { "epoch": 1.0870425321463897, "grad_norm": 0.7991637445092445, "learning_rate": 4.620089619609801e-06, "loss": 0.3823, "step": 4396 }, { "epoch": 1.08728981206726, "grad_norm": 0.7808831189681588, "learning_rate": 4.619917356612389e-06, "loss": 0.4113, "step": 4397 }, { "epoch": 1.0875370919881306, "grad_norm": 0.8034915607456375, "learning_rate": 4.619745057782155e-06, "loss": 0.3902, "step": 4398 }, { "epoch": 1.087784371909001, "grad_norm": 0.8110529809805359, "learning_rate": 4.61957272312201e-06, "loss": 0.3774, "step": 4399 }, { "epoch": 1.0880316518298714, "grad_norm": 0.8265377230997106, "learning_rate": 4.619400352634867e-06, "loss": 0.3907, "step": 4400 }, { "epoch": 1.0882789317507418, "grad_norm": 0.8342617529181773, "learning_rate": 4.619227946323642e-06, "loss": 0.3887, "step": 4401 }, { "epoch": 1.0885262116716123, "grad_norm": 0.7647714893712949, "learning_rate": 4.619055504191246e-06, "loss": 0.4145, "step": 4402 }, { "epoch": 1.0887734915924827, "grad_norm": 0.7893757911649837, "learning_rate": 4.618883026240596e-06, "loss": 0.3447, "step": 4403 }, { "epoch": 1.089020771513353, "grad_norm": 0.7762698172276777, "learning_rate": 4.618710512474606e-06, "loss": 0.3818, "step": 4404 }, { "epoch": 1.0892680514342234, "grad_norm": 0.8484630690056408, "learning_rate": 4.6185379628961915e-06, "loss": 0.3597, "step": 4405 }, { "epoch": 1.089515331355094, "grad_norm": 0.790182553884926, "learning_rate": 4.618365377508271e-06, "loss": 0.4006, "step": 4406 }, { "epoch": 1.0897626112759644, "grad_norm": 0.7840597249493056, "learning_rate": 4.618192756313761e-06, "loss": 0.3736, "step": 4407 }, { "epoch": 1.0900098911968348, "grad_norm": 0.8491674662200214, "learning_rate": 4.618020099315579e-06, "loss": 0.4084, "step": 4408 }, { "epoch": 1.0902571711177051, "grad_norm": 0.8683972050546668, "learning_rate": 4.617847406516642e-06, "loss": 0.3776, "step": 4409 }, { "epoch": 1.0905044510385757, "grad_norm": 0.7727260787982819, "learning_rate": 4.617674677919872e-06, "loss": 0.3576, "step": 4410 }, { "epoch": 1.090751730959446, "grad_norm": 0.7911443543840935, "learning_rate": 4.617501913528188e-06, "loss": 0.3685, "step": 4411 }, { "epoch": 1.0909990108803165, "grad_norm": 0.7988198916702408, "learning_rate": 4.617329113344508e-06, "loss": 0.4053, "step": 4412 }, { "epoch": 1.0912462908011868, "grad_norm": 0.8165388910396402, "learning_rate": 4.6171562773717536e-06, "loss": 0.4071, "step": 4413 }, { "epoch": 1.0914935707220574, "grad_norm": 0.7648956680759246, "learning_rate": 4.616983405612847e-06, "loss": 0.3972, "step": 4414 }, { "epoch": 1.0917408506429278, "grad_norm": 0.776424364031788, "learning_rate": 4.6168104980707105e-06, "loss": 0.3667, "step": 4415 }, { "epoch": 1.0919881305637982, "grad_norm": 0.7833192524550328, "learning_rate": 4.616637554748267e-06, "loss": 0.3959, "step": 4416 }, { "epoch": 1.0922354104846685, "grad_norm": 0.8607695016130422, "learning_rate": 4.616464575648438e-06, "loss": 0.4053, "step": 4417 }, { "epoch": 1.0924826904055391, "grad_norm": 0.8137931866688325, "learning_rate": 4.616291560774148e-06, "loss": 0.3737, "step": 4418 }, { "epoch": 1.0927299703264095, "grad_norm": 0.7729096909966967, "learning_rate": 4.616118510128323e-06, "loss": 0.3994, "step": 4419 }, { "epoch": 1.0929772502472799, "grad_norm": 0.8111755092998029, "learning_rate": 4.615945423713886e-06, "loss": 0.383, "step": 4420 }, { "epoch": 1.0932245301681502, "grad_norm": 0.8840213139453559, "learning_rate": 4.615772301533764e-06, "loss": 0.3929, "step": 4421 }, { "epoch": 1.0934718100890208, "grad_norm": 0.8960104400194246, "learning_rate": 4.615599143590883e-06, "loss": 0.4195, "step": 4422 }, { "epoch": 1.0937190900098912, "grad_norm": 0.832775407834559, "learning_rate": 4.61542594988817e-06, "loss": 0.3746, "step": 4423 }, { "epoch": 1.0939663699307616, "grad_norm": 0.760895524892044, "learning_rate": 4.615252720428551e-06, "loss": 0.3853, "step": 4424 }, { "epoch": 1.094213649851632, "grad_norm": 0.822897221419313, "learning_rate": 4.615079455214956e-06, "loss": 0.3841, "step": 4425 }, { "epoch": 1.0944609297725025, "grad_norm": 0.7622020023243429, "learning_rate": 4.614906154250314e-06, "loss": 0.4146, "step": 4426 }, { "epoch": 1.094708209693373, "grad_norm": 0.755426479073628, "learning_rate": 4.614732817537552e-06, "loss": 0.3882, "step": 4427 }, { "epoch": 1.0949554896142433, "grad_norm": 0.7935295519823844, "learning_rate": 4.614559445079601e-06, "loss": 0.4139, "step": 4428 }, { "epoch": 1.0952027695351139, "grad_norm": 0.8045006572044189, "learning_rate": 4.614386036879392e-06, "loss": 0.3579, "step": 4429 }, { "epoch": 1.0954500494559842, "grad_norm": 0.8133785487188834, "learning_rate": 4.6142125929398565e-06, "loss": 0.3944, "step": 4430 }, { "epoch": 1.0956973293768546, "grad_norm": 0.7699291745551514, "learning_rate": 4.6140391132639246e-06, "loss": 0.3809, "step": 4431 }, { "epoch": 1.095944609297725, "grad_norm": 0.8043121058809233, "learning_rate": 4.61386559785453e-06, "loss": 0.3734, "step": 4432 }, { "epoch": 1.0961918892185953, "grad_norm": 0.7642484264493106, "learning_rate": 4.613692046714605e-06, "loss": 0.3899, "step": 4433 }, { "epoch": 1.096439169139466, "grad_norm": 0.7732650307671076, "learning_rate": 4.613518459847083e-06, "loss": 0.4078, "step": 4434 }, { "epoch": 1.0966864490603363, "grad_norm": 0.8222697768446865, "learning_rate": 4.613344837254899e-06, "loss": 0.385, "step": 4435 }, { "epoch": 1.0969337289812067, "grad_norm": 0.7955080084095342, "learning_rate": 4.613171178940986e-06, "loss": 0.3821, "step": 4436 }, { "epoch": 1.0971810089020773, "grad_norm": 0.7942760233415241, "learning_rate": 4.612997484908281e-06, "loss": 0.3675, "step": 4437 }, { "epoch": 1.0974282888229476, "grad_norm": 0.8096638170598957, "learning_rate": 4.61282375515972e-06, "loss": 0.3738, "step": 4438 }, { "epoch": 1.097675568743818, "grad_norm": 0.8498820513681228, "learning_rate": 4.612649989698238e-06, "loss": 0.3872, "step": 4439 }, { "epoch": 1.0979228486646884, "grad_norm": 0.8097768742615912, "learning_rate": 4.612476188526773e-06, "loss": 0.4016, "step": 4440 }, { "epoch": 1.0981701285855587, "grad_norm": 0.7815873330106851, "learning_rate": 4.612302351648264e-06, "loss": 0.3599, "step": 4441 }, { "epoch": 1.0984174085064293, "grad_norm": 0.7657362130537584, "learning_rate": 4.6121284790656475e-06, "loss": 0.4117, "step": 4442 }, { "epoch": 1.0986646884272997, "grad_norm": 0.7790925533813563, "learning_rate": 4.611954570781863e-06, "loss": 0.3937, "step": 4443 }, { "epoch": 1.09891196834817, "grad_norm": 0.7695495331270616, "learning_rate": 4.61178062679985e-06, "loss": 0.3945, "step": 4444 }, { "epoch": 1.0991592482690407, "grad_norm": 0.7852274355800463, "learning_rate": 4.61160664712255e-06, "loss": 0.3939, "step": 4445 }, { "epoch": 1.099406528189911, "grad_norm": 0.7923267811092497, "learning_rate": 4.611432631752901e-06, "loss": 0.4085, "step": 4446 }, { "epoch": 1.0996538081107814, "grad_norm": 0.8040856275395187, "learning_rate": 4.611258580693847e-06, "loss": 0.3793, "step": 4447 }, { "epoch": 1.0999010880316518, "grad_norm": 0.796986583269477, "learning_rate": 4.611084493948329e-06, "loss": 0.3859, "step": 4448 }, { "epoch": 1.1001483679525224, "grad_norm": 0.7634550093917282, "learning_rate": 4.6109103715192894e-06, "loss": 0.3887, "step": 4449 }, { "epoch": 1.1003956478733927, "grad_norm": 0.8085012818010203, "learning_rate": 4.6107362134096714e-06, "loss": 0.3652, "step": 4450 }, { "epoch": 1.100642927794263, "grad_norm": 0.768276181942338, "learning_rate": 4.610562019622418e-06, "loss": 0.3929, "step": 4451 }, { "epoch": 1.1008902077151335, "grad_norm": 0.8042338672163366, "learning_rate": 4.610387790160476e-06, "loss": 0.3576, "step": 4452 }, { "epoch": 1.101137487636004, "grad_norm": 0.796259263637112, "learning_rate": 4.6102135250267884e-06, "loss": 0.3713, "step": 4453 }, { "epoch": 1.1013847675568744, "grad_norm": 0.8427966198751222, "learning_rate": 4.610039224224302e-06, "loss": 0.379, "step": 4454 }, { "epoch": 1.1016320474777448, "grad_norm": 0.7935477078640766, "learning_rate": 4.609864887755961e-06, "loss": 0.3789, "step": 4455 }, { "epoch": 1.1018793273986152, "grad_norm": 0.779204501017459, "learning_rate": 4.6096905156247144e-06, "loss": 0.4235, "step": 4456 }, { "epoch": 1.1021266073194858, "grad_norm": 0.7542713028827567, "learning_rate": 4.609516107833508e-06, "loss": 0.4068, "step": 4457 }, { "epoch": 1.1023738872403561, "grad_norm": 0.8457511732807452, "learning_rate": 4.60934166438529e-06, "loss": 0.3935, "step": 4458 }, { "epoch": 1.1026211671612265, "grad_norm": 0.7992405206648767, "learning_rate": 4.609167185283011e-06, "loss": 0.3917, "step": 4459 }, { "epoch": 1.1028684470820969, "grad_norm": 0.782464048233461, "learning_rate": 4.608992670529618e-06, "loss": 0.3771, "step": 4460 }, { "epoch": 1.1031157270029674, "grad_norm": 0.8160181138621695, "learning_rate": 4.6088181201280615e-06, "loss": 0.397, "step": 4461 }, { "epoch": 1.1033630069238378, "grad_norm": 0.7974693087882351, "learning_rate": 4.608643534081291e-06, "loss": 0.379, "step": 4462 }, { "epoch": 1.1036102868447082, "grad_norm": 0.7801440643464834, "learning_rate": 4.608468912392259e-06, "loss": 0.3872, "step": 4463 }, { "epoch": 1.1038575667655786, "grad_norm": 0.7818539539804097, "learning_rate": 4.608294255063917e-06, "loss": 0.38, "step": 4464 }, { "epoch": 1.1041048466864491, "grad_norm": 0.8083153315446957, "learning_rate": 4.608119562099217e-06, "loss": 0.3712, "step": 4465 }, { "epoch": 1.1043521266073195, "grad_norm": 0.813246685282877, "learning_rate": 4.6079448335011105e-06, "loss": 0.3685, "step": 4466 }, { "epoch": 1.1045994065281899, "grad_norm": 0.8233103586749435, "learning_rate": 4.607770069272552e-06, "loss": 0.3944, "step": 4467 }, { "epoch": 1.1048466864490603, "grad_norm": 0.8021378079771996, "learning_rate": 4.607595269416497e-06, "loss": 0.3782, "step": 4468 }, { "epoch": 1.1050939663699308, "grad_norm": 0.7312354290441203, "learning_rate": 4.607420433935898e-06, "loss": 0.377, "step": 4469 }, { "epoch": 1.1053412462908012, "grad_norm": 0.7756752720914921, "learning_rate": 4.60724556283371e-06, "loss": 0.3794, "step": 4470 }, { "epoch": 1.1055885262116716, "grad_norm": 0.8103041025207485, "learning_rate": 4.607070656112891e-06, "loss": 0.3776, "step": 4471 }, { "epoch": 1.105835806132542, "grad_norm": 0.7950143379679689, "learning_rate": 4.606895713776396e-06, "loss": 0.3722, "step": 4472 }, { "epoch": 1.1060830860534125, "grad_norm": 0.7784789884021242, "learning_rate": 4.606720735827182e-06, "loss": 0.3704, "step": 4473 }, { "epoch": 1.106330365974283, "grad_norm": 0.8095866105829985, "learning_rate": 4.6065457222682065e-06, "loss": 0.4029, "step": 4474 }, { "epoch": 1.1065776458951533, "grad_norm": 0.7686732865881911, "learning_rate": 4.606370673102429e-06, "loss": 0.3698, "step": 4475 }, { "epoch": 1.1068249258160237, "grad_norm": 0.7923186074182061, "learning_rate": 4.606195588332806e-06, "loss": 0.382, "step": 4476 }, { "epoch": 1.1070722057368942, "grad_norm": 0.7884754077352629, "learning_rate": 4.606020467962299e-06, "loss": 0.4192, "step": 4477 }, { "epoch": 1.1073194856577646, "grad_norm": 0.7744685782694107, "learning_rate": 4.6058453119938675e-06, "loss": 0.4185, "step": 4478 }, { "epoch": 1.107566765578635, "grad_norm": 0.7481094934736112, "learning_rate": 4.605670120430472e-06, "loss": 0.4009, "step": 4479 }, { "epoch": 1.1078140454995054, "grad_norm": 0.8230351258613748, "learning_rate": 4.605494893275074e-06, "loss": 0.3743, "step": 4480 }, { "epoch": 1.108061325420376, "grad_norm": 0.7826343124603149, "learning_rate": 4.605319630530636e-06, "loss": 0.3658, "step": 4481 }, { "epoch": 1.1083086053412463, "grad_norm": 0.8100436343843435, "learning_rate": 4.605144332200119e-06, "loss": 0.4036, "step": 4482 }, { "epoch": 1.1085558852621167, "grad_norm": 0.7833288205534297, "learning_rate": 4.604968998286486e-06, "loss": 0.3916, "step": 4483 }, { "epoch": 1.108803165182987, "grad_norm": 0.8202144652257336, "learning_rate": 4.604793628792702e-06, "loss": 0.3855, "step": 4484 }, { "epoch": 1.1090504451038576, "grad_norm": 0.7742338691723732, "learning_rate": 4.604618223721731e-06, "loss": 0.4039, "step": 4485 }, { "epoch": 1.109297725024728, "grad_norm": 0.8187936442437574, "learning_rate": 4.604442783076536e-06, "loss": 0.3998, "step": 4486 }, { "epoch": 1.1095450049455984, "grad_norm": 0.8187375336836844, "learning_rate": 4.604267306860086e-06, "loss": 0.3697, "step": 4487 }, { "epoch": 1.1097922848664687, "grad_norm": 0.782078091436047, "learning_rate": 4.604091795075344e-06, "loss": 0.4049, "step": 4488 }, { "epoch": 1.1100395647873393, "grad_norm": 0.775509142489941, "learning_rate": 4.603916247725277e-06, "loss": 0.3928, "step": 4489 }, { "epoch": 1.1102868447082097, "grad_norm": 0.8059342351291733, "learning_rate": 4.603740664812854e-06, "loss": 0.3837, "step": 4490 }, { "epoch": 1.11053412462908, "grad_norm": 0.77843570773598, "learning_rate": 4.603565046341042e-06, "loss": 0.385, "step": 4491 }, { "epoch": 1.1107814045499504, "grad_norm": 0.7929739528235876, "learning_rate": 4.603389392312808e-06, "loss": 0.3784, "step": 4492 }, { "epoch": 1.111028684470821, "grad_norm": 0.8091348743825967, "learning_rate": 4.603213702731123e-06, "loss": 0.352, "step": 4493 }, { "epoch": 1.1112759643916914, "grad_norm": 0.8185131519768273, "learning_rate": 4.603037977598956e-06, "loss": 0.3967, "step": 4494 }, { "epoch": 1.1115232443125618, "grad_norm": 0.8173818465264234, "learning_rate": 4.602862216919277e-06, "loss": 0.3861, "step": 4495 }, { "epoch": 1.1117705242334321, "grad_norm": 0.7970984599450708, "learning_rate": 4.602686420695059e-06, "loss": 0.3592, "step": 4496 }, { "epoch": 1.1120178041543027, "grad_norm": 0.8243281877318096, "learning_rate": 4.602510588929269e-06, "loss": 0.3824, "step": 4497 }, { "epoch": 1.112265084075173, "grad_norm": 0.7996722916811361, "learning_rate": 4.6023347216248825e-06, "loss": 0.3747, "step": 4498 }, { "epoch": 1.1125123639960435, "grad_norm": 0.7608245960931904, "learning_rate": 4.602158818784872e-06, "loss": 0.3803, "step": 4499 }, { "epoch": 1.1127596439169138, "grad_norm": 0.7513703886342513, "learning_rate": 4.60198288041221e-06, "loss": 0.3988, "step": 4500 }, { "epoch": 1.1130069238377844, "grad_norm": 0.7722998938360013, "learning_rate": 4.601806906509871e-06, "loss": 0.4056, "step": 4501 }, { "epoch": 1.1132542037586548, "grad_norm": 0.8035614959494193, "learning_rate": 4.601630897080828e-06, "loss": 0.3945, "step": 4502 }, { "epoch": 1.1135014836795252, "grad_norm": 0.8037843080102386, "learning_rate": 4.601454852128057e-06, "loss": 0.3746, "step": 4503 }, { "epoch": 1.1137487636003955, "grad_norm": 0.8041212300862741, "learning_rate": 4.601278771654533e-06, "loss": 0.3715, "step": 4504 }, { "epoch": 1.1139960435212661, "grad_norm": 0.7436967799863807, "learning_rate": 4.601102655663235e-06, "loss": 0.3785, "step": 4505 }, { "epoch": 1.1142433234421365, "grad_norm": 0.7924099078341875, "learning_rate": 4.600926504157137e-06, "loss": 0.3983, "step": 4506 }, { "epoch": 1.1144906033630069, "grad_norm": 0.7655426666641213, "learning_rate": 4.600750317139218e-06, "loss": 0.3877, "step": 4507 }, { "epoch": 1.1147378832838775, "grad_norm": 0.7836062068524732, "learning_rate": 4.600574094612455e-06, "loss": 0.3863, "step": 4508 }, { "epoch": 1.1149851632047478, "grad_norm": 0.7508219648819856, "learning_rate": 4.6003978365798265e-06, "loss": 0.393, "step": 4509 }, { "epoch": 1.1152324431256182, "grad_norm": 0.7795564053411989, "learning_rate": 4.600221543044313e-06, "loss": 0.385, "step": 4510 }, { "epoch": 1.1154797230464886, "grad_norm": 0.7909427019617117, "learning_rate": 4.600045214008894e-06, "loss": 0.3822, "step": 4511 }, { "epoch": 1.115727002967359, "grad_norm": 0.8015091874763165, "learning_rate": 4.599868849476549e-06, "loss": 0.3904, "step": 4512 }, { "epoch": 1.1159742828882295, "grad_norm": 0.7728380473484753, "learning_rate": 4.59969244945026e-06, "loss": 0.3972, "step": 4513 }, { "epoch": 1.1162215628091, "grad_norm": 0.800411764882676, "learning_rate": 4.599516013933009e-06, "loss": 0.4011, "step": 4514 }, { "epoch": 1.1164688427299703, "grad_norm": 0.7511202087479435, "learning_rate": 4.599339542927778e-06, "loss": 0.4183, "step": 4515 }, { "epoch": 1.1167161226508409, "grad_norm": 0.7828553962686378, "learning_rate": 4.5991630364375485e-06, "loss": 0.3819, "step": 4516 }, { "epoch": 1.1169634025717112, "grad_norm": 0.7792110736818442, "learning_rate": 4.598986494465306e-06, "loss": 0.3991, "step": 4517 }, { "epoch": 1.1172106824925816, "grad_norm": 0.7878829097739602, "learning_rate": 4.598809917014034e-06, "loss": 0.3753, "step": 4518 }, { "epoch": 1.117457962413452, "grad_norm": 0.7921615881916844, "learning_rate": 4.598633304086716e-06, "loss": 0.3464, "step": 4519 }, { "epoch": 1.1177052423343223, "grad_norm": 0.789091900326134, "learning_rate": 4.598456655686338e-06, "loss": 0.3941, "step": 4520 }, { "epoch": 1.117952522255193, "grad_norm": 0.7912069703947289, "learning_rate": 4.5982799718158875e-06, "loss": 0.3878, "step": 4521 }, { "epoch": 1.1181998021760633, "grad_norm": 0.7959195227847573, "learning_rate": 4.59810325247835e-06, "loss": 0.3925, "step": 4522 }, { "epoch": 1.1184470820969337, "grad_norm": 0.8336553403346311, "learning_rate": 4.59792649767671e-06, "loss": 0.3518, "step": 4523 }, { "epoch": 1.1186943620178043, "grad_norm": 0.8329762874423223, "learning_rate": 4.597749707413959e-06, "loss": 0.4101, "step": 4524 }, { "epoch": 1.1189416419386746, "grad_norm": 0.7872354822627062, "learning_rate": 4.597572881693082e-06, "loss": 0.4126, "step": 4525 }, { "epoch": 1.119188921859545, "grad_norm": 0.8117514083859518, "learning_rate": 4.59739602051707e-06, "loss": 0.3789, "step": 4526 }, { "epoch": 1.1194362017804154, "grad_norm": 0.7758380518508998, "learning_rate": 4.597219123888913e-06, "loss": 0.3839, "step": 4527 }, { "epoch": 1.119683481701286, "grad_norm": 0.78816284321847, "learning_rate": 4.597042191811598e-06, "loss": 0.372, "step": 4528 }, { "epoch": 1.1199307616221563, "grad_norm": 0.7745572687374715, "learning_rate": 4.596865224288119e-06, "loss": 0.3877, "step": 4529 }, { "epoch": 1.1201780415430267, "grad_norm": 0.7762804483742473, "learning_rate": 4.596688221321466e-06, "loss": 0.3952, "step": 4530 }, { "epoch": 1.120425321463897, "grad_norm": 0.7737242963056743, "learning_rate": 4.5965111829146305e-06, "loss": 0.402, "step": 4531 }, { "epoch": 1.1206726013847677, "grad_norm": 0.8127483201012078, "learning_rate": 4.596334109070605e-06, "loss": 0.4109, "step": 4532 }, { "epoch": 1.120919881305638, "grad_norm": 0.7993385251971618, "learning_rate": 4.596156999792383e-06, "loss": 0.3819, "step": 4533 }, { "epoch": 1.1211671612265084, "grad_norm": 0.8105033514016623, "learning_rate": 4.595979855082958e-06, "loss": 0.3972, "step": 4534 }, { "epoch": 1.1214144411473788, "grad_norm": 0.7829412377710302, "learning_rate": 4.595802674945325e-06, "loss": 0.3976, "step": 4535 }, { "epoch": 1.1216617210682494, "grad_norm": 0.8036569268538875, "learning_rate": 4.595625459382477e-06, "loss": 0.4028, "step": 4536 }, { "epoch": 1.1219090009891197, "grad_norm": 0.7901778433962706, "learning_rate": 4.5954482083974105e-06, "loss": 0.3635, "step": 4537 }, { "epoch": 1.12215628090999, "grad_norm": 0.7884932266325752, "learning_rate": 4.595270921993122e-06, "loss": 0.3807, "step": 4538 }, { "epoch": 1.1224035608308605, "grad_norm": 0.7805254322826995, "learning_rate": 4.595093600172608e-06, "loss": 0.3704, "step": 4539 }, { "epoch": 1.122650840751731, "grad_norm": 0.792711925709775, "learning_rate": 4.594916242938866e-06, "loss": 0.3741, "step": 4540 }, { "epoch": 1.1228981206726014, "grad_norm": 0.801491123778611, "learning_rate": 4.594738850294892e-06, "loss": 0.3898, "step": 4541 }, { "epoch": 1.1231454005934718, "grad_norm": 0.777023291707036, "learning_rate": 4.594561422243687e-06, "loss": 0.4094, "step": 4542 }, { "epoch": 1.1233926805143422, "grad_norm": 0.7577168906398215, "learning_rate": 4.594383958788249e-06, "loss": 0.4161, "step": 4543 }, { "epoch": 1.1236399604352127, "grad_norm": 0.7939905799761432, "learning_rate": 4.594206459931577e-06, "loss": 0.3856, "step": 4544 }, { "epoch": 1.1238872403560831, "grad_norm": 0.771565211811871, "learning_rate": 4.594028925676672e-06, "loss": 0.4218, "step": 4545 }, { "epoch": 1.1241345202769535, "grad_norm": 0.7687028275477652, "learning_rate": 4.593851356026535e-06, "loss": 0.4139, "step": 4546 }, { "epoch": 1.1243818001978239, "grad_norm": 0.7692403277213862, "learning_rate": 4.593673750984167e-06, "loss": 0.3902, "step": 4547 }, { "epoch": 1.1246290801186944, "grad_norm": 0.8120784950643422, "learning_rate": 4.59349611055257e-06, "loss": 0.3932, "step": 4548 }, { "epoch": 1.1248763600395648, "grad_norm": 0.7748112909255299, "learning_rate": 4.593318434734747e-06, "loss": 0.3933, "step": 4549 }, { "epoch": 1.1251236399604352, "grad_norm": 0.7624998872722952, "learning_rate": 4.593140723533701e-06, "loss": 0.4023, "step": 4550 }, { "epoch": 1.1253709198813056, "grad_norm": 0.7529509453009254, "learning_rate": 4.592962976952436e-06, "loss": 0.3949, "step": 4551 }, { "epoch": 1.1256181998021761, "grad_norm": 0.8289751548173756, "learning_rate": 4.5927851949939565e-06, "loss": 0.4074, "step": 4552 }, { "epoch": 1.1258654797230465, "grad_norm": 0.8006648320503932, "learning_rate": 4.5926073776612675e-06, "loss": 0.3741, "step": 4553 }, { "epoch": 1.1261127596439169, "grad_norm": 0.7940478964148794, "learning_rate": 4.5924295249573745e-06, "loss": 0.3741, "step": 4554 }, { "epoch": 1.1263600395647873, "grad_norm": 0.7663685854397381, "learning_rate": 4.592251636885283e-06, "loss": 0.4001, "step": 4555 }, { "epoch": 1.1266073194856578, "grad_norm": 0.8053390624087104, "learning_rate": 4.592073713448001e-06, "loss": 0.3624, "step": 4556 }, { "epoch": 1.1268545994065282, "grad_norm": 0.7780827766118591, "learning_rate": 4.5918957546485355e-06, "loss": 0.3963, "step": 4557 }, { "epoch": 1.1271018793273986, "grad_norm": 0.8125225596265134, "learning_rate": 4.591717760489895e-06, "loss": 0.4091, "step": 4558 }, { "epoch": 1.127349159248269, "grad_norm": 0.7952729411539091, "learning_rate": 4.5915397309750865e-06, "loss": 0.3978, "step": 4559 }, { "epoch": 1.1275964391691395, "grad_norm": 0.8010766726810049, "learning_rate": 4.591361666107121e-06, "loss": 0.3702, "step": 4560 }, { "epoch": 1.12784371909001, "grad_norm": 0.7993190695567921, "learning_rate": 4.591183565889008e-06, "loss": 0.3956, "step": 4561 }, { "epoch": 1.1280909990108803, "grad_norm": 0.8399041681541941, "learning_rate": 4.591005430323757e-06, "loss": 0.3697, "step": 4562 }, { "epoch": 1.1283382789317506, "grad_norm": 0.7760107463612735, "learning_rate": 4.590827259414381e-06, "loss": 0.4149, "step": 4563 }, { "epoch": 1.1285855588526212, "grad_norm": 0.7677787929675716, "learning_rate": 4.5906490531638895e-06, "loss": 0.3849, "step": 4564 }, { "epoch": 1.1288328387734916, "grad_norm": 0.8002026760744555, "learning_rate": 4.590470811575295e-06, "loss": 0.3976, "step": 4565 }, { "epoch": 1.129080118694362, "grad_norm": 0.7727468688179817, "learning_rate": 4.59029253465161e-06, "loss": 0.4113, "step": 4566 }, { "epoch": 1.1293273986152323, "grad_norm": 0.7976609116659373, "learning_rate": 4.59011422239585e-06, "loss": 0.3636, "step": 4567 }, { "epoch": 1.129574678536103, "grad_norm": 0.7916932108553905, "learning_rate": 4.5899358748110275e-06, "loss": 0.3628, "step": 4568 }, { "epoch": 1.1298219584569733, "grad_norm": 0.7776124737117467, "learning_rate": 4.589757491900157e-06, "loss": 0.4083, "step": 4569 }, { "epoch": 1.1300692383778437, "grad_norm": 0.7927658386944244, "learning_rate": 4.589579073666254e-06, "loss": 0.3803, "step": 4570 }, { "epoch": 1.1303165182987143, "grad_norm": 0.7500595565676875, "learning_rate": 4.589400620112334e-06, "loss": 0.3926, "step": 4571 }, { "epoch": 1.1305637982195846, "grad_norm": 0.8298525706495525, "learning_rate": 4.5892221312414145e-06, "loss": 0.374, "step": 4572 }, { "epoch": 1.130811078140455, "grad_norm": 0.7543556432844126, "learning_rate": 4.5890436070565106e-06, "loss": 0.4154, "step": 4573 }, { "epoch": 1.1310583580613254, "grad_norm": 0.7793115072582973, "learning_rate": 4.588865047560641e-06, "loss": 0.3767, "step": 4574 }, { "epoch": 1.1313056379821957, "grad_norm": 0.7851193216844471, "learning_rate": 4.588686452756824e-06, "loss": 0.3662, "step": 4575 }, { "epoch": 1.1315529179030663, "grad_norm": 0.7978096593051596, "learning_rate": 4.5885078226480784e-06, "loss": 0.4038, "step": 4576 }, { "epoch": 1.1318001978239367, "grad_norm": 0.8095964626358035, "learning_rate": 4.588329157237424e-06, "loss": 0.4057, "step": 4577 }, { "epoch": 1.132047477744807, "grad_norm": 0.8114227350093459, "learning_rate": 4.588150456527879e-06, "loss": 0.359, "step": 4578 }, { "epoch": 1.1322947576656777, "grad_norm": 0.7889821178202774, "learning_rate": 4.587971720522465e-06, "loss": 0.3792, "step": 4579 }, { "epoch": 1.132542037586548, "grad_norm": 0.8050682422764122, "learning_rate": 4.587792949224204e-06, "loss": 0.3829, "step": 4580 }, { "epoch": 1.1327893175074184, "grad_norm": 0.799353007116911, "learning_rate": 4.5876141426361156e-06, "loss": 0.377, "step": 4581 }, { "epoch": 1.1330365974282888, "grad_norm": 0.7758720665315219, "learning_rate": 4.587435300761225e-06, "loss": 0.3845, "step": 4582 }, { "epoch": 1.1332838773491591, "grad_norm": 0.8371979502772351, "learning_rate": 4.587256423602553e-06, "loss": 0.3528, "step": 4583 }, { "epoch": 1.1335311572700297, "grad_norm": 0.8253328975959264, "learning_rate": 4.587077511163124e-06, "loss": 0.3718, "step": 4584 }, { "epoch": 1.1337784371909, "grad_norm": 0.7819739370162615, "learning_rate": 4.586898563445962e-06, "loss": 0.3985, "step": 4585 }, { "epoch": 1.1340257171117705, "grad_norm": 0.8487973511717668, "learning_rate": 4.586719580454091e-06, "loss": 0.3814, "step": 4586 }, { "epoch": 1.134272997032641, "grad_norm": 0.805941377775664, "learning_rate": 4.586540562190538e-06, "loss": 0.405, "step": 4587 }, { "epoch": 1.1345202769535114, "grad_norm": 0.7907980801629296, "learning_rate": 4.586361508658328e-06, "loss": 0.3716, "step": 4588 }, { "epoch": 1.1347675568743818, "grad_norm": 0.777644837317748, "learning_rate": 4.586182419860487e-06, "loss": 0.3778, "step": 4589 }, { "epoch": 1.1350148367952522, "grad_norm": 0.8250453821382582, "learning_rate": 4.586003295800043e-06, "loss": 0.3798, "step": 4590 }, { "epoch": 1.1352621167161225, "grad_norm": 0.7740401020752534, "learning_rate": 4.585824136480023e-06, "loss": 0.4009, "step": 4591 }, { "epoch": 1.1355093966369931, "grad_norm": 0.7984494766131044, "learning_rate": 4.585644941903456e-06, "loss": 0.4031, "step": 4592 }, { "epoch": 1.1357566765578635, "grad_norm": 0.7808158506158405, "learning_rate": 4.5854657120733705e-06, "loss": 0.3938, "step": 4593 }, { "epoch": 1.1360039564787339, "grad_norm": 0.7843658595885038, "learning_rate": 4.5852864469927964e-06, "loss": 0.3855, "step": 4594 }, { "epoch": 1.1362512363996045, "grad_norm": 0.7791774108790656, "learning_rate": 4.585107146664763e-06, "loss": 0.3704, "step": 4595 }, { "epoch": 1.1364985163204748, "grad_norm": 0.7503894956521162, "learning_rate": 4.584927811092302e-06, "loss": 0.3767, "step": 4596 }, { "epoch": 1.1367457962413452, "grad_norm": 0.7803856320163748, "learning_rate": 4.584748440278445e-06, "loss": 0.3799, "step": 4597 }, { "epoch": 1.1369930761622156, "grad_norm": 0.757225237620275, "learning_rate": 4.5845690342262216e-06, "loss": 0.3881, "step": 4598 }, { "epoch": 1.137240356083086, "grad_norm": 0.7316851520707883, "learning_rate": 4.584389592938666e-06, "loss": 0.3933, "step": 4599 }, { "epoch": 1.1374876360039565, "grad_norm": 0.7950020823544652, "learning_rate": 4.584210116418812e-06, "loss": 0.3592, "step": 4600 }, { "epoch": 1.137734915924827, "grad_norm": 0.8134980437085719, "learning_rate": 4.584030604669692e-06, "loss": 0.3611, "step": 4601 }, { "epoch": 1.1379821958456973, "grad_norm": 0.7671478792150012, "learning_rate": 4.583851057694341e-06, "loss": 0.3857, "step": 4602 }, { "epoch": 1.1382294757665679, "grad_norm": 0.7792986746004074, "learning_rate": 4.583671475495792e-06, "loss": 0.3593, "step": 4603 }, { "epoch": 1.1384767556874382, "grad_norm": 0.7915015636434526, "learning_rate": 4.5834918580770835e-06, "loss": 0.3619, "step": 4604 }, { "epoch": 1.1387240356083086, "grad_norm": 0.7750618605630601, "learning_rate": 4.583312205441249e-06, "loss": 0.3741, "step": 4605 }, { "epoch": 1.138971315529179, "grad_norm": 0.7851086589884047, "learning_rate": 4.583132517591327e-06, "loss": 0.3779, "step": 4606 }, { "epoch": 1.1392185954500493, "grad_norm": 0.7797039915661187, "learning_rate": 4.582952794530354e-06, "loss": 0.3829, "step": 4607 }, { "epoch": 1.13946587537092, "grad_norm": 0.7790363283466922, "learning_rate": 4.582773036261368e-06, "loss": 0.3905, "step": 4608 }, { "epoch": 1.1397131552917903, "grad_norm": 0.8237385093520468, "learning_rate": 4.582593242787407e-06, "loss": 0.3574, "step": 4609 }, { "epoch": 1.1399604352126607, "grad_norm": 0.7680384771031868, "learning_rate": 4.5824134141115095e-06, "loss": 0.425, "step": 4610 }, { "epoch": 1.1402077151335313, "grad_norm": 0.8402529081856012, "learning_rate": 4.582233550236717e-06, "loss": 0.385, "step": 4611 }, { "epoch": 1.1404549950544016, "grad_norm": 0.7635616532996753, "learning_rate": 4.582053651166067e-06, "loss": 0.3672, "step": 4612 }, { "epoch": 1.140702274975272, "grad_norm": 0.7721370201577236, "learning_rate": 4.581873716902603e-06, "loss": 0.3882, "step": 4613 }, { "epoch": 1.1409495548961424, "grad_norm": 0.7543745155333027, "learning_rate": 4.581693747449365e-06, "loss": 0.3813, "step": 4614 }, { "epoch": 1.141196834817013, "grad_norm": 0.7608592408123686, "learning_rate": 4.581513742809396e-06, "loss": 0.3935, "step": 4615 }, { "epoch": 1.1414441147378833, "grad_norm": 0.7946361080580133, "learning_rate": 4.581333702985738e-06, "loss": 0.3602, "step": 4616 }, { "epoch": 1.1416913946587537, "grad_norm": 0.7708766903177554, "learning_rate": 4.581153627981434e-06, "loss": 0.403, "step": 4617 }, { "epoch": 1.141938674579624, "grad_norm": 0.7626165514992228, "learning_rate": 4.580973517799528e-06, "loss": 0.4099, "step": 4618 }, { "epoch": 1.1421859545004946, "grad_norm": 0.7389018235165796, "learning_rate": 4.580793372443064e-06, "loss": 0.3794, "step": 4619 }, { "epoch": 1.142433234421365, "grad_norm": 0.7659358346433859, "learning_rate": 4.580613191915088e-06, "loss": 0.3768, "step": 4620 }, { "epoch": 1.1426805143422354, "grad_norm": 0.7794503121662429, "learning_rate": 4.580432976218645e-06, "loss": 0.3603, "step": 4621 }, { "epoch": 1.1429277942631058, "grad_norm": 0.8033400877204863, "learning_rate": 4.580252725356781e-06, "loss": 0.3841, "step": 4622 }, { "epoch": 1.1431750741839763, "grad_norm": 0.8006337235881029, "learning_rate": 4.580072439332543e-06, "loss": 0.3864, "step": 4623 }, { "epoch": 1.1434223541048467, "grad_norm": 0.7860324390611284, "learning_rate": 4.5798921181489765e-06, "loss": 0.3823, "step": 4624 }, { "epoch": 1.143669634025717, "grad_norm": 0.7644110476649205, "learning_rate": 4.5797117618091335e-06, "loss": 0.3981, "step": 4625 }, { "epoch": 1.1439169139465875, "grad_norm": 0.7538137168409144, "learning_rate": 4.579531370316059e-06, "loss": 0.3822, "step": 4626 }, { "epoch": 1.144164193867458, "grad_norm": 0.7591245338140643, "learning_rate": 4.579350943672804e-06, "loss": 0.3764, "step": 4627 }, { "epoch": 1.1444114737883284, "grad_norm": 0.7991892235717789, "learning_rate": 4.579170481882417e-06, "loss": 0.4107, "step": 4628 }, { "epoch": 1.1446587537091988, "grad_norm": 0.7672094323265668, "learning_rate": 4.57898998494795e-06, "loss": 0.3911, "step": 4629 }, { "epoch": 1.1449060336300692, "grad_norm": 0.804176571957068, "learning_rate": 4.5788094528724525e-06, "loss": 0.3941, "step": 4630 }, { "epoch": 1.1451533135509397, "grad_norm": 0.7952063754666076, "learning_rate": 4.578628885658976e-06, "loss": 0.3454, "step": 4631 }, { "epoch": 1.1454005934718101, "grad_norm": 0.7608424655344372, "learning_rate": 4.578448283310573e-06, "loss": 0.3759, "step": 4632 }, { "epoch": 1.1456478733926805, "grad_norm": 0.7566227502398408, "learning_rate": 4.5782676458302965e-06, "loss": 0.3881, "step": 4633 }, { "epoch": 1.1458951533135509, "grad_norm": 0.7895831456268633, "learning_rate": 4.5780869732212e-06, "loss": 0.376, "step": 4634 }, { "epoch": 1.1461424332344214, "grad_norm": 0.8169120912036512, "learning_rate": 4.577906265486336e-06, "loss": 0.3784, "step": 4635 }, { "epoch": 1.1463897131552918, "grad_norm": 0.8064461216787852, "learning_rate": 4.577725522628761e-06, "loss": 0.3841, "step": 4636 }, { "epoch": 1.1466369930761622, "grad_norm": 0.7542068804907286, "learning_rate": 4.577544744651529e-06, "loss": 0.364, "step": 4637 }, { "epoch": 1.1468842729970326, "grad_norm": 0.7908614174019868, "learning_rate": 4.577363931557694e-06, "loss": 0.3914, "step": 4638 }, { "epoch": 1.1471315529179031, "grad_norm": 0.814241329215681, "learning_rate": 4.5771830833503164e-06, "loss": 0.3868, "step": 4639 }, { "epoch": 1.1473788328387735, "grad_norm": 0.7806920789729583, "learning_rate": 4.577002200032449e-06, "loss": 0.3627, "step": 4640 }, { "epoch": 1.1476261127596439, "grad_norm": 0.8117568870913549, "learning_rate": 4.576821281607151e-06, "loss": 0.4115, "step": 4641 }, { "epoch": 1.1478733926805142, "grad_norm": 0.787558210188228, "learning_rate": 4.576640328077481e-06, "loss": 0.3834, "step": 4642 }, { "epoch": 1.1481206726013848, "grad_norm": 0.7755054550114571, "learning_rate": 4.576459339446497e-06, "loss": 0.3748, "step": 4643 }, { "epoch": 1.1483679525222552, "grad_norm": 0.7604361858415432, "learning_rate": 4.576278315717257e-06, "loss": 0.3847, "step": 4644 }, { "epoch": 1.1486152324431256, "grad_norm": 0.7982027243950638, "learning_rate": 4.576097256892824e-06, "loss": 0.3639, "step": 4645 }, { "epoch": 1.148862512363996, "grad_norm": 0.7956263339668528, "learning_rate": 4.575916162976255e-06, "loss": 0.3614, "step": 4646 }, { "epoch": 1.1491097922848665, "grad_norm": 0.8451049275793069, "learning_rate": 4.575735033970613e-06, "loss": 0.3545, "step": 4647 }, { "epoch": 1.149357072205737, "grad_norm": 0.7771177370034342, "learning_rate": 4.5755538698789594e-06, "loss": 0.3835, "step": 4648 }, { "epoch": 1.1496043521266073, "grad_norm": 0.8132942653015522, "learning_rate": 4.575372670704356e-06, "loss": 0.3995, "step": 4649 }, { "epoch": 1.1498516320474779, "grad_norm": 0.7932703890305074, "learning_rate": 4.575191436449865e-06, "loss": 0.3707, "step": 4650 }, { "epoch": 1.1500989119683482, "grad_norm": 0.7925904779904588, "learning_rate": 4.575010167118551e-06, "loss": 0.3665, "step": 4651 }, { "epoch": 1.1503461918892186, "grad_norm": 0.7920232254388541, "learning_rate": 4.5748288627134776e-06, "loss": 0.3861, "step": 4652 }, { "epoch": 1.150593471810089, "grad_norm": 0.7911101877964717, "learning_rate": 4.5746475232377095e-06, "loss": 0.3899, "step": 4653 }, { "epoch": 1.1508407517309593, "grad_norm": 0.7786917690835121, "learning_rate": 4.574466148694312e-06, "loss": 0.3674, "step": 4654 }, { "epoch": 1.15108803165183, "grad_norm": 0.8262897230978141, "learning_rate": 4.57428473908635e-06, "loss": 0.3744, "step": 4655 }, { "epoch": 1.1513353115727003, "grad_norm": 0.7926414025355558, "learning_rate": 4.57410329441689e-06, "loss": 0.3822, "step": 4656 }, { "epoch": 1.1515825914935707, "grad_norm": 0.791427787814445, "learning_rate": 4.5739218146889996e-06, "loss": 0.3774, "step": 4657 }, { "epoch": 1.1518298714144413, "grad_norm": 0.8199505478423322, "learning_rate": 4.573740299905746e-06, "loss": 0.3955, "step": 4658 }, { "epoch": 1.1520771513353116, "grad_norm": 0.8120135974983161, "learning_rate": 4.573558750070198e-06, "loss": 0.3866, "step": 4659 }, { "epoch": 1.152324431256182, "grad_norm": 0.7737374998225437, "learning_rate": 4.573377165185423e-06, "loss": 0.3888, "step": 4660 }, { "epoch": 1.1525717111770524, "grad_norm": 0.772680307238391, "learning_rate": 4.573195545254491e-06, "loss": 0.3848, "step": 4661 }, { "epoch": 1.1528189910979227, "grad_norm": 0.8085462815506799, "learning_rate": 4.573013890280472e-06, "loss": 0.3812, "step": 4662 }, { "epoch": 1.1530662710187933, "grad_norm": 0.8254361615486903, "learning_rate": 4.572832200266437e-06, "loss": 0.3713, "step": 4663 }, { "epoch": 1.1533135509396637, "grad_norm": 0.7989801705088656, "learning_rate": 4.572650475215456e-06, "loss": 0.3831, "step": 4664 }, { "epoch": 1.153560830860534, "grad_norm": 0.794324747458301, "learning_rate": 4.5724687151306014e-06, "loss": 0.3641, "step": 4665 }, { "epoch": 1.1538081107814047, "grad_norm": 0.8023035921482412, "learning_rate": 4.572286920014945e-06, "loss": 0.3723, "step": 4666 }, { "epoch": 1.154055390702275, "grad_norm": 0.7844415514674841, "learning_rate": 4.5721050898715604e-06, "loss": 0.3719, "step": 4667 }, { "epoch": 1.1543026706231454, "grad_norm": 0.7811819514893367, "learning_rate": 4.571923224703521e-06, "loss": 0.3822, "step": 4668 }, { "epoch": 1.1545499505440158, "grad_norm": 0.8128952270514049, "learning_rate": 4.5717413245139e-06, "loss": 0.39, "step": 4669 }, { "epoch": 1.1547972304648861, "grad_norm": 0.7816396485857584, "learning_rate": 4.571559389305772e-06, "loss": 0.3795, "step": 4670 }, { "epoch": 1.1550445103857567, "grad_norm": 0.7866990504933012, "learning_rate": 4.571377419082213e-06, "loss": 0.3889, "step": 4671 }, { "epoch": 1.155291790306627, "grad_norm": 0.8350614775006485, "learning_rate": 4.571195413846299e-06, "loss": 0.3602, "step": 4672 }, { "epoch": 1.1555390702274975, "grad_norm": 0.7848958925814405, "learning_rate": 4.5710133736011055e-06, "loss": 0.414, "step": 4673 }, { "epoch": 1.155786350148368, "grad_norm": 0.7430764173429023, "learning_rate": 4.5708312983497104e-06, "loss": 0.3981, "step": 4674 }, { "epoch": 1.1560336300692384, "grad_norm": 0.76963573303256, "learning_rate": 4.570649188095191e-06, "loss": 0.3891, "step": 4675 }, { "epoch": 1.1562809099901088, "grad_norm": 0.8267871838279859, "learning_rate": 4.570467042840626e-06, "loss": 0.3685, "step": 4676 }, { "epoch": 1.1565281899109792, "grad_norm": 0.7832968820706605, "learning_rate": 4.570284862589092e-06, "loss": 0.3827, "step": 4677 }, { "epoch": 1.1567754698318495, "grad_norm": 0.8007773205511487, "learning_rate": 4.570102647343671e-06, "loss": 0.3861, "step": 4678 }, { "epoch": 1.1570227497527201, "grad_norm": 0.7838987923748685, "learning_rate": 4.569920397107443e-06, "loss": 0.4017, "step": 4679 }, { "epoch": 1.1572700296735905, "grad_norm": 0.7673897696257389, "learning_rate": 4.569738111883486e-06, "loss": 0.4068, "step": 4680 }, { "epoch": 1.1575173095944609, "grad_norm": 0.7720028326759558, "learning_rate": 4.569555791674883e-06, "loss": 0.3696, "step": 4681 }, { "epoch": 1.1577645895153315, "grad_norm": 0.7919439125652956, "learning_rate": 4.569373436484717e-06, "loss": 0.3841, "step": 4682 }, { "epoch": 1.1580118694362018, "grad_norm": 0.7951362572492998, "learning_rate": 4.569191046316067e-06, "loss": 0.3701, "step": 4683 }, { "epoch": 1.1582591493570722, "grad_norm": 0.787592059811671, "learning_rate": 4.5690086211720185e-06, "loss": 0.3908, "step": 4684 }, { "epoch": 1.1585064292779426, "grad_norm": 0.8258580251102673, "learning_rate": 4.568826161055654e-06, "loss": 0.3669, "step": 4685 }, { "epoch": 1.1587537091988132, "grad_norm": 0.8003924970906701, "learning_rate": 4.568643665970057e-06, "loss": 0.3788, "step": 4686 }, { "epoch": 1.1590009891196835, "grad_norm": 0.8280700169287192, "learning_rate": 4.568461135918314e-06, "loss": 0.3842, "step": 4687 }, { "epoch": 1.159248269040554, "grad_norm": 0.814246443309837, "learning_rate": 4.56827857090351e-06, "loss": 0.3809, "step": 4688 }, { "epoch": 1.1594955489614243, "grad_norm": 0.752996388512243, "learning_rate": 4.568095970928728e-06, "loss": 0.3993, "step": 4689 }, { "epoch": 1.1597428288822949, "grad_norm": 0.776351218744103, "learning_rate": 4.56791333599706e-06, "loss": 0.3899, "step": 4690 }, { "epoch": 1.1599901088031652, "grad_norm": 0.796179357856943, "learning_rate": 4.567730666111587e-06, "loss": 0.389, "step": 4691 }, { "epoch": 1.1602373887240356, "grad_norm": 0.7879189997086108, "learning_rate": 4.567547961275401e-06, "loss": 0.3685, "step": 4692 }, { "epoch": 1.160484668644906, "grad_norm": 0.751618807767443, "learning_rate": 4.567365221491588e-06, "loss": 0.3902, "step": 4693 }, { "epoch": 1.1607319485657766, "grad_norm": 0.8214182456876371, "learning_rate": 4.567182446763237e-06, "loss": 0.3631, "step": 4694 }, { "epoch": 1.160979228486647, "grad_norm": 0.8124697055949546, "learning_rate": 4.566999637093439e-06, "loss": 0.3771, "step": 4695 }, { "epoch": 1.1612265084075173, "grad_norm": 0.8009528579670885, "learning_rate": 4.566816792485282e-06, "loss": 0.3924, "step": 4696 }, { "epoch": 1.1614737883283877, "grad_norm": 0.8266159351155069, "learning_rate": 4.566633912941858e-06, "loss": 0.3863, "step": 4697 }, { "epoch": 1.1617210682492582, "grad_norm": 0.7774456853300822, "learning_rate": 4.5664509984662575e-06, "loss": 0.3576, "step": 4698 }, { "epoch": 1.1619683481701286, "grad_norm": 0.7697797234330931, "learning_rate": 4.566268049061573e-06, "loss": 0.3974, "step": 4699 }, { "epoch": 1.162215628090999, "grad_norm": 0.7918103334462823, "learning_rate": 4.5660850647308965e-06, "loss": 0.4164, "step": 4700 }, { "epoch": 1.1624629080118694, "grad_norm": 0.7843297163473777, "learning_rate": 4.56590204547732e-06, "loss": 0.3642, "step": 4701 }, { "epoch": 1.16271018793274, "grad_norm": 0.7771316799392175, "learning_rate": 4.565718991303939e-06, "loss": 0.4055, "step": 4702 }, { "epoch": 1.1629574678536103, "grad_norm": 0.7624264647721521, "learning_rate": 4.565535902213846e-06, "loss": 0.3955, "step": 4703 }, { "epoch": 1.1632047477744807, "grad_norm": 0.8284807381861017, "learning_rate": 4.565352778210137e-06, "loss": 0.3848, "step": 4704 }, { "epoch": 1.163452027695351, "grad_norm": 0.7592032856293534, "learning_rate": 4.565169619295907e-06, "loss": 0.3675, "step": 4705 }, { "epoch": 1.1636993076162216, "grad_norm": 0.7684019987796836, "learning_rate": 4.56498642547425e-06, "loss": 0.3852, "step": 4706 }, { "epoch": 1.163946587537092, "grad_norm": 0.7970612945852326, "learning_rate": 4.5648031967482656e-06, "loss": 0.3832, "step": 4707 }, { "epoch": 1.1641938674579624, "grad_norm": 0.784278158419437, "learning_rate": 4.564619933121049e-06, "loss": 0.3773, "step": 4708 }, { "epoch": 1.1644411473788328, "grad_norm": 0.7524522565225593, "learning_rate": 4.564436634595698e-06, "loss": 0.3807, "step": 4709 }, { "epoch": 1.1646884272997033, "grad_norm": 0.8173393195585756, "learning_rate": 4.564253301175312e-06, "loss": 0.3822, "step": 4710 }, { "epoch": 1.1649357072205737, "grad_norm": 0.8112009258038171, "learning_rate": 4.564069932862989e-06, "loss": 0.366, "step": 4711 }, { "epoch": 1.165182987141444, "grad_norm": 0.8245054527495849, "learning_rate": 4.5638865296618285e-06, "loss": 0.35, "step": 4712 }, { "epoch": 1.1654302670623145, "grad_norm": 0.8026034261225578, "learning_rate": 4.56370309157493e-06, "loss": 0.3679, "step": 4713 }, { "epoch": 1.165677546983185, "grad_norm": 0.8051780623629402, "learning_rate": 4.563519618605395e-06, "loss": 0.3624, "step": 4714 }, { "epoch": 1.1659248269040554, "grad_norm": 0.7631288500896913, "learning_rate": 4.563336110756325e-06, "loss": 0.3756, "step": 4715 }, { "epoch": 1.1661721068249258, "grad_norm": 0.7950174637775945, "learning_rate": 4.563152568030821e-06, "loss": 0.3963, "step": 4716 }, { "epoch": 1.1664193867457961, "grad_norm": 0.7759614424838869, "learning_rate": 4.562968990431985e-06, "loss": 0.3901, "step": 4717 }, { "epoch": 1.1666666666666667, "grad_norm": 0.7651596894620959, "learning_rate": 4.562785377962922e-06, "loss": 0.4113, "step": 4718 }, { "epoch": 1.166913946587537, "grad_norm": 0.7991106810470432, "learning_rate": 4.5626017306267335e-06, "loss": 0.3883, "step": 4719 }, { "epoch": 1.1671612265084075, "grad_norm": 0.8143439934689929, "learning_rate": 4.562418048426524e-06, "loss": 0.3751, "step": 4720 }, { "epoch": 1.1674085064292778, "grad_norm": 0.766279736015322, "learning_rate": 4.562234331365399e-06, "loss": 0.3808, "step": 4721 }, { "epoch": 1.1676557863501484, "grad_norm": 0.7818433180724461, "learning_rate": 4.562050579446465e-06, "loss": 0.3688, "step": 4722 }, { "epoch": 1.1679030662710188, "grad_norm": 0.7959921426354326, "learning_rate": 4.561866792672825e-06, "loss": 0.3741, "step": 4723 }, { "epoch": 1.1681503461918892, "grad_norm": 0.7994990410257138, "learning_rate": 4.561682971047587e-06, "loss": 0.3696, "step": 4724 }, { "epoch": 1.1683976261127595, "grad_norm": 0.7865706638612546, "learning_rate": 4.56149911457386e-06, "loss": 0.353, "step": 4725 }, { "epoch": 1.1686449060336301, "grad_norm": 0.8363451804778941, "learning_rate": 4.561315223254748e-06, "loss": 0.3849, "step": 4726 }, { "epoch": 1.1688921859545005, "grad_norm": 0.8563578802580468, "learning_rate": 4.561131297093362e-06, "loss": 0.3801, "step": 4727 }, { "epoch": 1.1691394658753709, "grad_norm": 0.8084524422183542, "learning_rate": 4.560947336092811e-06, "loss": 0.4091, "step": 4728 }, { "epoch": 1.1693867457962415, "grad_norm": 0.789783786703324, "learning_rate": 4.560763340256202e-06, "loss": 0.4254, "step": 4729 }, { "epoch": 1.1696340257171118, "grad_norm": 0.7725245383983903, "learning_rate": 4.560579309586648e-06, "loss": 0.4186, "step": 4730 }, { "epoch": 1.1698813056379822, "grad_norm": 0.8160071386928399, "learning_rate": 4.560395244087258e-06, "loss": 0.3702, "step": 4731 }, { "epoch": 1.1701285855588526, "grad_norm": 0.8304575152861501, "learning_rate": 4.560211143761143e-06, "loss": 0.3645, "step": 4732 }, { "epoch": 1.170375865479723, "grad_norm": 0.7763883895632128, "learning_rate": 4.560027008611415e-06, "loss": 0.3932, "step": 4733 }, { "epoch": 1.1706231454005935, "grad_norm": 0.7989366650865165, "learning_rate": 4.559842838641188e-06, "loss": 0.3978, "step": 4734 }, { "epoch": 1.170870425321464, "grad_norm": 0.7882427579221699, "learning_rate": 4.559658633853573e-06, "loss": 0.3839, "step": 4735 }, { "epoch": 1.1711177052423343, "grad_norm": 0.8143448707699285, "learning_rate": 4.559474394251685e-06, "loss": 0.3863, "step": 4736 }, { "epoch": 1.1713649851632049, "grad_norm": 0.8358064584607359, "learning_rate": 4.559290119838637e-06, "loss": 0.3945, "step": 4737 }, { "epoch": 1.1716122650840752, "grad_norm": 0.8698426166338199, "learning_rate": 4.559105810617545e-06, "loss": 0.3639, "step": 4738 }, { "epoch": 1.1718595450049456, "grad_norm": 0.8081878033877292, "learning_rate": 4.558921466591524e-06, "loss": 0.3825, "step": 4739 }, { "epoch": 1.172106824925816, "grad_norm": 0.794573020028849, "learning_rate": 4.55873708776369e-06, "loss": 0.3596, "step": 4740 }, { "epoch": 1.1723541048466863, "grad_norm": 0.748693596235417, "learning_rate": 4.558552674137159e-06, "loss": 0.4032, "step": 4741 }, { "epoch": 1.172601384767557, "grad_norm": 0.7995771369890413, "learning_rate": 4.558368225715047e-06, "loss": 0.3667, "step": 4742 }, { "epoch": 1.1728486646884273, "grad_norm": 0.7823353269501937, "learning_rate": 4.558183742500475e-06, "loss": 0.3798, "step": 4743 }, { "epoch": 1.1730959446092977, "grad_norm": 0.7891954085089259, "learning_rate": 4.557999224496559e-06, "loss": 0.4006, "step": 4744 }, { "epoch": 1.1733432245301683, "grad_norm": 0.7692882846793758, "learning_rate": 4.557814671706418e-06, "loss": 0.3841, "step": 4745 }, { "epoch": 1.1735905044510386, "grad_norm": 0.8277928708513691, "learning_rate": 4.557630084133172e-06, "loss": 0.3654, "step": 4746 }, { "epoch": 1.173837784371909, "grad_norm": 0.8008742559322195, "learning_rate": 4.5574454617799406e-06, "loss": 0.386, "step": 4747 }, { "epoch": 1.1740850642927794, "grad_norm": 0.8108864888464732, "learning_rate": 4.5572608046498455e-06, "loss": 0.3776, "step": 4748 }, { "epoch": 1.1743323442136497, "grad_norm": 0.811032608554751, "learning_rate": 4.557076112746006e-06, "loss": 0.3903, "step": 4749 }, { "epoch": 1.1745796241345203, "grad_norm": 0.8133016604185507, "learning_rate": 4.556891386071546e-06, "loss": 0.391, "step": 4750 }, { "epoch": 1.1748269040553907, "grad_norm": 0.7512581158973377, "learning_rate": 4.556706624629586e-06, "loss": 0.368, "step": 4751 }, { "epoch": 1.175074183976261, "grad_norm": 0.7563447289782117, "learning_rate": 4.556521828423252e-06, "loss": 0.392, "step": 4752 }, { "epoch": 1.1753214638971317, "grad_norm": 0.7657506804215433, "learning_rate": 4.556336997455665e-06, "loss": 0.3924, "step": 4753 }, { "epoch": 1.175568743818002, "grad_norm": 0.7910006920809217, "learning_rate": 4.556152131729949e-06, "loss": 0.3704, "step": 4754 }, { "epoch": 1.1758160237388724, "grad_norm": 0.7714976357944043, "learning_rate": 4.55596723124923e-06, "loss": 0.3904, "step": 4755 }, { "epoch": 1.1760633036597428, "grad_norm": 0.8746654622104577, "learning_rate": 4.555782296016633e-06, "loss": 0.392, "step": 4756 }, { "epoch": 1.1763105835806131, "grad_norm": 0.7640791527124708, "learning_rate": 4.555597326035284e-06, "loss": 0.3648, "step": 4757 }, { "epoch": 1.1765578635014837, "grad_norm": 0.8119622485060637, "learning_rate": 4.5554123213083096e-06, "loss": 0.3417, "step": 4758 }, { "epoch": 1.176805143422354, "grad_norm": 0.7666728641436488, "learning_rate": 4.555227281838836e-06, "loss": 0.3696, "step": 4759 }, { "epoch": 1.1770524233432245, "grad_norm": 0.8142922659155002, "learning_rate": 4.5550422076299925e-06, "loss": 0.3748, "step": 4760 }, { "epoch": 1.177299703264095, "grad_norm": 0.8273719578557225, "learning_rate": 4.554857098684907e-06, "loss": 0.3773, "step": 4761 }, { "epoch": 1.1775469831849654, "grad_norm": 0.7654202010357201, "learning_rate": 4.5546719550067074e-06, "loss": 0.3992, "step": 4762 }, { "epoch": 1.1777942631058358, "grad_norm": 0.7492891230504218, "learning_rate": 4.554486776598524e-06, "loss": 0.3783, "step": 4763 }, { "epoch": 1.1780415430267062, "grad_norm": 0.8085375764912527, "learning_rate": 4.554301563463487e-06, "loss": 0.3878, "step": 4764 }, { "epoch": 1.1782888229475768, "grad_norm": 0.805403061366855, "learning_rate": 4.554116315604725e-06, "loss": 0.3888, "step": 4765 }, { "epoch": 1.1785361028684471, "grad_norm": 0.7841070330301942, "learning_rate": 4.553931033025373e-06, "loss": 0.3773, "step": 4766 }, { "epoch": 1.1787833827893175, "grad_norm": 0.796084717363312, "learning_rate": 4.553745715728559e-06, "loss": 0.3729, "step": 4767 }, { "epoch": 1.1790306627101879, "grad_norm": 0.7951698807710156, "learning_rate": 4.553560363717418e-06, "loss": 0.4083, "step": 4768 }, { "epoch": 1.1792779426310585, "grad_norm": 0.8191377950140787, "learning_rate": 4.553374976995082e-06, "loss": 0.382, "step": 4769 }, { "epoch": 1.1795252225519288, "grad_norm": 0.7902665393089224, "learning_rate": 4.553189555564684e-06, "loss": 0.3875, "step": 4770 }, { "epoch": 1.1797725024727992, "grad_norm": 0.7956828830224619, "learning_rate": 4.55300409942936e-06, "loss": 0.3654, "step": 4771 }, { "epoch": 1.1800197823936696, "grad_norm": 0.8172152550419222, "learning_rate": 4.552818608592243e-06, "loss": 0.3506, "step": 4772 }, { "epoch": 1.1802670623145401, "grad_norm": 0.8222293619656454, "learning_rate": 4.552633083056469e-06, "loss": 0.3833, "step": 4773 }, { "epoch": 1.1805143422354105, "grad_norm": 0.7814674317425616, "learning_rate": 4.5524475228251745e-06, "loss": 0.3977, "step": 4774 }, { "epoch": 1.1807616221562809, "grad_norm": 0.7807895995483717, "learning_rate": 4.552261927901495e-06, "loss": 0.3848, "step": 4775 }, { "epoch": 1.1810089020771513, "grad_norm": 0.8035379365771035, "learning_rate": 4.5520762982885665e-06, "loss": 0.3642, "step": 4776 }, { "epoch": 1.1812561819980218, "grad_norm": 0.7907117036020671, "learning_rate": 4.55189063398953e-06, "loss": 0.4015, "step": 4777 }, { "epoch": 1.1815034619188922, "grad_norm": 0.7914095920333492, "learning_rate": 4.5517049350075214e-06, "loss": 0.4088, "step": 4778 }, { "epoch": 1.1817507418397626, "grad_norm": 0.812123796008198, "learning_rate": 4.55151920134568e-06, "loss": 0.3887, "step": 4779 }, { "epoch": 1.181998021760633, "grad_norm": 0.8117464144379515, "learning_rate": 4.551333433007146e-06, "loss": 0.3941, "step": 4780 }, { "epoch": 1.1822453016815035, "grad_norm": 0.7807000204110764, "learning_rate": 4.551147629995057e-06, "loss": 0.344, "step": 4781 }, { "epoch": 1.182492581602374, "grad_norm": 0.7855321721373756, "learning_rate": 4.550961792312557e-06, "loss": 0.4385, "step": 4782 }, { "epoch": 1.1827398615232443, "grad_norm": 0.8286599403396413, "learning_rate": 4.550775919962785e-06, "loss": 0.3447, "step": 4783 }, { "epoch": 1.1829871414441147, "grad_norm": 0.7349636058261712, "learning_rate": 4.550590012948884e-06, "loss": 0.3847, "step": 4784 }, { "epoch": 1.1832344213649852, "grad_norm": 0.781196149492525, "learning_rate": 4.550404071273995e-06, "loss": 0.3841, "step": 4785 }, { "epoch": 1.1834817012858556, "grad_norm": 0.7827369830894291, "learning_rate": 4.550218094941262e-06, "loss": 0.4039, "step": 4786 }, { "epoch": 1.183728981206726, "grad_norm": 0.7944726993923948, "learning_rate": 4.550032083953828e-06, "loss": 0.3752, "step": 4787 }, { "epoch": 1.1839762611275964, "grad_norm": 0.7404930840075693, "learning_rate": 4.549846038314838e-06, "loss": 0.4059, "step": 4788 }, { "epoch": 1.184223541048467, "grad_norm": 0.7900458195325206, "learning_rate": 4.549659958027436e-06, "loss": 0.3881, "step": 4789 }, { "epoch": 1.1844708209693373, "grad_norm": 0.7923193157855568, "learning_rate": 4.549473843094767e-06, "loss": 0.3872, "step": 4790 }, { "epoch": 1.1847181008902077, "grad_norm": 0.7720168373886991, "learning_rate": 4.549287693519977e-06, "loss": 0.3656, "step": 4791 }, { "epoch": 1.184965380811078, "grad_norm": 0.8067849087581662, "learning_rate": 4.549101509306214e-06, "loss": 0.3671, "step": 4792 }, { "epoch": 1.1852126607319486, "grad_norm": 0.8402401009890054, "learning_rate": 4.548915290456623e-06, "loss": 0.3818, "step": 4793 }, { "epoch": 1.185459940652819, "grad_norm": 0.7803701215399996, "learning_rate": 4.548729036974352e-06, "loss": 0.3759, "step": 4794 }, { "epoch": 1.1857072205736894, "grad_norm": 0.7669699770310334, "learning_rate": 4.548542748862551e-06, "loss": 0.3891, "step": 4795 }, { "epoch": 1.1859545004945597, "grad_norm": 0.7993156692573541, "learning_rate": 4.548356426124366e-06, "loss": 0.3992, "step": 4796 }, { "epoch": 1.1862017804154303, "grad_norm": 0.7977780184390316, "learning_rate": 4.548170068762949e-06, "loss": 0.3993, "step": 4797 }, { "epoch": 1.1864490603363007, "grad_norm": 0.7492001185245302, "learning_rate": 4.547983676781449e-06, "loss": 0.3813, "step": 4798 }, { "epoch": 1.186696340257171, "grad_norm": 0.7589208275184536, "learning_rate": 4.547797250183016e-06, "loss": 0.4055, "step": 4799 }, { "epoch": 1.1869436201780414, "grad_norm": 0.7653912786952211, "learning_rate": 4.547610788970802e-06, "loss": 0.4105, "step": 4800 }, { "epoch": 1.187190900098912, "grad_norm": 0.7611868027313463, "learning_rate": 4.547424293147958e-06, "loss": 0.394, "step": 4801 }, { "epoch": 1.1874381800197824, "grad_norm": 0.7690030070958218, "learning_rate": 4.547237762717637e-06, "loss": 0.356, "step": 4802 }, { "epoch": 1.1876854599406528, "grad_norm": 0.7918674733953116, "learning_rate": 4.547051197682991e-06, "loss": 0.3849, "step": 4803 }, { "epoch": 1.1879327398615231, "grad_norm": 0.7840603076392111, "learning_rate": 4.546864598047175e-06, "loss": 0.4029, "step": 4804 }, { "epoch": 1.1881800197823937, "grad_norm": 0.7887156659453942, "learning_rate": 4.546677963813342e-06, "loss": 0.3553, "step": 4805 }, { "epoch": 1.188427299703264, "grad_norm": 0.7556014512253404, "learning_rate": 4.5464912949846466e-06, "loss": 0.4096, "step": 4806 }, { "epoch": 1.1886745796241345, "grad_norm": 0.7774130469051223, "learning_rate": 4.546304591564244e-06, "loss": 0.3986, "step": 4807 }, { "epoch": 1.188921859545005, "grad_norm": 0.7538209635676737, "learning_rate": 4.546117853555291e-06, "loss": 0.3987, "step": 4808 }, { "epoch": 1.1891691394658754, "grad_norm": 0.8037929849955351, "learning_rate": 4.545931080960943e-06, "loss": 0.3715, "step": 4809 }, { "epoch": 1.1894164193867458, "grad_norm": 0.8102763679345087, "learning_rate": 4.545744273784357e-06, "loss": 0.351, "step": 4810 }, { "epoch": 1.1896636993076162, "grad_norm": 0.8316964948593969, "learning_rate": 4.5455574320286914e-06, "loss": 0.3483, "step": 4811 }, { "epoch": 1.1899109792284865, "grad_norm": 0.8168927907299055, "learning_rate": 4.545370555697104e-06, "loss": 0.3779, "step": 4812 }, { "epoch": 1.1901582591493571, "grad_norm": 0.7877436785971258, "learning_rate": 4.545183644792753e-06, "loss": 0.403, "step": 4813 }, { "epoch": 1.1904055390702275, "grad_norm": 0.8233042501154227, "learning_rate": 4.544996699318799e-06, "loss": 0.3665, "step": 4814 }, { "epoch": 1.1906528189910979, "grad_norm": 0.7989827785191973, "learning_rate": 4.5448097192784005e-06, "loss": 0.3792, "step": 4815 }, { "epoch": 1.1909000989119685, "grad_norm": 0.7944707823628144, "learning_rate": 4.544622704674719e-06, "loss": 0.3673, "step": 4816 }, { "epoch": 1.1911473788328388, "grad_norm": 0.8114883800949806, "learning_rate": 4.5444356555109145e-06, "loss": 0.373, "step": 4817 }, { "epoch": 1.1913946587537092, "grad_norm": 0.8094261467233816, "learning_rate": 4.544248571790151e-06, "loss": 0.3896, "step": 4818 }, { "epoch": 1.1916419386745796, "grad_norm": 0.7701480482168308, "learning_rate": 4.544061453515588e-06, "loss": 0.3806, "step": 4819 }, { "epoch": 1.19188921859545, "grad_norm": 0.8356792506691365, "learning_rate": 4.5438743006903905e-06, "loss": 0.3605, "step": 4820 }, { "epoch": 1.1921364985163205, "grad_norm": 0.8139405686429024, "learning_rate": 4.54368711331772e-06, "loss": 0.3206, "step": 4821 }, { "epoch": 1.192383778437191, "grad_norm": 0.7990857471344929, "learning_rate": 4.543499891400742e-06, "loss": 0.3837, "step": 4822 }, { "epoch": 1.1926310583580613, "grad_norm": 0.8075582093090053, "learning_rate": 4.543312634942621e-06, "loss": 0.4135, "step": 4823 }, { "epoch": 1.1928783382789319, "grad_norm": 0.7605239009729878, "learning_rate": 4.54312534394652e-06, "loss": 0.3842, "step": 4824 }, { "epoch": 1.1931256181998022, "grad_norm": 0.8369452695541183, "learning_rate": 4.542938018415608e-06, "loss": 0.4151, "step": 4825 }, { "epoch": 1.1933728981206726, "grad_norm": 0.8013547924176346, "learning_rate": 4.54275065835305e-06, "loss": 0.4021, "step": 4826 }, { "epoch": 1.193620178041543, "grad_norm": 0.8393332432664736, "learning_rate": 4.5425632637620135e-06, "loss": 0.3753, "step": 4827 }, { "epoch": 1.1938674579624133, "grad_norm": 0.8152259047425122, "learning_rate": 4.5423758346456645e-06, "loss": 0.3882, "step": 4828 }, { "epoch": 1.194114737883284, "grad_norm": 0.7803487302194408, "learning_rate": 4.5421883710071716e-06, "loss": 0.3809, "step": 4829 }, { "epoch": 1.1943620178041543, "grad_norm": 0.7813252819030082, "learning_rate": 4.542000872849704e-06, "loss": 0.3688, "step": 4830 }, { "epoch": 1.1946092977250247, "grad_norm": 0.7461741179236847, "learning_rate": 4.541813340176431e-06, "loss": 0.3771, "step": 4831 }, { "epoch": 1.1948565776458953, "grad_norm": 0.7734747218663485, "learning_rate": 4.541625772990523e-06, "loss": 0.3728, "step": 4832 }, { "epoch": 1.1951038575667656, "grad_norm": 0.7843620061121264, "learning_rate": 4.5414381712951485e-06, "loss": 0.3967, "step": 4833 }, { "epoch": 1.195351137487636, "grad_norm": 0.8085524891303967, "learning_rate": 4.54125053509348e-06, "loss": 0.3827, "step": 4834 }, { "epoch": 1.1955984174085064, "grad_norm": 0.7668878416771151, "learning_rate": 4.54106286438869e-06, "loss": 0.387, "step": 4835 }, { "epoch": 1.1958456973293767, "grad_norm": 0.768014022250441, "learning_rate": 4.540875159183949e-06, "loss": 0.4075, "step": 4836 }, { "epoch": 1.1960929772502473, "grad_norm": 0.7595556100915025, "learning_rate": 4.540687419482429e-06, "loss": 0.3966, "step": 4837 }, { "epoch": 1.1963402571711177, "grad_norm": 0.7915570012192699, "learning_rate": 4.540499645287305e-06, "loss": 0.3905, "step": 4838 }, { "epoch": 1.196587537091988, "grad_norm": 0.7667150216379646, "learning_rate": 4.540311836601751e-06, "loss": 0.3704, "step": 4839 }, { "epoch": 1.1968348170128587, "grad_norm": 0.7656823019596728, "learning_rate": 4.540123993428942e-06, "loss": 0.3765, "step": 4840 }, { "epoch": 1.197082096933729, "grad_norm": 0.7475671581305283, "learning_rate": 4.539936115772051e-06, "loss": 0.3624, "step": 4841 }, { "epoch": 1.1973293768545994, "grad_norm": 0.7891713560615394, "learning_rate": 4.539748203634255e-06, "loss": 0.3567, "step": 4842 }, { "epoch": 1.1975766567754698, "grad_norm": 0.8184853209668596, "learning_rate": 4.539560257018731e-06, "loss": 0.3537, "step": 4843 }, { "epoch": 1.1978239366963404, "grad_norm": 0.8035815459609491, "learning_rate": 4.539372275928654e-06, "loss": 0.3833, "step": 4844 }, { "epoch": 1.1980712166172107, "grad_norm": 0.7735407986191559, "learning_rate": 4.539184260367203e-06, "loss": 0.3861, "step": 4845 }, { "epoch": 1.198318496538081, "grad_norm": 0.7744133614353745, "learning_rate": 4.538996210337555e-06, "loss": 0.3873, "step": 4846 }, { "epoch": 1.1985657764589515, "grad_norm": 0.7835604666563304, "learning_rate": 4.5388081258428895e-06, "loss": 0.3805, "step": 4847 }, { "epoch": 1.198813056379822, "grad_norm": 0.7682095250747694, "learning_rate": 4.538620006886385e-06, "loss": 0.4067, "step": 4848 }, { "epoch": 1.1990603363006924, "grad_norm": 0.7670897744053592, "learning_rate": 4.538431853471221e-06, "loss": 0.395, "step": 4849 }, { "epoch": 1.1993076162215628, "grad_norm": 0.796246041407166, "learning_rate": 4.538243665600579e-06, "loss": 0.3653, "step": 4850 }, { "epoch": 1.1995548961424332, "grad_norm": 0.7781342200376644, "learning_rate": 4.538055443277639e-06, "loss": 0.3938, "step": 4851 }, { "epoch": 1.1998021760633037, "grad_norm": 0.7880311623299936, "learning_rate": 4.5378671865055825e-06, "loss": 0.4205, "step": 4852 }, { "epoch": 1.2000494559841741, "grad_norm": 0.8138606120282215, "learning_rate": 4.537678895287592e-06, "loss": 0.3911, "step": 4853 }, { "epoch": 1.2002967359050445, "grad_norm": 0.8118086620167203, "learning_rate": 4.53749056962685e-06, "loss": 0.3999, "step": 4854 }, { "epoch": 1.2005440158259149, "grad_norm": 0.7712578119370603, "learning_rate": 4.537302209526541e-06, "loss": 0.3781, "step": 4855 }, { "epoch": 1.2007912957467854, "grad_norm": 0.7670812389286972, "learning_rate": 4.537113814989846e-06, "loss": 0.4146, "step": 4856 }, { "epoch": 1.2010385756676558, "grad_norm": 0.7460393778908895, "learning_rate": 4.536925386019951e-06, "loss": 0.4351, "step": 4857 }, { "epoch": 1.2012858555885262, "grad_norm": 0.7900575080505278, "learning_rate": 4.536736922620042e-06, "loss": 0.3862, "step": 4858 }, { "epoch": 1.2015331355093966, "grad_norm": 0.7800131552462165, "learning_rate": 4.536548424793303e-06, "loss": 0.4064, "step": 4859 }, { "epoch": 1.2017804154302671, "grad_norm": 0.7958641365704422, "learning_rate": 4.536359892542921e-06, "loss": 0.3932, "step": 4860 }, { "epoch": 1.2020276953511375, "grad_norm": 0.7771317247528405, "learning_rate": 4.536171325872082e-06, "loss": 0.3817, "step": 4861 }, { "epoch": 1.2022749752720079, "grad_norm": 0.7836792807025146, "learning_rate": 4.535982724783975e-06, "loss": 0.3815, "step": 4862 }, { "epoch": 1.2025222551928783, "grad_norm": 0.793485344908246, "learning_rate": 4.535794089281786e-06, "loss": 0.3689, "step": 4863 }, { "epoch": 1.2027695351137488, "grad_norm": 0.8056402709447272, "learning_rate": 4.535605419368705e-06, "loss": 0.3801, "step": 4864 }, { "epoch": 1.2030168150346192, "grad_norm": 0.776010219139765, "learning_rate": 4.535416715047919e-06, "loss": 0.3988, "step": 4865 }, { "epoch": 1.2032640949554896, "grad_norm": 0.7973763790182075, "learning_rate": 4.5352279763226205e-06, "loss": 0.3919, "step": 4866 }, { "epoch": 1.20351137487636, "grad_norm": 0.7720945546809527, "learning_rate": 4.5350392031959964e-06, "loss": 0.3893, "step": 4867 }, { "epoch": 1.2037586547972305, "grad_norm": 0.7949555861065664, "learning_rate": 4.534850395671241e-06, "loss": 0.3466, "step": 4868 }, { "epoch": 1.204005934718101, "grad_norm": 0.7814835774895829, "learning_rate": 4.534661553751544e-06, "loss": 0.3604, "step": 4869 }, { "epoch": 1.2042532146389713, "grad_norm": 0.8119125835513517, "learning_rate": 4.5344726774400966e-06, "loss": 0.3918, "step": 4870 }, { "epoch": 1.2045004945598417, "grad_norm": 0.7792738261167718, "learning_rate": 4.534283766740092e-06, "loss": 0.4001, "step": 4871 }, { "epoch": 1.2047477744807122, "grad_norm": 0.772768344512989, "learning_rate": 4.534094821654724e-06, "loss": 0.3755, "step": 4872 }, { "epoch": 1.2049950544015826, "grad_norm": 0.810022564548045, "learning_rate": 4.533905842187186e-06, "loss": 0.3945, "step": 4873 }, { "epoch": 1.205242334322453, "grad_norm": 0.8215504718865734, "learning_rate": 4.533716828340673e-06, "loss": 0.3639, "step": 4874 }, { "epoch": 1.2054896142433233, "grad_norm": 0.7819631201762376, "learning_rate": 4.533527780118378e-06, "loss": 0.3747, "step": 4875 }, { "epoch": 1.205736894164194, "grad_norm": 0.7899138251021981, "learning_rate": 4.533338697523498e-06, "loss": 0.3777, "step": 4876 }, { "epoch": 1.2059841740850643, "grad_norm": 0.7719124316058078, "learning_rate": 4.533149580559229e-06, "loss": 0.3952, "step": 4877 }, { "epoch": 1.2062314540059347, "grad_norm": 0.8358779114617527, "learning_rate": 4.532960429228766e-06, "loss": 0.3877, "step": 4878 }, { "epoch": 1.206478733926805, "grad_norm": 0.7741437344787291, "learning_rate": 4.532771243535308e-06, "loss": 0.3526, "step": 4879 }, { "epoch": 1.2067260138476756, "grad_norm": 0.7754416202437382, "learning_rate": 4.5325820234820525e-06, "loss": 0.3984, "step": 4880 }, { "epoch": 1.206973293768546, "grad_norm": 0.788703442717869, "learning_rate": 4.532392769072197e-06, "loss": 0.4067, "step": 4881 }, { "epoch": 1.2072205736894164, "grad_norm": 0.797185459745468, "learning_rate": 4.532203480308942e-06, "loss": 0.3648, "step": 4882 }, { "epoch": 1.2074678536102867, "grad_norm": 0.796794730835319, "learning_rate": 4.5320141571954854e-06, "loss": 0.396, "step": 4883 }, { "epoch": 1.2077151335311573, "grad_norm": 0.766933670490487, "learning_rate": 4.531824799735028e-06, "loss": 0.3747, "step": 4884 }, { "epoch": 1.2079624134520277, "grad_norm": 0.7768911100943373, "learning_rate": 4.531635407930771e-06, "loss": 0.3785, "step": 4885 }, { "epoch": 1.208209693372898, "grad_norm": 0.7900592596257592, "learning_rate": 4.531445981785915e-06, "loss": 0.3721, "step": 4886 }, { "epoch": 1.2084569732937687, "grad_norm": 0.7916646766651483, "learning_rate": 4.531256521303662e-06, "loss": 0.4152, "step": 4887 }, { "epoch": 1.208704253214639, "grad_norm": 0.8197307188244259, "learning_rate": 4.531067026487214e-06, "loss": 0.3824, "step": 4888 }, { "epoch": 1.2089515331355094, "grad_norm": 0.7982685065791939, "learning_rate": 4.5308774973397755e-06, "loss": 0.3624, "step": 4889 }, { "epoch": 1.2091988130563798, "grad_norm": 0.8037277674890725, "learning_rate": 4.5306879338645486e-06, "loss": 0.3756, "step": 4890 }, { "epoch": 1.2094460929772501, "grad_norm": 0.8345704227015408, "learning_rate": 4.530498336064737e-06, "loss": 0.3788, "step": 4891 }, { "epoch": 1.2096933728981207, "grad_norm": 0.7860038487713605, "learning_rate": 4.530308703943548e-06, "loss": 0.3787, "step": 4892 }, { "epoch": 1.209940652818991, "grad_norm": 0.7599622867312993, "learning_rate": 4.530119037504185e-06, "loss": 0.424, "step": 4893 }, { "epoch": 1.2101879327398615, "grad_norm": 0.8466589636715154, "learning_rate": 4.529929336749853e-06, "loss": 0.3748, "step": 4894 }, { "epoch": 1.210435212660732, "grad_norm": 0.7635539141588643, "learning_rate": 4.529739601683761e-06, "loss": 0.3994, "step": 4895 }, { "epoch": 1.2106824925816024, "grad_norm": 0.8080628214042201, "learning_rate": 4.5295498323091144e-06, "loss": 0.3763, "step": 4896 }, { "epoch": 1.2109297725024728, "grad_norm": 0.8046385582099821, "learning_rate": 4.5293600286291215e-06, "loss": 0.3589, "step": 4897 }, { "epoch": 1.2111770524233432, "grad_norm": 0.7605226347635777, "learning_rate": 4.529170190646991e-06, "loss": 0.3553, "step": 4898 }, { "epoch": 1.2114243323442135, "grad_norm": 0.7878254424707832, "learning_rate": 4.528980318365931e-06, "loss": 0.3925, "step": 4899 }, { "epoch": 1.2116716122650841, "grad_norm": 0.8032244412539615, "learning_rate": 4.52879041178915e-06, "loss": 0.3878, "step": 4900 }, { "epoch": 1.2119188921859545, "grad_norm": 0.7856359792152349, "learning_rate": 4.5286004709198595e-06, "loss": 0.3704, "step": 4901 }, { "epoch": 1.2121661721068249, "grad_norm": 0.7563786685107527, "learning_rate": 4.528410495761269e-06, "loss": 0.3978, "step": 4902 }, { "epoch": 1.2124134520276955, "grad_norm": 0.7619075768120056, "learning_rate": 4.528220486316591e-06, "loss": 0.4343, "step": 4903 }, { "epoch": 1.2126607319485658, "grad_norm": 0.7862802633305559, "learning_rate": 4.528030442589037e-06, "loss": 0.3694, "step": 4904 }, { "epoch": 1.2129080118694362, "grad_norm": 0.7853872645398764, "learning_rate": 4.527840364581817e-06, "loss": 0.4015, "step": 4905 }, { "epoch": 1.2131552917903066, "grad_norm": 0.7881036508926134, "learning_rate": 4.5276502522981465e-06, "loss": 0.3772, "step": 4906 }, { "epoch": 1.213402571711177, "grad_norm": 0.7716064783230894, "learning_rate": 4.5274601057412376e-06, "loss": 0.3711, "step": 4907 }, { "epoch": 1.2136498516320475, "grad_norm": 0.7787846092722479, "learning_rate": 4.527269924914305e-06, "loss": 0.3679, "step": 4908 }, { "epoch": 1.213897131552918, "grad_norm": 0.8015523434168799, "learning_rate": 4.527079709820563e-06, "loss": 0.3734, "step": 4909 }, { "epoch": 1.2141444114737883, "grad_norm": 0.8138988509537958, "learning_rate": 4.526889460463227e-06, "loss": 0.3542, "step": 4910 }, { "epoch": 1.2143916913946589, "grad_norm": 0.7992030308979681, "learning_rate": 4.526699176845512e-06, "loss": 0.3809, "step": 4911 }, { "epoch": 1.2146389713155292, "grad_norm": 0.7955732802238391, "learning_rate": 4.526508858970637e-06, "loss": 0.3936, "step": 4912 }, { "epoch": 1.2148862512363996, "grad_norm": 0.7745385903930552, "learning_rate": 4.5263185068418146e-06, "loss": 0.3858, "step": 4913 }, { "epoch": 1.21513353115727, "grad_norm": 0.8132367646388334, "learning_rate": 4.526128120462265e-06, "loss": 0.3889, "step": 4914 }, { "epoch": 1.2153808110781403, "grad_norm": 0.7973553571971875, "learning_rate": 4.5259376998352065e-06, "loss": 0.3852, "step": 4915 }, { "epoch": 1.215628090999011, "grad_norm": 0.7599402952721872, "learning_rate": 4.525747244963857e-06, "loss": 0.3881, "step": 4916 }, { "epoch": 1.2158753709198813, "grad_norm": 0.7822814763339597, "learning_rate": 4.525556755851436e-06, "loss": 0.3876, "step": 4917 }, { "epoch": 1.2161226508407517, "grad_norm": 0.7966757756407691, "learning_rate": 4.5253662325011625e-06, "loss": 0.3897, "step": 4918 }, { "epoch": 1.2163699307616223, "grad_norm": 0.7808636988408262, "learning_rate": 4.525175674916259e-06, "loss": 0.3555, "step": 4919 }, { "epoch": 1.2166172106824926, "grad_norm": 0.787548260778102, "learning_rate": 4.524985083099944e-06, "loss": 0.3886, "step": 4920 }, { "epoch": 1.216864490603363, "grad_norm": 0.8137498243948261, "learning_rate": 4.524794457055441e-06, "loss": 0.4034, "step": 4921 }, { "epoch": 1.2171117705242334, "grad_norm": 0.7731162070687602, "learning_rate": 4.524603796785971e-06, "loss": 0.3888, "step": 4922 }, { "epoch": 1.217359050445104, "grad_norm": 0.7686888698388606, "learning_rate": 4.524413102294757e-06, "loss": 0.3898, "step": 4923 }, { "epoch": 1.2176063303659743, "grad_norm": 0.7613597044262604, "learning_rate": 4.524222373585022e-06, "loss": 0.3951, "step": 4924 }, { "epoch": 1.2178536102868447, "grad_norm": 0.7705880851646494, "learning_rate": 4.524031610659991e-06, "loss": 0.3808, "step": 4925 }, { "epoch": 1.218100890207715, "grad_norm": 0.7702198483116763, "learning_rate": 4.5238408135228865e-06, "loss": 0.4005, "step": 4926 }, { "epoch": 1.2183481701285857, "grad_norm": 0.7906849256830784, "learning_rate": 4.5236499821769355e-06, "loss": 0.3677, "step": 4927 }, { "epoch": 1.218595450049456, "grad_norm": 0.7865395147846755, "learning_rate": 4.523459116625363e-06, "loss": 0.4009, "step": 4928 }, { "epoch": 1.2188427299703264, "grad_norm": 0.7955639743811127, "learning_rate": 4.523268216871394e-06, "loss": 0.4069, "step": 4929 }, { "epoch": 1.2190900098911968, "grad_norm": 0.7971264718073302, "learning_rate": 4.523077282918257e-06, "loss": 0.4012, "step": 4930 }, { "epoch": 1.2193372898120673, "grad_norm": 0.7795122580275117, "learning_rate": 4.522886314769178e-06, "loss": 0.3914, "step": 4931 }, { "epoch": 1.2195845697329377, "grad_norm": 0.7740472382412046, "learning_rate": 4.522695312427386e-06, "loss": 0.3979, "step": 4932 }, { "epoch": 1.219831849653808, "grad_norm": 0.7910678994921423, "learning_rate": 4.52250427589611e-06, "loss": 0.3824, "step": 4933 }, { "epoch": 1.2200791295746785, "grad_norm": 0.8184279438270351, "learning_rate": 4.522313205178577e-06, "loss": 0.345, "step": 4934 }, { "epoch": 1.220326409495549, "grad_norm": 0.7901133815079288, "learning_rate": 4.522122100278018e-06, "loss": 0.3692, "step": 4935 }, { "epoch": 1.2205736894164194, "grad_norm": 0.7796426390268087, "learning_rate": 4.521930961197663e-06, "loss": 0.3886, "step": 4936 }, { "epoch": 1.2208209693372898, "grad_norm": 0.8266952336330282, "learning_rate": 4.521739787940743e-06, "loss": 0.3508, "step": 4937 }, { "epoch": 1.2210682492581602, "grad_norm": 0.7958709967083475, "learning_rate": 4.521548580510488e-06, "loss": 0.3842, "step": 4938 }, { "epoch": 1.2213155291790307, "grad_norm": 0.8188146611052858, "learning_rate": 4.521357338910133e-06, "loss": 0.3929, "step": 4939 }, { "epoch": 1.2215628090999011, "grad_norm": 0.8400991011770317, "learning_rate": 4.521166063142907e-06, "loss": 0.3664, "step": 4940 }, { "epoch": 1.2218100890207715, "grad_norm": 0.8154925647273198, "learning_rate": 4.520974753212046e-06, "loss": 0.3794, "step": 4941 }, { "epoch": 1.2220573689416419, "grad_norm": 0.7984903992281173, "learning_rate": 4.520783409120783e-06, "loss": 0.409, "step": 4942 }, { "epoch": 1.2223046488625124, "grad_norm": 0.7746766996482216, "learning_rate": 4.52059203087235e-06, "loss": 0.377, "step": 4943 }, { "epoch": 1.2225519287833828, "grad_norm": 0.7559207995215051, "learning_rate": 4.520400618469985e-06, "loss": 0.3929, "step": 4944 }, { "epoch": 1.2227992087042532, "grad_norm": 0.8023597895461501, "learning_rate": 4.5202091719169215e-06, "loss": 0.3975, "step": 4945 }, { "epoch": 1.2230464886251236, "grad_norm": 0.7847533816377383, "learning_rate": 4.520017691216396e-06, "loss": 0.3917, "step": 4946 }, { "epoch": 1.2232937685459941, "grad_norm": 0.8043919462802736, "learning_rate": 4.519826176371646e-06, "loss": 0.3833, "step": 4947 }, { "epoch": 1.2235410484668645, "grad_norm": 0.7999947202023772, "learning_rate": 4.519634627385907e-06, "loss": 0.3752, "step": 4948 }, { "epoch": 1.2237883283877349, "grad_norm": 0.750805459388481, "learning_rate": 4.519443044262419e-06, "loss": 0.4009, "step": 4949 }, { "epoch": 1.2240356083086052, "grad_norm": 0.8025601014197002, "learning_rate": 4.5192514270044174e-06, "loss": 0.3975, "step": 4950 }, { "epoch": 1.2242828882294758, "grad_norm": 0.8081373125417918, "learning_rate": 4.519059775615143e-06, "loss": 0.3769, "step": 4951 }, { "epoch": 1.2245301681503462, "grad_norm": 0.7627598174657156, "learning_rate": 4.518868090097835e-06, "loss": 0.4177, "step": 4952 }, { "epoch": 1.2247774480712166, "grad_norm": 0.7593072762751363, "learning_rate": 4.5186763704557335e-06, "loss": 0.3947, "step": 4953 }, { "epoch": 1.225024727992087, "grad_norm": 0.7765002531689607, "learning_rate": 4.5184846166920786e-06, "loss": 0.3431, "step": 4954 }, { "epoch": 1.2252720079129575, "grad_norm": 0.7856276739248065, "learning_rate": 4.518292828810111e-06, "loss": 0.3711, "step": 4955 }, { "epoch": 1.225519287833828, "grad_norm": 0.7811463182919565, "learning_rate": 4.518101006813075e-06, "loss": 0.382, "step": 4956 }, { "epoch": 1.2257665677546983, "grad_norm": 0.8060365382807589, "learning_rate": 4.51790915070421e-06, "loss": 0.3748, "step": 4957 }, { "epoch": 1.2260138476755689, "grad_norm": 0.8168663832824753, "learning_rate": 4.517717260486761e-06, "loss": 0.3669, "step": 4958 }, { "epoch": 1.2262611275964392, "grad_norm": 0.7961845838360583, "learning_rate": 4.51752533616397e-06, "loss": 0.3749, "step": 4959 }, { "epoch": 1.2265084075173096, "grad_norm": 0.8039381255932287, "learning_rate": 4.517333377739083e-06, "loss": 0.3965, "step": 4960 }, { "epoch": 1.22675568743818, "grad_norm": 0.7707736851202597, "learning_rate": 4.517141385215342e-06, "loss": 0.3806, "step": 4961 }, { "epoch": 1.2270029673590503, "grad_norm": 0.7684377945139835, "learning_rate": 4.516949358595995e-06, "loss": 0.3944, "step": 4962 }, { "epoch": 1.227250247279921, "grad_norm": 0.7884269793662793, "learning_rate": 4.5167572978842856e-06, "loss": 0.3797, "step": 4963 }, { "epoch": 1.2274975272007913, "grad_norm": 0.7581016645743803, "learning_rate": 4.516565203083462e-06, "loss": 0.3886, "step": 4964 }, { "epoch": 1.2277448071216617, "grad_norm": 0.7576980697043955, "learning_rate": 4.516373074196769e-06, "loss": 0.3944, "step": 4965 }, { "epoch": 1.2279920870425323, "grad_norm": 0.8114021653103737, "learning_rate": 4.516180911227455e-06, "loss": 0.3889, "step": 4966 }, { "epoch": 1.2282393669634026, "grad_norm": 0.7748025230259129, "learning_rate": 4.5159887141787705e-06, "loss": 0.3787, "step": 4967 }, { "epoch": 1.228486646884273, "grad_norm": 0.8401689749531943, "learning_rate": 4.515796483053961e-06, "loss": 0.3444, "step": 4968 }, { "epoch": 1.2287339268051434, "grad_norm": 0.7855914329787703, "learning_rate": 4.5156042178562774e-06, "loss": 0.3835, "step": 4969 }, { "epoch": 1.2289812067260137, "grad_norm": 0.7843852474748455, "learning_rate": 4.5154119185889685e-06, "loss": 0.3752, "step": 4970 }, { "epoch": 1.2292284866468843, "grad_norm": 0.8054563658704187, "learning_rate": 4.515219585255286e-06, "loss": 0.4131, "step": 4971 }, { "epoch": 1.2294757665677547, "grad_norm": 0.7854456442810994, "learning_rate": 4.5150272178584805e-06, "loss": 0.3652, "step": 4972 }, { "epoch": 1.229723046488625, "grad_norm": 0.7867457591651689, "learning_rate": 4.514834816401803e-06, "loss": 0.3525, "step": 4973 }, { "epoch": 1.2299703264094957, "grad_norm": 0.7832989172475077, "learning_rate": 4.514642380888505e-06, "loss": 0.3845, "step": 4974 }, { "epoch": 1.230217606330366, "grad_norm": 0.7682990690722795, "learning_rate": 4.514449911321842e-06, "loss": 0.3811, "step": 4975 }, { "epoch": 1.2304648862512364, "grad_norm": 0.8149375158528219, "learning_rate": 4.514257407705065e-06, "loss": 0.4153, "step": 4976 }, { "epoch": 1.2307121661721068, "grad_norm": 0.7534336701983072, "learning_rate": 4.514064870041427e-06, "loss": 0.3934, "step": 4977 }, { "epoch": 1.2309594460929771, "grad_norm": 0.7574297095820887, "learning_rate": 4.513872298334185e-06, "loss": 0.3645, "step": 4978 }, { "epoch": 1.2312067260138477, "grad_norm": 0.8168503503705136, "learning_rate": 4.5136796925865924e-06, "loss": 0.3536, "step": 4979 }, { "epoch": 1.231454005934718, "grad_norm": 0.7513285812568407, "learning_rate": 4.513487052801906e-06, "loss": 0.3679, "step": 4980 }, { "epoch": 1.2317012858555885, "grad_norm": 0.7884085520274648, "learning_rate": 4.513294378983381e-06, "loss": 0.3761, "step": 4981 }, { "epoch": 1.231948565776459, "grad_norm": 0.7973082172406644, "learning_rate": 4.513101671134273e-06, "loss": 0.3623, "step": 4982 }, { "epoch": 1.2321958456973294, "grad_norm": 0.7738284123450081, "learning_rate": 4.512908929257843e-06, "loss": 0.3613, "step": 4983 }, { "epoch": 1.2324431256181998, "grad_norm": 0.7699407377060754, "learning_rate": 4.512716153357345e-06, "loss": 0.387, "step": 4984 }, { "epoch": 1.2326904055390702, "grad_norm": 0.811479145164763, "learning_rate": 4.512523343436039e-06, "loss": 0.3631, "step": 4985 }, { "epoch": 1.2329376854599405, "grad_norm": 0.748296344317464, "learning_rate": 4.512330499497185e-06, "loss": 0.3997, "step": 4986 }, { "epoch": 1.2331849653808111, "grad_norm": 0.7869542882520558, "learning_rate": 4.512137621544041e-06, "loss": 0.3856, "step": 4987 }, { "epoch": 1.2334322453016815, "grad_norm": 0.8199960255044006, "learning_rate": 4.511944709579869e-06, "loss": 0.3907, "step": 4988 }, { "epoch": 1.2336795252225519, "grad_norm": 0.7822872938107134, "learning_rate": 4.5117517636079284e-06, "loss": 0.3626, "step": 4989 }, { "epoch": 1.2339268051434225, "grad_norm": 0.8035508453755497, "learning_rate": 4.51155878363148e-06, "loss": 0.3829, "step": 4990 }, { "epoch": 1.2341740850642928, "grad_norm": 0.7850544118865945, "learning_rate": 4.511365769653788e-06, "loss": 0.3792, "step": 4991 }, { "epoch": 1.2344213649851632, "grad_norm": 0.7870457320172803, "learning_rate": 4.511172721678112e-06, "loss": 0.3768, "step": 4992 }, { "epoch": 1.2346686449060336, "grad_norm": 0.7663795199751592, "learning_rate": 4.510979639707718e-06, "loss": 0.3974, "step": 4993 }, { "epoch": 1.234915924826904, "grad_norm": 0.8385573029754841, "learning_rate": 4.510786523745868e-06, "loss": 0.3843, "step": 4994 }, { "epoch": 1.2351632047477745, "grad_norm": 0.8720713575985365, "learning_rate": 4.5105933737958265e-06, "loss": 0.4023, "step": 4995 }, { "epoch": 1.235410484668645, "grad_norm": 0.8102168557893267, "learning_rate": 4.510400189860857e-06, "loss": 0.3763, "step": 4996 }, { "epoch": 1.2356577645895153, "grad_norm": 0.7481674801352836, "learning_rate": 4.510206971944228e-06, "loss": 0.3763, "step": 4997 }, { "epoch": 1.2359050445103859, "grad_norm": 0.7976408250716166, "learning_rate": 4.510013720049203e-06, "loss": 0.4003, "step": 4998 }, { "epoch": 1.2361523244312562, "grad_norm": 0.8178428192101422, "learning_rate": 4.509820434179048e-06, "loss": 0.3865, "step": 4999 }, { "epoch": 1.2363996043521266, "grad_norm": 0.8016643113041549, "learning_rate": 4.509627114337033e-06, "loss": 0.4044, "step": 5000 }, { "epoch": 1.236646884272997, "grad_norm": 0.7787008348383881, "learning_rate": 4.509433760526423e-06, "loss": 0.3629, "step": 5001 }, { "epoch": 1.2368941641938676, "grad_norm": 0.7955707523926873, "learning_rate": 4.509240372750488e-06, "loss": 0.371, "step": 5002 }, { "epoch": 1.237141444114738, "grad_norm": 0.7927988461157977, "learning_rate": 4.509046951012495e-06, "loss": 0.3744, "step": 5003 }, { "epoch": 1.2373887240356083, "grad_norm": 0.8017945164848691, "learning_rate": 4.508853495315714e-06, "loss": 0.3695, "step": 5004 }, { "epoch": 1.2376360039564787, "grad_norm": 0.7848947217723, "learning_rate": 4.508660005663417e-06, "loss": 0.3866, "step": 5005 }, { "epoch": 1.2378832838773492, "grad_norm": 0.8501763761222897, "learning_rate": 4.508466482058871e-06, "loss": 0.3818, "step": 5006 }, { "epoch": 1.2381305637982196, "grad_norm": 0.7947908917664634, "learning_rate": 4.50827292450535e-06, "loss": 0.3751, "step": 5007 }, { "epoch": 1.23837784371909, "grad_norm": 0.7980443232701457, "learning_rate": 4.5080793330061244e-06, "loss": 0.3719, "step": 5008 }, { "epoch": 1.2386251236399604, "grad_norm": 0.7863196771826106, "learning_rate": 4.507885707564467e-06, "loss": 0.375, "step": 5009 }, { "epoch": 1.238872403560831, "grad_norm": 0.7961776338017525, "learning_rate": 4.50769204818365e-06, "loss": 0.3839, "step": 5010 }, { "epoch": 1.2391196834817013, "grad_norm": 0.7685572040287985, "learning_rate": 4.507498354866949e-06, "loss": 0.3994, "step": 5011 }, { "epoch": 1.2393669634025717, "grad_norm": 0.777076309336032, "learning_rate": 4.507304627617634e-06, "loss": 0.403, "step": 5012 }, { "epoch": 1.239614243323442, "grad_norm": 0.7988697316141328, "learning_rate": 4.507110866438982e-06, "loss": 0.3764, "step": 5013 }, { "epoch": 1.2398615232443126, "grad_norm": 0.8046296081985712, "learning_rate": 4.5069170713342695e-06, "loss": 0.3862, "step": 5014 }, { "epoch": 1.240108803165183, "grad_norm": 0.7970624875228973, "learning_rate": 4.506723242306769e-06, "loss": 0.3577, "step": 5015 }, { "epoch": 1.2403560830860534, "grad_norm": 0.7952338256430788, "learning_rate": 4.5065293793597585e-06, "loss": 0.3872, "step": 5016 }, { "epoch": 1.2406033630069238, "grad_norm": 0.769618683068731, "learning_rate": 4.506335482496516e-06, "loss": 0.391, "step": 5017 }, { "epoch": 1.2408506429277943, "grad_norm": 0.780190052474291, "learning_rate": 4.506141551720316e-06, "loss": 0.4242, "step": 5018 }, { "epoch": 1.2410979228486647, "grad_norm": 0.7815870359625454, "learning_rate": 4.505947587034439e-06, "loss": 0.3778, "step": 5019 }, { "epoch": 1.241345202769535, "grad_norm": 0.8059845124169922, "learning_rate": 4.505753588442163e-06, "loss": 0.3656, "step": 5020 }, { "epoch": 1.2415924826904055, "grad_norm": 0.7858751948362668, "learning_rate": 4.5055595559467665e-06, "loss": 0.3873, "step": 5021 }, { "epoch": 1.241839762611276, "grad_norm": 0.7521902334241786, "learning_rate": 4.50536548955153e-06, "loss": 0.4005, "step": 5022 }, { "epoch": 1.2420870425321464, "grad_norm": 0.7754399905252279, "learning_rate": 4.5051713892597324e-06, "loss": 0.3862, "step": 5023 }, { "epoch": 1.2423343224530168, "grad_norm": 0.7982360064799265, "learning_rate": 4.504977255074656e-06, "loss": 0.384, "step": 5024 }, { "epoch": 1.2425816023738872, "grad_norm": 0.8087092325306788, "learning_rate": 4.504783086999582e-06, "loss": 0.383, "step": 5025 }, { "epoch": 1.2428288822947577, "grad_norm": 0.7936421873823563, "learning_rate": 4.5045888850377915e-06, "loss": 0.3898, "step": 5026 }, { "epoch": 1.243076162215628, "grad_norm": 0.7762225230173943, "learning_rate": 4.504394649192569e-06, "loss": 0.3902, "step": 5027 }, { "epoch": 1.2433234421364985, "grad_norm": 0.7476544079732805, "learning_rate": 4.504200379467196e-06, "loss": 0.3887, "step": 5028 }, { "epoch": 1.2435707220573688, "grad_norm": 0.7494935653852727, "learning_rate": 4.504006075864956e-06, "loss": 0.4187, "step": 5029 }, { "epoch": 1.2438180019782394, "grad_norm": 0.8176758039566192, "learning_rate": 4.5038117383891346e-06, "loss": 0.3886, "step": 5030 }, { "epoch": 1.2440652818991098, "grad_norm": 0.7928996513809159, "learning_rate": 4.503617367043015e-06, "loss": 0.3754, "step": 5031 }, { "epoch": 1.2443125618199802, "grad_norm": 0.8273135672020202, "learning_rate": 4.503422961829885e-06, "loss": 0.4028, "step": 5032 }, { "epoch": 1.2445598417408505, "grad_norm": 0.8397623327460644, "learning_rate": 4.5032285227530295e-06, "loss": 0.3706, "step": 5033 }, { "epoch": 1.2448071216617211, "grad_norm": 0.7802269556366286, "learning_rate": 4.5030340498157335e-06, "loss": 0.3811, "step": 5034 }, { "epoch": 1.2450544015825915, "grad_norm": 0.7981318218919968, "learning_rate": 4.502839543021287e-06, "loss": 0.3518, "step": 5035 }, { "epoch": 1.2453016815034619, "grad_norm": 0.7581438135699036, "learning_rate": 4.502645002372975e-06, "loss": 0.3833, "step": 5036 }, { "epoch": 1.2455489614243325, "grad_norm": 0.7965843578689855, "learning_rate": 4.5024504278740875e-06, "loss": 0.381, "step": 5037 }, { "epoch": 1.2457962413452028, "grad_norm": 0.8144765817241916, "learning_rate": 4.502255819527913e-06, "loss": 0.385, "step": 5038 }, { "epoch": 1.2460435212660732, "grad_norm": 0.775672593517881, "learning_rate": 4.5020611773377406e-06, "loss": 0.3836, "step": 5039 }, { "epoch": 1.2462908011869436, "grad_norm": 0.7565708727486117, "learning_rate": 4.501866501306861e-06, "loss": 0.398, "step": 5040 }, { "epoch": 1.246538081107814, "grad_norm": 0.7667978372868375, "learning_rate": 4.501671791438565e-06, "loss": 0.3745, "step": 5041 }, { "epoch": 1.2467853610286845, "grad_norm": 0.7659736184144327, "learning_rate": 4.501477047736142e-06, "loss": 0.3932, "step": 5042 }, { "epoch": 1.247032640949555, "grad_norm": 0.7781383598372351, "learning_rate": 4.501282270202886e-06, "loss": 0.3953, "step": 5043 }, { "epoch": 1.2472799208704253, "grad_norm": 0.8384685707609962, "learning_rate": 4.501087458842088e-06, "loss": 0.3789, "step": 5044 }, { "epoch": 1.2475272007912959, "grad_norm": 0.7987364493619875, "learning_rate": 4.5008926136570415e-06, "loss": 0.3918, "step": 5045 }, { "epoch": 1.2477744807121662, "grad_norm": 0.8177683700893424, "learning_rate": 4.500697734651039e-06, "loss": 0.3443, "step": 5046 }, { "epoch": 1.2480217606330366, "grad_norm": 0.7795908349569957, "learning_rate": 4.500502821827375e-06, "loss": 0.3798, "step": 5047 }, { "epoch": 1.248269040553907, "grad_norm": 0.7481966065329204, "learning_rate": 4.500307875189345e-06, "loss": 0.3736, "step": 5048 }, { "epoch": 1.2485163204747773, "grad_norm": 0.7612483182405895, "learning_rate": 4.500112894740243e-06, "loss": 0.3728, "step": 5049 }, { "epoch": 1.248763600395648, "grad_norm": 0.8153427548506353, "learning_rate": 4.499917880483365e-06, "loss": 0.3662, "step": 5050 }, { "epoch": 1.2490108803165183, "grad_norm": 0.7898883444164804, "learning_rate": 4.499722832422008e-06, "loss": 0.4041, "step": 5051 }, { "epoch": 1.2492581602373887, "grad_norm": 0.7428909825727187, "learning_rate": 4.499527750559467e-06, "loss": 0.4081, "step": 5052 }, { "epoch": 1.2495054401582593, "grad_norm": 0.7928788735701104, "learning_rate": 4.4993326348990415e-06, "loss": 0.3781, "step": 5053 }, { "epoch": 1.2497527200791296, "grad_norm": 0.7756124066019087, "learning_rate": 4.499137485444029e-06, "loss": 0.3966, "step": 5054 }, { "epoch": 1.25, "grad_norm": 0.7860330534338861, "learning_rate": 4.4989423021977286e-06, "loss": 0.3775, "step": 5055 }, { "epoch": 1.2502472799208704, "grad_norm": 0.7644659460717258, "learning_rate": 4.498747085163438e-06, "loss": 0.3968, "step": 5056 }, { "epoch": 1.2504945598417407, "grad_norm": 0.7777806895237441, "learning_rate": 4.498551834344458e-06, "loss": 0.3621, "step": 5057 }, { "epoch": 1.2507418397626113, "grad_norm": 0.7633860923683963, "learning_rate": 4.498356549744089e-06, "loss": 0.3885, "step": 5058 }, { "epoch": 1.2509891196834817, "grad_norm": 0.7525146116178814, "learning_rate": 4.4981612313656295e-06, "loss": 0.3895, "step": 5059 }, { "epoch": 1.251236399604352, "grad_norm": 0.7919926974719121, "learning_rate": 4.497965879212385e-06, "loss": 0.3832, "step": 5060 }, { "epoch": 1.2514836795252227, "grad_norm": 0.7975769844706013, "learning_rate": 4.497770493287656e-06, "loss": 0.3737, "step": 5061 }, { "epoch": 1.251730959446093, "grad_norm": 0.7896002049017717, "learning_rate": 4.497575073594743e-06, "loss": 0.3655, "step": 5062 }, { "epoch": 1.2519782393669634, "grad_norm": 0.7725516377310019, "learning_rate": 4.4973796201369505e-06, "loss": 0.3889, "step": 5063 }, { "epoch": 1.2522255192878338, "grad_norm": 0.824617629808187, "learning_rate": 4.4971841329175835e-06, "loss": 0.3648, "step": 5064 }, { "epoch": 1.2524727992087041, "grad_norm": 0.7750394296947322, "learning_rate": 4.4969886119399454e-06, "loss": 0.403, "step": 5065 }, { "epoch": 1.2527200791295747, "grad_norm": 0.7712460172392823, "learning_rate": 4.4967930572073405e-06, "loss": 0.3967, "step": 5066 }, { "epoch": 1.252967359050445, "grad_norm": 0.7859591548213357, "learning_rate": 4.496597468723075e-06, "loss": 0.3717, "step": 5067 }, { "epoch": 1.2532146389713155, "grad_norm": 0.8186300200115335, "learning_rate": 4.496401846490455e-06, "loss": 0.3777, "step": 5068 }, { "epoch": 1.253461918892186, "grad_norm": 0.7798973279949561, "learning_rate": 4.496206190512786e-06, "loss": 0.3732, "step": 5069 }, { "epoch": 1.2537091988130564, "grad_norm": 0.7817816984160825, "learning_rate": 4.496010500793376e-06, "loss": 0.387, "step": 5070 }, { "epoch": 1.2539564787339268, "grad_norm": 0.7987146534874888, "learning_rate": 4.495814777335533e-06, "loss": 0.3427, "step": 5071 }, { "epoch": 1.2542037586547972, "grad_norm": 0.8017756083835961, "learning_rate": 4.4956190201425656e-06, "loss": 0.3594, "step": 5072 }, { "epoch": 1.2544510385756675, "grad_norm": 0.8127195103531558, "learning_rate": 4.495423229217781e-06, "loss": 0.3667, "step": 5073 }, { "epoch": 1.2546983184965381, "grad_norm": 0.8115656795591214, "learning_rate": 4.4952274045644895e-06, "loss": 0.3906, "step": 5074 }, { "epoch": 1.2549455984174085, "grad_norm": 0.7960292534928861, "learning_rate": 4.495031546186002e-06, "loss": 0.4001, "step": 5075 }, { "epoch": 1.2551928783382789, "grad_norm": 0.8349067180038289, "learning_rate": 4.494835654085627e-06, "loss": 0.3721, "step": 5076 }, { "epoch": 1.2554401582591495, "grad_norm": 0.7736835936634592, "learning_rate": 4.494639728266678e-06, "loss": 0.3782, "step": 5077 }, { "epoch": 1.2556874381800198, "grad_norm": 0.8056469289578025, "learning_rate": 4.494443768732466e-06, "loss": 0.3869, "step": 5078 }, { "epoch": 1.2559347181008902, "grad_norm": 0.7814217334138762, "learning_rate": 4.494247775486302e-06, "loss": 0.3913, "step": 5079 }, { "epoch": 1.2561819980217606, "grad_norm": 0.7930422231891733, "learning_rate": 4.494051748531501e-06, "loss": 0.3869, "step": 5080 }, { "epoch": 1.256429277942631, "grad_norm": 0.7845945565524525, "learning_rate": 4.493855687871375e-06, "loss": 0.4038, "step": 5081 }, { "epoch": 1.2566765578635015, "grad_norm": 0.7911805995577631, "learning_rate": 4.493659593509238e-06, "loss": 0.3757, "step": 5082 }, { "epoch": 1.256923837784372, "grad_norm": 0.757617059284575, "learning_rate": 4.493463465448406e-06, "loss": 0.369, "step": 5083 }, { "epoch": 1.2571711177052423, "grad_norm": 0.8475378191576469, "learning_rate": 4.493267303692191e-06, "loss": 0.3952, "step": 5084 }, { "epoch": 1.2574183976261128, "grad_norm": 0.783699374807463, "learning_rate": 4.493071108243912e-06, "loss": 0.3862, "step": 5085 }, { "epoch": 1.2576656775469832, "grad_norm": 0.7864372943148732, "learning_rate": 4.4928748791068835e-06, "loss": 0.3937, "step": 5086 }, { "epoch": 1.2579129574678536, "grad_norm": 0.7965302417508711, "learning_rate": 4.492678616284422e-06, "loss": 0.3563, "step": 5087 }, { "epoch": 1.258160237388724, "grad_norm": 0.7768917802934155, "learning_rate": 4.492482319779848e-06, "loss": 0.4322, "step": 5088 }, { "epoch": 1.2584075173095943, "grad_norm": 0.7776062088329516, "learning_rate": 4.492285989596476e-06, "loss": 0.3914, "step": 5089 }, { "epoch": 1.258654797230465, "grad_norm": 0.7708201908344137, "learning_rate": 4.492089625737626e-06, "loss": 0.403, "step": 5090 }, { "epoch": 1.2589020771513353, "grad_norm": 0.7990654963129649, "learning_rate": 4.4918932282066165e-06, "loss": 0.3622, "step": 5091 }, { "epoch": 1.2591493570722057, "grad_norm": 0.7921978268393997, "learning_rate": 4.491696797006768e-06, "loss": 0.3904, "step": 5092 }, { "epoch": 1.2593966369930762, "grad_norm": 0.8040875431917949, "learning_rate": 4.4915003321413995e-06, "loss": 0.3726, "step": 5093 }, { "epoch": 1.2596439169139466, "grad_norm": 0.8124886036851785, "learning_rate": 4.491303833613834e-06, "loss": 0.3959, "step": 5094 }, { "epoch": 1.259891196834817, "grad_norm": 0.7730580393074661, "learning_rate": 4.491107301427391e-06, "loss": 0.3891, "step": 5095 }, { "epoch": 1.2601384767556874, "grad_norm": 0.8128580573412434, "learning_rate": 4.490910735585393e-06, "loss": 0.3766, "step": 5096 }, { "epoch": 1.260385756676558, "grad_norm": 0.7938914899538558, "learning_rate": 4.490714136091163e-06, "loss": 0.375, "step": 5097 }, { "epoch": 1.2606330365974283, "grad_norm": 0.7922343167483054, "learning_rate": 4.490517502948023e-06, "loss": 0.3781, "step": 5098 }, { "epoch": 1.2608803165182987, "grad_norm": 0.7820643118638017, "learning_rate": 4.490320836159299e-06, "loss": 0.4163, "step": 5099 }, { "epoch": 1.2611275964391693, "grad_norm": 0.7835515035262297, "learning_rate": 4.490124135728312e-06, "loss": 0.3578, "step": 5100 }, { "epoch": 1.2613748763600396, "grad_norm": 0.7878736668464732, "learning_rate": 4.489927401658389e-06, "loss": 0.3551, "step": 5101 }, { "epoch": 1.26162215628091, "grad_norm": 0.8086262794364134, "learning_rate": 4.4897306339528545e-06, "loss": 0.3599, "step": 5102 }, { "epoch": 1.2618694362017804, "grad_norm": 0.7802017211470089, "learning_rate": 4.4895338326150356e-06, "loss": 0.3679, "step": 5103 }, { "epoch": 1.2621167161226508, "grad_norm": 0.7814308905438287, "learning_rate": 4.489336997648258e-06, "loss": 0.3662, "step": 5104 }, { "epoch": 1.2623639960435213, "grad_norm": 0.7757803436338097, "learning_rate": 4.489140129055848e-06, "loss": 0.3562, "step": 5105 }, { "epoch": 1.2626112759643917, "grad_norm": 0.7886803310007393, "learning_rate": 4.4889432268411344e-06, "loss": 0.3649, "step": 5106 }, { "epoch": 1.262858555885262, "grad_norm": 0.8227992158242561, "learning_rate": 4.488746291007446e-06, "loss": 0.3807, "step": 5107 }, { "epoch": 1.2631058358061327, "grad_norm": 0.8483688438377366, "learning_rate": 4.488549321558109e-06, "loss": 0.3741, "step": 5108 }, { "epoch": 1.263353115727003, "grad_norm": 0.7863093394368537, "learning_rate": 4.488352318496456e-06, "loss": 0.3804, "step": 5109 }, { "epoch": 1.2636003956478734, "grad_norm": 0.7618949907000928, "learning_rate": 4.488155281825814e-06, "loss": 0.3949, "step": 5110 }, { "epoch": 1.2638476755687438, "grad_norm": 0.7591044323536802, "learning_rate": 4.487958211549517e-06, "loss": 0.3571, "step": 5111 }, { "epoch": 1.2640949554896141, "grad_norm": 0.8355907666816403, "learning_rate": 4.487761107670892e-06, "loss": 0.3589, "step": 5112 }, { "epoch": 1.2643422354104847, "grad_norm": 0.8102171006496408, "learning_rate": 4.487563970193273e-06, "loss": 0.3787, "step": 5113 }, { "epoch": 1.264589515331355, "grad_norm": 0.8019806635245758, "learning_rate": 4.487366799119992e-06, "loss": 0.3733, "step": 5114 }, { "epoch": 1.2648367952522255, "grad_norm": 0.7592634382395193, "learning_rate": 4.487169594454381e-06, "loss": 0.377, "step": 5115 }, { "epoch": 1.265084075173096, "grad_norm": 0.8053330895890225, "learning_rate": 4.486972356199775e-06, "loss": 0.3599, "step": 5116 }, { "epoch": 1.2653313550939664, "grad_norm": 0.7997585259145267, "learning_rate": 4.486775084359506e-06, "loss": 0.379, "step": 5117 }, { "epoch": 1.2655786350148368, "grad_norm": 0.7864575990916536, "learning_rate": 4.48657777893691e-06, "loss": 0.3852, "step": 5118 }, { "epoch": 1.2658259149357072, "grad_norm": 0.7889460637094079, "learning_rate": 4.486380439935321e-06, "loss": 0.3802, "step": 5119 }, { "epoch": 1.2660731948565775, "grad_norm": 0.7632351707795301, "learning_rate": 4.486183067358074e-06, "loss": 0.3752, "step": 5120 }, { "epoch": 1.2663204747774481, "grad_norm": 0.7781564842683213, "learning_rate": 4.485985661208507e-06, "loss": 0.4009, "step": 5121 }, { "epoch": 1.2665677546983185, "grad_norm": 0.8029650734350023, "learning_rate": 4.485788221489955e-06, "loss": 0.3601, "step": 5122 }, { "epoch": 1.2668150346191889, "grad_norm": 0.7684740997896675, "learning_rate": 4.485590748205757e-06, "loss": 0.3876, "step": 5123 }, { "epoch": 1.2670623145400595, "grad_norm": 0.7630995877168623, "learning_rate": 4.48539324135925e-06, "loss": 0.3767, "step": 5124 }, { "epoch": 1.2673095944609298, "grad_norm": 0.7938018131450808, "learning_rate": 4.4851957009537726e-06, "loss": 0.3736, "step": 5125 }, { "epoch": 1.2675568743818002, "grad_norm": 0.8221106354004134, "learning_rate": 4.484998126992663e-06, "loss": 0.3638, "step": 5126 }, { "epoch": 1.2678041543026706, "grad_norm": 0.7972194235458452, "learning_rate": 4.484800519479262e-06, "loss": 0.4084, "step": 5127 }, { "epoch": 1.268051434223541, "grad_norm": 0.7932911895878441, "learning_rate": 4.484602878416909e-06, "loss": 0.3887, "step": 5128 }, { "epoch": 1.2682987141444115, "grad_norm": 0.7838245994123189, "learning_rate": 4.4844052038089446e-06, "loss": 0.3758, "step": 5129 }, { "epoch": 1.268545994065282, "grad_norm": 0.7996686425265455, "learning_rate": 4.48420749565871e-06, "loss": 0.4115, "step": 5130 }, { "epoch": 1.2687932739861523, "grad_norm": 0.7426810735179171, "learning_rate": 4.4840097539695485e-06, "loss": 0.3841, "step": 5131 }, { "epoch": 1.2690405539070229, "grad_norm": 0.771550535015586, "learning_rate": 4.483811978744801e-06, "loss": 0.357, "step": 5132 }, { "epoch": 1.2692878338278932, "grad_norm": 0.7810286148726288, "learning_rate": 4.483614169987811e-06, "loss": 0.3741, "step": 5133 }, { "epoch": 1.2695351137487636, "grad_norm": 0.8060184944502079, "learning_rate": 4.483416327701922e-06, "loss": 0.3869, "step": 5134 }, { "epoch": 1.269782393669634, "grad_norm": 0.7982941686949031, "learning_rate": 4.483218451890479e-06, "loss": 0.359, "step": 5135 }, { "epoch": 1.2700296735905043, "grad_norm": 0.7752346691318749, "learning_rate": 4.483020542556824e-06, "loss": 0.3895, "step": 5136 }, { "epoch": 1.270276953511375, "grad_norm": 0.7764955839580754, "learning_rate": 4.482822599704305e-06, "loss": 0.3766, "step": 5137 }, { "epoch": 1.2705242334322453, "grad_norm": 0.7612702727176542, "learning_rate": 4.482624623336267e-06, "loss": 0.3868, "step": 5138 }, { "epoch": 1.2707715133531157, "grad_norm": 0.7942568799624892, "learning_rate": 4.4824266134560564e-06, "loss": 0.3776, "step": 5139 }, { "epoch": 1.2710187932739863, "grad_norm": 0.8155040461273152, "learning_rate": 4.48222857006702e-06, "loss": 0.3622, "step": 5140 }, { "epoch": 1.2712660731948566, "grad_norm": 0.7554524051830521, "learning_rate": 4.482030493172504e-06, "loss": 0.3761, "step": 5141 }, { "epoch": 1.271513353115727, "grad_norm": 0.7830346599839882, "learning_rate": 4.481832382775859e-06, "loss": 0.3724, "step": 5142 }, { "epoch": 1.2717606330365974, "grad_norm": 0.7522188131394106, "learning_rate": 4.481634238880433e-06, "loss": 0.3932, "step": 5143 }, { "epoch": 1.2720079129574677, "grad_norm": 0.7984397620436523, "learning_rate": 4.4814360614895734e-06, "loss": 0.4075, "step": 5144 }, { "epoch": 1.2722551928783383, "grad_norm": 0.7525958194825093, "learning_rate": 4.481237850606631e-06, "loss": 0.4098, "step": 5145 }, { "epoch": 1.2725024727992087, "grad_norm": 0.7976520695514134, "learning_rate": 4.481039606234957e-06, "loss": 0.3617, "step": 5146 }, { "epoch": 1.272749752720079, "grad_norm": 0.783087846030496, "learning_rate": 4.4808413283779016e-06, "loss": 0.3763, "step": 5147 }, { "epoch": 1.2729970326409497, "grad_norm": 0.7834196785572395, "learning_rate": 4.480643017038817e-06, "loss": 0.3881, "step": 5148 }, { "epoch": 1.27324431256182, "grad_norm": 0.7762407171155572, "learning_rate": 4.480444672221053e-06, "loss": 0.3771, "step": 5149 }, { "epoch": 1.2734915924826904, "grad_norm": 0.8028499171209291, "learning_rate": 4.480246293927965e-06, "loss": 0.3791, "step": 5150 }, { "epoch": 1.2737388724035608, "grad_norm": 0.7827808282541638, "learning_rate": 4.480047882162905e-06, "loss": 0.3422, "step": 5151 }, { "epoch": 1.2739861523244311, "grad_norm": 0.7614961278056019, "learning_rate": 4.4798494369292265e-06, "loss": 0.3597, "step": 5152 }, { "epoch": 1.2742334322453017, "grad_norm": 0.8134974025435528, "learning_rate": 4.4796509582302835e-06, "loss": 0.3626, "step": 5153 }, { "epoch": 1.274480712166172, "grad_norm": 0.7737910387604808, "learning_rate": 4.479452446069432e-06, "loss": 0.3978, "step": 5154 }, { "epoch": 1.2747279920870425, "grad_norm": 0.7882510349958958, "learning_rate": 4.4792539004500274e-06, "loss": 0.363, "step": 5155 }, { "epoch": 1.274975272007913, "grad_norm": 0.763654188976373, "learning_rate": 4.479055321375424e-06, "loss": 0.3788, "step": 5156 }, { "epoch": 1.2752225519287834, "grad_norm": 0.77858522882188, "learning_rate": 4.478856708848981e-06, "loss": 0.3652, "step": 5157 }, { "epoch": 1.2754698318496538, "grad_norm": 0.77035028725191, "learning_rate": 4.478658062874053e-06, "loss": 0.359, "step": 5158 }, { "epoch": 1.2757171117705242, "grad_norm": 0.7918696682733054, "learning_rate": 4.478459383453998e-06, "loss": 0.3807, "step": 5159 }, { "epoch": 1.2759643916913945, "grad_norm": 0.7905279027458672, "learning_rate": 4.478260670592176e-06, "loss": 0.366, "step": 5160 }, { "epoch": 1.2762116716122651, "grad_norm": 0.7946130506087248, "learning_rate": 4.478061924291944e-06, "loss": 0.3849, "step": 5161 }, { "epoch": 1.2764589515331355, "grad_norm": 0.7527671524351435, "learning_rate": 4.477863144556663e-06, "loss": 0.397, "step": 5162 }, { "epoch": 1.2767062314540059, "grad_norm": 0.8231676104132625, "learning_rate": 4.4776643313896926e-06, "loss": 0.3621, "step": 5163 }, { "epoch": 1.2769535113748764, "grad_norm": 0.8386349713602416, "learning_rate": 4.477465484794392e-06, "loss": 0.3647, "step": 5164 }, { "epoch": 1.2772007912957468, "grad_norm": 0.794204092808583, "learning_rate": 4.477266604774124e-06, "loss": 0.3678, "step": 5165 }, { "epoch": 1.2774480712166172, "grad_norm": 0.7700963976676172, "learning_rate": 4.477067691332248e-06, "loss": 0.3909, "step": 5166 }, { "epoch": 1.2776953511374876, "grad_norm": 0.7680904803672542, "learning_rate": 4.47686874447213e-06, "loss": 0.3754, "step": 5167 }, { "epoch": 1.277942631058358, "grad_norm": 0.7786173733871352, "learning_rate": 4.476669764197129e-06, "loss": 0.3887, "step": 5168 }, { "epoch": 1.2781899109792285, "grad_norm": 0.7600088082023408, "learning_rate": 4.4764707505106095e-06, "loss": 0.3715, "step": 5169 }, { "epoch": 1.2784371909000989, "grad_norm": 0.7487397965843531, "learning_rate": 4.4762717034159366e-06, "loss": 0.3966, "step": 5170 }, { "epoch": 1.2786844708209693, "grad_norm": 0.7964193114317424, "learning_rate": 4.476072622916474e-06, "loss": 0.3914, "step": 5171 }, { "epoch": 1.2789317507418398, "grad_norm": 0.7468440534784923, "learning_rate": 4.4758735090155856e-06, "loss": 0.407, "step": 5172 }, { "epoch": 1.2791790306627102, "grad_norm": 0.7738305487490387, "learning_rate": 4.475674361716639e-06, "loss": 0.3577, "step": 5173 }, { "epoch": 1.2794263105835806, "grad_norm": 0.7703997402246496, "learning_rate": 4.475475181022999e-06, "loss": 0.3842, "step": 5174 }, { "epoch": 1.279673590504451, "grad_norm": 0.7989268498334792, "learning_rate": 4.475275966938033e-06, "loss": 0.3926, "step": 5175 }, { "epoch": 1.2799208704253215, "grad_norm": 0.7867364222501286, "learning_rate": 4.475076719465108e-06, "loss": 0.3816, "step": 5176 }, { "epoch": 1.280168150346192, "grad_norm": 0.7696696133043853, "learning_rate": 4.474877438607592e-06, "loss": 0.3878, "step": 5177 }, { "epoch": 1.2804154302670623, "grad_norm": 0.8059376354999271, "learning_rate": 4.474678124368854e-06, "loss": 0.358, "step": 5178 }, { "epoch": 1.2806627101879329, "grad_norm": 0.7918506851066401, "learning_rate": 4.474478776752261e-06, "loss": 0.3777, "step": 5179 }, { "epoch": 1.2809099901088032, "grad_norm": 0.7633457725518863, "learning_rate": 4.474279395761185e-06, "loss": 0.38, "step": 5180 }, { "epoch": 1.2811572700296736, "grad_norm": 0.7669955549650003, "learning_rate": 4.474079981398994e-06, "loss": 0.3964, "step": 5181 }, { "epoch": 1.281404549950544, "grad_norm": 0.800169310549515, "learning_rate": 4.473880533669061e-06, "loss": 0.376, "step": 5182 }, { "epoch": 1.2816518298714143, "grad_norm": 0.7966511507226834, "learning_rate": 4.473681052574755e-06, "loss": 0.3846, "step": 5183 }, { "epoch": 1.281899109792285, "grad_norm": 0.7908724178728678, "learning_rate": 4.47348153811945e-06, "loss": 0.3876, "step": 5184 }, { "epoch": 1.2821463897131553, "grad_norm": 0.7946412083186123, "learning_rate": 4.473281990306517e-06, "loss": 0.3382, "step": 5185 }, { "epoch": 1.2823936696340257, "grad_norm": 0.8052588673126574, "learning_rate": 4.473082409139328e-06, "loss": 0.3587, "step": 5186 }, { "epoch": 1.2826409495548963, "grad_norm": 0.7789440571842964, "learning_rate": 4.472882794621258e-06, "loss": 0.385, "step": 5187 }, { "epoch": 1.2828882294757666, "grad_norm": 0.7907193911053574, "learning_rate": 4.472683146755682e-06, "loss": 0.3759, "step": 5188 }, { "epoch": 1.283135509396637, "grad_norm": 0.765666764386679, "learning_rate": 4.472483465545972e-06, "loss": 0.3646, "step": 5189 }, { "epoch": 1.2833827893175074, "grad_norm": 0.7566956562066384, "learning_rate": 4.472283750995504e-06, "loss": 0.3924, "step": 5190 }, { "epoch": 1.2836300692383777, "grad_norm": 0.7768318928284962, "learning_rate": 4.472084003107657e-06, "loss": 0.3779, "step": 5191 }, { "epoch": 1.2838773491592483, "grad_norm": 0.8199481809996464, "learning_rate": 4.471884221885802e-06, "loss": 0.3756, "step": 5192 }, { "epoch": 1.2841246290801187, "grad_norm": 0.7842292094803429, "learning_rate": 4.47168440733332e-06, "loss": 0.4124, "step": 5193 }, { "epoch": 1.284371909000989, "grad_norm": 0.7557027754536744, "learning_rate": 4.471484559453586e-06, "loss": 0.3688, "step": 5194 }, { "epoch": 1.2846191889218597, "grad_norm": 0.7989724025386588, "learning_rate": 4.471284678249979e-06, "loss": 0.4092, "step": 5195 }, { "epoch": 1.28486646884273, "grad_norm": 0.7988462350537475, "learning_rate": 4.471084763725878e-06, "loss": 0.3572, "step": 5196 }, { "epoch": 1.2851137487636004, "grad_norm": 0.8213847172821245, "learning_rate": 4.470884815884662e-06, "loss": 0.3864, "step": 5197 }, { "epoch": 1.2853610286844708, "grad_norm": 0.7528809213557032, "learning_rate": 4.4706848347297105e-06, "loss": 0.3891, "step": 5198 }, { "epoch": 1.2856083086053411, "grad_norm": 0.8033554727980045, "learning_rate": 4.470484820264403e-06, "loss": 0.3864, "step": 5199 }, { "epoch": 1.2858555885262117, "grad_norm": 0.7877766152708563, "learning_rate": 4.4702847724921215e-06, "loss": 0.3405, "step": 5200 }, { "epoch": 1.286102868447082, "grad_norm": 0.7966535140497238, "learning_rate": 4.470084691416247e-06, "loss": 0.3765, "step": 5201 }, { "epoch": 1.2863501483679525, "grad_norm": 0.7796564811323057, "learning_rate": 4.469884577040161e-06, "loss": 0.3865, "step": 5202 }, { "epoch": 1.286597428288823, "grad_norm": 0.7559890602987585, "learning_rate": 4.469684429367246e-06, "loss": 0.3871, "step": 5203 }, { "epoch": 1.2868447082096934, "grad_norm": 0.7863436800592355, "learning_rate": 4.469484248400886e-06, "loss": 0.3972, "step": 5204 }, { "epoch": 1.2870919881305638, "grad_norm": 0.8200505345814457, "learning_rate": 4.469284034144464e-06, "loss": 0.3801, "step": 5205 }, { "epoch": 1.2873392680514342, "grad_norm": 0.7817354167848025, "learning_rate": 4.469083786601365e-06, "loss": 0.3629, "step": 5206 }, { "epoch": 1.2875865479723045, "grad_norm": 0.7860727845206454, "learning_rate": 4.468883505774973e-06, "loss": 0.3758, "step": 5207 }, { "epoch": 1.2878338278931751, "grad_norm": 0.7921856274199621, "learning_rate": 4.468683191668672e-06, "loss": 0.3541, "step": 5208 }, { "epoch": 1.2880811078140455, "grad_norm": 0.7872342784776625, "learning_rate": 4.4684828442858504e-06, "loss": 0.3392, "step": 5209 }, { "epoch": 1.2883283877349159, "grad_norm": 0.7867840447380229, "learning_rate": 4.468282463629894e-06, "loss": 0.3954, "step": 5210 }, { "epoch": 1.2885756676557865, "grad_norm": 0.7859170830539963, "learning_rate": 4.4680820497041885e-06, "loss": 0.3673, "step": 5211 }, { "epoch": 1.2888229475766568, "grad_norm": 0.7949272909349651, "learning_rate": 4.467881602512123e-06, "loss": 0.3387, "step": 5212 }, { "epoch": 1.2890702274975272, "grad_norm": 0.7746775242348436, "learning_rate": 4.4676811220570844e-06, "loss": 0.362, "step": 5213 }, { "epoch": 1.2893175074183976, "grad_norm": 0.7626402741132354, "learning_rate": 4.467480608342463e-06, "loss": 0.3688, "step": 5214 }, { "epoch": 1.289564787339268, "grad_norm": 0.8098253573837331, "learning_rate": 4.467280061371647e-06, "loss": 0.3803, "step": 5215 }, { "epoch": 1.2898120672601385, "grad_norm": 0.7715223950040957, "learning_rate": 4.467079481148025e-06, "loss": 0.3805, "step": 5216 }, { "epoch": 1.290059347181009, "grad_norm": 0.7830326613435596, "learning_rate": 4.4668788676749905e-06, "loss": 0.3764, "step": 5217 }, { "epoch": 1.2903066271018793, "grad_norm": 0.777932263183256, "learning_rate": 4.466678220955931e-06, "loss": 0.3747, "step": 5218 }, { "epoch": 1.2905539070227499, "grad_norm": 0.7978219482362807, "learning_rate": 4.466477540994241e-06, "loss": 0.3664, "step": 5219 }, { "epoch": 1.2908011869436202, "grad_norm": 0.8147172325255136, "learning_rate": 4.466276827793311e-06, "loss": 0.3729, "step": 5220 }, { "epoch": 1.2910484668644906, "grad_norm": 0.7704262261586412, "learning_rate": 4.466076081356534e-06, "loss": 0.3674, "step": 5221 }, { "epoch": 1.291295746785361, "grad_norm": 0.7813695030255806, "learning_rate": 4.465875301687303e-06, "loss": 0.3892, "step": 5222 }, { "epoch": 1.2915430267062313, "grad_norm": 0.7629401463727951, "learning_rate": 4.465674488789012e-06, "loss": 0.3772, "step": 5223 }, { "epoch": 1.291790306627102, "grad_norm": 0.784573485013266, "learning_rate": 4.4654736426650544e-06, "loss": 0.3846, "step": 5224 }, { "epoch": 1.2920375865479723, "grad_norm": 0.7748941272067119, "learning_rate": 4.465272763318827e-06, "loss": 0.3861, "step": 5225 }, { "epoch": 1.2922848664688427, "grad_norm": 0.7731376314257934, "learning_rate": 4.465071850753724e-06, "loss": 0.3765, "step": 5226 }, { "epoch": 1.2925321463897133, "grad_norm": 0.7874920367986128, "learning_rate": 4.4648709049731415e-06, "loss": 0.3995, "step": 5227 }, { "epoch": 1.2927794263105836, "grad_norm": 0.7697583446957881, "learning_rate": 4.464669925980476e-06, "loss": 0.4007, "step": 5228 }, { "epoch": 1.293026706231454, "grad_norm": 0.7442737544935962, "learning_rate": 4.464468913779125e-06, "loss": 0.3767, "step": 5229 }, { "epoch": 1.2932739861523244, "grad_norm": 0.7974206450143851, "learning_rate": 4.464267868372486e-06, "loss": 0.3766, "step": 5230 }, { "epoch": 1.2935212660731947, "grad_norm": 0.8007984433384929, "learning_rate": 4.464066789763957e-06, "loss": 0.4066, "step": 5231 }, { "epoch": 1.2937685459940653, "grad_norm": 0.7990420804979205, "learning_rate": 4.463865677956938e-06, "loss": 0.3805, "step": 5232 }, { "epoch": 1.2940158259149357, "grad_norm": 0.7664104671223602, "learning_rate": 4.463664532954825e-06, "loss": 0.3764, "step": 5233 }, { "epoch": 1.294263105835806, "grad_norm": 0.7862871665901164, "learning_rate": 4.463463354761023e-06, "loss": 0.3741, "step": 5234 }, { "epoch": 1.2945103857566767, "grad_norm": 0.8106612238161799, "learning_rate": 4.463262143378928e-06, "loss": 0.3878, "step": 5235 }, { "epoch": 1.294757665677547, "grad_norm": 0.7640545061021522, "learning_rate": 4.4630608988119445e-06, "loss": 0.4055, "step": 5236 }, { "epoch": 1.2950049455984174, "grad_norm": 0.7800659412966927, "learning_rate": 4.4628596210634724e-06, "loss": 0.379, "step": 5237 }, { "epoch": 1.2952522255192878, "grad_norm": 0.7824183682964886, "learning_rate": 4.462658310136914e-06, "loss": 0.3862, "step": 5238 }, { "epoch": 1.2954995054401581, "grad_norm": 0.7761512959635977, "learning_rate": 4.462456966035671e-06, "loss": 0.4309, "step": 5239 }, { "epoch": 1.2957467853610287, "grad_norm": 0.756749786008696, "learning_rate": 4.462255588763148e-06, "loss": 0.3796, "step": 5240 }, { "epoch": 1.295994065281899, "grad_norm": 0.7673644227120926, "learning_rate": 4.46205417832275e-06, "loss": 0.3911, "step": 5241 }, { "epoch": 1.2962413452027695, "grad_norm": 0.7846650059711212, "learning_rate": 4.4618527347178785e-06, "loss": 0.3567, "step": 5242 }, { "epoch": 1.29648862512364, "grad_norm": 0.7663263283183631, "learning_rate": 4.461651257951941e-06, "loss": 0.368, "step": 5243 }, { "epoch": 1.2967359050445104, "grad_norm": 0.7919322811934161, "learning_rate": 4.461449748028342e-06, "loss": 0.4029, "step": 5244 }, { "epoch": 1.2969831849653808, "grad_norm": 0.8210262928034145, "learning_rate": 4.4612482049504875e-06, "loss": 0.36, "step": 5245 }, { "epoch": 1.2972304648862512, "grad_norm": 0.7853707987233157, "learning_rate": 4.461046628721785e-06, "loss": 0.3534, "step": 5246 }, { "epoch": 1.2974777448071215, "grad_norm": 0.7758570543678638, "learning_rate": 4.460845019345641e-06, "loss": 0.3972, "step": 5247 }, { "epoch": 1.2977250247279921, "grad_norm": 0.788686706902972, "learning_rate": 4.460643376825463e-06, "loss": 0.364, "step": 5248 }, { "epoch": 1.2979723046488625, "grad_norm": 0.7735638006773858, "learning_rate": 4.460441701164661e-06, "loss": 0.3914, "step": 5249 }, { "epoch": 1.298219584569733, "grad_norm": 0.7613644894532048, "learning_rate": 4.460239992366641e-06, "loss": 0.3746, "step": 5250 }, { "epoch": 1.2984668644906034, "grad_norm": 0.776637328886695, "learning_rate": 4.460038250434815e-06, "loss": 0.3887, "step": 5251 }, { "epoch": 1.2987141444114738, "grad_norm": 0.7557809738136717, "learning_rate": 4.459836475372592e-06, "loss": 0.3958, "step": 5252 }, { "epoch": 1.2989614243323442, "grad_norm": 0.8103634849371182, "learning_rate": 4.459634667183384e-06, "loss": 0.3678, "step": 5253 }, { "epoch": 1.2992087042532146, "grad_norm": 0.7546966629836871, "learning_rate": 4.459432825870599e-06, "loss": 0.3512, "step": 5254 }, { "epoch": 1.2994559841740851, "grad_norm": 0.8381329677771521, "learning_rate": 4.459230951437652e-06, "loss": 0.3754, "step": 5255 }, { "epoch": 1.2997032640949555, "grad_norm": 0.7879815783295794, "learning_rate": 4.4590290438879535e-06, "loss": 0.3888, "step": 5256 }, { "epoch": 1.2999505440158259, "grad_norm": 0.7518062402892711, "learning_rate": 4.458827103224916e-06, "loss": 0.3724, "step": 5257 }, { "epoch": 1.3001978239366965, "grad_norm": 0.7944404907942474, "learning_rate": 4.458625129451955e-06, "loss": 0.3627, "step": 5258 }, { "epoch": 1.3004451038575668, "grad_norm": 0.7801843496671288, "learning_rate": 4.458423122572482e-06, "loss": 0.3752, "step": 5259 }, { "epoch": 1.3006923837784372, "grad_norm": 0.7725515669664675, "learning_rate": 4.458221082589913e-06, "loss": 0.375, "step": 5260 }, { "epoch": 1.3009396636993076, "grad_norm": 0.7954144039597497, "learning_rate": 4.4580190095076634e-06, "loss": 0.3884, "step": 5261 }, { "epoch": 1.301186943620178, "grad_norm": 0.8040865357549296, "learning_rate": 4.457816903329147e-06, "loss": 0.3689, "step": 5262 }, { "epoch": 1.3014342235410485, "grad_norm": 0.7672360803512956, "learning_rate": 4.457614764057781e-06, "loss": 0.384, "step": 5263 }, { "epoch": 1.301681503461919, "grad_norm": 0.7850612288540293, "learning_rate": 4.457412591696983e-06, "loss": 0.3843, "step": 5264 }, { "epoch": 1.3019287833827893, "grad_norm": 0.7913626471967832, "learning_rate": 4.457210386250169e-06, "loss": 0.3789, "step": 5265 }, { "epoch": 1.3021760633036599, "grad_norm": 0.7855808187726485, "learning_rate": 4.457008147720758e-06, "loss": 0.3671, "step": 5266 }, { "epoch": 1.3024233432245302, "grad_norm": 0.8043253546787054, "learning_rate": 4.4568058761121675e-06, "loss": 0.3876, "step": 5267 }, { "epoch": 1.3026706231454006, "grad_norm": 0.7999210511859174, "learning_rate": 4.456603571427817e-06, "loss": 0.391, "step": 5268 }, { "epoch": 1.302917903066271, "grad_norm": 0.7988338640484216, "learning_rate": 4.456401233671125e-06, "loss": 0.3824, "step": 5269 }, { "epoch": 1.3031651829871413, "grad_norm": 0.7860138608319007, "learning_rate": 4.4561988628455135e-06, "loss": 0.3846, "step": 5270 }, { "epoch": 1.303412462908012, "grad_norm": 0.7999991444677728, "learning_rate": 4.455996458954401e-06, "loss": 0.3985, "step": 5271 }, { "epoch": 1.3036597428288823, "grad_norm": 0.7836139091513834, "learning_rate": 4.455794022001211e-06, "loss": 0.4099, "step": 5272 }, { "epoch": 1.3039070227497527, "grad_norm": 0.7882050761005842, "learning_rate": 4.455591551989363e-06, "loss": 0.3963, "step": 5273 }, { "epoch": 1.3041543026706233, "grad_norm": 0.7830071166359722, "learning_rate": 4.4553890489222815e-06, "loss": 0.3841, "step": 5274 }, { "epoch": 1.3044015825914936, "grad_norm": 0.800743564279978, "learning_rate": 4.455186512803387e-06, "loss": 0.3655, "step": 5275 }, { "epoch": 1.304648862512364, "grad_norm": 0.8547655836261313, "learning_rate": 4.454983943636106e-06, "loss": 0.3845, "step": 5276 }, { "epoch": 1.3048961424332344, "grad_norm": 0.7912989803375123, "learning_rate": 4.4547813414238604e-06, "loss": 0.3788, "step": 5277 }, { "epoch": 1.3051434223541047, "grad_norm": 0.8256865141480082, "learning_rate": 4.454578706170075e-06, "loss": 0.3693, "step": 5278 }, { "epoch": 1.3053907022749753, "grad_norm": 0.7995230793593499, "learning_rate": 4.454376037878175e-06, "loss": 0.4012, "step": 5279 }, { "epoch": 1.3056379821958457, "grad_norm": 0.7987394901775831, "learning_rate": 4.454173336551586e-06, "loss": 0.375, "step": 5280 }, { "epoch": 1.305885262116716, "grad_norm": 0.8380944488049058, "learning_rate": 4.453970602193734e-06, "loss": 0.3699, "step": 5281 }, { "epoch": 1.3061325420375867, "grad_norm": 0.8140949134143729, "learning_rate": 4.453767834808048e-06, "loss": 0.3543, "step": 5282 }, { "epoch": 1.306379821958457, "grad_norm": 0.7665818576141765, "learning_rate": 4.453565034397952e-06, "loss": 0.4072, "step": 5283 }, { "epoch": 1.3066271018793274, "grad_norm": 0.7897552403122408, "learning_rate": 4.453362200966876e-06, "loss": 0.3558, "step": 5284 }, { "epoch": 1.3068743818001978, "grad_norm": 0.7760462597218717, "learning_rate": 4.453159334518248e-06, "loss": 0.377, "step": 5285 }, { "epoch": 1.3071216617210681, "grad_norm": 0.78363336153644, "learning_rate": 4.452956435055497e-06, "loss": 0.3832, "step": 5286 }, { "epoch": 1.3073689416419387, "grad_norm": 0.8294228792261029, "learning_rate": 4.452753502582053e-06, "loss": 0.3706, "step": 5287 }, { "epoch": 1.307616221562809, "grad_norm": 0.8240576465301988, "learning_rate": 4.452550537101346e-06, "loss": 0.3644, "step": 5288 }, { "epoch": 1.3078635014836795, "grad_norm": 0.8052496475214912, "learning_rate": 4.452347538616806e-06, "loss": 0.3665, "step": 5289 }, { "epoch": 1.30811078140455, "grad_norm": 0.808726411510644, "learning_rate": 4.452144507131865e-06, "loss": 0.3859, "step": 5290 }, { "epoch": 1.3083580613254204, "grad_norm": 0.7662268708011103, "learning_rate": 4.4519414426499545e-06, "loss": 0.3966, "step": 5291 }, { "epoch": 1.3086053412462908, "grad_norm": 0.8317769016379025, "learning_rate": 4.451738345174506e-06, "loss": 0.3729, "step": 5292 }, { "epoch": 1.3088526211671612, "grad_norm": 0.7671871448741324, "learning_rate": 4.451535214708955e-06, "loss": 0.4073, "step": 5293 }, { "epoch": 1.3090999010880315, "grad_norm": 0.7878604406695072, "learning_rate": 4.451332051256733e-06, "loss": 0.3596, "step": 5294 }, { "epoch": 1.3093471810089021, "grad_norm": 0.7728926723094797, "learning_rate": 4.451128854821274e-06, "loss": 0.3844, "step": 5295 }, { "epoch": 1.3095944609297725, "grad_norm": 0.779842645414283, "learning_rate": 4.450925625406014e-06, "loss": 0.3645, "step": 5296 }, { "epoch": 1.3098417408506429, "grad_norm": 0.7991838934998918, "learning_rate": 4.450722363014387e-06, "loss": 0.3746, "step": 5297 }, { "epoch": 1.3100890207715135, "grad_norm": 0.7701398441035895, "learning_rate": 4.450519067649829e-06, "loss": 0.3602, "step": 5298 }, { "epoch": 1.3103363006923838, "grad_norm": 0.76661480080521, "learning_rate": 4.450315739315776e-06, "loss": 0.4071, "step": 5299 }, { "epoch": 1.3105835806132542, "grad_norm": 0.7701231561402392, "learning_rate": 4.450112378015665e-06, "loss": 0.3681, "step": 5300 }, { "epoch": 1.3108308605341246, "grad_norm": 0.7417379064925949, "learning_rate": 4.449908983752934e-06, "loss": 0.4037, "step": 5301 }, { "epoch": 1.311078140454995, "grad_norm": 0.7800789909430765, "learning_rate": 4.44970555653102e-06, "loss": 0.3857, "step": 5302 }, { "epoch": 1.3113254203758655, "grad_norm": 0.7435321307868962, "learning_rate": 4.449502096353363e-06, "loss": 0.3732, "step": 5303 }, { "epoch": 1.311572700296736, "grad_norm": 0.7707693538578343, "learning_rate": 4.449298603223401e-06, "loss": 0.3658, "step": 5304 }, { "epoch": 1.3118199802176063, "grad_norm": 0.7999798298729143, "learning_rate": 4.4490950771445726e-06, "loss": 0.3775, "step": 5305 }, { "epoch": 1.3120672601384769, "grad_norm": 0.7894688284394069, "learning_rate": 4.44889151812032e-06, "loss": 0.3954, "step": 5306 }, { "epoch": 1.3123145400593472, "grad_norm": 0.7699173543146396, "learning_rate": 4.448687926154084e-06, "loss": 0.3571, "step": 5307 }, { "epoch": 1.3125618199802176, "grad_norm": 0.802039074987846, "learning_rate": 4.4484843012493025e-06, "loss": 0.3617, "step": 5308 }, { "epoch": 1.312809099901088, "grad_norm": 0.767882555565531, "learning_rate": 4.4482806434094214e-06, "loss": 0.3865, "step": 5309 }, { "epoch": 1.3130563798219583, "grad_norm": 0.7793933669197856, "learning_rate": 4.448076952637882e-06, "loss": 0.3629, "step": 5310 }, { "epoch": 1.313303659742829, "grad_norm": 0.7740022582642647, "learning_rate": 4.447873228938126e-06, "loss": 0.3801, "step": 5311 }, { "epoch": 1.3135509396636993, "grad_norm": 0.8044680647455889, "learning_rate": 4.447669472313598e-06, "loss": 0.3562, "step": 5312 }, { "epoch": 1.3137982195845697, "grad_norm": 0.7759387966984879, "learning_rate": 4.447465682767742e-06, "loss": 0.3786, "step": 5313 }, { "epoch": 1.3140454995054403, "grad_norm": 0.7611721982027827, "learning_rate": 4.447261860304002e-06, "loss": 0.375, "step": 5314 }, { "epoch": 1.3142927794263106, "grad_norm": 0.79032017487805, "learning_rate": 4.447058004925824e-06, "loss": 0.393, "step": 5315 }, { "epoch": 1.314540059347181, "grad_norm": 0.8051473930233694, "learning_rate": 4.446854116636653e-06, "loss": 0.3623, "step": 5316 }, { "epoch": 1.3147873392680514, "grad_norm": 0.7757379322958485, "learning_rate": 4.446650195439936e-06, "loss": 0.3766, "step": 5317 }, { "epoch": 1.3150346191889217, "grad_norm": 0.7968607094028276, "learning_rate": 4.4464462413391205e-06, "loss": 0.3716, "step": 5318 }, { "epoch": 1.3152818991097923, "grad_norm": 0.7930615268418927, "learning_rate": 4.446242254337653e-06, "loss": 0.4014, "step": 5319 }, { "epoch": 1.3155291790306627, "grad_norm": 0.8175192101450349, "learning_rate": 4.446038234438981e-06, "loss": 0.3576, "step": 5320 }, { "epoch": 1.315776458951533, "grad_norm": 0.7926925828043911, "learning_rate": 4.445834181646553e-06, "loss": 0.3833, "step": 5321 }, { "epoch": 1.3160237388724036, "grad_norm": 0.7755320455398058, "learning_rate": 4.445630095963819e-06, "loss": 0.3819, "step": 5322 }, { "epoch": 1.316271018793274, "grad_norm": 0.778604363818648, "learning_rate": 4.445425977394227e-06, "loss": 0.3821, "step": 5323 }, { "epoch": 1.3165182987141444, "grad_norm": 0.7686612851656363, "learning_rate": 4.445221825941231e-06, "loss": 0.4171, "step": 5324 }, { "epoch": 1.3167655786350148, "grad_norm": 0.7867751457845202, "learning_rate": 4.445017641608278e-06, "loss": 0.3766, "step": 5325 }, { "epoch": 1.3170128585558851, "grad_norm": 0.7739638872737151, "learning_rate": 4.444813424398821e-06, "loss": 0.3864, "step": 5326 }, { "epoch": 1.3172601384767557, "grad_norm": 0.7688588137389474, "learning_rate": 4.44460917431631e-06, "loss": 0.3804, "step": 5327 }, { "epoch": 1.317507418397626, "grad_norm": 0.7906909557155023, "learning_rate": 4.4444048913642e-06, "loss": 0.3728, "step": 5328 }, { "epoch": 1.3177546983184967, "grad_norm": 0.8024955243945009, "learning_rate": 4.444200575545943e-06, "loss": 0.3824, "step": 5329 }, { "epoch": 1.318001978239367, "grad_norm": 0.7878850043718235, "learning_rate": 4.4439962268649915e-06, "loss": 0.4063, "step": 5330 }, { "epoch": 1.3182492581602374, "grad_norm": 0.8017907077632028, "learning_rate": 4.443791845324801e-06, "loss": 0.3547, "step": 5331 }, { "epoch": 1.3184965380811078, "grad_norm": 0.7388479065776778, "learning_rate": 4.443587430928826e-06, "loss": 0.3726, "step": 5332 }, { "epoch": 1.3187438180019782, "grad_norm": 0.8136579580795593, "learning_rate": 4.443382983680521e-06, "loss": 0.3533, "step": 5333 }, { "epoch": 1.3189910979228487, "grad_norm": 0.7717735836654415, "learning_rate": 4.443178503583342e-06, "loss": 0.3738, "step": 5334 }, { "epoch": 1.3192383778437191, "grad_norm": 0.7468384654791693, "learning_rate": 4.442973990640745e-06, "loss": 0.3809, "step": 5335 }, { "epoch": 1.3194856577645895, "grad_norm": 0.775822057887279, "learning_rate": 4.4427694448561874e-06, "loss": 0.4279, "step": 5336 }, { "epoch": 1.31973293768546, "grad_norm": 0.7705660840581189, "learning_rate": 4.442564866233127e-06, "loss": 0.3979, "step": 5337 }, { "epoch": 1.3199802176063304, "grad_norm": 0.7892320125427905, "learning_rate": 4.442360254775021e-06, "loss": 0.3944, "step": 5338 }, { "epoch": 1.3202274975272008, "grad_norm": 0.8091688595270494, "learning_rate": 4.442155610485328e-06, "loss": 0.3623, "step": 5339 }, { "epoch": 1.3204747774480712, "grad_norm": 0.8036770502800618, "learning_rate": 4.441950933367508e-06, "loss": 0.3918, "step": 5340 }, { "epoch": 1.3207220573689415, "grad_norm": 0.7679862421638024, "learning_rate": 4.441746223425019e-06, "loss": 0.3922, "step": 5341 }, { "epoch": 1.3209693372898121, "grad_norm": 0.7660664767779827, "learning_rate": 4.441541480661322e-06, "loss": 0.4117, "step": 5342 }, { "epoch": 1.3212166172106825, "grad_norm": 0.8276474520920432, "learning_rate": 4.441336705079879e-06, "loss": 0.3606, "step": 5343 }, { "epoch": 1.3214638971315529, "grad_norm": 0.795632677791019, "learning_rate": 4.44113189668415e-06, "loss": 0.4012, "step": 5344 }, { "epoch": 1.3217111770524235, "grad_norm": 0.7413734022045944, "learning_rate": 4.440927055477597e-06, "loss": 0.3998, "step": 5345 }, { "epoch": 1.3219584569732938, "grad_norm": 0.7810986292002615, "learning_rate": 4.4407221814636815e-06, "loss": 0.3844, "step": 5346 }, { "epoch": 1.3222057368941642, "grad_norm": 0.751024175485607, "learning_rate": 4.440517274645868e-06, "loss": 0.3831, "step": 5347 }, { "epoch": 1.3224530168150346, "grad_norm": 0.7778142133400296, "learning_rate": 4.440312335027619e-06, "loss": 0.3577, "step": 5348 }, { "epoch": 1.322700296735905, "grad_norm": 0.8290696293139205, "learning_rate": 4.4401073626124e-06, "loss": 0.4032, "step": 5349 }, { "epoch": 1.3229475766567755, "grad_norm": 0.8016651561980968, "learning_rate": 4.4399023574036735e-06, "loss": 0.4042, "step": 5350 }, { "epoch": 1.323194856577646, "grad_norm": 0.7780816334060897, "learning_rate": 4.4396973194049065e-06, "loss": 0.3859, "step": 5351 }, { "epoch": 1.3234421364985163, "grad_norm": 0.7818976834185003, "learning_rate": 4.439492248619564e-06, "loss": 0.3639, "step": 5352 }, { "epoch": 1.3236894164193869, "grad_norm": 0.7671425266474645, "learning_rate": 4.439287145051114e-06, "loss": 0.3872, "step": 5353 }, { "epoch": 1.3239366963402572, "grad_norm": 0.7874831093050584, "learning_rate": 4.43908200870302e-06, "loss": 0.3578, "step": 5354 }, { "epoch": 1.3241839762611276, "grad_norm": 0.7689859581255031, "learning_rate": 4.438876839578751e-06, "loss": 0.4222, "step": 5355 }, { "epoch": 1.324431256181998, "grad_norm": 0.7925050873195252, "learning_rate": 4.438671637681775e-06, "loss": 0.3583, "step": 5356 }, { "epoch": 1.3246785361028683, "grad_norm": 0.7685936608514004, "learning_rate": 4.438466403015562e-06, "loss": 0.3802, "step": 5357 }, { "epoch": 1.324925816023739, "grad_norm": 0.7965689787920817, "learning_rate": 4.438261135583578e-06, "loss": 0.3931, "step": 5358 }, { "epoch": 1.3251730959446093, "grad_norm": 0.8111648001046629, "learning_rate": 4.438055835389295e-06, "loss": 0.3743, "step": 5359 }, { "epoch": 1.3254203758654797, "grad_norm": 0.7828485239567465, "learning_rate": 4.437850502436184e-06, "loss": 0.4007, "step": 5360 }, { "epoch": 1.3256676557863503, "grad_norm": 0.7834661239339817, "learning_rate": 4.437645136727713e-06, "loss": 0.3708, "step": 5361 }, { "epoch": 1.3259149357072206, "grad_norm": 0.8420405737922302, "learning_rate": 4.437439738267354e-06, "loss": 0.3338, "step": 5362 }, { "epoch": 1.326162215628091, "grad_norm": 0.803957195677449, "learning_rate": 4.43723430705858e-06, "loss": 0.4077, "step": 5363 }, { "epoch": 1.3264094955489614, "grad_norm": 0.8044398277012044, "learning_rate": 4.437028843104863e-06, "loss": 0.3974, "step": 5364 }, { "epoch": 1.3266567754698317, "grad_norm": 0.7945996381566278, "learning_rate": 4.436823346409675e-06, "loss": 0.3759, "step": 5365 }, { "epoch": 1.3269040553907023, "grad_norm": 0.770391888151092, "learning_rate": 4.43661781697649e-06, "loss": 0.3818, "step": 5366 }, { "epoch": 1.3271513353115727, "grad_norm": 0.7615838868032776, "learning_rate": 4.436412254808784e-06, "loss": 0.3748, "step": 5367 }, { "epoch": 1.327398615232443, "grad_norm": 0.7928921822428737, "learning_rate": 4.436206659910028e-06, "loss": 0.3756, "step": 5368 }, { "epoch": 1.3276458951533137, "grad_norm": 0.8125538720244616, "learning_rate": 4.4360010322837e-06, "loss": 0.3632, "step": 5369 }, { "epoch": 1.327893175074184, "grad_norm": 0.7770932654034518, "learning_rate": 4.435795371933274e-06, "loss": 0.3791, "step": 5370 }, { "epoch": 1.3281404549950544, "grad_norm": 0.7641759390368759, "learning_rate": 4.435589678862227e-06, "loss": 0.3905, "step": 5371 }, { "epoch": 1.3283877349159248, "grad_norm": 0.8018283313317861, "learning_rate": 4.435383953074036e-06, "loss": 0.3603, "step": 5372 }, { "epoch": 1.3286350148367951, "grad_norm": 0.7807417504175191, "learning_rate": 4.435178194572178e-06, "loss": 0.3676, "step": 5373 }, { "epoch": 1.3288822947576657, "grad_norm": 0.8076773013765459, "learning_rate": 4.434972403360132e-06, "loss": 0.3472, "step": 5374 }, { "epoch": 1.329129574678536, "grad_norm": 0.7852686489676102, "learning_rate": 4.434766579441375e-06, "loss": 0.3473, "step": 5375 }, { "epoch": 1.3293768545994065, "grad_norm": 0.7571102798421531, "learning_rate": 4.434560722819387e-06, "loss": 0.3809, "step": 5376 }, { "epoch": 1.329624134520277, "grad_norm": 0.8019533034873996, "learning_rate": 4.4343548334976465e-06, "loss": 0.3783, "step": 5377 }, { "epoch": 1.3298714144411474, "grad_norm": 0.7879172580253602, "learning_rate": 4.434148911479634e-06, "loss": 0.3762, "step": 5378 }, { "epoch": 1.3301186943620178, "grad_norm": 0.7474258622857598, "learning_rate": 4.433942956768831e-06, "loss": 0.391, "step": 5379 }, { "epoch": 1.3303659742828882, "grad_norm": 0.7838426323523046, "learning_rate": 4.433736969368718e-06, "loss": 0.3715, "step": 5380 }, { "epoch": 1.3306132542037585, "grad_norm": 0.7705991086652756, "learning_rate": 4.433530949282776e-06, "loss": 0.3759, "step": 5381 }, { "epoch": 1.3308605341246291, "grad_norm": 0.7799285561387074, "learning_rate": 4.4333248965144904e-06, "loss": 0.3694, "step": 5382 }, { "epoch": 1.3311078140454995, "grad_norm": 0.7612735070129444, "learning_rate": 4.4331188110673405e-06, "loss": 0.3978, "step": 5383 }, { "epoch": 1.3313550939663699, "grad_norm": 0.747196875225221, "learning_rate": 4.432912692944812e-06, "loss": 0.4025, "step": 5384 }, { "epoch": 1.3316023738872405, "grad_norm": 0.7816321369461575, "learning_rate": 4.432706542150388e-06, "loss": 0.3754, "step": 5385 }, { "epoch": 1.3318496538081108, "grad_norm": 0.792342952247383, "learning_rate": 4.432500358687554e-06, "loss": 0.3815, "step": 5386 }, { "epoch": 1.3320969337289812, "grad_norm": 0.8129260451831943, "learning_rate": 4.432294142559794e-06, "loss": 0.3855, "step": 5387 }, { "epoch": 1.3323442136498516, "grad_norm": 0.7724204533693804, "learning_rate": 4.432087893770594e-06, "loss": 0.3742, "step": 5388 }, { "epoch": 1.332591493570722, "grad_norm": 0.7793515945114506, "learning_rate": 4.43188161232344e-06, "loss": 0.3887, "step": 5389 }, { "epoch": 1.3328387734915925, "grad_norm": 0.7837468131118116, "learning_rate": 4.431675298221818e-06, "loss": 0.3916, "step": 5390 }, { "epoch": 1.333086053412463, "grad_norm": 0.7659881109293293, "learning_rate": 4.431468951469218e-06, "loss": 0.3858, "step": 5391 }, { "epoch": 1.3333333333333333, "grad_norm": 0.8334594980791707, "learning_rate": 4.431262572069125e-06, "loss": 0.3879, "step": 5392 }, { "epoch": 1.3335806132542039, "grad_norm": 0.7757706358805326, "learning_rate": 4.431056160025029e-06, "loss": 0.3723, "step": 5393 }, { "epoch": 1.3338278931750742, "grad_norm": 0.7479587070843311, "learning_rate": 4.430849715340419e-06, "loss": 0.3924, "step": 5394 }, { "epoch": 1.3340751730959446, "grad_norm": 0.7647183147509077, "learning_rate": 4.430643238018784e-06, "loss": 0.3619, "step": 5395 }, { "epoch": 1.334322453016815, "grad_norm": 0.7913775081222889, "learning_rate": 4.430436728063613e-06, "loss": 0.3869, "step": 5396 }, { "epoch": 1.3345697329376853, "grad_norm": 0.7870114612677891, "learning_rate": 4.430230185478399e-06, "loss": 0.3976, "step": 5397 }, { "epoch": 1.334817012858556, "grad_norm": 0.7813362445020718, "learning_rate": 4.430023610266631e-06, "loss": 0.3806, "step": 5398 }, { "epoch": 1.3350642927794263, "grad_norm": 0.7737272096057366, "learning_rate": 4.429817002431803e-06, "loss": 0.3898, "step": 5399 }, { "epoch": 1.3353115727002967, "grad_norm": 0.8039583925538684, "learning_rate": 4.4296103619774046e-06, "loss": 0.3561, "step": 5400 }, { "epoch": 1.3355588526211672, "grad_norm": 0.7973358188481623, "learning_rate": 4.429403688906931e-06, "loss": 0.403, "step": 5401 }, { "epoch": 1.3358061325420376, "grad_norm": 0.7768183713110066, "learning_rate": 4.429196983223874e-06, "loss": 0.3971, "step": 5402 }, { "epoch": 1.336053412462908, "grad_norm": 0.8107106050676828, "learning_rate": 4.428990244931728e-06, "loss": 0.3548, "step": 5403 }, { "epoch": 1.3363006923837784, "grad_norm": 0.8139704553544898, "learning_rate": 4.428783474033988e-06, "loss": 0.3915, "step": 5404 }, { "epoch": 1.336547972304649, "grad_norm": 0.7781862408803109, "learning_rate": 4.428576670534148e-06, "loss": 0.3724, "step": 5405 }, { "epoch": 1.3367952522255193, "grad_norm": 0.770782725431163, "learning_rate": 4.428369834435705e-06, "loss": 0.3611, "step": 5406 }, { "epoch": 1.3370425321463897, "grad_norm": 0.7552919169910439, "learning_rate": 4.428162965742154e-06, "loss": 0.3876, "step": 5407 }, { "epoch": 1.3372898120672603, "grad_norm": 0.8271241951000634, "learning_rate": 4.427956064456992e-06, "loss": 0.396, "step": 5408 }, { "epoch": 1.3375370919881306, "grad_norm": 0.8075775002656523, "learning_rate": 4.427749130583716e-06, "loss": 0.3694, "step": 5409 }, { "epoch": 1.337784371909001, "grad_norm": 0.8093878224237593, "learning_rate": 4.427542164125825e-06, "loss": 0.364, "step": 5410 }, { "epoch": 1.3380316518298714, "grad_norm": 0.8042609674036927, "learning_rate": 4.427335165086815e-06, "loss": 0.3747, "step": 5411 }, { "epoch": 1.3382789317507418, "grad_norm": 0.8091565391747276, "learning_rate": 4.427128133470187e-06, "loss": 0.3855, "step": 5412 }, { "epoch": 1.3385262116716123, "grad_norm": 0.7852724398317525, "learning_rate": 4.4269210692794396e-06, "loss": 0.3707, "step": 5413 }, { "epoch": 1.3387734915924827, "grad_norm": 0.7888140736106173, "learning_rate": 4.426713972518073e-06, "loss": 0.373, "step": 5414 }, { "epoch": 1.339020771513353, "grad_norm": 0.7418255221024851, "learning_rate": 4.426506843189588e-06, "loss": 0.3912, "step": 5415 }, { "epoch": 1.3392680514342237, "grad_norm": 0.7751159972765319, "learning_rate": 4.426299681297485e-06, "loss": 0.3587, "step": 5416 }, { "epoch": 1.339515331355094, "grad_norm": 0.7979620764574475, "learning_rate": 4.426092486845266e-06, "loss": 0.3667, "step": 5417 }, { "epoch": 1.3397626112759644, "grad_norm": 0.7838846504484972, "learning_rate": 4.4258852598364334e-06, "loss": 0.3665, "step": 5418 }, { "epoch": 1.3400098911968348, "grad_norm": 0.7861524371908795, "learning_rate": 4.425678000274489e-06, "loss": 0.36, "step": 5419 }, { "epoch": 1.3402571711177051, "grad_norm": 0.8280350689300539, "learning_rate": 4.425470708162938e-06, "loss": 0.3705, "step": 5420 }, { "epoch": 1.3405044510385757, "grad_norm": 0.7755103096348223, "learning_rate": 4.425263383505283e-06, "loss": 0.3587, "step": 5421 }, { "epoch": 1.340751730959446, "grad_norm": 0.775359457045456, "learning_rate": 4.425056026305028e-06, "loss": 0.4106, "step": 5422 }, { "epoch": 1.3409990108803165, "grad_norm": 0.7633936612323781, "learning_rate": 4.424848636565678e-06, "loss": 0.41, "step": 5423 }, { "epoch": 1.341246290801187, "grad_norm": 0.7826298135002148, "learning_rate": 4.424641214290739e-06, "loss": 0.3539, "step": 5424 }, { "epoch": 1.3414935707220574, "grad_norm": 0.7794009043215739, "learning_rate": 4.424433759483717e-06, "loss": 0.3796, "step": 5425 }, { "epoch": 1.3417408506429278, "grad_norm": 0.7861208707342839, "learning_rate": 4.424226272148119e-06, "loss": 0.3969, "step": 5426 }, { "epoch": 1.3419881305637982, "grad_norm": 0.8018109204877015, "learning_rate": 4.424018752287452e-06, "loss": 0.3477, "step": 5427 }, { "epoch": 1.3422354104846685, "grad_norm": 0.7895813799211288, "learning_rate": 4.423811199905223e-06, "loss": 0.4194, "step": 5428 }, { "epoch": 1.3424826904055391, "grad_norm": 0.7989995659626887, "learning_rate": 4.42360361500494e-06, "loss": 0.3608, "step": 5429 }, { "epoch": 1.3427299703264095, "grad_norm": 0.7943914545713879, "learning_rate": 4.423395997590113e-06, "loss": 0.3913, "step": 5430 }, { "epoch": 1.3429772502472799, "grad_norm": 0.7978377941107173, "learning_rate": 4.423188347664251e-06, "loss": 0.4037, "step": 5431 }, { "epoch": 1.3432245301681505, "grad_norm": 0.7831232004480012, "learning_rate": 4.422980665230863e-06, "loss": 0.3958, "step": 5432 }, { "epoch": 1.3434718100890208, "grad_norm": 0.7735872865111836, "learning_rate": 4.422772950293461e-06, "loss": 0.3876, "step": 5433 }, { "epoch": 1.3437190900098912, "grad_norm": 0.7907747280205418, "learning_rate": 4.4225652028555545e-06, "loss": 0.3713, "step": 5434 }, { "epoch": 1.3439663699307616, "grad_norm": 0.8048698724139385, "learning_rate": 4.422357422920656e-06, "loss": 0.3744, "step": 5435 }, { "epoch": 1.344213649851632, "grad_norm": 0.7746306325509, "learning_rate": 4.422149610492277e-06, "loss": 0.387, "step": 5436 }, { "epoch": 1.3444609297725025, "grad_norm": 0.7987895345635784, "learning_rate": 4.421941765573931e-06, "loss": 0.3874, "step": 5437 }, { "epoch": 1.344708209693373, "grad_norm": 0.7759097187714444, "learning_rate": 4.421733888169129e-06, "loss": 0.3826, "step": 5438 }, { "epoch": 1.3449554896142433, "grad_norm": 0.7759893029354509, "learning_rate": 4.421525978281388e-06, "loss": 0.3895, "step": 5439 }, { "epoch": 1.3452027695351139, "grad_norm": 0.8388578346811251, "learning_rate": 4.421318035914219e-06, "loss": 0.3892, "step": 5440 }, { "epoch": 1.3454500494559842, "grad_norm": 0.7774702573650007, "learning_rate": 4.42111006107114e-06, "loss": 0.3688, "step": 5441 }, { "epoch": 1.3456973293768546, "grad_norm": 0.8007852465954002, "learning_rate": 4.420902053755664e-06, "loss": 0.3879, "step": 5442 }, { "epoch": 1.345944609297725, "grad_norm": 0.7469471932039948, "learning_rate": 4.420694013971308e-06, "loss": 0.3894, "step": 5443 }, { "epoch": 1.3461918892185953, "grad_norm": 0.8012831145899852, "learning_rate": 4.420485941721588e-06, "loss": 0.359, "step": 5444 }, { "epoch": 1.346439169139466, "grad_norm": 0.7856976598230031, "learning_rate": 4.420277837010021e-06, "loss": 0.3628, "step": 5445 }, { "epoch": 1.3466864490603363, "grad_norm": 0.7887415160468156, "learning_rate": 4.420069699840125e-06, "loss": 0.3704, "step": 5446 }, { "epoch": 1.3469337289812067, "grad_norm": 0.7648550608976985, "learning_rate": 4.419861530215418e-06, "loss": 0.3843, "step": 5447 }, { "epoch": 1.3471810089020773, "grad_norm": 0.7864348795761739, "learning_rate": 4.4196533281394186e-06, "loss": 0.3675, "step": 5448 }, { "epoch": 1.3474282888229476, "grad_norm": 0.7977137116110643, "learning_rate": 4.419445093615646e-06, "loss": 0.356, "step": 5449 }, { "epoch": 1.347675568743818, "grad_norm": 0.7620038983073131, "learning_rate": 4.419236826647621e-06, "loss": 0.3814, "step": 5450 }, { "epoch": 1.3479228486646884, "grad_norm": 0.7699798732119217, "learning_rate": 4.419028527238862e-06, "loss": 0.3495, "step": 5451 }, { "epoch": 1.3481701285855587, "grad_norm": 0.7904031593369844, "learning_rate": 4.418820195392891e-06, "loss": 0.3719, "step": 5452 }, { "epoch": 1.3484174085064293, "grad_norm": 0.7847402433873509, "learning_rate": 4.418611831113229e-06, "loss": 0.3606, "step": 5453 }, { "epoch": 1.3486646884272997, "grad_norm": 0.8398178564205545, "learning_rate": 4.418403434403399e-06, "loss": 0.3844, "step": 5454 }, { "epoch": 1.34891196834817, "grad_norm": 0.7788333081537959, "learning_rate": 4.418195005266922e-06, "loss": 0.3695, "step": 5455 }, { "epoch": 1.3491592482690407, "grad_norm": 0.7983987662442377, "learning_rate": 4.417986543707322e-06, "loss": 0.3835, "step": 5456 }, { "epoch": 1.349406528189911, "grad_norm": 0.7583735312653431, "learning_rate": 4.417778049728123e-06, "loss": 0.3928, "step": 5457 }, { "epoch": 1.3496538081107814, "grad_norm": 0.8113080790565267, "learning_rate": 4.417569523332848e-06, "loss": 0.3391, "step": 5458 }, { "epoch": 1.3499010880316518, "grad_norm": 0.780077957796761, "learning_rate": 4.417360964525023e-06, "loss": 0.3679, "step": 5459 }, { "epoch": 1.3501483679525221, "grad_norm": 0.7785746279203721, "learning_rate": 4.4171523733081715e-06, "loss": 0.3787, "step": 5460 }, { "epoch": 1.3503956478733927, "grad_norm": 0.793804135099321, "learning_rate": 4.41694374968582e-06, "loss": 0.3877, "step": 5461 }, { "epoch": 1.350642927794263, "grad_norm": 0.80028699489228, "learning_rate": 4.416735093661496e-06, "loss": 0.3856, "step": 5462 }, { "epoch": 1.3508902077151335, "grad_norm": 0.7980641735093809, "learning_rate": 4.416526405238726e-06, "loss": 0.3584, "step": 5463 }, { "epoch": 1.351137487636004, "grad_norm": 0.7850387096551562, "learning_rate": 4.416317684421036e-06, "loss": 0.3504, "step": 5464 }, { "epoch": 1.3513847675568744, "grad_norm": 0.7605638998979523, "learning_rate": 4.416108931211956e-06, "loss": 0.3556, "step": 5465 }, { "epoch": 1.3516320474777448, "grad_norm": 0.7425943676214893, "learning_rate": 4.415900145615013e-06, "loss": 0.3816, "step": 5466 }, { "epoch": 1.3518793273986152, "grad_norm": 0.7704561635482716, "learning_rate": 4.415691327633737e-06, "loss": 0.3712, "step": 5467 }, { "epoch": 1.3521266073194855, "grad_norm": 0.7898534941953664, "learning_rate": 4.415482477271657e-06, "loss": 0.4074, "step": 5468 }, { "epoch": 1.3523738872403561, "grad_norm": 0.770852917276948, "learning_rate": 4.415273594532303e-06, "loss": 0.4025, "step": 5469 }, { "epoch": 1.3526211671612265, "grad_norm": 0.743513153019515, "learning_rate": 4.415064679419207e-06, "loss": 0.3777, "step": 5470 }, { "epoch": 1.3528684470820969, "grad_norm": 0.7580443005307244, "learning_rate": 4.4148557319359e-06, "loss": 0.3854, "step": 5471 }, { "epoch": 1.3531157270029674, "grad_norm": 0.77977639943339, "learning_rate": 4.414646752085912e-06, "loss": 0.3787, "step": 5472 }, { "epoch": 1.3533630069238378, "grad_norm": 0.7735726280234481, "learning_rate": 4.414437739872778e-06, "loss": 0.4096, "step": 5473 }, { "epoch": 1.3536102868447082, "grad_norm": 0.7866483529906524, "learning_rate": 4.414228695300029e-06, "loss": 0.3987, "step": 5474 }, { "epoch": 1.3538575667655786, "grad_norm": 0.7649389446317472, "learning_rate": 4.414019618371199e-06, "loss": 0.3715, "step": 5475 }, { "epoch": 1.354104846686449, "grad_norm": 0.7932060420134758, "learning_rate": 4.413810509089822e-06, "loss": 0.3594, "step": 5476 }, { "epoch": 1.3543521266073195, "grad_norm": 0.78921738180382, "learning_rate": 4.413601367459432e-06, "loss": 0.3925, "step": 5477 }, { "epoch": 1.3545994065281899, "grad_norm": 0.7945781688051834, "learning_rate": 4.413392193483567e-06, "loss": 0.3986, "step": 5478 }, { "epoch": 1.3548466864490603, "grad_norm": 0.765576707042195, "learning_rate": 4.413182987165758e-06, "loss": 0.3604, "step": 5479 }, { "epoch": 1.3550939663699308, "grad_norm": 0.8042174975238197, "learning_rate": 4.4129737485095445e-06, "loss": 0.374, "step": 5480 }, { "epoch": 1.3553412462908012, "grad_norm": 0.7712307461838376, "learning_rate": 4.412764477518462e-06, "loss": 0.4028, "step": 5481 }, { "epoch": 1.3555885262116716, "grad_norm": 0.7839229638739086, "learning_rate": 4.412555174196049e-06, "loss": 0.3588, "step": 5482 }, { "epoch": 1.355835806132542, "grad_norm": 0.7855815041334804, "learning_rate": 4.412345838545842e-06, "loss": 0.3777, "step": 5483 }, { "epoch": 1.3560830860534125, "grad_norm": 0.7625933429583475, "learning_rate": 4.41213647057138e-06, "loss": 0.3623, "step": 5484 }, { "epoch": 1.356330365974283, "grad_norm": 0.7879629146203526, "learning_rate": 4.4119270702762016e-06, "loss": 0.3622, "step": 5485 }, { "epoch": 1.3565776458951533, "grad_norm": 0.80350147756139, "learning_rate": 4.411717637663847e-06, "loss": 0.3831, "step": 5486 }, { "epoch": 1.3568249258160239, "grad_norm": 0.8019617944955559, "learning_rate": 4.411508172737855e-06, "loss": 0.4078, "step": 5487 }, { "epoch": 1.3570722057368942, "grad_norm": 0.7999484166581802, "learning_rate": 4.411298675501767e-06, "loss": 0.3498, "step": 5488 }, { "epoch": 1.3573194856577646, "grad_norm": 0.7907360914473291, "learning_rate": 4.411089145959125e-06, "loss": 0.3575, "step": 5489 }, { "epoch": 1.357566765578635, "grad_norm": 0.7804468940050673, "learning_rate": 4.4108795841134686e-06, "loss": 0.3946, "step": 5490 }, { "epoch": 1.3578140454995054, "grad_norm": 0.7774972978652573, "learning_rate": 4.410669989968341e-06, "loss": 0.3701, "step": 5491 }, { "epoch": 1.358061325420376, "grad_norm": 0.8031147855445966, "learning_rate": 4.4104603635272855e-06, "loss": 0.3862, "step": 5492 }, { "epoch": 1.3583086053412463, "grad_norm": 0.779699740350415, "learning_rate": 4.410250704793845e-06, "loss": 0.3816, "step": 5493 }, { "epoch": 1.3585558852621167, "grad_norm": 0.7637285791504776, "learning_rate": 4.410041013771563e-06, "loss": 0.3971, "step": 5494 }, { "epoch": 1.3588031651829873, "grad_norm": 0.7960949268567825, "learning_rate": 4.409831290463985e-06, "loss": 0.3506, "step": 5495 }, { "epoch": 1.3590504451038576, "grad_norm": 0.8052103817753233, "learning_rate": 4.409621534874654e-06, "loss": 0.3798, "step": 5496 }, { "epoch": 1.359297725024728, "grad_norm": 0.7563532622366238, "learning_rate": 4.409411747007118e-06, "loss": 0.3798, "step": 5497 }, { "epoch": 1.3595450049455984, "grad_norm": 0.7397898220678248, "learning_rate": 4.40920192686492e-06, "loss": 0.4241, "step": 5498 }, { "epoch": 1.3597922848664687, "grad_norm": 0.7990944990185733, "learning_rate": 4.408992074451609e-06, "loss": 0.4026, "step": 5499 }, { "epoch": 1.3600395647873393, "grad_norm": 0.7685725717300861, "learning_rate": 4.408782189770732e-06, "loss": 0.4003, "step": 5500 }, { "epoch": 1.3602868447082097, "grad_norm": 0.7500114657256781, "learning_rate": 4.408572272825835e-06, "loss": 0.3626, "step": 5501 }, { "epoch": 1.36053412462908, "grad_norm": 0.7500782265077065, "learning_rate": 4.408362323620467e-06, "loss": 0.4083, "step": 5502 }, { "epoch": 1.3607814045499507, "grad_norm": 0.8050237294068932, "learning_rate": 4.408152342158177e-06, "loss": 0.3414, "step": 5503 }, { "epoch": 1.361028684470821, "grad_norm": 0.7625852810043874, "learning_rate": 4.407942328442515e-06, "loss": 0.3732, "step": 5504 }, { "epoch": 1.3612759643916914, "grad_norm": 0.7945774165692356, "learning_rate": 4.4077322824770305e-06, "loss": 0.3845, "step": 5505 }, { "epoch": 1.3615232443125618, "grad_norm": 0.7825091637517715, "learning_rate": 4.407522204265272e-06, "loss": 0.3847, "step": 5506 }, { "epoch": 1.3617705242334321, "grad_norm": 0.8405063949553303, "learning_rate": 4.407312093810794e-06, "loss": 0.3461, "step": 5507 }, { "epoch": 1.3620178041543027, "grad_norm": 0.7733563245686609, "learning_rate": 4.407101951117144e-06, "loss": 0.3843, "step": 5508 }, { "epoch": 1.362265084075173, "grad_norm": 0.7402335326071758, "learning_rate": 4.406891776187877e-06, "loss": 0.3632, "step": 5509 }, { "epoch": 1.3625123639960435, "grad_norm": 0.7794573550103879, "learning_rate": 4.406681569026544e-06, "loss": 0.3661, "step": 5510 }, { "epoch": 1.362759643916914, "grad_norm": 0.7950362179237567, "learning_rate": 4.406471329636699e-06, "loss": 0.3761, "step": 5511 }, { "epoch": 1.3630069238377844, "grad_norm": 0.7568067583954027, "learning_rate": 4.406261058021895e-06, "loss": 0.3923, "step": 5512 }, { "epoch": 1.3632542037586548, "grad_norm": 0.7836486092689748, "learning_rate": 4.4060507541856865e-06, "loss": 0.3702, "step": 5513 }, { "epoch": 1.3635014836795252, "grad_norm": 0.755248286656055, "learning_rate": 4.405840418131629e-06, "loss": 0.3882, "step": 5514 }, { "epoch": 1.3637487636003955, "grad_norm": 0.7666641352392564, "learning_rate": 4.405630049863276e-06, "loss": 0.3869, "step": 5515 }, { "epoch": 1.3639960435212661, "grad_norm": 0.7931288371216416, "learning_rate": 4.405419649384186e-06, "loss": 0.3855, "step": 5516 }, { "epoch": 1.3642433234421365, "grad_norm": 0.7760859850218942, "learning_rate": 4.405209216697912e-06, "loss": 0.378, "step": 5517 }, { "epoch": 1.3644906033630069, "grad_norm": 0.7903573216200368, "learning_rate": 4.404998751808014e-06, "loss": 0.3471, "step": 5518 }, { "epoch": 1.3647378832838775, "grad_norm": 0.7746693831328718, "learning_rate": 4.404788254718047e-06, "loss": 0.3828, "step": 5519 }, { "epoch": 1.3649851632047478, "grad_norm": 0.7471982771193783, "learning_rate": 4.4045777254315715e-06, "loss": 0.3982, "step": 5520 }, { "epoch": 1.3652324431256182, "grad_norm": 0.7534701488780862, "learning_rate": 4.404367163952145e-06, "loss": 0.3852, "step": 5521 }, { "epoch": 1.3654797230464886, "grad_norm": 0.7623595146776082, "learning_rate": 4.404156570283325e-06, "loss": 0.364, "step": 5522 }, { "epoch": 1.365727002967359, "grad_norm": 0.7449290849710298, "learning_rate": 4.403945944428673e-06, "loss": 0.3789, "step": 5523 }, { "epoch": 1.3659742828882295, "grad_norm": 0.7684276764928973, "learning_rate": 4.4037352863917495e-06, "loss": 0.3799, "step": 5524 }, { "epoch": 1.3662215628091, "grad_norm": 0.781744435510269, "learning_rate": 4.4035245961761135e-06, "loss": 0.3715, "step": 5525 }, { "epoch": 1.3664688427299703, "grad_norm": 0.8010540871950249, "learning_rate": 4.403313873785328e-06, "loss": 0.3738, "step": 5526 }, { "epoch": 1.3667161226508409, "grad_norm": 0.769688122480626, "learning_rate": 4.403103119222954e-06, "loss": 0.3654, "step": 5527 }, { "epoch": 1.3669634025717112, "grad_norm": 0.7647084073166552, "learning_rate": 4.402892332492553e-06, "loss": 0.3824, "step": 5528 }, { "epoch": 1.3672106824925816, "grad_norm": 0.7839823018422508, "learning_rate": 4.40268151359769e-06, "loss": 0.3635, "step": 5529 }, { "epoch": 1.367457962413452, "grad_norm": 0.8046034658491099, "learning_rate": 4.402470662541927e-06, "loss": 0.3689, "step": 5530 }, { "epoch": 1.3677052423343223, "grad_norm": 0.7522348530540152, "learning_rate": 4.402259779328828e-06, "loss": 0.3854, "step": 5531 }, { "epoch": 1.367952522255193, "grad_norm": 0.8060727888429818, "learning_rate": 4.402048863961958e-06, "loss": 0.3523, "step": 5532 }, { "epoch": 1.3681998021760633, "grad_norm": 0.8152092853361814, "learning_rate": 4.401837916444882e-06, "loss": 0.3861, "step": 5533 }, { "epoch": 1.3684470820969337, "grad_norm": 0.8008429599581622, "learning_rate": 4.401626936781166e-06, "loss": 0.3784, "step": 5534 }, { "epoch": 1.3686943620178043, "grad_norm": 0.7461347966092977, "learning_rate": 4.401415924974375e-06, "loss": 0.3765, "step": 5535 }, { "epoch": 1.3689416419386746, "grad_norm": 0.7971884595468837, "learning_rate": 4.401204881028076e-06, "loss": 0.3829, "step": 5536 }, { "epoch": 1.369188921859545, "grad_norm": 0.7306733911711513, "learning_rate": 4.400993804945838e-06, "loss": 0.3984, "step": 5537 }, { "epoch": 1.3694362017804154, "grad_norm": 0.8033564707405734, "learning_rate": 4.400782696731228e-06, "loss": 0.3757, "step": 5538 }, { "epoch": 1.3696834817012857, "grad_norm": 0.8026528611514775, "learning_rate": 4.400571556387813e-06, "loss": 0.3642, "step": 5539 }, { "epoch": 1.3699307616221563, "grad_norm": 0.8155696697568415, "learning_rate": 4.400360383919162e-06, "loss": 0.3481, "step": 5540 }, { "epoch": 1.3701780415430267, "grad_norm": 0.7909127278179476, "learning_rate": 4.400149179328846e-06, "loss": 0.3615, "step": 5541 }, { "epoch": 1.370425321463897, "grad_norm": 0.7887400837273165, "learning_rate": 4.3999379426204345e-06, "loss": 0.372, "step": 5542 }, { "epoch": 1.3706726013847677, "grad_norm": 0.7757047014262383, "learning_rate": 4.399726673797498e-06, "loss": 0.3994, "step": 5543 }, { "epoch": 1.370919881305638, "grad_norm": 0.784583648523282, "learning_rate": 4.399515372863606e-06, "loss": 0.4178, "step": 5544 }, { "epoch": 1.3711671612265084, "grad_norm": 0.7915074909008394, "learning_rate": 4.399304039822333e-06, "loss": 0.3642, "step": 5545 }, { "epoch": 1.3714144411473788, "grad_norm": 0.7631471890918473, "learning_rate": 4.399092674677249e-06, "loss": 0.3706, "step": 5546 }, { "epoch": 1.3716617210682491, "grad_norm": 0.7926671602178493, "learning_rate": 4.398881277431927e-06, "loss": 0.3555, "step": 5547 }, { "epoch": 1.3719090009891197, "grad_norm": 0.7489080554980195, "learning_rate": 4.39866984808994e-06, "loss": 0.3975, "step": 5548 }, { "epoch": 1.37215628090999, "grad_norm": 0.7715191337070184, "learning_rate": 4.398458386654862e-06, "loss": 0.39, "step": 5549 }, { "epoch": 1.3724035608308605, "grad_norm": 0.7828804462881461, "learning_rate": 4.398246893130269e-06, "loss": 0.3913, "step": 5550 }, { "epoch": 1.372650840751731, "grad_norm": 0.8021239194577843, "learning_rate": 4.398035367519734e-06, "loss": 0.3405, "step": 5551 }, { "epoch": 1.3728981206726014, "grad_norm": 0.7881486190611002, "learning_rate": 4.397823809826832e-06, "loss": 0.3665, "step": 5552 }, { "epoch": 1.3731454005934718, "grad_norm": 0.7753957374744556, "learning_rate": 4.397612220055141e-06, "loss": 0.3935, "step": 5553 }, { "epoch": 1.3733926805143422, "grad_norm": 0.8060493068138441, "learning_rate": 4.397400598208235e-06, "loss": 0.4001, "step": 5554 }, { "epoch": 1.3736399604352125, "grad_norm": 0.7806567232886567, "learning_rate": 4.3971889442896935e-06, "loss": 0.3743, "step": 5555 }, { "epoch": 1.3738872403560831, "grad_norm": 0.7767277032043856, "learning_rate": 4.396977258303091e-06, "loss": 0.3826, "step": 5556 }, { "epoch": 1.3741345202769535, "grad_norm": 0.7749298543057168, "learning_rate": 4.396765540252009e-06, "loss": 0.3381, "step": 5557 }, { "epoch": 1.3743818001978239, "grad_norm": 0.7884320499990423, "learning_rate": 4.3965537901400246e-06, "loss": 0.3811, "step": 5558 }, { "epoch": 1.3746290801186944, "grad_norm": 0.7546169876224119, "learning_rate": 4.396342007970716e-06, "loss": 0.3699, "step": 5559 }, { "epoch": 1.3748763600395648, "grad_norm": 0.7475468210097842, "learning_rate": 4.396130193747665e-06, "loss": 0.3599, "step": 5560 }, { "epoch": 1.3751236399604352, "grad_norm": 0.7870154020330912, "learning_rate": 4.3959183474744506e-06, "loss": 0.3588, "step": 5561 }, { "epoch": 1.3753709198813056, "grad_norm": 0.821655681869217, "learning_rate": 4.395706469154654e-06, "loss": 0.3658, "step": 5562 }, { "epoch": 1.3756181998021761, "grad_norm": 0.7674718569968305, "learning_rate": 4.395494558791856e-06, "loss": 0.351, "step": 5563 }, { "epoch": 1.3758654797230465, "grad_norm": 0.7990551579783962, "learning_rate": 4.395282616389639e-06, "loss": 0.3405, "step": 5564 }, { "epoch": 1.3761127596439169, "grad_norm": 0.7733074075019286, "learning_rate": 4.395070641951587e-06, "loss": 0.3816, "step": 5565 }, { "epoch": 1.3763600395647875, "grad_norm": 0.7769796923797068, "learning_rate": 4.394858635481279e-06, "loss": 0.3646, "step": 5566 }, { "epoch": 1.3766073194856578, "grad_norm": 0.8133861474740832, "learning_rate": 4.394646596982302e-06, "loss": 0.3798, "step": 5567 }, { "epoch": 1.3768545994065282, "grad_norm": 0.7704260533466177, "learning_rate": 4.394434526458239e-06, "loss": 0.3684, "step": 5568 }, { "epoch": 1.3771018793273986, "grad_norm": 0.7891891319389784, "learning_rate": 4.394222423912674e-06, "loss": 0.3923, "step": 5569 }, { "epoch": 1.377349159248269, "grad_norm": 0.7291559780832642, "learning_rate": 4.394010289349194e-06, "loss": 0.3923, "step": 5570 }, { "epoch": 1.3775964391691395, "grad_norm": 0.7703269431528448, "learning_rate": 4.393798122771382e-06, "loss": 0.3637, "step": 5571 }, { "epoch": 1.37784371909001, "grad_norm": 0.7697035007277055, "learning_rate": 4.393585924182827e-06, "loss": 0.4052, "step": 5572 }, { "epoch": 1.3780909990108803, "grad_norm": 0.7881057433242873, "learning_rate": 4.393373693587114e-06, "loss": 0.3966, "step": 5573 }, { "epoch": 1.3783382789317509, "grad_norm": 0.7741001403704066, "learning_rate": 4.39316143098783e-06, "loss": 0.3864, "step": 5574 }, { "epoch": 1.3785855588526212, "grad_norm": 0.8018051120926664, "learning_rate": 4.392949136388565e-06, "loss": 0.3922, "step": 5575 }, { "epoch": 1.3788328387734916, "grad_norm": 0.784991405923686, "learning_rate": 4.3927368097929045e-06, "loss": 0.3797, "step": 5576 }, { "epoch": 1.379080118694362, "grad_norm": 0.765248759417815, "learning_rate": 4.3925244512044405e-06, "loss": 0.3794, "step": 5577 }, { "epoch": 1.3793273986152323, "grad_norm": 0.8089623962026963, "learning_rate": 4.39231206062676e-06, "loss": 0.3845, "step": 5578 }, { "epoch": 1.379574678536103, "grad_norm": 0.7798189739295874, "learning_rate": 4.392099638063454e-06, "loss": 0.3807, "step": 5579 }, { "epoch": 1.3798219584569733, "grad_norm": 0.7752040215713535, "learning_rate": 4.391887183518114e-06, "loss": 0.3526, "step": 5580 }, { "epoch": 1.3800692383778437, "grad_norm": 0.7633079099108347, "learning_rate": 4.39167469699433e-06, "loss": 0.3793, "step": 5581 }, { "epoch": 1.3803165182987143, "grad_norm": 0.7959811187984761, "learning_rate": 4.391462178495693e-06, "loss": 0.3966, "step": 5582 }, { "epoch": 1.3805637982195846, "grad_norm": 0.7834047512091786, "learning_rate": 4.391249628025797e-06, "loss": 0.3525, "step": 5583 }, { "epoch": 1.380811078140455, "grad_norm": 0.7663588185709258, "learning_rate": 4.391037045588233e-06, "loss": 0.3822, "step": 5584 }, { "epoch": 1.3810583580613254, "grad_norm": 0.7988370767594788, "learning_rate": 4.390824431186594e-06, "loss": 0.3766, "step": 5585 }, { "epoch": 1.3813056379821957, "grad_norm": 0.7661122383652051, "learning_rate": 4.390611784824477e-06, "loss": 0.3865, "step": 5586 }, { "epoch": 1.3815529179030663, "grad_norm": 0.7644874046703505, "learning_rate": 4.390399106505473e-06, "loss": 0.3919, "step": 5587 }, { "epoch": 1.3818001978239367, "grad_norm": 0.7987785722197582, "learning_rate": 4.390186396233178e-06, "loss": 0.3563, "step": 5588 }, { "epoch": 1.382047477744807, "grad_norm": 0.7983374804565774, "learning_rate": 4.3899736540111874e-06, "loss": 0.386, "step": 5589 }, { "epoch": 1.3822947576656777, "grad_norm": 0.7893713340203703, "learning_rate": 4.389760879843098e-06, "loss": 0.3968, "step": 5590 }, { "epoch": 1.382542037586548, "grad_norm": 0.740294214255641, "learning_rate": 4.389548073732505e-06, "loss": 0.362, "step": 5591 }, { "epoch": 1.3827893175074184, "grad_norm": 0.7437645827591944, "learning_rate": 4.3893352356830055e-06, "loss": 0.3954, "step": 5592 }, { "epoch": 1.3830365974282888, "grad_norm": 0.7850045628912667, "learning_rate": 4.389122365698199e-06, "loss": 0.3851, "step": 5593 }, { "epoch": 1.3832838773491591, "grad_norm": 0.7889952359148668, "learning_rate": 4.388909463781681e-06, "loss": 0.3621, "step": 5594 }, { "epoch": 1.3835311572700297, "grad_norm": 0.8064657781951712, "learning_rate": 4.388696529937051e-06, "loss": 0.3932, "step": 5595 }, { "epoch": 1.3837784371909, "grad_norm": 0.7842840023750725, "learning_rate": 4.388483564167909e-06, "loss": 0.3847, "step": 5596 }, { "epoch": 1.3840257171117705, "grad_norm": 0.773523350662329, "learning_rate": 4.388270566477855e-06, "loss": 0.3734, "step": 5597 }, { "epoch": 1.384272997032641, "grad_norm": 0.7676417447078764, "learning_rate": 4.388057536870488e-06, "loss": 0.3963, "step": 5598 }, { "epoch": 1.3845202769535114, "grad_norm": 0.7495568517674056, "learning_rate": 4.38784447534941e-06, "loss": 0.3958, "step": 5599 }, { "epoch": 1.3847675568743818, "grad_norm": 0.77499544415659, "learning_rate": 4.3876313819182205e-06, "loss": 0.3632, "step": 5600 }, { "epoch": 1.3850148367952522, "grad_norm": 0.789466894456486, "learning_rate": 4.387418256580524e-06, "loss": 0.3615, "step": 5601 }, { "epoch": 1.3852621167161225, "grad_norm": 0.7834969378060799, "learning_rate": 4.387205099339921e-06, "loss": 0.3612, "step": 5602 }, { "epoch": 1.3855093966369931, "grad_norm": 0.7816676692219949, "learning_rate": 4.386991910200016e-06, "loss": 0.3468, "step": 5603 }, { "epoch": 1.3857566765578635, "grad_norm": 0.7531541283326211, "learning_rate": 4.38677868916441e-06, "loss": 0.3868, "step": 5604 }, { "epoch": 1.3860039564787339, "grad_norm": 0.7718690196245734, "learning_rate": 4.38656543623671e-06, "loss": 0.3699, "step": 5605 }, { "epoch": 1.3862512363996045, "grad_norm": 0.7814213703935808, "learning_rate": 4.3863521514205195e-06, "loss": 0.3453, "step": 5606 }, { "epoch": 1.3864985163204748, "grad_norm": 0.7734177622347255, "learning_rate": 4.386138834719443e-06, "loss": 0.4128, "step": 5607 }, { "epoch": 1.3867457962413452, "grad_norm": 0.8084138302764106, "learning_rate": 4.3859254861370865e-06, "loss": 0.3643, "step": 5608 }, { "epoch": 1.3869930761622156, "grad_norm": 0.7950128444408052, "learning_rate": 4.385712105677057e-06, "loss": 0.3731, "step": 5609 }, { "epoch": 1.387240356083086, "grad_norm": 0.7723112974177536, "learning_rate": 4.38549869334296e-06, "loss": 0.4057, "step": 5610 }, { "epoch": 1.3874876360039565, "grad_norm": 0.8324957858097111, "learning_rate": 4.385285249138404e-06, "loss": 0.3823, "step": 5611 }, { "epoch": 1.387734915924827, "grad_norm": 0.798721973674469, "learning_rate": 4.385071773066994e-06, "loss": 0.3706, "step": 5612 }, { "epoch": 1.3879821958456973, "grad_norm": 0.7507859096671706, "learning_rate": 4.384858265132344e-06, "loss": 0.3574, "step": 5613 }, { "epoch": 1.3882294757665679, "grad_norm": 0.7845635822926347, "learning_rate": 4.384644725338057e-06, "loss": 0.3741, "step": 5614 }, { "epoch": 1.3884767556874382, "grad_norm": 0.8031090712394219, "learning_rate": 4.384431153687746e-06, "loss": 0.3623, "step": 5615 }, { "epoch": 1.3887240356083086, "grad_norm": 0.7929137116704521, "learning_rate": 4.384217550185019e-06, "loss": 0.3672, "step": 5616 }, { "epoch": 1.388971315529179, "grad_norm": 0.7795427604422787, "learning_rate": 4.384003914833489e-06, "loss": 0.3798, "step": 5617 }, { "epoch": 1.3892185954500493, "grad_norm": 0.8248538649365316, "learning_rate": 4.383790247636765e-06, "loss": 0.3831, "step": 5618 }, { "epoch": 1.38946587537092, "grad_norm": 0.7738813390232476, "learning_rate": 4.383576548598458e-06, "loss": 0.3752, "step": 5619 }, { "epoch": 1.3897131552917903, "grad_norm": 0.7632214142014159, "learning_rate": 4.383362817722183e-06, "loss": 0.3861, "step": 5620 }, { "epoch": 1.3899604352126607, "grad_norm": 0.7736585647961712, "learning_rate": 4.38314905501155e-06, "loss": 0.3935, "step": 5621 }, { "epoch": 1.3902077151335313, "grad_norm": 0.7573982744648153, "learning_rate": 4.382935260470174e-06, "loss": 0.3837, "step": 5622 }, { "epoch": 1.3904549950544016, "grad_norm": 0.7555336807620092, "learning_rate": 4.382721434101667e-06, "loss": 0.3786, "step": 5623 }, { "epoch": 1.390702274975272, "grad_norm": 0.7955479234778873, "learning_rate": 4.382507575909644e-06, "loss": 0.3692, "step": 5624 }, { "epoch": 1.3909495548961424, "grad_norm": 0.7756984093412433, "learning_rate": 4.3822936858977205e-06, "loss": 0.3902, "step": 5625 }, { "epoch": 1.3911968348170127, "grad_norm": 0.8217776941557168, "learning_rate": 4.3820797640695116e-06, "loss": 0.3825, "step": 5626 }, { "epoch": 1.3914441147378833, "grad_norm": 0.7599993513589507, "learning_rate": 4.381865810428632e-06, "loss": 0.3415, "step": 5627 }, { "epoch": 1.3916913946587537, "grad_norm": 0.7565981027555896, "learning_rate": 4.381651824978701e-06, "loss": 0.3849, "step": 5628 }, { "epoch": 1.391938674579624, "grad_norm": 0.7486802476873141, "learning_rate": 4.381437807723334e-06, "loss": 0.3817, "step": 5629 }, { "epoch": 1.3921859545004946, "grad_norm": 0.8400564468643441, "learning_rate": 4.381223758666146e-06, "loss": 0.3853, "step": 5630 }, { "epoch": 1.392433234421365, "grad_norm": 0.7661118295575295, "learning_rate": 4.381009677810759e-06, "loss": 0.3469, "step": 5631 }, { "epoch": 1.3926805143422354, "grad_norm": 0.7948031106847951, "learning_rate": 4.380795565160789e-06, "loss": 0.3922, "step": 5632 }, { "epoch": 1.3929277942631058, "grad_norm": 0.741495193738868, "learning_rate": 4.380581420719857e-06, "loss": 0.3881, "step": 5633 }, { "epoch": 1.3931750741839761, "grad_norm": 0.8079597300195097, "learning_rate": 4.3803672444915804e-06, "loss": 0.3672, "step": 5634 }, { "epoch": 1.3934223541048467, "grad_norm": 0.7906416941995733, "learning_rate": 4.380153036479582e-06, "loss": 0.3543, "step": 5635 }, { "epoch": 1.393669634025717, "grad_norm": 0.782199382023654, "learning_rate": 4.379938796687481e-06, "loss": 0.397, "step": 5636 }, { "epoch": 1.3939169139465875, "grad_norm": 0.7997099665919433, "learning_rate": 4.3797245251188985e-06, "loss": 0.3637, "step": 5637 }, { "epoch": 1.394164193867458, "grad_norm": 0.793344483956962, "learning_rate": 4.3795102217774575e-06, "loss": 0.3763, "step": 5638 }, { "epoch": 1.3944114737883284, "grad_norm": 0.759454366638749, "learning_rate": 4.379295886666779e-06, "loss": 0.4052, "step": 5639 }, { "epoch": 1.3946587537091988, "grad_norm": 0.8001749543402854, "learning_rate": 4.379081519790486e-06, "loss": 0.4369, "step": 5640 }, { "epoch": 1.3949060336300692, "grad_norm": 0.7750209276223661, "learning_rate": 4.3788671211522035e-06, "loss": 0.4023, "step": 5641 }, { "epoch": 1.3951533135509397, "grad_norm": 0.7352640686226565, "learning_rate": 4.3786526907555545e-06, "loss": 0.3622, "step": 5642 }, { "epoch": 1.3954005934718101, "grad_norm": 0.7621587244498571, "learning_rate": 4.378438228604163e-06, "loss": 0.3933, "step": 5643 }, { "epoch": 1.3956478733926805, "grad_norm": 0.8103068910372265, "learning_rate": 4.378223734701655e-06, "loss": 0.3669, "step": 5644 }, { "epoch": 1.395895153313551, "grad_norm": 0.822049742703841, "learning_rate": 4.378009209051655e-06, "loss": 0.367, "step": 5645 }, { "epoch": 1.3961424332344214, "grad_norm": 0.8179310884425444, "learning_rate": 4.37779465165779e-06, "loss": 0.3702, "step": 5646 }, { "epoch": 1.3963897131552918, "grad_norm": 0.7687152605764365, "learning_rate": 4.377580062523686e-06, "loss": 0.3852, "step": 5647 }, { "epoch": 1.3966369930761622, "grad_norm": 0.7872997299453607, "learning_rate": 4.37736544165297e-06, "loss": 0.3814, "step": 5648 }, { "epoch": 1.3968842729970326, "grad_norm": 0.7743213738735131, "learning_rate": 4.377150789049271e-06, "loss": 0.4011, "step": 5649 }, { "epoch": 1.3971315529179031, "grad_norm": 0.7951248257142024, "learning_rate": 4.376936104716216e-06, "loss": 0.387, "step": 5650 }, { "epoch": 1.3973788328387735, "grad_norm": 0.7988431126268566, "learning_rate": 4.376721388657435e-06, "loss": 0.3793, "step": 5651 }, { "epoch": 1.3976261127596439, "grad_norm": 0.746865422754339, "learning_rate": 4.376506640876555e-06, "loss": 0.3789, "step": 5652 }, { "epoch": 1.3978733926805145, "grad_norm": 0.769824086240452, "learning_rate": 4.376291861377209e-06, "loss": 0.3714, "step": 5653 }, { "epoch": 1.3981206726013848, "grad_norm": 0.7807536694771282, "learning_rate": 4.376077050163026e-06, "loss": 0.3582, "step": 5654 }, { "epoch": 1.3983679525222552, "grad_norm": 0.8336814302828442, "learning_rate": 4.3758622072376365e-06, "loss": 0.3732, "step": 5655 }, { "epoch": 1.3986152324431256, "grad_norm": 0.7749556839802121, "learning_rate": 4.375647332604672e-06, "loss": 0.3825, "step": 5656 }, { "epoch": 1.398862512363996, "grad_norm": 0.8071641708594828, "learning_rate": 4.375432426267764e-06, "loss": 0.3769, "step": 5657 }, { "epoch": 1.3991097922848665, "grad_norm": 0.7763290819884654, "learning_rate": 4.375217488230547e-06, "loss": 0.3648, "step": 5658 }, { "epoch": 1.399357072205737, "grad_norm": 0.78354592093784, "learning_rate": 4.375002518496652e-06, "loss": 0.3461, "step": 5659 }, { "epoch": 1.3996043521266073, "grad_norm": 0.8208200582570313, "learning_rate": 4.3747875170697154e-06, "loss": 0.3628, "step": 5660 }, { "epoch": 1.3998516320474779, "grad_norm": 0.7780435158072976, "learning_rate": 4.374572483953368e-06, "loss": 0.3964, "step": 5661 }, { "epoch": 1.4000989119683482, "grad_norm": 0.7780350746913844, "learning_rate": 4.374357419151246e-06, "loss": 0.3947, "step": 5662 }, { "epoch": 1.4003461918892186, "grad_norm": 0.7591518591341986, "learning_rate": 4.374142322666984e-06, "loss": 0.385, "step": 5663 }, { "epoch": 1.400593471810089, "grad_norm": 0.8163090369547303, "learning_rate": 4.37392719450422e-06, "loss": 0.3687, "step": 5664 }, { "epoch": 1.4008407517309593, "grad_norm": 0.7777981797844294, "learning_rate": 4.373712034666588e-06, "loss": 0.3603, "step": 5665 }, { "epoch": 1.40108803165183, "grad_norm": 0.8254472344278817, "learning_rate": 4.373496843157725e-06, "loss": 0.3611, "step": 5666 }, { "epoch": 1.4013353115727003, "grad_norm": 0.7841453540946525, "learning_rate": 4.37328161998127e-06, "loss": 0.3777, "step": 5667 }, { "epoch": 1.4015825914935707, "grad_norm": 0.7660686399651366, "learning_rate": 4.373066365140859e-06, "loss": 0.4109, "step": 5668 }, { "epoch": 1.4018298714144413, "grad_norm": 0.8173494717178563, "learning_rate": 4.3728510786401315e-06, "loss": 0.3723, "step": 5669 }, { "epoch": 1.4020771513353116, "grad_norm": 0.7846652626109926, "learning_rate": 4.372635760482726e-06, "loss": 0.3587, "step": 5670 }, { "epoch": 1.402324431256182, "grad_norm": 0.8051549488577708, "learning_rate": 4.372420410672282e-06, "loss": 0.3623, "step": 5671 }, { "epoch": 1.4025717111770524, "grad_norm": 0.8073141654765561, "learning_rate": 4.37220502921244e-06, "loss": 0.3524, "step": 5672 }, { "epoch": 1.4028189910979227, "grad_norm": 0.7694277273273052, "learning_rate": 4.37198961610684e-06, "loss": 0.4243, "step": 5673 }, { "epoch": 1.4030662710187933, "grad_norm": 0.7990437990638399, "learning_rate": 4.371774171359123e-06, "loss": 0.3825, "step": 5674 }, { "epoch": 1.4033135509396637, "grad_norm": 0.812336654632399, "learning_rate": 4.371558694972933e-06, "loss": 0.3701, "step": 5675 }, { "epoch": 1.403560830860534, "grad_norm": 0.7805150215093682, "learning_rate": 4.371343186951909e-06, "loss": 0.392, "step": 5676 }, { "epoch": 1.4038081107814047, "grad_norm": 0.780749653524115, "learning_rate": 4.371127647299695e-06, "loss": 0.4034, "step": 5677 }, { "epoch": 1.404055390702275, "grad_norm": 0.7792425591356775, "learning_rate": 4.370912076019934e-06, "loss": 0.3599, "step": 5678 }, { "epoch": 1.4043026706231454, "grad_norm": 0.7812131386317559, "learning_rate": 4.37069647311627e-06, "loss": 0.3587, "step": 5679 }, { "epoch": 1.4045499505440158, "grad_norm": 0.7611529283181209, "learning_rate": 4.370480838592348e-06, "loss": 0.3758, "step": 5680 }, { "epoch": 1.4047972304648861, "grad_norm": 0.7933883553803307, "learning_rate": 4.370265172451812e-06, "loss": 0.3588, "step": 5681 }, { "epoch": 1.4050445103857567, "grad_norm": 0.7776174122724785, "learning_rate": 4.370049474698308e-06, "loss": 0.3577, "step": 5682 }, { "epoch": 1.405291790306627, "grad_norm": 0.7879538020626979, "learning_rate": 4.369833745335481e-06, "loss": 0.3633, "step": 5683 }, { "epoch": 1.4055390702274975, "grad_norm": 0.7889804321141901, "learning_rate": 4.3696179843669775e-06, "loss": 0.3558, "step": 5684 }, { "epoch": 1.405786350148368, "grad_norm": 0.7354399441244663, "learning_rate": 4.369402191796446e-06, "loss": 0.3832, "step": 5685 }, { "epoch": 1.4060336300692384, "grad_norm": 0.8191674007568029, "learning_rate": 4.369186367627532e-06, "loss": 0.3402, "step": 5686 }, { "epoch": 1.4062809099901088, "grad_norm": 0.7614443550485815, "learning_rate": 4.368970511863885e-06, "loss": 0.3811, "step": 5687 }, { "epoch": 1.4065281899109792, "grad_norm": 0.7396712995250723, "learning_rate": 4.3687546245091525e-06, "loss": 0.3894, "step": 5688 }, { "epoch": 1.4067754698318495, "grad_norm": 0.741805705165498, "learning_rate": 4.368538705566985e-06, "loss": 0.3767, "step": 5689 }, { "epoch": 1.4070227497527201, "grad_norm": 0.7874057869211852, "learning_rate": 4.368322755041032e-06, "loss": 0.3854, "step": 5690 }, { "epoch": 1.4072700296735905, "grad_norm": 0.7428066549838057, "learning_rate": 4.368106772934942e-06, "loss": 0.4011, "step": 5691 }, { "epoch": 1.4075173095944609, "grad_norm": 0.811197869552433, "learning_rate": 4.367890759252368e-06, "loss": 0.3625, "step": 5692 }, { "epoch": 1.4077645895153315, "grad_norm": 0.7926965338928202, "learning_rate": 4.367674713996959e-06, "loss": 0.3505, "step": 5693 }, { "epoch": 1.4080118694362018, "grad_norm": 0.8124527749477977, "learning_rate": 4.3674586371723686e-06, "loss": 0.3559, "step": 5694 }, { "epoch": 1.4082591493570722, "grad_norm": 0.7808072089288003, "learning_rate": 4.367242528782248e-06, "loss": 0.3924, "step": 5695 }, { "epoch": 1.4085064292779426, "grad_norm": 0.7960444174817984, "learning_rate": 4.367026388830251e-06, "loss": 0.3754, "step": 5696 }, { "epoch": 1.408753709198813, "grad_norm": 0.7897403094840699, "learning_rate": 4.3668102173200315e-06, "loss": 0.3679, "step": 5697 }, { "epoch": 1.4090009891196835, "grad_norm": 0.7861559539898647, "learning_rate": 4.366594014255241e-06, "loss": 0.3982, "step": 5698 }, { "epoch": 1.409248269040554, "grad_norm": 0.7935821437036714, "learning_rate": 4.3663777796395355e-06, "loss": 0.3836, "step": 5699 }, { "epoch": 1.4094955489614243, "grad_norm": 0.8143576903973883, "learning_rate": 4.366161513476571e-06, "loss": 0.3389, "step": 5700 }, { "epoch": 1.4097428288822949, "grad_norm": 0.7575421359849247, "learning_rate": 4.3659452157700014e-06, "loss": 0.3879, "step": 5701 }, { "epoch": 1.4099901088031652, "grad_norm": 0.785200011830782, "learning_rate": 4.365728886523483e-06, "loss": 0.3636, "step": 5702 }, { "epoch": 1.4102373887240356, "grad_norm": 0.8138727647923982, "learning_rate": 4.365512525740673e-06, "loss": 0.3563, "step": 5703 }, { "epoch": 1.410484668644906, "grad_norm": 0.7600669150218518, "learning_rate": 4.365296133425228e-06, "loss": 0.3695, "step": 5704 }, { "epoch": 1.4107319485657763, "grad_norm": 0.7845332221627969, "learning_rate": 4.365079709580806e-06, "loss": 0.3693, "step": 5705 }, { "epoch": 1.410979228486647, "grad_norm": 0.7494062565317303, "learning_rate": 4.364863254211066e-06, "loss": 0.3926, "step": 5706 }, { "epoch": 1.4112265084075173, "grad_norm": 0.7809739494333553, "learning_rate": 4.364646767319665e-06, "loss": 0.35, "step": 5707 }, { "epoch": 1.4114737883283877, "grad_norm": 0.7652153888469585, "learning_rate": 4.364430248910263e-06, "loss": 0.3645, "step": 5708 }, { "epoch": 1.4117210682492582, "grad_norm": 0.780213833418871, "learning_rate": 4.364213698986521e-06, "loss": 0.3959, "step": 5709 }, { "epoch": 1.4119683481701286, "grad_norm": 0.7823385624093865, "learning_rate": 4.363997117552097e-06, "loss": 0.3569, "step": 5710 }, { "epoch": 1.412215628090999, "grad_norm": 0.8078917102147449, "learning_rate": 4.363780504610653e-06, "loss": 0.3642, "step": 5711 }, { "epoch": 1.4124629080118694, "grad_norm": 0.7664084695469019, "learning_rate": 4.3635638601658505e-06, "loss": 0.3909, "step": 5712 }, { "epoch": 1.4127101879327397, "grad_norm": 0.8196216907090232, "learning_rate": 4.363347184221353e-06, "loss": 0.3744, "step": 5713 }, { "epoch": 1.4129574678536103, "grad_norm": 0.7432617060625175, "learning_rate": 4.36313047678082e-06, "loss": 0.3984, "step": 5714 }, { "epoch": 1.4132047477744807, "grad_norm": 0.7535872620385694, "learning_rate": 4.362913737847916e-06, "loss": 0.38, "step": 5715 }, { "epoch": 1.413452027695351, "grad_norm": 0.7864231555447977, "learning_rate": 4.362696967426305e-06, "loss": 0.3953, "step": 5716 }, { "epoch": 1.4136993076162216, "grad_norm": 0.7669454983651496, "learning_rate": 4.36248016551965e-06, "loss": 0.3641, "step": 5717 }, { "epoch": 1.413946587537092, "grad_norm": 0.8142346282733516, "learning_rate": 4.362263332131616e-06, "loss": 0.3506, "step": 5718 }, { "epoch": 1.4141938674579624, "grad_norm": 0.7839602472674757, "learning_rate": 4.362046467265868e-06, "loss": 0.3761, "step": 5719 }, { "epoch": 1.4144411473788328, "grad_norm": 0.806771092320648, "learning_rate": 4.361829570926073e-06, "loss": 0.3648, "step": 5720 }, { "epoch": 1.4146884272997033, "grad_norm": 0.755320282294172, "learning_rate": 4.3616126431158945e-06, "loss": 0.352, "step": 5721 }, { "epoch": 1.4149357072205737, "grad_norm": 0.7726230912472833, "learning_rate": 4.361395683839001e-06, "loss": 0.3625, "step": 5722 }, { "epoch": 1.415182987141444, "grad_norm": 0.7647267768415063, "learning_rate": 4.361178693099061e-06, "loss": 0.3854, "step": 5723 }, { "epoch": 1.4154302670623147, "grad_norm": 0.7720196586670123, "learning_rate": 4.360961670899739e-06, "loss": 0.3979, "step": 5724 }, { "epoch": 1.415677546983185, "grad_norm": 0.7764908181257305, "learning_rate": 4.360744617244706e-06, "loss": 0.3667, "step": 5725 }, { "epoch": 1.4159248269040554, "grad_norm": 0.810489565546959, "learning_rate": 4.36052753213763e-06, "loss": 0.3662, "step": 5726 }, { "epoch": 1.4161721068249258, "grad_norm": 0.8187709142285464, "learning_rate": 4.36031041558218e-06, "loss": 0.3659, "step": 5727 }, { "epoch": 1.4164193867457961, "grad_norm": 0.7937689916022154, "learning_rate": 4.360093267582025e-06, "loss": 0.3661, "step": 5728 }, { "epoch": 1.4166666666666667, "grad_norm": 0.7745185718140511, "learning_rate": 4.359876088140838e-06, "loss": 0.385, "step": 5729 }, { "epoch": 1.416913946587537, "grad_norm": 0.7441045200953259, "learning_rate": 4.359658877262289e-06, "loss": 0.3983, "step": 5730 }, { "epoch": 1.4171612265084075, "grad_norm": 0.7657776134999995, "learning_rate": 4.359441634950048e-06, "loss": 0.3852, "step": 5731 }, { "epoch": 1.417408506429278, "grad_norm": 0.7736705828953736, "learning_rate": 4.359224361207789e-06, "loss": 0.3809, "step": 5732 }, { "epoch": 1.4176557863501484, "grad_norm": 0.7547954424494198, "learning_rate": 4.359007056039183e-06, "loss": 0.3682, "step": 5733 }, { "epoch": 1.4179030662710188, "grad_norm": 0.792497886293794, "learning_rate": 4.3587897194479036e-06, "loss": 0.3758, "step": 5734 }, { "epoch": 1.4181503461918892, "grad_norm": 0.7936440189693845, "learning_rate": 4.358572351437626e-06, "loss": 0.365, "step": 5735 }, { "epoch": 1.4183976261127595, "grad_norm": 0.7386794217664778, "learning_rate": 4.358354952012022e-06, "loss": 0.3935, "step": 5736 }, { "epoch": 1.4186449060336301, "grad_norm": 0.7890750780071629, "learning_rate": 4.3581375211747665e-06, "loss": 0.359, "step": 5737 }, { "epoch": 1.4188921859545005, "grad_norm": 0.7780248844777594, "learning_rate": 4.3579200589295364e-06, "loss": 0.3746, "step": 5738 }, { "epoch": 1.4191394658753709, "grad_norm": 0.780033943497606, "learning_rate": 4.357702565280007e-06, "loss": 0.3773, "step": 5739 }, { "epoch": 1.4193867457962415, "grad_norm": 0.7791120931541085, "learning_rate": 4.357485040229853e-06, "loss": 0.3664, "step": 5740 }, { "epoch": 1.4196340257171118, "grad_norm": 0.7555850321315462, "learning_rate": 4.357267483782753e-06, "loss": 0.3583, "step": 5741 }, { "epoch": 1.4198813056379822, "grad_norm": 0.792382030524132, "learning_rate": 4.357049895942384e-06, "loss": 0.3632, "step": 5742 }, { "epoch": 1.4201285855588526, "grad_norm": 0.7580070101266971, "learning_rate": 4.356832276712423e-06, "loss": 0.3834, "step": 5743 }, { "epoch": 1.420375865479723, "grad_norm": 0.7743060721329673, "learning_rate": 4.3566146260965485e-06, "loss": 0.3655, "step": 5744 }, { "epoch": 1.4206231454005935, "grad_norm": 0.7938681073749769, "learning_rate": 4.35639694409844e-06, "loss": 0.4015, "step": 5745 }, { "epoch": 1.420870425321464, "grad_norm": 0.7619049709565119, "learning_rate": 4.356179230721777e-06, "loss": 0.3848, "step": 5746 }, { "epoch": 1.4211177052423343, "grad_norm": 0.7583024717726556, "learning_rate": 4.35596148597024e-06, "loss": 0.3753, "step": 5747 }, { "epoch": 1.4213649851632049, "grad_norm": 0.7725523302437469, "learning_rate": 4.355743709847509e-06, "loss": 0.3781, "step": 5748 }, { "epoch": 1.4216122650840752, "grad_norm": 0.779405020305919, "learning_rate": 4.3555259023572635e-06, "loss": 0.3872, "step": 5749 }, { "epoch": 1.4218595450049456, "grad_norm": 0.7842682002631386, "learning_rate": 4.355308063503187e-06, "loss": 0.3573, "step": 5750 }, { "epoch": 1.422106824925816, "grad_norm": 0.8218520353621512, "learning_rate": 4.355090193288962e-06, "loss": 0.3773, "step": 5751 }, { "epoch": 1.4223541048466863, "grad_norm": 0.7409775109758512, "learning_rate": 4.3548722917182695e-06, "loss": 0.36, "step": 5752 }, { "epoch": 1.422601384767557, "grad_norm": 0.7839559530856219, "learning_rate": 4.354654358794794e-06, "loss": 0.3732, "step": 5753 }, { "epoch": 1.4228486646884273, "grad_norm": 0.8067266924320017, "learning_rate": 4.354436394522218e-06, "loss": 0.4099, "step": 5754 }, { "epoch": 1.4230959446092977, "grad_norm": 0.7837806540305828, "learning_rate": 4.354218398904226e-06, "loss": 0.3483, "step": 5755 }, { "epoch": 1.4233432245301683, "grad_norm": 0.7817669124743636, "learning_rate": 4.354000371944504e-06, "loss": 0.3641, "step": 5756 }, { "epoch": 1.4235905044510386, "grad_norm": 0.7628403550372875, "learning_rate": 4.353782313646737e-06, "loss": 0.3694, "step": 5757 }, { "epoch": 1.423837784371909, "grad_norm": 0.7878804208706381, "learning_rate": 4.3535642240146096e-06, "loss": 0.3637, "step": 5758 }, { "epoch": 1.4240850642927794, "grad_norm": 0.8293122501110616, "learning_rate": 4.353346103051808e-06, "loss": 0.3768, "step": 5759 }, { "epoch": 1.4243323442136497, "grad_norm": 0.7924025164396673, "learning_rate": 4.353127950762021e-06, "loss": 0.3616, "step": 5760 }, { "epoch": 1.4245796241345203, "grad_norm": 0.814874541828642, "learning_rate": 4.352909767148935e-06, "loss": 0.3538, "step": 5761 }, { "epoch": 1.4248269040553907, "grad_norm": 0.7850680065674066, "learning_rate": 4.352691552216237e-06, "loss": 0.3763, "step": 5762 }, { "epoch": 1.425074183976261, "grad_norm": 0.7878078265984299, "learning_rate": 4.352473305967617e-06, "loss": 0.3802, "step": 5763 }, { "epoch": 1.4253214638971317, "grad_norm": 0.7796920833287806, "learning_rate": 4.352255028406763e-06, "loss": 0.3684, "step": 5764 }, { "epoch": 1.425568743818002, "grad_norm": 0.7618349677018834, "learning_rate": 4.352036719537364e-06, "loss": 0.4036, "step": 5765 }, { "epoch": 1.4258160237388724, "grad_norm": 0.8017242303951579, "learning_rate": 4.351818379363113e-06, "loss": 0.3662, "step": 5766 }, { "epoch": 1.4260633036597428, "grad_norm": 0.7661785880905659, "learning_rate": 4.351600007887697e-06, "loss": 0.3745, "step": 5767 }, { "epoch": 1.4263105835806131, "grad_norm": 0.7540392877800275, "learning_rate": 4.351381605114809e-06, "loss": 0.3567, "step": 5768 }, { "epoch": 1.4265578635014837, "grad_norm": 0.7473394288023749, "learning_rate": 4.3511631710481406e-06, "loss": 0.3769, "step": 5769 }, { "epoch": 1.426805143422354, "grad_norm": 0.7780388813762856, "learning_rate": 4.350944705691383e-06, "loss": 0.3724, "step": 5770 }, { "epoch": 1.4270524233432245, "grad_norm": 0.7840269215694958, "learning_rate": 4.35072620904823e-06, "loss": 0.3768, "step": 5771 }, { "epoch": 1.427299703264095, "grad_norm": 0.8122333151981017, "learning_rate": 4.3505076811223746e-06, "loss": 0.3739, "step": 5772 }, { "epoch": 1.4275469831849654, "grad_norm": 0.774075896987554, "learning_rate": 4.350289121917511e-06, "loss": 0.3698, "step": 5773 }, { "epoch": 1.4277942631058358, "grad_norm": 0.7810791243048004, "learning_rate": 4.350070531437331e-06, "loss": 0.3306, "step": 5774 }, { "epoch": 1.4280415430267062, "grad_norm": 0.7539817798803257, "learning_rate": 4.349851909685532e-06, "loss": 0.3885, "step": 5775 }, { "epoch": 1.4282888229475765, "grad_norm": 0.7865461459729546, "learning_rate": 4.349633256665809e-06, "loss": 0.3704, "step": 5776 }, { "epoch": 1.4285361028684471, "grad_norm": 0.7453694494197035, "learning_rate": 4.349414572381857e-06, "loss": 0.3909, "step": 5777 }, { "epoch": 1.4287833827893175, "grad_norm": 0.8082638318591463, "learning_rate": 4.349195856837373e-06, "loss": 0.3747, "step": 5778 }, { "epoch": 1.4290306627101879, "grad_norm": 0.7613510402338187, "learning_rate": 4.348977110036054e-06, "loss": 0.3983, "step": 5779 }, { "epoch": 1.4292779426310585, "grad_norm": 0.794235173315222, "learning_rate": 4.348758331981599e-06, "loss": 0.4019, "step": 5780 }, { "epoch": 1.4295252225519288, "grad_norm": 0.749197012835429, "learning_rate": 4.348539522677702e-06, "loss": 0.3823, "step": 5781 }, { "epoch": 1.4297725024727992, "grad_norm": 0.7858201185196779, "learning_rate": 4.348320682128064e-06, "loss": 0.3622, "step": 5782 }, { "epoch": 1.4300197823936696, "grad_norm": 0.7353836399843681, "learning_rate": 4.348101810336385e-06, "loss": 0.3962, "step": 5783 }, { "epoch": 1.43026706231454, "grad_norm": 0.763827546431934, "learning_rate": 4.347882907306362e-06, "loss": 0.369, "step": 5784 }, { "epoch": 1.4305143422354105, "grad_norm": 0.7658764295610988, "learning_rate": 4.347663973041698e-06, "loss": 0.3883, "step": 5785 }, { "epoch": 1.4307616221562809, "grad_norm": 0.7782808834887324, "learning_rate": 4.347445007546092e-06, "loss": 0.3685, "step": 5786 }, { "epoch": 1.4310089020771513, "grad_norm": 0.8110624761096779, "learning_rate": 4.347226010823245e-06, "loss": 0.3805, "step": 5787 }, { "epoch": 1.4312561819980218, "grad_norm": 0.8086880269636288, "learning_rate": 4.347006982876858e-06, "loss": 0.3788, "step": 5788 }, { "epoch": 1.4315034619188922, "grad_norm": 0.7884473181171783, "learning_rate": 4.346787923710635e-06, "loss": 0.355, "step": 5789 }, { "epoch": 1.4317507418397626, "grad_norm": 0.7872812747953045, "learning_rate": 4.3465688333282786e-06, "loss": 0.3872, "step": 5790 }, { "epoch": 1.431998021760633, "grad_norm": 0.7766299917431178, "learning_rate": 4.3463497117334905e-06, "loss": 0.344, "step": 5791 }, { "epoch": 1.4322453016815033, "grad_norm": 0.7864572176477667, "learning_rate": 4.346130558929976e-06, "loss": 0.3706, "step": 5792 }, { "epoch": 1.432492581602374, "grad_norm": 0.7794225519477159, "learning_rate": 4.345911374921439e-06, "loss": 0.3844, "step": 5793 }, { "epoch": 1.4327398615232443, "grad_norm": 0.7510546104297806, "learning_rate": 4.3456921597115845e-06, "loss": 0.3748, "step": 5794 }, { "epoch": 1.4329871414441149, "grad_norm": 0.7391756085132625, "learning_rate": 4.345472913304117e-06, "loss": 0.3478, "step": 5795 }, { "epoch": 1.4332344213649852, "grad_norm": 0.775246832071169, "learning_rate": 4.345253635702743e-06, "loss": 0.3824, "step": 5796 }, { "epoch": 1.4334817012858556, "grad_norm": 0.7733281227725143, "learning_rate": 4.345034326911169e-06, "loss": 0.3677, "step": 5797 }, { "epoch": 1.433728981206726, "grad_norm": 0.8027228922623313, "learning_rate": 4.344814986933101e-06, "loss": 0.3596, "step": 5798 }, { "epoch": 1.4339762611275964, "grad_norm": 0.8042930509812242, "learning_rate": 4.344595615772249e-06, "loss": 0.4039, "step": 5799 }, { "epoch": 1.434223541048467, "grad_norm": 0.774729265742482, "learning_rate": 4.344376213432318e-06, "loss": 0.3968, "step": 5800 }, { "epoch": 1.4344708209693373, "grad_norm": 0.7761874559757815, "learning_rate": 4.344156779917019e-06, "loss": 0.3411, "step": 5801 }, { "epoch": 1.4347181008902077, "grad_norm": 0.7490832172627896, "learning_rate": 4.34393731523006e-06, "loss": 0.3553, "step": 5802 }, { "epoch": 1.4349653808110783, "grad_norm": 0.788762121419923, "learning_rate": 4.343717819375151e-06, "loss": 0.3629, "step": 5803 }, { "epoch": 1.4352126607319486, "grad_norm": 0.7948578537354839, "learning_rate": 4.3434982923560005e-06, "loss": 0.3588, "step": 5804 }, { "epoch": 1.435459940652819, "grad_norm": 0.7617814067774752, "learning_rate": 4.3432787341763215e-06, "loss": 0.371, "step": 5805 }, { "epoch": 1.4357072205736894, "grad_norm": 0.7331742214689282, "learning_rate": 4.343059144839823e-06, "loss": 0.3887, "step": 5806 }, { "epoch": 1.4359545004945597, "grad_norm": 0.7265545602264305, "learning_rate": 4.3428395243502184e-06, "loss": 0.3935, "step": 5807 }, { "epoch": 1.4362017804154303, "grad_norm": 0.7488090883789311, "learning_rate": 4.342619872711219e-06, "loss": 0.3933, "step": 5808 }, { "epoch": 1.4364490603363007, "grad_norm": 0.782144506653421, "learning_rate": 4.342400189926539e-06, "loss": 0.376, "step": 5809 }, { "epoch": 1.436696340257171, "grad_norm": 0.7702524293850338, "learning_rate": 4.342180475999889e-06, "loss": 0.3625, "step": 5810 }, { "epoch": 1.4369436201780417, "grad_norm": 0.7987403785391061, "learning_rate": 4.341960730934985e-06, "loss": 0.3765, "step": 5811 }, { "epoch": 1.437190900098912, "grad_norm": 0.7687080074251038, "learning_rate": 4.34174095473554e-06, "loss": 0.4042, "step": 5812 }, { "epoch": 1.4374381800197824, "grad_norm": 0.81564051205711, "learning_rate": 4.341521147405271e-06, "loss": 0.4015, "step": 5813 }, { "epoch": 1.4376854599406528, "grad_norm": 0.7614229175912163, "learning_rate": 4.341301308947891e-06, "loss": 0.3725, "step": 5814 }, { "epoch": 1.4379327398615231, "grad_norm": 0.7491681316250594, "learning_rate": 4.3410814393671176e-06, "loss": 0.3811, "step": 5815 }, { "epoch": 1.4381800197823937, "grad_norm": 0.7644485761389787, "learning_rate": 4.340861538666666e-06, "loss": 0.3687, "step": 5816 }, { "epoch": 1.438427299703264, "grad_norm": 0.7778831108171753, "learning_rate": 4.340641606850253e-06, "loss": 0.366, "step": 5817 }, { "epoch": 1.4386745796241345, "grad_norm": 0.7947731194127882, "learning_rate": 4.340421643921598e-06, "loss": 0.3788, "step": 5818 }, { "epoch": 1.438921859545005, "grad_norm": 0.7576733353093887, "learning_rate": 4.340201649884417e-06, "loss": 0.3894, "step": 5819 }, { "epoch": 1.4391691394658754, "grad_norm": 0.8232274784989781, "learning_rate": 4.33998162474243e-06, "loss": 0.3793, "step": 5820 }, { "epoch": 1.4394164193867458, "grad_norm": 0.7807873373022134, "learning_rate": 4.339761568499355e-06, "loss": 0.3581, "step": 5821 }, { "epoch": 1.4396636993076162, "grad_norm": 0.7611030940314512, "learning_rate": 4.3395414811589116e-06, "loss": 0.3877, "step": 5822 }, { "epoch": 1.4399109792284865, "grad_norm": 0.7715789422542036, "learning_rate": 4.339321362724821e-06, "loss": 0.3731, "step": 5823 }, { "epoch": 1.4401582591493571, "grad_norm": 0.7977320337014504, "learning_rate": 4.3391012132008024e-06, "loss": 0.3719, "step": 5824 }, { "epoch": 1.4404055390702275, "grad_norm": 0.7541406069399212, "learning_rate": 4.3388810325905775e-06, "loss": 0.3715, "step": 5825 }, { "epoch": 1.4406528189910979, "grad_norm": 0.7744273197392303, "learning_rate": 4.3386608208978696e-06, "loss": 0.3605, "step": 5826 }, { "epoch": 1.4409000989119685, "grad_norm": 0.7580278391267768, "learning_rate": 4.338440578126399e-06, "loss": 0.4166, "step": 5827 }, { "epoch": 1.4411473788328388, "grad_norm": 0.7815404674772864, "learning_rate": 4.338220304279889e-06, "loss": 0.39, "step": 5828 }, { "epoch": 1.4413946587537092, "grad_norm": 0.7629189514405762, "learning_rate": 4.337999999362063e-06, "loss": 0.3951, "step": 5829 }, { "epoch": 1.4416419386745796, "grad_norm": 0.7843337684100025, "learning_rate": 4.337779663376643e-06, "loss": 0.3582, "step": 5830 }, { "epoch": 1.44188921859545, "grad_norm": 0.7961699509393938, "learning_rate": 4.337559296327357e-06, "loss": 0.3524, "step": 5831 }, { "epoch": 1.4421364985163205, "grad_norm": 0.8245578476517087, "learning_rate": 4.337338898217927e-06, "loss": 0.3783, "step": 5832 }, { "epoch": 1.442383778437191, "grad_norm": 0.7867397459788374, "learning_rate": 4.337118469052079e-06, "loss": 0.3746, "step": 5833 }, { "epoch": 1.4426310583580613, "grad_norm": 0.7840935370629001, "learning_rate": 4.33689800883354e-06, "loss": 0.3884, "step": 5834 }, { "epoch": 1.4428783382789319, "grad_norm": 0.8103366896878423, "learning_rate": 4.336677517566036e-06, "loss": 0.3734, "step": 5835 }, { "epoch": 1.4431256181998022, "grad_norm": 0.7957205480542808, "learning_rate": 4.336456995253292e-06, "loss": 0.3942, "step": 5836 }, { "epoch": 1.4433728981206726, "grad_norm": 0.7580251486246169, "learning_rate": 4.336236441899038e-06, "loss": 0.3509, "step": 5837 }, { "epoch": 1.443620178041543, "grad_norm": 0.77468152235789, "learning_rate": 4.336015857507001e-06, "loss": 0.4024, "step": 5838 }, { "epoch": 1.4438674579624133, "grad_norm": 0.8119120295838208, "learning_rate": 4.335795242080908e-06, "loss": 0.4041, "step": 5839 }, { "epoch": 1.444114737883284, "grad_norm": 0.7791491821375246, "learning_rate": 4.335574595624491e-06, "loss": 0.361, "step": 5840 }, { "epoch": 1.4443620178041543, "grad_norm": 0.781668805524737, "learning_rate": 4.335353918141478e-06, "loss": 0.3597, "step": 5841 }, { "epoch": 1.4446092977250247, "grad_norm": 0.8087937018348946, "learning_rate": 4.335133209635599e-06, "loss": 0.3379, "step": 5842 }, { "epoch": 1.4448565776458953, "grad_norm": 0.7822476907031191, "learning_rate": 4.334912470110585e-06, "loss": 0.3455, "step": 5843 }, { "epoch": 1.4451038575667656, "grad_norm": 0.7890522955748461, "learning_rate": 4.3346916995701665e-06, "loss": 0.3786, "step": 5844 }, { "epoch": 1.445351137487636, "grad_norm": 0.7864518026506417, "learning_rate": 4.334470898018076e-06, "loss": 0.3771, "step": 5845 }, { "epoch": 1.4455984174085064, "grad_norm": 0.7301216099934943, "learning_rate": 4.3342500654580455e-06, "loss": 0.4067, "step": 5846 }, { "epoch": 1.4458456973293767, "grad_norm": 0.7839499245935488, "learning_rate": 4.334029201893807e-06, "loss": 0.365, "step": 5847 }, { "epoch": 1.4460929772502473, "grad_norm": 0.8015842434159947, "learning_rate": 4.333808307329095e-06, "loss": 0.3998, "step": 5848 }, { "epoch": 1.4463402571711177, "grad_norm": 0.8043421837478707, "learning_rate": 4.333587381767642e-06, "loss": 0.4023, "step": 5849 }, { "epoch": 1.446587537091988, "grad_norm": 0.7551798169374428, "learning_rate": 4.333366425213183e-06, "loss": 0.3958, "step": 5850 }, { "epoch": 1.4468348170128587, "grad_norm": 0.7694617280103803, "learning_rate": 4.333145437669453e-06, "loss": 0.3751, "step": 5851 }, { "epoch": 1.447082096933729, "grad_norm": 0.787016619194183, "learning_rate": 4.332924419140187e-06, "loss": 0.3619, "step": 5852 }, { "epoch": 1.4473293768545994, "grad_norm": 0.8056864810883798, "learning_rate": 4.33270336962912e-06, "loss": 0.3828, "step": 5853 }, { "epoch": 1.4475766567754698, "grad_norm": 0.8309391297806914, "learning_rate": 4.33248228913999e-06, "loss": 0.3861, "step": 5854 }, { "epoch": 1.4478239366963401, "grad_norm": 0.7394679524266062, "learning_rate": 4.332261177676533e-06, "loss": 0.391, "step": 5855 }, { "epoch": 1.4480712166172107, "grad_norm": 0.7725623253652614, "learning_rate": 4.332040035242488e-06, "loss": 0.391, "step": 5856 }, { "epoch": 1.448318496538081, "grad_norm": 0.7632763962771769, "learning_rate": 4.331818861841589e-06, "loss": 0.3621, "step": 5857 }, { "epoch": 1.4485657764589515, "grad_norm": 0.7733355839237703, "learning_rate": 4.331597657477579e-06, "loss": 0.3636, "step": 5858 }, { "epoch": 1.448813056379822, "grad_norm": 0.7776403628498147, "learning_rate": 4.331376422154195e-06, "loss": 0.3778, "step": 5859 }, { "epoch": 1.4490603363006924, "grad_norm": 0.7802980112474821, "learning_rate": 4.331155155875175e-06, "loss": 0.3996, "step": 5860 }, { "epoch": 1.4493076162215628, "grad_norm": 0.7565213155484117, "learning_rate": 4.330933858644261e-06, "loss": 0.3832, "step": 5861 }, { "epoch": 1.4495548961424332, "grad_norm": 0.7907761701281025, "learning_rate": 4.330712530465194e-06, "loss": 0.3532, "step": 5862 }, { "epoch": 1.4498021760633035, "grad_norm": 0.770087052954414, "learning_rate": 4.330491171341714e-06, "loss": 0.3686, "step": 5863 }, { "epoch": 1.4500494559841741, "grad_norm": 0.8336352787453809, "learning_rate": 4.330269781277563e-06, "loss": 0.366, "step": 5864 }, { "epoch": 1.4502967359050445, "grad_norm": 0.7758365860534888, "learning_rate": 4.330048360276483e-06, "loss": 0.3594, "step": 5865 }, { "epoch": 1.4505440158259149, "grad_norm": 0.767480647632896, "learning_rate": 4.329826908342217e-06, "loss": 0.3711, "step": 5866 }, { "epoch": 1.4507912957467854, "grad_norm": 0.7114568659367067, "learning_rate": 4.329605425478507e-06, "loss": 0.3753, "step": 5867 }, { "epoch": 1.4510385756676558, "grad_norm": 0.7421980470890979, "learning_rate": 4.329383911689099e-06, "loss": 0.3925, "step": 5868 }, { "epoch": 1.4512858555885262, "grad_norm": 0.7523791324960577, "learning_rate": 4.329162366977734e-06, "loss": 0.3971, "step": 5869 }, { "epoch": 1.4515331355093966, "grad_norm": 0.8126154186077956, "learning_rate": 4.32894079134816e-06, "loss": 0.3461, "step": 5870 }, { "epoch": 1.451780415430267, "grad_norm": 0.7774788270749899, "learning_rate": 4.3287191848041204e-06, "loss": 0.3847, "step": 5871 }, { "epoch": 1.4520276953511375, "grad_norm": 0.7594164043632619, "learning_rate": 4.328497547349362e-06, "loss": 0.4037, "step": 5872 }, { "epoch": 1.4522749752720079, "grad_norm": 0.784415476207347, "learning_rate": 4.32827587898763e-06, "loss": 0.35, "step": 5873 }, { "epoch": 1.4525222551928785, "grad_norm": 0.7728608649801685, "learning_rate": 4.328054179722672e-06, "loss": 0.3705, "step": 5874 }, { "epoch": 1.4527695351137488, "grad_norm": 0.7784031605774198, "learning_rate": 4.327832449558234e-06, "loss": 0.3935, "step": 5875 }, { "epoch": 1.4530168150346192, "grad_norm": 0.7854963256567995, "learning_rate": 4.327610688498067e-06, "loss": 0.3873, "step": 5876 }, { "epoch": 1.4532640949554896, "grad_norm": 0.7727986520508463, "learning_rate": 4.327388896545917e-06, "loss": 0.3473, "step": 5877 }, { "epoch": 1.45351137487636, "grad_norm": 0.7443970979494283, "learning_rate": 4.327167073705533e-06, "loss": 0.3732, "step": 5878 }, { "epoch": 1.4537586547972305, "grad_norm": 0.78755644582791, "learning_rate": 4.326945219980665e-06, "loss": 0.3627, "step": 5879 }, { "epoch": 1.454005934718101, "grad_norm": 0.8211369382239723, "learning_rate": 4.326723335375063e-06, "loss": 0.3608, "step": 5880 }, { "epoch": 1.4542532146389713, "grad_norm": 0.8101676501181105, "learning_rate": 4.326501419892477e-06, "loss": 0.3816, "step": 5881 }, { "epoch": 1.4545004945598419, "grad_norm": 0.7632735353964222, "learning_rate": 4.32627947353666e-06, "loss": 0.3739, "step": 5882 }, { "epoch": 1.4547477744807122, "grad_norm": 0.7385647489997016, "learning_rate": 4.326057496311361e-06, "loss": 0.3718, "step": 5883 }, { "epoch": 1.4549950544015826, "grad_norm": 0.8411739988218989, "learning_rate": 4.325835488220332e-06, "loss": 0.3502, "step": 5884 }, { "epoch": 1.455242334322453, "grad_norm": 0.8116281518398825, "learning_rate": 4.3256134492673275e-06, "loss": 0.3728, "step": 5885 }, { "epoch": 1.4554896142433233, "grad_norm": 0.774094718201295, "learning_rate": 4.3253913794561e-06, "loss": 0.3554, "step": 5886 }, { "epoch": 1.455736894164194, "grad_norm": 0.7732589467684142, "learning_rate": 4.325169278790402e-06, "loss": 0.3685, "step": 5887 }, { "epoch": 1.4559841740850643, "grad_norm": 0.7542058493351904, "learning_rate": 4.324947147273989e-06, "loss": 0.3875, "step": 5888 }, { "epoch": 1.4562314540059347, "grad_norm": 0.7742788444140601, "learning_rate": 4.324724984910616e-06, "loss": 0.3791, "step": 5889 }, { "epoch": 1.4564787339268053, "grad_norm": 0.7864945569395847, "learning_rate": 4.324502791704036e-06, "loss": 0.4007, "step": 5890 }, { "epoch": 1.4567260138476756, "grad_norm": 0.792085460463634, "learning_rate": 4.324280567658006e-06, "loss": 0.3653, "step": 5891 }, { "epoch": 1.456973293768546, "grad_norm": 0.7975984203389602, "learning_rate": 4.324058312776284e-06, "loss": 0.3428, "step": 5892 }, { "epoch": 1.4572205736894164, "grad_norm": 0.8040895165482024, "learning_rate": 4.323836027062623e-06, "loss": 0.3509, "step": 5893 }, { "epoch": 1.4574678536102867, "grad_norm": 0.7729784313747452, "learning_rate": 4.323613710520783e-06, "loss": 0.3488, "step": 5894 }, { "epoch": 1.4577151335311573, "grad_norm": 0.8077378056157708, "learning_rate": 4.323391363154522e-06, "loss": 0.4162, "step": 5895 }, { "epoch": 1.4579624134520277, "grad_norm": 0.8047489823642981, "learning_rate": 4.323168984967598e-06, "loss": 0.3894, "step": 5896 }, { "epoch": 1.458209693372898, "grad_norm": 0.7836876801426723, "learning_rate": 4.322946575963768e-06, "loss": 0.3839, "step": 5897 }, { "epoch": 1.4584569732937687, "grad_norm": 0.765538682236265, "learning_rate": 4.322724136146793e-06, "loss": 0.384, "step": 5898 }, { "epoch": 1.458704253214639, "grad_norm": 0.7885107116267426, "learning_rate": 4.322501665520432e-06, "loss": 0.3659, "step": 5899 }, { "epoch": 1.4589515331355094, "grad_norm": 0.8091670379266556, "learning_rate": 4.322279164088447e-06, "loss": 0.3341, "step": 5900 }, { "epoch": 1.4591988130563798, "grad_norm": 0.7697803379919581, "learning_rate": 4.322056631854597e-06, "loss": 0.3674, "step": 5901 }, { "epoch": 1.4594460929772501, "grad_norm": 0.796274054859105, "learning_rate": 4.321834068822645e-06, "loss": 0.3824, "step": 5902 }, { "epoch": 1.4596933728981207, "grad_norm": 0.7793123175443895, "learning_rate": 4.321611474996353e-06, "loss": 0.3516, "step": 5903 }, { "epoch": 1.459940652818991, "grad_norm": 0.7865056129887511, "learning_rate": 4.3213888503794824e-06, "loss": 0.3563, "step": 5904 }, { "epoch": 1.4601879327398615, "grad_norm": 0.809417692214669, "learning_rate": 4.321166194975796e-06, "loss": 0.3831, "step": 5905 }, { "epoch": 1.460435212660732, "grad_norm": 0.8115519274982709, "learning_rate": 4.320943508789058e-06, "loss": 0.355, "step": 5906 }, { "epoch": 1.4606824925816024, "grad_norm": 0.8290946556952113, "learning_rate": 4.320720791823033e-06, "loss": 0.3776, "step": 5907 }, { "epoch": 1.4609297725024728, "grad_norm": 0.8110307869300635, "learning_rate": 4.320498044081484e-06, "loss": 0.3795, "step": 5908 }, { "epoch": 1.4611770524233432, "grad_norm": 0.7853604408857817, "learning_rate": 4.320275265568178e-06, "loss": 0.346, "step": 5909 }, { "epoch": 1.4614243323442135, "grad_norm": 0.7694310540328071, "learning_rate": 4.32005245628688e-06, "loss": 0.3641, "step": 5910 }, { "epoch": 1.4616716122650841, "grad_norm": 0.7803868321740091, "learning_rate": 4.319829616241355e-06, "loss": 0.38, "step": 5911 }, { "epoch": 1.4619188921859545, "grad_norm": 0.7762619251792111, "learning_rate": 4.319606745435371e-06, "loss": 0.3619, "step": 5912 }, { "epoch": 1.4621661721068249, "grad_norm": 0.8083529051875504, "learning_rate": 4.3193838438726934e-06, "loss": 0.3483, "step": 5913 }, { "epoch": 1.4624134520276955, "grad_norm": 0.7721651134712738, "learning_rate": 4.319160911557092e-06, "loss": 0.3723, "step": 5914 }, { "epoch": 1.4626607319485658, "grad_norm": 0.8022358549002774, "learning_rate": 4.318937948492335e-06, "loss": 0.3614, "step": 5915 }, { "epoch": 1.4629080118694362, "grad_norm": 0.8131849980235627, "learning_rate": 4.318714954682189e-06, "loss": 0.3473, "step": 5916 }, { "epoch": 1.4631552917903066, "grad_norm": 0.7991021743668315, "learning_rate": 4.318491930130425e-06, "loss": 0.3662, "step": 5917 }, { "epoch": 1.463402571711177, "grad_norm": 0.8047340593213115, "learning_rate": 4.318268874840812e-06, "loss": 0.3668, "step": 5918 }, { "epoch": 1.4636498516320475, "grad_norm": 0.7844463306534564, "learning_rate": 4.318045788817121e-06, "loss": 0.3868, "step": 5919 }, { "epoch": 1.463897131552918, "grad_norm": 0.7955513559719526, "learning_rate": 4.317822672063122e-06, "loss": 0.3925, "step": 5920 }, { "epoch": 1.4641444114737883, "grad_norm": 0.7673110672716023, "learning_rate": 4.317599524582587e-06, "loss": 0.3957, "step": 5921 }, { "epoch": 1.4643916913946589, "grad_norm": 0.8236251365309315, "learning_rate": 4.317376346379287e-06, "loss": 0.3752, "step": 5922 }, { "epoch": 1.4646389713155292, "grad_norm": 0.7950167311725865, "learning_rate": 4.317153137456996e-06, "loss": 0.388, "step": 5923 }, { "epoch": 1.4648862512363996, "grad_norm": 0.7655713231535342, "learning_rate": 4.316929897819485e-06, "loss": 0.3982, "step": 5924 }, { "epoch": 1.46513353115727, "grad_norm": 0.7705307789748667, "learning_rate": 4.316706627470529e-06, "loss": 0.3877, "step": 5925 }, { "epoch": 1.4653808110781403, "grad_norm": 0.8199403668970167, "learning_rate": 4.3164833264139e-06, "loss": 0.364, "step": 5926 }, { "epoch": 1.465628090999011, "grad_norm": 0.8017659844486553, "learning_rate": 4.316259994653374e-06, "loss": 0.3821, "step": 5927 }, { "epoch": 1.4658753709198813, "grad_norm": 0.8319630906281384, "learning_rate": 4.316036632192725e-06, "loss": 0.3601, "step": 5928 }, { "epoch": 1.4661226508407517, "grad_norm": 0.7810121701704562, "learning_rate": 4.31581323903573e-06, "loss": 0.3803, "step": 5929 }, { "epoch": 1.4663699307616223, "grad_norm": 0.7710032221445307, "learning_rate": 4.315589815186164e-06, "loss": 0.3711, "step": 5930 }, { "epoch": 1.4666172106824926, "grad_norm": 0.7873494532898242, "learning_rate": 4.315366360647803e-06, "loss": 0.3699, "step": 5931 }, { "epoch": 1.466864490603363, "grad_norm": 0.7974256648905441, "learning_rate": 4.3151428754244255e-06, "loss": 0.35, "step": 5932 }, { "epoch": 1.4671117705242334, "grad_norm": 0.8221524721882866, "learning_rate": 4.314919359519807e-06, "loss": 0.3848, "step": 5933 }, { "epoch": 1.4673590504451037, "grad_norm": 0.7664946490180511, "learning_rate": 4.314695812937727e-06, "loss": 0.3383, "step": 5934 }, { "epoch": 1.4676063303659743, "grad_norm": 0.7762644169417596, "learning_rate": 4.314472235681963e-06, "loss": 0.3762, "step": 5935 }, { "epoch": 1.4678536102868447, "grad_norm": 0.8039164394234738, "learning_rate": 4.314248627756297e-06, "loss": 0.3679, "step": 5936 }, { "epoch": 1.468100890207715, "grad_norm": 0.7644828670598542, "learning_rate": 4.314024989164504e-06, "loss": 0.3717, "step": 5937 }, { "epoch": 1.4683481701285857, "grad_norm": 0.8193146866499905, "learning_rate": 4.313801319910369e-06, "loss": 0.3556, "step": 5938 }, { "epoch": 1.468595450049456, "grad_norm": 0.7758526406092077, "learning_rate": 4.31357761999767e-06, "loss": 0.3582, "step": 5939 }, { "epoch": 1.4688427299703264, "grad_norm": 0.8036619876781145, "learning_rate": 4.313353889430188e-06, "loss": 0.3491, "step": 5940 }, { "epoch": 1.4690900098911968, "grad_norm": 0.7958546698736949, "learning_rate": 4.313130128211705e-06, "loss": 0.3411, "step": 5941 }, { "epoch": 1.4693372898120671, "grad_norm": 0.774361569148122, "learning_rate": 4.312906336346004e-06, "loss": 0.3736, "step": 5942 }, { "epoch": 1.4695845697329377, "grad_norm": 0.7926896763995549, "learning_rate": 4.312682513836867e-06, "loss": 0.3764, "step": 5943 }, { "epoch": 1.469831849653808, "grad_norm": 0.7877219218493611, "learning_rate": 4.312458660688077e-06, "loss": 0.3585, "step": 5944 }, { "epoch": 1.4700791295746785, "grad_norm": 0.7798335939691436, "learning_rate": 4.312234776903418e-06, "loss": 0.3757, "step": 5945 }, { "epoch": 1.470326409495549, "grad_norm": 0.8221645662934913, "learning_rate": 4.312010862486675e-06, "loss": 0.3823, "step": 5946 }, { "epoch": 1.4705736894164194, "grad_norm": 0.795619700632349, "learning_rate": 4.311786917441633e-06, "loss": 0.3631, "step": 5947 }, { "epoch": 1.4708209693372898, "grad_norm": 0.767339204847162, "learning_rate": 4.311562941772076e-06, "loss": 0.3823, "step": 5948 }, { "epoch": 1.4710682492581602, "grad_norm": 0.8034290853031101, "learning_rate": 4.311338935481791e-06, "loss": 0.3658, "step": 5949 }, { "epoch": 1.4713155291790307, "grad_norm": 0.7545021560773721, "learning_rate": 4.311114898574563e-06, "loss": 0.3748, "step": 5950 }, { "epoch": 1.4715628090999011, "grad_norm": 0.7743029976756036, "learning_rate": 4.3108908310541804e-06, "loss": 0.375, "step": 5951 }, { "epoch": 1.4718100890207715, "grad_norm": 0.772425300886846, "learning_rate": 4.31066673292443e-06, "loss": 0.396, "step": 5952 }, { "epoch": 1.472057368941642, "grad_norm": 0.7414754858746576, "learning_rate": 4.310442604189099e-06, "loss": 0.3743, "step": 5953 }, { "epoch": 1.4723046488625124, "grad_norm": 0.8285407569411976, "learning_rate": 4.310218444851977e-06, "loss": 0.3681, "step": 5954 }, { "epoch": 1.4725519287833828, "grad_norm": 0.7464165452879078, "learning_rate": 4.309994254916853e-06, "loss": 0.3721, "step": 5955 }, { "epoch": 1.4727992087042532, "grad_norm": 0.7488756988157483, "learning_rate": 4.309770034387515e-06, "loss": 0.3853, "step": 5956 }, { "epoch": 1.4730464886251236, "grad_norm": 0.8205382116087586, "learning_rate": 4.3095457832677545e-06, "loss": 0.3701, "step": 5957 }, { "epoch": 1.4732937685459941, "grad_norm": 0.774349236788954, "learning_rate": 4.309321501561361e-06, "loss": 0.384, "step": 5958 }, { "epoch": 1.4735410484668645, "grad_norm": 0.7771362194350268, "learning_rate": 4.309097189272127e-06, "loss": 0.3635, "step": 5959 }, { "epoch": 1.4737883283877349, "grad_norm": 0.7664401979074538, "learning_rate": 4.308872846403842e-06, "loss": 0.4215, "step": 5960 }, { "epoch": 1.4740356083086055, "grad_norm": 0.7549267950395546, "learning_rate": 4.308648472960298e-06, "loss": 0.3647, "step": 5961 }, { "epoch": 1.4742828882294758, "grad_norm": 0.7509448376246158, "learning_rate": 4.30842406894529e-06, "loss": 0.3638, "step": 5962 }, { "epoch": 1.4745301681503462, "grad_norm": 0.7759116195573315, "learning_rate": 4.3081996343626096e-06, "loss": 0.3753, "step": 5963 }, { "epoch": 1.4747774480712166, "grad_norm": 0.7879699945314984, "learning_rate": 4.30797516921605e-06, "loss": 0.3744, "step": 5964 }, { "epoch": 1.475024727992087, "grad_norm": 0.7584289239460251, "learning_rate": 4.307750673509406e-06, "loss": 0.3696, "step": 5965 }, { "epoch": 1.4752720079129575, "grad_norm": 0.7649871736320523, "learning_rate": 4.307526147246472e-06, "loss": 0.3713, "step": 5966 }, { "epoch": 1.475519287833828, "grad_norm": 0.7786360279474849, "learning_rate": 4.3073015904310435e-06, "loss": 0.3336, "step": 5967 }, { "epoch": 1.4757665677546983, "grad_norm": 0.7663713661137577, "learning_rate": 4.307077003066916e-06, "loss": 0.3632, "step": 5968 }, { "epoch": 1.4760138476755689, "grad_norm": 0.7673983939110526, "learning_rate": 4.306852385157885e-06, "loss": 0.3903, "step": 5969 }, { "epoch": 1.4762611275964392, "grad_norm": 0.7871539865230947, "learning_rate": 4.306627736707748e-06, "loss": 0.3785, "step": 5970 }, { "epoch": 1.4765084075173096, "grad_norm": 0.8426549949587084, "learning_rate": 4.306403057720302e-06, "loss": 0.3471, "step": 5971 }, { "epoch": 1.47675568743818, "grad_norm": 0.7936169928986676, "learning_rate": 4.306178348199344e-06, "loss": 0.3655, "step": 5972 }, { "epoch": 1.4770029673590503, "grad_norm": 0.7663971805278014, "learning_rate": 4.305953608148674e-06, "loss": 0.3755, "step": 5973 }, { "epoch": 1.477250247279921, "grad_norm": 0.7942551754824173, "learning_rate": 4.305728837572088e-06, "loss": 0.3877, "step": 5974 }, { "epoch": 1.4774975272007913, "grad_norm": 0.7784011197946462, "learning_rate": 4.305504036473389e-06, "loss": 0.3578, "step": 5975 }, { "epoch": 1.4777448071216617, "grad_norm": 0.8117069984728654, "learning_rate": 4.305279204856374e-06, "loss": 0.3517, "step": 5976 }, { "epoch": 1.4779920870425323, "grad_norm": 0.7791464256632459, "learning_rate": 4.3050543427248445e-06, "loss": 0.3894, "step": 5977 }, { "epoch": 1.4782393669634026, "grad_norm": 0.7618731325638851, "learning_rate": 4.304829450082601e-06, "loss": 0.3865, "step": 5978 }, { "epoch": 1.478486646884273, "grad_norm": 0.7644975850830349, "learning_rate": 4.304604526933444e-06, "loss": 0.3754, "step": 5979 }, { "epoch": 1.4787339268051434, "grad_norm": 0.797936709915137, "learning_rate": 4.304379573281176e-06, "loss": 0.3743, "step": 5980 }, { "epoch": 1.4789812067260137, "grad_norm": 0.7715577287531934, "learning_rate": 4.3041545891296e-06, "loss": 0.3763, "step": 5981 }, { "epoch": 1.4792284866468843, "grad_norm": 0.7420015925926727, "learning_rate": 4.303929574482519e-06, "loss": 0.3816, "step": 5982 }, { "epoch": 1.4794757665677547, "grad_norm": 0.7649712804123437, "learning_rate": 4.303704529343734e-06, "loss": 0.3607, "step": 5983 }, { "epoch": 1.479723046488625, "grad_norm": 0.7524684758115233, "learning_rate": 4.303479453717052e-06, "loss": 0.3868, "step": 5984 }, { "epoch": 1.4799703264094957, "grad_norm": 0.774369323619824, "learning_rate": 4.303254347606276e-06, "loss": 0.375, "step": 5985 }, { "epoch": 1.480217606330366, "grad_norm": 0.7880093254448461, "learning_rate": 4.303029211015211e-06, "loss": 0.369, "step": 5986 }, { "epoch": 1.4804648862512364, "grad_norm": 0.7905082195372578, "learning_rate": 4.302804043947664e-06, "loss": 0.3818, "step": 5987 }, { "epoch": 1.4807121661721068, "grad_norm": 0.7762016181391597, "learning_rate": 4.302578846407438e-06, "loss": 0.3958, "step": 5988 }, { "epoch": 1.4809594460929771, "grad_norm": 0.7709889249971662, "learning_rate": 4.3023536183983416e-06, "loss": 0.396, "step": 5989 }, { "epoch": 1.4812067260138477, "grad_norm": 0.7710661689503965, "learning_rate": 4.302128359924181e-06, "loss": 0.4052, "step": 5990 }, { "epoch": 1.481454005934718, "grad_norm": 0.7741141833977926, "learning_rate": 4.301903070988764e-06, "loss": 0.4015, "step": 5991 }, { "epoch": 1.4817012858555885, "grad_norm": 0.7943459886401544, "learning_rate": 4.301677751595899e-06, "loss": 0.3631, "step": 5992 }, { "epoch": 1.481948565776459, "grad_norm": 0.7359766913630278, "learning_rate": 4.3014524017493944e-06, "loss": 0.389, "step": 5993 }, { "epoch": 1.4821958456973294, "grad_norm": 0.7771492199534231, "learning_rate": 4.3012270214530585e-06, "loss": 0.3651, "step": 5994 }, { "epoch": 1.4824431256181998, "grad_norm": 0.7836646716891125, "learning_rate": 4.301001610710702e-06, "loss": 0.3519, "step": 5995 }, { "epoch": 1.4826904055390702, "grad_norm": 0.8154168192726611, "learning_rate": 4.300776169526134e-06, "loss": 0.3483, "step": 5996 }, { "epoch": 1.4829376854599405, "grad_norm": 0.7848785104864257, "learning_rate": 4.300550697903166e-06, "loss": 0.3572, "step": 5997 }, { "epoch": 1.4831849653808111, "grad_norm": 0.8050284963304133, "learning_rate": 4.300325195845608e-06, "loss": 0.3545, "step": 5998 }, { "epoch": 1.4834322453016815, "grad_norm": 0.7641469513323534, "learning_rate": 4.300099663357274e-06, "loss": 0.3999, "step": 5999 }, { "epoch": 1.4836795252225519, "grad_norm": 0.7824777929909854, "learning_rate": 4.299874100441973e-06, "loss": 0.3587, "step": 6000 }, { "epoch": 1.4839268051434225, "grad_norm": 0.7805335338129622, "learning_rate": 4.2996485071035195e-06, "loss": 0.3661, "step": 6001 }, { "epoch": 1.4841740850642928, "grad_norm": 0.8218328361126277, "learning_rate": 4.2994228833457275e-06, "loss": 0.3388, "step": 6002 }, { "epoch": 1.4844213649851632, "grad_norm": 0.7900183934886207, "learning_rate": 4.299197229172409e-06, "loss": 0.3741, "step": 6003 }, { "epoch": 1.4846686449060336, "grad_norm": 0.7697637834779915, "learning_rate": 4.298971544587378e-06, "loss": 0.3899, "step": 6004 }, { "epoch": 1.484915924826904, "grad_norm": 0.7996644676325034, "learning_rate": 4.298745829594451e-06, "loss": 0.3822, "step": 6005 }, { "epoch": 1.4851632047477745, "grad_norm": 0.7574889671941859, "learning_rate": 4.2985200841974426e-06, "loss": 0.4303, "step": 6006 }, { "epoch": 1.485410484668645, "grad_norm": 0.7900192938943521, "learning_rate": 4.298294308400167e-06, "loss": 0.4102, "step": 6007 }, { "epoch": 1.4856577645895153, "grad_norm": 0.7797834222623896, "learning_rate": 4.298068502206443e-06, "loss": 0.3827, "step": 6008 }, { "epoch": 1.4859050445103859, "grad_norm": 0.7756942566574225, "learning_rate": 4.2978426656200855e-06, "loss": 0.3734, "step": 6009 }, { "epoch": 1.4861523244312562, "grad_norm": 0.7812160791920478, "learning_rate": 4.2976167986449126e-06, "loss": 0.383, "step": 6010 }, { "epoch": 1.4863996043521266, "grad_norm": 0.7988888490523935, "learning_rate": 4.297390901284742e-06, "loss": 0.3502, "step": 6011 }, { "epoch": 1.486646884272997, "grad_norm": 0.769433523716827, "learning_rate": 4.297164973543392e-06, "loss": 0.3831, "step": 6012 }, { "epoch": 1.4868941641938673, "grad_norm": 0.7960086250619877, "learning_rate": 4.296939015424681e-06, "loss": 0.3527, "step": 6013 }, { "epoch": 1.487141444114738, "grad_norm": 0.7686007648273936, "learning_rate": 4.2967130269324294e-06, "loss": 0.376, "step": 6014 }, { "epoch": 1.4873887240356083, "grad_norm": 0.7997110496728094, "learning_rate": 4.296487008070456e-06, "loss": 0.3643, "step": 6015 }, { "epoch": 1.4876360039564787, "grad_norm": 0.7957552985352186, "learning_rate": 4.296260958842582e-06, "loss": 0.3531, "step": 6016 }, { "epoch": 1.4878832838773492, "grad_norm": 0.832625831228677, "learning_rate": 4.296034879252628e-06, "loss": 0.3799, "step": 6017 }, { "epoch": 1.4881305637982196, "grad_norm": 0.7726968750197082, "learning_rate": 4.295808769304414e-06, "loss": 0.3745, "step": 6018 }, { "epoch": 1.48837784371909, "grad_norm": 0.7691733846108981, "learning_rate": 4.295582629001765e-06, "loss": 0.3829, "step": 6019 }, { "epoch": 1.4886251236399604, "grad_norm": 0.7465123096767641, "learning_rate": 4.2953564583485005e-06, "loss": 0.3688, "step": 6020 }, { "epoch": 1.4888724035608307, "grad_norm": 0.8021524170248899, "learning_rate": 4.2951302573484445e-06, "loss": 0.3517, "step": 6021 }, { "epoch": 1.4891196834817013, "grad_norm": 0.7622013460485632, "learning_rate": 4.294904026005421e-06, "loss": 0.404, "step": 6022 }, { "epoch": 1.4893669634025717, "grad_norm": 0.7590934227395325, "learning_rate": 4.294677764323254e-06, "loss": 0.3664, "step": 6023 }, { "epoch": 1.489614243323442, "grad_norm": 0.7841194976988748, "learning_rate": 4.294451472305767e-06, "loss": 0.3761, "step": 6024 }, { "epoch": 1.4898615232443126, "grad_norm": 0.7405615229120289, "learning_rate": 4.294225149956786e-06, "loss": 0.3693, "step": 6025 }, { "epoch": 1.490108803165183, "grad_norm": 0.7563873926011574, "learning_rate": 4.293998797280136e-06, "loss": 0.3981, "step": 6026 }, { "epoch": 1.4903560830860534, "grad_norm": 0.7850324036446026, "learning_rate": 4.293772414279643e-06, "loss": 0.3563, "step": 6027 }, { "epoch": 1.4906033630069238, "grad_norm": 0.7926794879698753, "learning_rate": 4.293546000959133e-06, "loss": 0.3569, "step": 6028 }, { "epoch": 1.4908506429277943, "grad_norm": 0.7384082472140292, "learning_rate": 4.293319557322435e-06, "loss": 0.3606, "step": 6029 }, { "epoch": 1.4910979228486647, "grad_norm": 0.7690962116362583, "learning_rate": 4.293093083373374e-06, "loss": 0.3968, "step": 6030 }, { "epoch": 1.491345202769535, "grad_norm": 0.8169308820298585, "learning_rate": 4.29286657911578e-06, "loss": 0.3785, "step": 6031 }, { "epoch": 1.4915924826904057, "grad_norm": 0.7921041982901503, "learning_rate": 4.29264004455348e-06, "loss": 0.396, "step": 6032 }, { "epoch": 1.491839762611276, "grad_norm": 0.7852695521114116, "learning_rate": 4.292413479690305e-06, "loss": 0.3571, "step": 6033 }, { "epoch": 1.4920870425321464, "grad_norm": 0.7586619314102113, "learning_rate": 4.292186884530084e-06, "loss": 0.3919, "step": 6034 }, { "epoch": 1.4923343224530168, "grad_norm": 0.7782137448836814, "learning_rate": 4.291960259076645e-06, "loss": 0.3776, "step": 6035 }, { "epoch": 1.4925816023738872, "grad_norm": 0.7740515325643257, "learning_rate": 4.2917336033338216e-06, "loss": 0.3734, "step": 6036 }, { "epoch": 1.4928288822947577, "grad_norm": 0.7358123620604828, "learning_rate": 4.291506917305443e-06, "loss": 0.3722, "step": 6037 }, { "epoch": 1.493076162215628, "grad_norm": 0.7773030037943368, "learning_rate": 4.2912802009953426e-06, "loss": 0.3806, "step": 6038 }, { "epoch": 1.4933234421364985, "grad_norm": 0.7951850607638318, "learning_rate": 4.291053454407351e-06, "loss": 0.3876, "step": 6039 }, { "epoch": 1.493570722057369, "grad_norm": 0.7643338076214721, "learning_rate": 4.290826677545301e-06, "loss": 0.3784, "step": 6040 }, { "epoch": 1.4938180019782394, "grad_norm": 0.7721997686703538, "learning_rate": 4.290599870413027e-06, "loss": 0.3679, "step": 6041 }, { "epoch": 1.4940652818991098, "grad_norm": 0.7824213071944561, "learning_rate": 4.290373033014361e-06, "loss": 0.3717, "step": 6042 }, { "epoch": 1.4943125618199802, "grad_norm": 0.7993993603879539, "learning_rate": 4.290146165353139e-06, "loss": 0.3643, "step": 6043 }, { "epoch": 1.4945598417408505, "grad_norm": 0.773545113758887, "learning_rate": 4.2899192674331946e-06, "loss": 0.3814, "step": 6044 }, { "epoch": 1.4948071216617211, "grad_norm": 0.766758869697089, "learning_rate": 4.289692339258364e-06, "loss": 0.3578, "step": 6045 }, { "epoch": 1.4950544015825915, "grad_norm": 0.751800808975268, "learning_rate": 4.289465380832481e-06, "loss": 0.4025, "step": 6046 }, { "epoch": 1.4953016815034619, "grad_norm": 0.8300154602000364, "learning_rate": 4.2892383921593835e-06, "loss": 0.3533, "step": 6047 }, { "epoch": 1.4955489614243325, "grad_norm": 0.7903162115111397, "learning_rate": 4.289011373242908e-06, "loss": 0.3831, "step": 6048 }, { "epoch": 1.4957962413452028, "grad_norm": 0.8053680260267784, "learning_rate": 4.288784324086892e-06, "loss": 0.3751, "step": 6049 }, { "epoch": 1.4960435212660732, "grad_norm": 0.8008057450894418, "learning_rate": 4.288557244695172e-06, "loss": 0.3616, "step": 6050 }, { "epoch": 1.4962908011869436, "grad_norm": 0.7683353214269775, "learning_rate": 4.2883301350715885e-06, "loss": 0.3639, "step": 6051 }, { "epoch": 1.496538081107814, "grad_norm": 0.7373049087786842, "learning_rate": 4.288102995219978e-06, "loss": 0.4051, "step": 6052 }, { "epoch": 1.4967853610286845, "grad_norm": 0.7969580752380845, "learning_rate": 4.287875825144182e-06, "loss": 0.3614, "step": 6053 }, { "epoch": 1.497032640949555, "grad_norm": 0.734440431530923, "learning_rate": 4.287648624848039e-06, "loss": 0.3946, "step": 6054 }, { "epoch": 1.4972799208704253, "grad_norm": 0.7979032129899907, "learning_rate": 4.28742139433539e-06, "loss": 0.3759, "step": 6055 }, { "epoch": 1.4975272007912959, "grad_norm": 0.7710225687783632, "learning_rate": 4.287194133610074e-06, "loss": 0.3639, "step": 6056 }, { "epoch": 1.4977744807121662, "grad_norm": 0.7616108307097096, "learning_rate": 4.286966842675934e-06, "loss": 0.3768, "step": 6057 }, { "epoch": 1.4980217606330366, "grad_norm": 0.7821479538970799, "learning_rate": 4.2867395215368136e-06, "loss": 0.3363, "step": 6058 }, { "epoch": 1.498269040553907, "grad_norm": 0.7740202515989815, "learning_rate": 4.286512170196552e-06, "loss": 0.3847, "step": 6059 }, { "epoch": 1.4985163204747773, "grad_norm": 0.747051363140567, "learning_rate": 4.286284788658994e-06, "loss": 0.3576, "step": 6060 }, { "epoch": 1.498763600395648, "grad_norm": 0.7715871462316806, "learning_rate": 4.286057376927982e-06, "loss": 0.381, "step": 6061 }, { "epoch": 1.4990108803165183, "grad_norm": 0.7950884341451383, "learning_rate": 4.28582993500736e-06, "loss": 0.3747, "step": 6062 }, { "epoch": 1.4992581602373887, "grad_norm": 0.8013480981229845, "learning_rate": 4.285602462900974e-06, "loss": 0.3667, "step": 6063 }, { "epoch": 1.4995054401582593, "grad_norm": 0.7495439126407275, "learning_rate": 4.285374960612667e-06, "loss": 0.3862, "step": 6064 }, { "epoch": 1.4997527200791296, "grad_norm": 0.7620085378201085, "learning_rate": 4.2851474281462855e-06, "loss": 0.3781, "step": 6065 }, { "epoch": 1.5, "grad_norm": 0.7605010951536779, "learning_rate": 4.284919865505676e-06, "loss": 0.3881, "step": 6066 }, { "epoch": 1.5002472799208704, "grad_norm": 0.7622526338433455, "learning_rate": 4.284692272694683e-06, "loss": 0.407, "step": 6067 }, { "epoch": 1.5004945598417407, "grad_norm": 0.7886964981955813, "learning_rate": 4.284464649717154e-06, "loss": 0.3935, "step": 6068 }, { "epoch": 1.5007418397626113, "grad_norm": 0.7944782241962716, "learning_rate": 4.284236996576938e-06, "loss": 0.3683, "step": 6069 }, { "epoch": 1.5009891196834817, "grad_norm": 0.7704323097930601, "learning_rate": 4.284009313277883e-06, "loss": 0.3906, "step": 6070 }, { "epoch": 1.5012363996043523, "grad_norm": 0.8007337675972512, "learning_rate": 4.2837815998238355e-06, "loss": 0.3662, "step": 6071 }, { "epoch": 1.5014836795252227, "grad_norm": 0.775126836436552, "learning_rate": 4.283553856218647e-06, "loss": 0.3748, "step": 6072 }, { "epoch": 1.501730959446093, "grad_norm": 0.8084331502619998, "learning_rate": 4.283326082466163e-06, "loss": 0.3463, "step": 6073 }, { "epoch": 1.5019782393669634, "grad_norm": 0.7456795410160211, "learning_rate": 4.283098278570239e-06, "loss": 0.3765, "step": 6074 }, { "epoch": 1.5022255192878338, "grad_norm": 0.7865805420265817, "learning_rate": 4.282870444534722e-06, "loss": 0.3822, "step": 6075 }, { "epoch": 1.5024727992087041, "grad_norm": 0.8032137645704756, "learning_rate": 4.2826425803634646e-06, "loss": 0.3506, "step": 6076 }, { "epoch": 1.5027200791295747, "grad_norm": 0.8023605490303767, "learning_rate": 4.282414686060316e-06, "loss": 0.378, "step": 6077 }, { "epoch": 1.502967359050445, "grad_norm": 0.7895865423798535, "learning_rate": 4.28218676162913e-06, "loss": 0.3519, "step": 6078 }, { "epoch": 1.5032146389713157, "grad_norm": 0.8153814147240069, "learning_rate": 4.2819588070737605e-06, "loss": 0.3658, "step": 6079 }, { "epoch": 1.503461918892186, "grad_norm": 0.8049207418460308, "learning_rate": 4.281730822398059e-06, "loss": 0.3681, "step": 6080 }, { "epoch": 1.5037091988130564, "grad_norm": 0.7670173756448261, "learning_rate": 4.281502807605879e-06, "loss": 0.362, "step": 6081 }, { "epoch": 1.5039564787339268, "grad_norm": 0.7683207523430311, "learning_rate": 4.281274762701075e-06, "loss": 0.3394, "step": 6082 }, { "epoch": 1.5042037586547972, "grad_norm": 0.7415857959145655, "learning_rate": 4.281046687687502e-06, "loss": 0.3425, "step": 6083 }, { "epoch": 1.5044510385756675, "grad_norm": 0.7647316370287017, "learning_rate": 4.280818582569014e-06, "loss": 0.3553, "step": 6084 }, { "epoch": 1.5046983184965381, "grad_norm": 0.7882189723859808, "learning_rate": 4.280590447349468e-06, "loss": 0.3405, "step": 6085 }, { "epoch": 1.5049455984174085, "grad_norm": 0.7864024910426413, "learning_rate": 4.28036228203272e-06, "loss": 0.3901, "step": 6086 }, { "epoch": 1.505192878338279, "grad_norm": 0.7626131674787484, "learning_rate": 4.280134086622625e-06, "loss": 0.3586, "step": 6087 }, { "epoch": 1.5054401582591495, "grad_norm": 0.7599738711614584, "learning_rate": 4.279905861123041e-06, "loss": 0.3653, "step": 6088 }, { "epoch": 1.5056874381800198, "grad_norm": 0.7755680670091425, "learning_rate": 4.279677605537828e-06, "loss": 0.3529, "step": 6089 }, { "epoch": 1.5059347181008902, "grad_norm": 0.7719046445111032, "learning_rate": 4.279449319870841e-06, "loss": 0.394, "step": 6090 }, { "epoch": 1.5061819980217606, "grad_norm": 0.7810303046604633, "learning_rate": 4.279221004125939e-06, "loss": 0.3643, "step": 6091 }, { "epoch": 1.506429277942631, "grad_norm": 0.7675129198016463, "learning_rate": 4.278992658306984e-06, "loss": 0.3485, "step": 6092 }, { "epoch": 1.5066765578635015, "grad_norm": 0.7666548898955136, "learning_rate": 4.278764282417833e-06, "loss": 0.3662, "step": 6093 }, { "epoch": 1.506923837784372, "grad_norm": 0.7711637605873575, "learning_rate": 4.278535876462348e-06, "loss": 0.3791, "step": 6094 }, { "epoch": 1.5071711177052425, "grad_norm": 0.7615250109394368, "learning_rate": 4.2783074404443874e-06, "loss": 0.3879, "step": 6095 }, { "epoch": 1.5074183976261128, "grad_norm": 0.7921551715005914, "learning_rate": 4.278078974367814e-06, "loss": 0.3903, "step": 6096 }, { "epoch": 1.5076656775469832, "grad_norm": 0.7562054894609398, "learning_rate": 4.27785047823649e-06, "loss": 0.4089, "step": 6097 }, { "epoch": 1.5079129574678536, "grad_norm": 0.7760943637379899, "learning_rate": 4.277621952054277e-06, "loss": 0.3762, "step": 6098 }, { "epoch": 1.508160237388724, "grad_norm": 0.7740672822777018, "learning_rate": 4.277393395825038e-06, "loss": 0.363, "step": 6099 }, { "epoch": 1.5084075173095943, "grad_norm": 0.7875067047516912, "learning_rate": 4.277164809552636e-06, "loss": 0.3859, "step": 6100 }, { "epoch": 1.508654797230465, "grad_norm": 0.7796228996416912, "learning_rate": 4.276936193240935e-06, "loss": 0.3947, "step": 6101 }, { "epoch": 1.5089020771513353, "grad_norm": 0.8207621639639857, "learning_rate": 4.276707546893799e-06, "loss": 0.351, "step": 6102 }, { "epoch": 1.5091493570722059, "grad_norm": 0.8052909911372065, "learning_rate": 4.276478870515092e-06, "loss": 0.3542, "step": 6103 }, { "epoch": 1.5093966369930762, "grad_norm": 0.79392696883247, "learning_rate": 4.276250164108681e-06, "loss": 0.3814, "step": 6104 }, { "epoch": 1.5096439169139466, "grad_norm": 0.7876320078386326, "learning_rate": 4.276021427678431e-06, "loss": 0.3865, "step": 6105 }, { "epoch": 1.509891196834817, "grad_norm": 0.7936948403914527, "learning_rate": 4.275792661228209e-06, "loss": 0.3575, "step": 6106 }, { "epoch": 1.5101384767556874, "grad_norm": 0.7892938369215219, "learning_rate": 4.27556386476188e-06, "loss": 0.3496, "step": 6107 }, { "epoch": 1.5103857566765577, "grad_norm": 0.7858559463294982, "learning_rate": 4.275335038283314e-06, "loss": 0.3663, "step": 6108 }, { "epoch": 1.5106330365974283, "grad_norm": 0.7715681729933014, "learning_rate": 4.275106181796376e-06, "loss": 0.3761, "step": 6109 }, { "epoch": 1.5108803165182987, "grad_norm": 0.7942426609763511, "learning_rate": 4.274877295304937e-06, "loss": 0.379, "step": 6110 }, { "epoch": 1.5111275964391693, "grad_norm": 0.7880311090967522, "learning_rate": 4.274648378812863e-06, "loss": 0.3489, "step": 6111 }, { "epoch": 1.5113748763600396, "grad_norm": 0.7540391778876903, "learning_rate": 4.274419432324026e-06, "loss": 0.3742, "step": 6112 }, { "epoch": 1.51162215628091, "grad_norm": 0.8007362567784394, "learning_rate": 4.274190455842294e-06, "loss": 0.3831, "step": 6113 }, { "epoch": 1.5118694362017804, "grad_norm": 0.8085224992602257, "learning_rate": 4.2739614493715395e-06, "loss": 0.3586, "step": 6114 }, { "epoch": 1.5121167161226508, "grad_norm": 0.7898497542084066, "learning_rate": 4.273732412915631e-06, "loss": 0.3925, "step": 6115 }, { "epoch": 1.5123639960435211, "grad_norm": 0.7583691735264015, "learning_rate": 4.273503346478441e-06, "loss": 0.382, "step": 6116 }, { "epoch": 1.5126112759643917, "grad_norm": 0.831010031019208, "learning_rate": 4.2732742500638415e-06, "loss": 0.3785, "step": 6117 }, { "epoch": 1.512858555885262, "grad_norm": 0.775719286883691, "learning_rate": 4.273045123675705e-06, "loss": 0.3871, "step": 6118 }, { "epoch": 1.5131058358061327, "grad_norm": 0.7810709803982615, "learning_rate": 4.272815967317904e-06, "loss": 0.3497, "step": 6119 }, { "epoch": 1.513353115727003, "grad_norm": 0.7639357058431699, "learning_rate": 4.272586780994312e-06, "loss": 0.3626, "step": 6120 }, { "epoch": 1.5136003956478734, "grad_norm": 0.7727717478438458, "learning_rate": 4.272357564708804e-06, "loss": 0.3378, "step": 6121 }, { "epoch": 1.5138476755687438, "grad_norm": 0.7752255251153589, "learning_rate": 4.272128318465251e-06, "loss": 0.382, "step": 6122 }, { "epoch": 1.5140949554896141, "grad_norm": 0.7453729569225749, "learning_rate": 4.271899042267532e-06, "loss": 0.3581, "step": 6123 }, { "epoch": 1.5143422354104845, "grad_norm": 0.8002693648548048, "learning_rate": 4.27166973611952e-06, "loss": 0.3696, "step": 6124 }, { "epoch": 1.514589515331355, "grad_norm": 0.763531423569293, "learning_rate": 4.271440400025093e-06, "loss": 0.3621, "step": 6125 }, { "epoch": 1.5148367952522255, "grad_norm": 0.8089130434400962, "learning_rate": 4.271211033988125e-06, "loss": 0.3521, "step": 6126 }, { "epoch": 1.515084075173096, "grad_norm": 0.7732815393295857, "learning_rate": 4.2709816380124945e-06, "loss": 0.3795, "step": 6127 }, { "epoch": 1.5153313550939664, "grad_norm": 0.7558898179452991, "learning_rate": 4.270752212102079e-06, "loss": 0.3911, "step": 6128 }, { "epoch": 1.5155786350148368, "grad_norm": 0.7828057072512415, "learning_rate": 4.270522756260756e-06, "loss": 0.3979, "step": 6129 }, { "epoch": 1.5158259149357072, "grad_norm": 0.7800893025617197, "learning_rate": 4.270293270492404e-06, "loss": 0.3542, "step": 6130 }, { "epoch": 1.5160731948565775, "grad_norm": 0.761180823488512, "learning_rate": 4.2700637548009014e-06, "loss": 0.3667, "step": 6131 }, { "epoch": 1.516320474777448, "grad_norm": 0.7718718353633414, "learning_rate": 4.269834209190129e-06, "loss": 0.3882, "step": 6132 }, { "epoch": 1.5165677546983185, "grad_norm": 0.748856295945753, "learning_rate": 4.269604633663965e-06, "loss": 0.3776, "step": 6133 }, { "epoch": 1.5168150346191889, "grad_norm": 0.7642176466769041, "learning_rate": 4.269375028226292e-06, "loss": 0.3842, "step": 6134 }, { "epoch": 1.5170623145400595, "grad_norm": 0.7870161629853357, "learning_rate": 4.26914539288099e-06, "loss": 0.3631, "step": 6135 }, { "epoch": 1.5173095944609298, "grad_norm": 0.800210020292738, "learning_rate": 4.268915727631941e-06, "loss": 0.3387, "step": 6136 }, { "epoch": 1.5175568743818002, "grad_norm": 0.7624219819295027, "learning_rate": 4.268686032483026e-06, "loss": 0.3946, "step": 6137 }, { "epoch": 1.5178041543026706, "grad_norm": 0.8004175273686828, "learning_rate": 4.268456307438128e-06, "loss": 0.3944, "step": 6138 }, { "epoch": 1.518051434223541, "grad_norm": 0.7799753886020464, "learning_rate": 4.26822655250113e-06, "loss": 0.3967, "step": 6139 }, { "epoch": 1.5182987141444113, "grad_norm": 0.7838244001709641, "learning_rate": 4.267996767675916e-06, "loss": 0.361, "step": 6140 }, { "epoch": 1.518545994065282, "grad_norm": 0.782720138032773, "learning_rate": 4.267766952966369e-06, "loss": 0.365, "step": 6141 }, { "epoch": 1.5187932739861523, "grad_norm": 0.8131693467505169, "learning_rate": 4.2675371083763754e-06, "loss": 0.367, "step": 6142 }, { "epoch": 1.5190405539070229, "grad_norm": 0.7590393529717546, "learning_rate": 4.267307233909818e-06, "loss": 0.3577, "step": 6143 }, { "epoch": 1.5192878338278932, "grad_norm": 0.7573041929827176, "learning_rate": 4.267077329570584e-06, "loss": 0.3859, "step": 6144 }, { "epoch": 1.5195351137487636, "grad_norm": 0.796283987005283, "learning_rate": 4.2668473953625585e-06, "loss": 0.3698, "step": 6145 }, { "epoch": 1.519782393669634, "grad_norm": 0.8181103417014607, "learning_rate": 4.266617431289628e-06, "loss": 0.382, "step": 6146 }, { "epoch": 1.5200296735905043, "grad_norm": 0.7839500061858623, "learning_rate": 4.2663874373556806e-06, "loss": 0.3689, "step": 6147 }, { "epoch": 1.520276953511375, "grad_norm": 0.7869798569785892, "learning_rate": 4.266157413564604e-06, "loss": 0.3659, "step": 6148 }, { "epoch": 1.5205242334322453, "grad_norm": 0.835398309391355, "learning_rate": 4.265927359920284e-06, "loss": 0.3792, "step": 6149 }, { "epoch": 1.520771513353116, "grad_norm": 0.7783942003749401, "learning_rate": 4.265697276426612e-06, "loss": 0.3725, "step": 6150 }, { "epoch": 1.5210187932739863, "grad_norm": 0.8031343587562415, "learning_rate": 4.265467163087475e-06, "loss": 0.3536, "step": 6151 }, { "epoch": 1.5212660731948566, "grad_norm": 0.7940942551064276, "learning_rate": 4.265237019906764e-06, "loss": 0.3577, "step": 6152 }, { "epoch": 1.521513353115727, "grad_norm": 0.7718337963632657, "learning_rate": 4.2650068468883685e-06, "loss": 0.3456, "step": 6153 }, { "epoch": 1.5217606330365974, "grad_norm": 0.7742443166942757, "learning_rate": 4.264776644036178e-06, "loss": 0.3614, "step": 6154 }, { "epoch": 1.5220079129574677, "grad_norm": 0.7750379877972666, "learning_rate": 4.2645464113540855e-06, "loss": 0.3617, "step": 6155 }, { "epoch": 1.5222551928783383, "grad_norm": 0.7905651083174965, "learning_rate": 4.264316148845983e-06, "loss": 0.3379, "step": 6156 }, { "epoch": 1.5225024727992087, "grad_norm": 0.7763276122546381, "learning_rate": 4.26408585651576e-06, "loss": 0.3849, "step": 6157 }, { "epoch": 1.5227497527200793, "grad_norm": 0.8034363809656621, "learning_rate": 4.2638555343673106e-06, "loss": 0.386, "step": 6158 }, { "epoch": 1.5229970326409497, "grad_norm": 0.7887072314761925, "learning_rate": 4.263625182404528e-06, "loss": 0.3628, "step": 6159 }, { "epoch": 1.52324431256182, "grad_norm": 0.7629496976338871, "learning_rate": 4.263394800631307e-06, "loss": 0.3841, "step": 6160 }, { "epoch": 1.5234915924826904, "grad_norm": 0.7732946261651045, "learning_rate": 4.263164389051538e-06, "loss": 0.3642, "step": 6161 }, { "epoch": 1.5237388724035608, "grad_norm": 0.7607934907323374, "learning_rate": 4.2629339476691195e-06, "loss": 0.3793, "step": 6162 }, { "epoch": 1.5239861523244311, "grad_norm": 0.7542111205836451, "learning_rate": 4.2627034764879445e-06, "loss": 0.3538, "step": 6163 }, { "epoch": 1.5242334322453017, "grad_norm": 0.7920146310836941, "learning_rate": 4.26247297551191e-06, "loss": 0.3526, "step": 6164 }, { "epoch": 1.524480712166172, "grad_norm": 0.7515790710851793, "learning_rate": 4.262242444744911e-06, "loss": 0.3565, "step": 6165 }, { "epoch": 1.5247279920870427, "grad_norm": 0.7422904134083594, "learning_rate": 4.262011884190845e-06, "loss": 0.4094, "step": 6166 }, { "epoch": 1.524975272007913, "grad_norm": 0.7858951013298122, "learning_rate": 4.261781293853609e-06, "loss": 0.3615, "step": 6167 }, { "epoch": 1.5252225519287834, "grad_norm": 0.7724128251522274, "learning_rate": 4.2615506737371e-06, "loss": 0.3665, "step": 6168 }, { "epoch": 1.5254698318496538, "grad_norm": 0.7642504508864308, "learning_rate": 4.2613200238452166e-06, "loss": 0.3689, "step": 6169 }, { "epoch": 1.5257171117705242, "grad_norm": 0.7532260538576143, "learning_rate": 4.261089344181857e-06, "loss": 0.381, "step": 6170 }, { "epoch": 1.5259643916913945, "grad_norm": 0.7811175855967843, "learning_rate": 4.260858634750922e-06, "loss": 0.3704, "step": 6171 }, { "epoch": 1.5262116716122651, "grad_norm": 0.7974575057990625, "learning_rate": 4.260627895556309e-06, "loss": 0.3758, "step": 6172 }, { "epoch": 1.5264589515331355, "grad_norm": 0.7860238911675871, "learning_rate": 4.260397126601919e-06, "loss": 0.3615, "step": 6173 }, { "epoch": 1.526706231454006, "grad_norm": 0.7896462174699899, "learning_rate": 4.260166327891654e-06, "loss": 0.3687, "step": 6174 }, { "epoch": 1.5269535113748764, "grad_norm": 0.8038191291052161, "learning_rate": 4.2599354994294125e-06, "loss": 0.3588, "step": 6175 }, { "epoch": 1.5272007912957468, "grad_norm": 0.7972288564370071, "learning_rate": 4.259704641219099e-06, "loss": 0.3675, "step": 6176 }, { "epoch": 1.5274480712166172, "grad_norm": 0.7967221673537175, "learning_rate": 4.259473753264614e-06, "loss": 0.3594, "step": 6177 }, { "epoch": 1.5276953511374876, "grad_norm": 0.7805683861460124, "learning_rate": 4.259242835569861e-06, "loss": 0.3722, "step": 6178 }, { "epoch": 1.527942631058358, "grad_norm": 0.7890277061162352, "learning_rate": 4.259011888138743e-06, "loss": 0.3621, "step": 6179 }, { "epoch": 1.5281899109792285, "grad_norm": 0.7267517364056546, "learning_rate": 4.258780910975163e-06, "loss": 0.3895, "step": 6180 }, { "epoch": 1.5284371909000989, "grad_norm": 0.7689083431997062, "learning_rate": 4.258549904083026e-06, "loss": 0.39, "step": 6181 }, { "epoch": 1.5286844708209695, "grad_norm": 0.7885834423254036, "learning_rate": 4.258318867466236e-06, "loss": 0.3948, "step": 6182 }, { "epoch": 1.5289317507418398, "grad_norm": 0.7899422567821546, "learning_rate": 4.258087801128698e-06, "loss": 0.4023, "step": 6183 }, { "epoch": 1.5291790306627102, "grad_norm": 0.7991697232375004, "learning_rate": 4.25785670507432e-06, "loss": 0.3868, "step": 6184 }, { "epoch": 1.5294263105835806, "grad_norm": 0.7809597400016723, "learning_rate": 4.257625579307006e-06, "loss": 0.382, "step": 6185 }, { "epoch": 1.529673590504451, "grad_norm": 0.7777086796317169, "learning_rate": 4.257394423830662e-06, "loss": 0.3935, "step": 6186 }, { "epoch": 1.5299208704253213, "grad_norm": 0.7855397416913252, "learning_rate": 4.257163238649197e-06, "loss": 0.3477, "step": 6187 }, { "epoch": 1.530168150346192, "grad_norm": 0.7862363962940774, "learning_rate": 4.256932023766518e-06, "loss": 0.3984, "step": 6188 }, { "epoch": 1.5304154302670623, "grad_norm": 0.7938848999775562, "learning_rate": 4.256700779186533e-06, "loss": 0.3708, "step": 6189 }, { "epoch": 1.5306627101879329, "grad_norm": 0.8108587705166987, "learning_rate": 4.2564695049131525e-06, "loss": 0.3529, "step": 6190 }, { "epoch": 1.5309099901088032, "grad_norm": 0.7613992804791707, "learning_rate": 4.2562382009502825e-06, "loss": 0.3774, "step": 6191 }, { "epoch": 1.5311572700296736, "grad_norm": 0.7830619693601154, "learning_rate": 4.256006867301835e-06, "loss": 0.3527, "step": 6192 }, { "epoch": 1.531404549950544, "grad_norm": 0.7636220829083064, "learning_rate": 4.255775503971719e-06, "loss": 0.3668, "step": 6193 }, { "epoch": 1.5316518298714143, "grad_norm": 0.7804447870305125, "learning_rate": 4.2555441109638476e-06, "loss": 0.3789, "step": 6194 }, { "epoch": 1.5318991097922847, "grad_norm": 0.7569409006497775, "learning_rate": 4.25531268828213e-06, "loss": 0.3638, "step": 6195 }, { "epoch": 1.5321463897131553, "grad_norm": 0.7826339001985123, "learning_rate": 4.2550812359304775e-06, "loss": 0.3986, "step": 6196 }, { "epoch": 1.5323936696340257, "grad_norm": 0.8130787406930612, "learning_rate": 4.254849753912803e-06, "loss": 0.3733, "step": 6197 }, { "epoch": 1.5326409495548963, "grad_norm": 0.813139404043533, "learning_rate": 4.254618242233019e-06, "loss": 0.3646, "step": 6198 }, { "epoch": 1.5328882294757666, "grad_norm": 0.8324734600754249, "learning_rate": 4.254386700895039e-06, "loss": 0.3244, "step": 6199 }, { "epoch": 1.533135509396637, "grad_norm": 0.7975604048234647, "learning_rate": 4.254155129902777e-06, "loss": 0.3724, "step": 6200 }, { "epoch": 1.5333827893175074, "grad_norm": 0.7865253099133579, "learning_rate": 4.253923529260148e-06, "loss": 0.3563, "step": 6201 }, { "epoch": 1.5336300692383777, "grad_norm": 0.7662768502344606, "learning_rate": 4.2536918989710645e-06, "loss": 0.3948, "step": 6202 }, { "epoch": 1.5338773491592481, "grad_norm": 0.7737491637287415, "learning_rate": 4.253460239039443e-06, "loss": 0.3554, "step": 6203 }, { "epoch": 1.5341246290801187, "grad_norm": 0.7599317651541431, "learning_rate": 4.2532285494692e-06, "loss": 0.4077, "step": 6204 }, { "epoch": 1.534371909000989, "grad_norm": 0.7557488680163431, "learning_rate": 4.25299683026425e-06, "loss": 0.3418, "step": 6205 }, { "epoch": 1.5346191889218597, "grad_norm": 0.7772282689622431, "learning_rate": 4.252765081428511e-06, "loss": 0.3711, "step": 6206 }, { "epoch": 1.53486646884273, "grad_norm": 0.7574955473937013, "learning_rate": 4.2525333029659e-06, "loss": 0.3779, "step": 6207 }, { "epoch": 1.5351137487636004, "grad_norm": 0.7589666959914868, "learning_rate": 4.252301494880334e-06, "loss": 0.3256, "step": 6208 }, { "epoch": 1.5353610286844708, "grad_norm": 0.84809804429515, "learning_rate": 4.252069657175732e-06, "loss": 0.3614, "step": 6209 }, { "epoch": 1.5356083086053411, "grad_norm": 0.799032332620406, "learning_rate": 4.251837789856013e-06, "loss": 0.388, "step": 6210 }, { "epoch": 1.5358555885262115, "grad_norm": 0.7975272670275316, "learning_rate": 4.251605892925096e-06, "loss": 0.4137, "step": 6211 }, { "epoch": 1.536102868447082, "grad_norm": 0.751769531648611, "learning_rate": 4.2513739663868995e-06, "loss": 0.3987, "step": 6212 }, { "epoch": 1.5363501483679525, "grad_norm": 0.7574017399358111, "learning_rate": 4.251142010245345e-06, "loss": 0.3968, "step": 6213 }, { "epoch": 1.536597428288823, "grad_norm": 0.7789356970810858, "learning_rate": 4.250910024504354e-06, "loss": 0.3718, "step": 6214 }, { "epoch": 1.5368447082096934, "grad_norm": 0.7904701718069009, "learning_rate": 4.2506780091678455e-06, "loss": 0.3671, "step": 6215 }, { "epoch": 1.5370919881305638, "grad_norm": 0.7773028221724643, "learning_rate": 4.250445964239744e-06, "loss": 0.3763, "step": 6216 }, { "epoch": 1.5373392680514342, "grad_norm": 0.8102894939897367, "learning_rate": 4.250213889723969e-06, "loss": 0.3628, "step": 6217 }, { "epoch": 1.5375865479723045, "grad_norm": 0.8007623253599726, "learning_rate": 4.249981785624445e-06, "loss": 0.3295, "step": 6218 }, { "epoch": 1.537833827893175, "grad_norm": 0.774261440435981, "learning_rate": 4.249749651945094e-06, "loss": 0.3821, "step": 6219 }, { "epoch": 1.5380811078140455, "grad_norm": 0.7472464858625096, "learning_rate": 4.249517488689842e-06, "loss": 0.4039, "step": 6220 }, { "epoch": 1.5383283877349159, "grad_norm": 0.7795649373187207, "learning_rate": 4.24928529586261e-06, "loss": 0.3441, "step": 6221 }, { "epoch": 1.5385756676557865, "grad_norm": 0.7803340686615338, "learning_rate": 4.249053073467325e-06, "loss": 0.3839, "step": 6222 }, { "epoch": 1.5388229475766568, "grad_norm": 0.7569537967537963, "learning_rate": 4.248820821507912e-06, "loss": 0.3863, "step": 6223 }, { "epoch": 1.5390702274975272, "grad_norm": 0.793468132244622, "learning_rate": 4.248588539988296e-06, "loss": 0.3705, "step": 6224 }, { "epoch": 1.5393175074183976, "grad_norm": 0.785787606303264, "learning_rate": 4.248356228912404e-06, "loss": 0.3778, "step": 6225 }, { "epoch": 1.539564787339268, "grad_norm": 0.7667621318236404, "learning_rate": 4.248123888284162e-06, "loss": 0.3681, "step": 6226 }, { "epoch": 1.5398120672601385, "grad_norm": 0.766709412672787, "learning_rate": 4.247891518107499e-06, "loss": 0.3634, "step": 6227 }, { "epoch": 1.540059347181009, "grad_norm": 0.792037782044854, "learning_rate": 4.24765911838634e-06, "loss": 0.3639, "step": 6228 }, { "epoch": 1.5403066271018795, "grad_norm": 0.7562486806641074, "learning_rate": 4.2474266891246146e-06, "loss": 0.3667, "step": 6229 }, { "epoch": 1.5405539070227499, "grad_norm": 0.7689167068820432, "learning_rate": 4.247194230326252e-06, "loss": 0.3508, "step": 6230 }, { "epoch": 1.5408011869436202, "grad_norm": 0.7962805892539311, "learning_rate": 4.246961741995182e-06, "loss": 0.3649, "step": 6231 }, { "epoch": 1.5410484668644906, "grad_norm": 0.808327677399416, "learning_rate": 4.246729224135331e-06, "loss": 0.3757, "step": 6232 }, { "epoch": 1.541295746785361, "grad_norm": 0.7939395425647082, "learning_rate": 4.246496676750633e-06, "loss": 0.3958, "step": 6233 }, { "epoch": 1.5415430267062313, "grad_norm": 0.7546085843763278, "learning_rate": 4.246264099845017e-06, "loss": 0.338, "step": 6234 }, { "epoch": 1.541790306627102, "grad_norm": 0.7767257652870975, "learning_rate": 4.246031493422415e-06, "loss": 0.3893, "step": 6235 }, { "epoch": 1.5420375865479723, "grad_norm": 0.7678053830056679, "learning_rate": 4.245798857486757e-06, "loss": 0.385, "step": 6236 }, { "epoch": 1.5422848664688429, "grad_norm": 0.7794503802349565, "learning_rate": 4.245566192041978e-06, "loss": 0.3478, "step": 6237 }, { "epoch": 1.5425321463897133, "grad_norm": 0.7305133897045314, "learning_rate": 4.245333497092008e-06, "loss": 0.3592, "step": 6238 }, { "epoch": 1.5427794263105836, "grad_norm": 0.7729355324224405, "learning_rate": 4.245100772640781e-06, "loss": 0.3617, "step": 6239 }, { "epoch": 1.543026706231454, "grad_norm": 0.8056572883912286, "learning_rate": 4.244868018692233e-06, "loss": 0.3821, "step": 6240 }, { "epoch": 1.5432739861523244, "grad_norm": 0.7971392930395684, "learning_rate": 4.244635235250295e-06, "loss": 0.3638, "step": 6241 }, { "epoch": 1.5435212660731947, "grad_norm": 0.7677167629930116, "learning_rate": 4.244402422318904e-06, "loss": 0.3674, "step": 6242 }, { "epoch": 1.5437685459940653, "grad_norm": 0.800907485039191, "learning_rate": 4.244169579901994e-06, "loss": 0.3662, "step": 6243 }, { "epoch": 1.5440158259149357, "grad_norm": 0.7443175076506262, "learning_rate": 4.243936708003501e-06, "loss": 0.3898, "step": 6244 }, { "epoch": 1.5442631058358063, "grad_norm": 0.777405472798028, "learning_rate": 4.243703806627361e-06, "loss": 0.3835, "step": 6245 }, { "epoch": 1.5445103857566767, "grad_norm": 0.7576791200980652, "learning_rate": 4.2434708757775115e-06, "loss": 0.399, "step": 6246 }, { "epoch": 1.544757665677547, "grad_norm": 0.7601984338144303, "learning_rate": 4.243237915457888e-06, "loss": 0.3866, "step": 6247 }, { "epoch": 1.5450049455984174, "grad_norm": 0.7802134079653746, "learning_rate": 4.2430049256724305e-06, "loss": 0.3783, "step": 6248 }, { "epoch": 1.5452522255192878, "grad_norm": 0.7618817635370613, "learning_rate": 4.2427719064250765e-06, "loss": 0.3558, "step": 6249 }, { "epoch": 1.5454995054401581, "grad_norm": 0.8306848889414888, "learning_rate": 4.242538857719763e-06, "loss": 0.3654, "step": 6250 }, { "epoch": 1.5457467853610287, "grad_norm": 0.8182762531751381, "learning_rate": 4.242305779560432e-06, "loss": 0.3678, "step": 6251 }, { "epoch": 1.545994065281899, "grad_norm": 0.7517259687796941, "learning_rate": 4.242072671951021e-06, "loss": 0.3358, "step": 6252 }, { "epoch": 1.5462413452027697, "grad_norm": 0.8001436444930253, "learning_rate": 4.241839534895471e-06, "loss": 0.3837, "step": 6253 }, { "epoch": 1.54648862512364, "grad_norm": 0.7832350335796066, "learning_rate": 4.241606368397722e-06, "loss": 0.3523, "step": 6254 }, { "epoch": 1.5467359050445104, "grad_norm": 0.7997025565166457, "learning_rate": 4.241373172461717e-06, "loss": 0.3751, "step": 6255 }, { "epoch": 1.5469831849653808, "grad_norm": 0.7678656526490936, "learning_rate": 4.241139947091396e-06, "loss": 0.3738, "step": 6256 }, { "epoch": 1.5472304648862512, "grad_norm": 0.7907075663700592, "learning_rate": 4.240906692290701e-06, "loss": 0.3638, "step": 6257 }, { "epoch": 1.5474777448071215, "grad_norm": 0.7763908853281443, "learning_rate": 4.240673408063577e-06, "loss": 0.3596, "step": 6258 }, { "epoch": 1.5477250247279921, "grad_norm": 0.7857340312567335, "learning_rate": 4.2404400944139645e-06, "loss": 0.4073, "step": 6259 }, { "epoch": 1.5479723046488625, "grad_norm": 0.7759433202162459, "learning_rate": 4.240206751345809e-06, "loss": 0.383, "step": 6260 }, { "epoch": 1.548219584569733, "grad_norm": 0.7999699703759791, "learning_rate": 4.239973378863053e-06, "loss": 0.3463, "step": 6261 }, { "epoch": 1.5484668644906034, "grad_norm": 0.769337781408425, "learning_rate": 4.239739976969643e-06, "loss": 0.4255, "step": 6262 }, { "epoch": 1.5487141444114738, "grad_norm": 0.7915433476619885, "learning_rate": 4.239506545669523e-06, "loss": 0.3955, "step": 6263 }, { "epoch": 1.5489614243323442, "grad_norm": 0.8102012254961218, "learning_rate": 4.239273084966639e-06, "loss": 0.3859, "step": 6264 }, { "epoch": 1.5492087042532146, "grad_norm": 0.7823147493075941, "learning_rate": 4.2390395948649365e-06, "loss": 0.3755, "step": 6265 }, { "epoch": 1.549455984174085, "grad_norm": 0.7816940034683049, "learning_rate": 4.238806075368364e-06, "loss": 0.3675, "step": 6266 }, { "epoch": 1.5497032640949555, "grad_norm": 0.752937345223362, "learning_rate": 4.238572526480867e-06, "loss": 0.3947, "step": 6267 }, { "epoch": 1.5499505440158259, "grad_norm": 0.7925184119048592, "learning_rate": 4.238338948206394e-06, "loss": 0.3634, "step": 6268 }, { "epoch": 1.5501978239366965, "grad_norm": 0.7740326811087748, "learning_rate": 4.238105340548892e-06, "loss": 0.3852, "step": 6269 }, { "epoch": 1.5504451038575668, "grad_norm": 0.751359243845578, "learning_rate": 4.237871703512312e-06, "loss": 0.3787, "step": 6270 }, { "epoch": 1.5506923837784372, "grad_norm": 0.7738808286350727, "learning_rate": 4.2376380371006e-06, "loss": 0.359, "step": 6271 }, { "epoch": 1.5509396636993076, "grad_norm": 0.7520673507032563, "learning_rate": 4.237404341317708e-06, "loss": 0.4002, "step": 6272 }, { "epoch": 1.551186943620178, "grad_norm": 0.7692951519691322, "learning_rate": 4.2371706161675855e-06, "loss": 0.4022, "step": 6273 }, { "epoch": 1.5514342235410483, "grad_norm": 0.7684468244892125, "learning_rate": 4.236936861654183e-06, "loss": 0.3633, "step": 6274 }, { "epoch": 1.551681503461919, "grad_norm": 0.7968705693122405, "learning_rate": 4.236703077781452e-06, "loss": 0.3704, "step": 6275 }, { "epoch": 1.5519287833827893, "grad_norm": 0.7362427313978651, "learning_rate": 4.236469264553343e-06, "loss": 0.3496, "step": 6276 }, { "epoch": 1.5521760633036599, "grad_norm": 0.7850049901479373, "learning_rate": 4.236235421973809e-06, "loss": 0.3621, "step": 6277 }, { "epoch": 1.5524233432245302, "grad_norm": 0.7790116985480459, "learning_rate": 4.236001550046803e-06, "loss": 0.3773, "step": 6278 }, { "epoch": 1.5526706231454006, "grad_norm": 0.7173673342330735, "learning_rate": 4.235767648776278e-06, "loss": 0.3647, "step": 6279 }, { "epoch": 1.552917903066271, "grad_norm": 0.7942934081133302, "learning_rate": 4.235533718166188e-06, "loss": 0.3777, "step": 6280 }, { "epoch": 1.5531651829871413, "grad_norm": 0.7442632735530732, "learning_rate": 4.2352997582204845e-06, "loss": 0.3902, "step": 6281 }, { "epoch": 1.5534124629080117, "grad_norm": 0.7731274246508707, "learning_rate": 4.235065768943126e-06, "loss": 0.3703, "step": 6282 }, { "epoch": 1.5536597428288823, "grad_norm": 0.7770784907197068, "learning_rate": 4.234831750338064e-06, "loss": 0.4088, "step": 6283 }, { "epoch": 1.5539070227497527, "grad_norm": 0.8001931851199924, "learning_rate": 4.234597702409257e-06, "loss": 0.4056, "step": 6284 }, { "epoch": 1.5541543026706233, "grad_norm": 0.7592356929368401, "learning_rate": 4.2343636251606594e-06, "loss": 0.3776, "step": 6285 }, { "epoch": 1.5544015825914936, "grad_norm": 0.7914795970766962, "learning_rate": 4.234129518596229e-06, "loss": 0.3503, "step": 6286 }, { "epoch": 1.554648862512364, "grad_norm": 0.731884526934121, "learning_rate": 4.23389538271992e-06, "loss": 0.3749, "step": 6287 }, { "epoch": 1.5548961424332344, "grad_norm": 0.7710386064510834, "learning_rate": 4.233661217535694e-06, "loss": 0.3542, "step": 6288 }, { "epoch": 1.5551434223541047, "grad_norm": 0.7651321043191052, "learning_rate": 4.233427023047507e-06, "loss": 0.384, "step": 6289 }, { "epoch": 1.5553907022749751, "grad_norm": 0.7782772890035883, "learning_rate": 4.233192799259318e-06, "loss": 0.3667, "step": 6290 }, { "epoch": 1.5556379821958457, "grad_norm": 0.8051505548992122, "learning_rate": 4.232958546175084e-06, "loss": 0.375, "step": 6291 }, { "epoch": 1.555885262116716, "grad_norm": 0.7459496835318311, "learning_rate": 4.232724263798769e-06, "loss": 0.3614, "step": 6292 }, { "epoch": 1.5561325420375867, "grad_norm": 0.7592911708562895, "learning_rate": 4.232489952134329e-06, "loss": 0.3464, "step": 6293 }, { "epoch": 1.556379821958457, "grad_norm": 0.8175292940914679, "learning_rate": 4.232255611185726e-06, "loss": 0.3781, "step": 6294 }, { "epoch": 1.5566271018793274, "grad_norm": 0.8147302330458426, "learning_rate": 4.232021240956921e-06, "loss": 0.3733, "step": 6295 }, { "epoch": 1.5568743818001978, "grad_norm": 0.7814784942848721, "learning_rate": 4.231786841451877e-06, "loss": 0.3504, "step": 6296 }, { "epoch": 1.5571216617210681, "grad_norm": 0.7700232379044383, "learning_rate": 4.231552412674553e-06, "loss": 0.3902, "step": 6297 }, { "epoch": 1.5573689416419385, "grad_norm": 0.796637158009648, "learning_rate": 4.231317954628914e-06, "loss": 0.3563, "step": 6298 }, { "epoch": 1.557616221562809, "grad_norm": 0.7566104043009374, "learning_rate": 4.231083467318922e-06, "loss": 0.3945, "step": 6299 }, { "epoch": 1.5578635014836797, "grad_norm": 0.76900962250095, "learning_rate": 4.230848950748541e-06, "loss": 0.3807, "step": 6300 }, { "epoch": 1.55811078140455, "grad_norm": 0.7708949204801354, "learning_rate": 4.230614404921734e-06, "loss": 0.3597, "step": 6301 }, { "epoch": 1.5583580613254204, "grad_norm": 0.7831806490181457, "learning_rate": 4.230379829842467e-06, "loss": 0.3617, "step": 6302 }, { "epoch": 1.5586053412462908, "grad_norm": 0.7607240500988549, "learning_rate": 4.230145225514703e-06, "loss": 0.3341, "step": 6303 }, { "epoch": 1.5588526211671612, "grad_norm": 0.7782856242381475, "learning_rate": 4.229910591942411e-06, "loss": 0.3652, "step": 6304 }, { "epoch": 1.5590999010880315, "grad_norm": 0.7698781564037144, "learning_rate": 4.2296759291295534e-06, "loss": 0.3676, "step": 6305 }, { "epoch": 1.5593471810089021, "grad_norm": 0.8195908231596121, "learning_rate": 4.229441237080099e-06, "loss": 0.3722, "step": 6306 }, { "epoch": 1.5595944609297725, "grad_norm": 0.7906949723820191, "learning_rate": 4.229206515798012e-06, "loss": 0.3966, "step": 6307 }, { "epoch": 1.559841740850643, "grad_norm": 0.809756409987822, "learning_rate": 4.228971765287263e-06, "loss": 0.3478, "step": 6308 }, { "epoch": 1.5600890207715135, "grad_norm": 0.7656371609874385, "learning_rate": 4.228736985551819e-06, "loss": 0.3611, "step": 6309 }, { "epoch": 1.5603363006923838, "grad_norm": 0.7688666763571264, "learning_rate": 4.228502176595648e-06, "loss": 0.3759, "step": 6310 }, { "epoch": 1.5605835806132542, "grad_norm": 0.7731552630301288, "learning_rate": 4.228267338422718e-06, "loss": 0.3562, "step": 6311 }, { "epoch": 1.5608308605341246, "grad_norm": 0.7878386189412722, "learning_rate": 4.228032471037001e-06, "loss": 0.3641, "step": 6312 }, { "epoch": 1.561078140454995, "grad_norm": 0.790814405097217, "learning_rate": 4.227797574442465e-06, "loss": 0.3566, "step": 6313 }, { "epoch": 1.5613254203758655, "grad_norm": 0.771165924203214, "learning_rate": 4.227562648643081e-06, "loss": 0.381, "step": 6314 }, { "epoch": 1.561572700296736, "grad_norm": 0.7772248860157766, "learning_rate": 4.22732769364282e-06, "loss": 0.3757, "step": 6315 }, { "epoch": 1.5618199802176065, "grad_norm": 0.7783285419721535, "learning_rate": 4.227092709445652e-06, "loss": 0.3727, "step": 6316 }, { "epoch": 1.5620672601384769, "grad_norm": 0.7779340433084706, "learning_rate": 4.226857696055553e-06, "loss": 0.3578, "step": 6317 }, { "epoch": 1.5623145400593472, "grad_norm": 0.7580876444099343, "learning_rate": 4.226622653476491e-06, "loss": 0.3687, "step": 6318 }, { "epoch": 1.5625618199802176, "grad_norm": 0.7635747854356978, "learning_rate": 4.22638758171244e-06, "loss": 0.3471, "step": 6319 }, { "epoch": 1.562809099901088, "grad_norm": 0.7728913628650974, "learning_rate": 4.2261524807673744e-06, "loss": 0.3736, "step": 6320 }, { "epoch": 1.5630563798219583, "grad_norm": 0.7716670762950005, "learning_rate": 4.225917350645269e-06, "loss": 0.4157, "step": 6321 }, { "epoch": 1.563303659742829, "grad_norm": 0.7874053610845828, "learning_rate": 4.225682191350096e-06, "loss": 0.3705, "step": 6322 }, { "epoch": 1.5635509396636993, "grad_norm": 0.7864350183321268, "learning_rate": 4.225447002885831e-06, "loss": 0.3615, "step": 6323 }, { "epoch": 1.5637982195845699, "grad_norm": 0.7933211064087715, "learning_rate": 4.225211785256449e-06, "loss": 0.3799, "step": 6324 }, { "epoch": 1.5640454995054403, "grad_norm": 0.7790858016546929, "learning_rate": 4.224976538465927e-06, "loss": 0.3752, "step": 6325 }, { "epoch": 1.5642927794263106, "grad_norm": 0.7899524192771772, "learning_rate": 4.2247412625182405e-06, "loss": 0.3965, "step": 6326 }, { "epoch": 1.564540059347181, "grad_norm": 0.7862297952481558, "learning_rate": 4.224505957417367e-06, "loss": 0.3764, "step": 6327 }, { "epoch": 1.5647873392680514, "grad_norm": 0.7951313431728206, "learning_rate": 4.224270623167283e-06, "loss": 0.3589, "step": 6328 }, { "epoch": 1.5650346191889217, "grad_norm": 0.8208627684405196, "learning_rate": 4.224035259771967e-06, "loss": 0.3691, "step": 6329 }, { "epoch": 1.5652818991097923, "grad_norm": 0.7627645154893068, "learning_rate": 4.223799867235398e-06, "loss": 0.3743, "step": 6330 }, { "epoch": 1.5655291790306627, "grad_norm": 0.7895227003632047, "learning_rate": 4.2235644455615525e-06, "loss": 0.375, "step": 6331 }, { "epoch": 1.5657764589515333, "grad_norm": 0.7877495972250088, "learning_rate": 4.223328994754412e-06, "loss": 0.37, "step": 6332 }, { "epoch": 1.5660237388724036, "grad_norm": 0.758358631067232, "learning_rate": 4.223093514817955e-06, "loss": 0.3886, "step": 6333 }, { "epoch": 1.566271018793274, "grad_norm": 0.7911555265207226, "learning_rate": 4.222858005756164e-06, "loss": 0.3726, "step": 6334 }, { "epoch": 1.5665182987141444, "grad_norm": 0.7822752876618456, "learning_rate": 4.222622467573016e-06, "loss": 0.3719, "step": 6335 }, { "epoch": 1.5667655786350148, "grad_norm": 0.7521909055111662, "learning_rate": 4.222386900272496e-06, "loss": 0.3662, "step": 6336 }, { "epoch": 1.5670128585558851, "grad_norm": 0.8089895746183244, "learning_rate": 4.222151303858584e-06, "loss": 0.3727, "step": 6337 }, { "epoch": 1.5672601384767557, "grad_norm": 0.7816065248453298, "learning_rate": 4.221915678335262e-06, "loss": 0.3615, "step": 6338 }, { "epoch": 1.567507418397626, "grad_norm": 0.7889396215394516, "learning_rate": 4.221680023706513e-06, "loss": 0.3497, "step": 6339 }, { "epoch": 1.5677546983184967, "grad_norm": 0.8151954163055456, "learning_rate": 4.2214443399763215e-06, "loss": 0.363, "step": 6340 }, { "epoch": 1.568001978239367, "grad_norm": 0.775432738255676, "learning_rate": 4.22120862714867e-06, "loss": 0.3772, "step": 6341 }, { "epoch": 1.5682492581602374, "grad_norm": 0.7819569559850131, "learning_rate": 4.220972885227542e-06, "loss": 0.3628, "step": 6342 }, { "epoch": 1.5684965380811078, "grad_norm": 0.7896759936707712, "learning_rate": 4.220737114216925e-06, "loss": 0.3322, "step": 6343 }, { "epoch": 1.5687438180019782, "grad_norm": 0.7792711873973955, "learning_rate": 4.220501314120802e-06, "loss": 0.3636, "step": 6344 }, { "epoch": 1.5689910979228485, "grad_norm": 0.7627391829516568, "learning_rate": 4.220265484943158e-06, "loss": 0.3594, "step": 6345 }, { "epoch": 1.5692383778437191, "grad_norm": 0.816893387453078, "learning_rate": 4.220029626687981e-06, "loss": 0.3773, "step": 6346 }, { "epoch": 1.5694856577645895, "grad_norm": 0.774907294158934, "learning_rate": 4.219793739359257e-06, "loss": 0.3667, "step": 6347 }, { "epoch": 1.56973293768546, "grad_norm": 0.7915226203546838, "learning_rate": 4.219557822960975e-06, "loss": 0.3869, "step": 6348 }, { "epoch": 1.5699802176063304, "grad_norm": 0.7983208675850375, "learning_rate": 4.219321877497119e-06, "loss": 0.3975, "step": 6349 }, { "epoch": 1.5702274975272008, "grad_norm": 0.7777751952903091, "learning_rate": 4.219085902971679e-06, "loss": 0.3896, "step": 6350 }, { "epoch": 1.5704747774480712, "grad_norm": 0.7612228484994086, "learning_rate": 4.218849899388644e-06, "loss": 0.3517, "step": 6351 }, { "epoch": 1.5707220573689415, "grad_norm": 0.7642870663375274, "learning_rate": 4.218613866752004e-06, "loss": 0.3496, "step": 6352 }, { "epoch": 1.570969337289812, "grad_norm": 0.7627654535943694, "learning_rate": 4.218377805065747e-06, "loss": 0.388, "step": 6353 }, { "epoch": 1.5712166172106825, "grad_norm": 0.7947333208426629, "learning_rate": 4.218141714333863e-06, "loss": 0.3749, "step": 6354 }, { "epoch": 1.5714638971315529, "grad_norm": 0.736865566961618, "learning_rate": 4.217905594560344e-06, "loss": 0.3887, "step": 6355 }, { "epoch": 1.5717111770524235, "grad_norm": 0.7519367980507369, "learning_rate": 4.217669445749181e-06, "loss": 0.3722, "step": 6356 }, { "epoch": 1.5719584569732938, "grad_norm": 0.7462429016539935, "learning_rate": 4.217433267904364e-06, "loss": 0.372, "step": 6357 }, { "epoch": 1.5722057368941642, "grad_norm": 0.7749571163529954, "learning_rate": 4.217197061029886e-06, "loss": 0.3568, "step": 6358 }, { "epoch": 1.5724530168150346, "grad_norm": 0.7747896138107505, "learning_rate": 4.21696082512974e-06, "loss": 0.3394, "step": 6359 }, { "epoch": 1.572700296735905, "grad_norm": 0.7788953950884815, "learning_rate": 4.21672456020792e-06, "loss": 0.3585, "step": 6360 }, { "epoch": 1.5729475766567753, "grad_norm": 0.7514623091632487, "learning_rate": 4.216488266268417e-06, "loss": 0.3873, "step": 6361 }, { "epoch": 1.573194856577646, "grad_norm": 0.7785136712879568, "learning_rate": 4.216251943315227e-06, "loss": 0.3471, "step": 6362 }, { "epoch": 1.5734421364985163, "grad_norm": 0.7893895570014621, "learning_rate": 4.216015591352344e-06, "loss": 0.3695, "step": 6363 }, { "epoch": 1.5736894164193869, "grad_norm": 0.7905636111000112, "learning_rate": 4.215779210383763e-06, "loss": 0.3639, "step": 6364 }, { "epoch": 1.5739366963402572, "grad_norm": 0.7883346028964607, "learning_rate": 4.21554280041348e-06, "loss": 0.3863, "step": 6365 }, { "epoch": 1.5741839762611276, "grad_norm": 0.7652971799193422, "learning_rate": 4.21530636144549e-06, "loss": 0.3815, "step": 6366 }, { "epoch": 1.574431256181998, "grad_norm": 0.7805445939583068, "learning_rate": 4.215069893483791e-06, "loss": 0.3511, "step": 6367 }, { "epoch": 1.5746785361028683, "grad_norm": 0.7515241051729142, "learning_rate": 4.214833396532378e-06, "loss": 0.3867, "step": 6368 }, { "epoch": 1.5749258160237387, "grad_norm": 0.8046275932968091, "learning_rate": 4.21459687059525e-06, "loss": 0.3678, "step": 6369 }, { "epoch": 1.5751730959446093, "grad_norm": 0.7292626732714689, "learning_rate": 4.214360315676405e-06, "loss": 0.3827, "step": 6370 }, { "epoch": 1.5754203758654797, "grad_norm": 0.7439359130022896, "learning_rate": 4.21412373177984e-06, "loss": 0.3703, "step": 6371 }, { "epoch": 1.5756676557863503, "grad_norm": 0.7697273780962904, "learning_rate": 4.213887118909556e-06, "loss": 0.3733, "step": 6372 }, { "epoch": 1.5759149357072206, "grad_norm": 0.7959612107762682, "learning_rate": 4.213650477069552e-06, "loss": 0.3664, "step": 6373 }, { "epoch": 1.576162215628091, "grad_norm": 0.7946094680735852, "learning_rate": 4.213413806263827e-06, "loss": 0.3226, "step": 6374 }, { "epoch": 1.5764094955489614, "grad_norm": 0.7858375178301397, "learning_rate": 4.213177106496381e-06, "loss": 0.3292, "step": 6375 }, { "epoch": 1.5766567754698317, "grad_norm": 0.7774927978880786, "learning_rate": 4.212940377771216e-06, "loss": 0.3549, "step": 6376 }, { "epoch": 1.576904055390702, "grad_norm": 0.7586512690156004, "learning_rate": 4.212703620092334e-06, "loss": 0.4024, "step": 6377 }, { "epoch": 1.5771513353115727, "grad_norm": 0.7554304864573931, "learning_rate": 4.2124668334637355e-06, "loss": 0.3677, "step": 6378 }, { "epoch": 1.5773986152324433, "grad_norm": 0.7633082449241473, "learning_rate": 4.212230017889424e-06, "loss": 0.3692, "step": 6379 }, { "epoch": 1.5776458951533137, "grad_norm": 0.748525677006286, "learning_rate": 4.211993173373401e-06, "loss": 0.36, "step": 6380 }, { "epoch": 1.577893175074184, "grad_norm": 0.7489551937455744, "learning_rate": 4.211756299919671e-06, "loss": 0.3859, "step": 6381 }, { "epoch": 1.5781404549950544, "grad_norm": 0.7622555229540056, "learning_rate": 4.211519397532238e-06, "loss": 0.358, "step": 6382 }, { "epoch": 1.5783877349159248, "grad_norm": 0.7813081729696283, "learning_rate": 4.211282466215105e-06, "loss": 0.3644, "step": 6383 }, { "epoch": 1.5786350148367951, "grad_norm": 0.8185668940577417, "learning_rate": 4.211045505972279e-06, "loss": 0.3506, "step": 6384 }, { "epoch": 1.5788822947576657, "grad_norm": 0.815664540600793, "learning_rate": 4.210808516807763e-06, "loss": 0.3529, "step": 6385 }, { "epoch": 1.579129574678536, "grad_norm": 0.7575778189707025, "learning_rate": 4.210571498725564e-06, "loss": 0.3936, "step": 6386 }, { "epoch": 1.5793768545994067, "grad_norm": 0.7699453607881769, "learning_rate": 4.210334451729689e-06, "loss": 0.3401, "step": 6387 }, { "epoch": 1.579624134520277, "grad_norm": 0.77702665048728, "learning_rate": 4.210097375824144e-06, "loss": 0.3621, "step": 6388 }, { "epoch": 1.5798714144411474, "grad_norm": 0.8278294942400759, "learning_rate": 4.209860271012936e-06, "loss": 0.3587, "step": 6389 }, { "epoch": 1.5801186943620178, "grad_norm": 0.73875799553122, "learning_rate": 4.2096231373000714e-06, "loss": 0.3621, "step": 6390 }, { "epoch": 1.5803659742828882, "grad_norm": 0.7836281310193499, "learning_rate": 4.209385974689562e-06, "loss": 0.3799, "step": 6391 }, { "epoch": 1.5806132542037585, "grad_norm": 0.7925524897036469, "learning_rate": 4.209148783185414e-06, "loss": 0.379, "step": 6392 }, { "epoch": 1.5808605341246291, "grad_norm": 0.8001296635701929, "learning_rate": 4.208911562791638e-06, "loss": 0.3784, "step": 6393 }, { "epoch": 1.5811078140454995, "grad_norm": 0.7732908782841879, "learning_rate": 4.208674313512242e-06, "loss": 0.3647, "step": 6394 }, { "epoch": 1.58135509396637, "grad_norm": 0.7812695871661243, "learning_rate": 4.2084370353512375e-06, "loss": 0.3898, "step": 6395 }, { "epoch": 1.5816023738872405, "grad_norm": 0.7871217395274125, "learning_rate": 4.208199728312635e-06, "loss": 0.3856, "step": 6396 }, { "epoch": 1.5818496538081108, "grad_norm": 0.8172550517492383, "learning_rate": 4.207962392400445e-06, "loss": 0.3765, "step": 6397 }, { "epoch": 1.5820969337289812, "grad_norm": 0.745374987808505, "learning_rate": 4.207725027618681e-06, "loss": 0.3965, "step": 6398 }, { "epoch": 1.5823442136498516, "grad_norm": 0.7448328264157851, "learning_rate": 4.207487633971353e-06, "loss": 0.3469, "step": 6399 }, { "epoch": 1.582591493570722, "grad_norm": 0.7584384792234328, "learning_rate": 4.207250211462474e-06, "loss": 0.4129, "step": 6400 }, { "epoch": 1.5828387734915925, "grad_norm": 0.79626303032794, "learning_rate": 4.207012760096059e-06, "loss": 0.345, "step": 6401 }, { "epoch": 1.583086053412463, "grad_norm": 0.7393326597959936, "learning_rate": 4.20677527987612e-06, "loss": 0.3604, "step": 6402 }, { "epoch": 1.5833333333333335, "grad_norm": 0.7439766819143045, "learning_rate": 4.206537770806671e-06, "loss": 0.3588, "step": 6403 }, { "epoch": 1.5835806132542039, "grad_norm": 0.7722113099871267, "learning_rate": 4.2063002328917265e-06, "loss": 0.3303, "step": 6404 }, { "epoch": 1.5838278931750742, "grad_norm": 0.7773743163356608, "learning_rate": 4.2060626661353035e-06, "loss": 0.364, "step": 6405 }, { "epoch": 1.5840751730959446, "grad_norm": 0.836370569274264, "learning_rate": 4.205825070541415e-06, "loss": 0.3331, "step": 6406 }, { "epoch": 1.584322453016815, "grad_norm": 0.801983565196525, "learning_rate": 4.205587446114079e-06, "loss": 0.3516, "step": 6407 }, { "epoch": 1.5845697329376853, "grad_norm": 0.7974776605889261, "learning_rate": 4.20534979285731e-06, "loss": 0.3474, "step": 6408 }, { "epoch": 1.584817012858556, "grad_norm": 0.7429353964291947, "learning_rate": 4.205112110775128e-06, "loss": 0.3552, "step": 6409 }, { "epoch": 1.5850642927794263, "grad_norm": 0.7923398373547462, "learning_rate": 4.204874399871548e-06, "loss": 0.3689, "step": 6410 }, { "epoch": 1.5853115727002969, "grad_norm": 0.7294329865972179, "learning_rate": 4.20463666015059e-06, "loss": 0.3681, "step": 6411 }, { "epoch": 1.5855588526211672, "grad_norm": 0.761856607034988, "learning_rate": 4.20439889161627e-06, "loss": 0.3457, "step": 6412 }, { "epoch": 1.5858061325420376, "grad_norm": 0.7864458995091066, "learning_rate": 4.204161094272608e-06, "loss": 0.3704, "step": 6413 }, { "epoch": 1.586053412462908, "grad_norm": 0.7548595525780619, "learning_rate": 4.203923268123625e-06, "loss": 0.3777, "step": 6414 }, { "epoch": 1.5863006923837784, "grad_norm": 0.7795860610598434, "learning_rate": 4.20368541317334e-06, "loss": 0.3589, "step": 6415 }, { "epoch": 1.5865479723046487, "grad_norm": 0.7780270316250296, "learning_rate": 4.203447529425772e-06, "loss": 0.3883, "step": 6416 }, { "epoch": 1.5867952522255193, "grad_norm": 0.7745417827943019, "learning_rate": 4.2032096168849444e-06, "loss": 0.3635, "step": 6417 }, { "epoch": 1.5870425321463897, "grad_norm": 0.8073756161117522, "learning_rate": 4.202971675554877e-06, "loss": 0.3738, "step": 6418 }, { "epoch": 1.5872898120672603, "grad_norm": 0.7597149356622898, "learning_rate": 4.202733705439592e-06, "loss": 0.3658, "step": 6419 }, { "epoch": 1.5875370919881306, "grad_norm": 0.7966768979774023, "learning_rate": 4.202495706543112e-06, "loss": 0.3588, "step": 6420 }, { "epoch": 1.587784371909001, "grad_norm": 0.7771385647332084, "learning_rate": 4.202257678869459e-06, "loss": 0.3816, "step": 6421 }, { "epoch": 1.5880316518298714, "grad_norm": 0.7897211229531774, "learning_rate": 4.2020196224226576e-06, "loss": 0.3679, "step": 6422 }, { "epoch": 1.5882789317507418, "grad_norm": 0.7697531489635653, "learning_rate": 4.2017815372067315e-06, "loss": 0.3553, "step": 6423 }, { "epoch": 1.5885262116716121, "grad_norm": 0.771272457375711, "learning_rate": 4.201543423225705e-06, "loss": 0.3746, "step": 6424 }, { "epoch": 1.5887734915924827, "grad_norm": 0.7703893121032136, "learning_rate": 4.201305280483602e-06, "loss": 0.3742, "step": 6425 }, { "epoch": 1.589020771513353, "grad_norm": 0.7652385577915212, "learning_rate": 4.201067108984449e-06, "loss": 0.3818, "step": 6426 }, { "epoch": 1.5892680514342237, "grad_norm": 0.7744146642636255, "learning_rate": 4.20082890873227e-06, "loss": 0.3682, "step": 6427 }, { "epoch": 1.589515331355094, "grad_norm": 0.7327156944629393, "learning_rate": 4.2005906797310934e-06, "loss": 0.3975, "step": 6428 }, { "epoch": 1.5897626112759644, "grad_norm": 0.8037121686381721, "learning_rate": 4.200352421984946e-06, "loss": 0.3796, "step": 6429 }, { "epoch": 1.5900098911968348, "grad_norm": 0.7799428699847921, "learning_rate": 4.200114135497853e-06, "loss": 0.3692, "step": 6430 }, { "epoch": 1.5902571711177051, "grad_norm": 0.7569159080943401, "learning_rate": 4.199875820273843e-06, "loss": 0.3794, "step": 6431 }, { "epoch": 1.5905044510385755, "grad_norm": 0.7653280621304537, "learning_rate": 4.199637476316946e-06, "loss": 0.342, "step": 6432 }, { "epoch": 1.590751730959446, "grad_norm": 0.7741272888295035, "learning_rate": 4.199399103631188e-06, "loss": 0.3745, "step": 6433 }, { "epoch": 1.5909990108803165, "grad_norm": 0.7685003726994714, "learning_rate": 4.199160702220601e-06, "loss": 0.3414, "step": 6434 }, { "epoch": 1.591246290801187, "grad_norm": 0.7691916735744259, "learning_rate": 4.198922272089211e-06, "loss": 0.3662, "step": 6435 }, { "epoch": 1.5914935707220574, "grad_norm": 0.7911960913004916, "learning_rate": 4.198683813241051e-06, "loss": 0.4079, "step": 6436 }, { "epoch": 1.5917408506429278, "grad_norm": 0.771258227917006, "learning_rate": 4.198445325680152e-06, "loss": 0.3737, "step": 6437 }, { "epoch": 1.5919881305637982, "grad_norm": 0.7759457856776081, "learning_rate": 4.198206809410544e-06, "loss": 0.3666, "step": 6438 }, { "epoch": 1.5922354104846685, "grad_norm": 0.807431806251582, "learning_rate": 4.197968264436257e-06, "loss": 0.3849, "step": 6439 }, { "epoch": 1.592482690405539, "grad_norm": 0.7814024256289971, "learning_rate": 4.197729690761326e-06, "loss": 0.3783, "step": 6440 }, { "epoch": 1.5927299703264095, "grad_norm": 0.7829506183793827, "learning_rate": 4.1974910883897824e-06, "loss": 0.3623, "step": 6441 }, { "epoch": 1.5929772502472799, "grad_norm": 0.7530245696396372, "learning_rate": 4.197252457325659e-06, "loss": 0.3795, "step": 6442 }, { "epoch": 1.5932245301681505, "grad_norm": 0.7658943843269894, "learning_rate": 4.1970137975729904e-06, "loss": 0.4106, "step": 6443 }, { "epoch": 1.5934718100890208, "grad_norm": 0.754807967008384, "learning_rate": 4.196775109135809e-06, "loss": 0.3671, "step": 6444 }, { "epoch": 1.5937190900098912, "grad_norm": 0.7797785866364985, "learning_rate": 4.196536392018151e-06, "loss": 0.3825, "step": 6445 }, { "epoch": 1.5939663699307616, "grad_norm": 0.7627662967479398, "learning_rate": 4.1962976462240505e-06, "loss": 0.3616, "step": 6446 }, { "epoch": 1.594213649851632, "grad_norm": 0.7848808482536511, "learning_rate": 4.196058871757542e-06, "loss": 0.3768, "step": 6447 }, { "epoch": 1.5944609297725023, "grad_norm": 0.7831166681970118, "learning_rate": 4.195820068622665e-06, "loss": 0.3828, "step": 6448 }, { "epoch": 1.594708209693373, "grad_norm": 0.7539331548228781, "learning_rate": 4.195581236823452e-06, "loss": 0.3743, "step": 6449 }, { "epoch": 1.5949554896142433, "grad_norm": 0.7983107055808297, "learning_rate": 4.195342376363942e-06, "loss": 0.3703, "step": 6450 }, { "epoch": 1.5952027695351139, "grad_norm": 0.7601266754351131, "learning_rate": 4.195103487248171e-06, "loss": 0.3893, "step": 6451 }, { "epoch": 1.5954500494559842, "grad_norm": 0.7402315695556846, "learning_rate": 4.1948645694801796e-06, "loss": 0.3639, "step": 6452 }, { "epoch": 1.5956973293768546, "grad_norm": 0.7919977924861503, "learning_rate": 4.194625623064003e-06, "loss": 0.3501, "step": 6453 }, { "epoch": 1.595944609297725, "grad_norm": 0.7626484431102543, "learning_rate": 4.194386648003684e-06, "loss": 0.3288, "step": 6454 }, { "epoch": 1.5961918892185953, "grad_norm": 0.7680240118056006, "learning_rate": 4.194147644303257e-06, "loss": 0.4186, "step": 6455 }, { "epoch": 1.596439169139466, "grad_norm": 0.7417209715652979, "learning_rate": 4.193908611966766e-06, "loss": 0.381, "step": 6456 }, { "epoch": 1.5966864490603363, "grad_norm": 0.803074148996737, "learning_rate": 4.1936695509982496e-06, "loss": 0.3518, "step": 6457 }, { "epoch": 1.596933728981207, "grad_norm": 0.7926289178684669, "learning_rate": 4.193430461401749e-06, "loss": 0.3879, "step": 6458 }, { "epoch": 1.5971810089020773, "grad_norm": 0.796812084565938, "learning_rate": 4.193191343181305e-06, "loss": 0.3548, "step": 6459 }, { "epoch": 1.5974282888229476, "grad_norm": 0.7572479085590834, "learning_rate": 4.1929521963409606e-06, "loss": 0.3728, "step": 6460 }, { "epoch": 1.597675568743818, "grad_norm": 0.7844511557487907, "learning_rate": 4.192713020884756e-06, "loss": 0.3659, "step": 6461 }, { "epoch": 1.5979228486646884, "grad_norm": 0.8220131344884863, "learning_rate": 4.192473816816737e-06, "loss": 0.3889, "step": 6462 }, { "epoch": 1.5981701285855587, "grad_norm": 0.7687962888562877, "learning_rate": 4.192234584140943e-06, "loss": 0.383, "step": 6463 }, { "epoch": 1.5984174085064293, "grad_norm": 0.7477443875832368, "learning_rate": 4.191995322861421e-06, "loss": 0.381, "step": 6464 }, { "epoch": 1.5986646884272997, "grad_norm": 0.7942336972418195, "learning_rate": 4.191756032982215e-06, "loss": 0.3464, "step": 6465 }, { "epoch": 1.5989119683481703, "grad_norm": 0.7372303137692642, "learning_rate": 4.1915167145073675e-06, "loss": 0.3627, "step": 6466 }, { "epoch": 1.5991592482690407, "grad_norm": 0.7687719213118099, "learning_rate": 4.191277367440925e-06, "loss": 0.404, "step": 6467 }, { "epoch": 1.599406528189911, "grad_norm": 0.7774279478473115, "learning_rate": 4.191037991786934e-06, "loss": 0.3618, "step": 6468 }, { "epoch": 1.5996538081107814, "grad_norm": 0.8567670691107414, "learning_rate": 4.190798587549439e-06, "loss": 0.3639, "step": 6469 }, { "epoch": 1.5999010880316518, "grad_norm": 0.7930584156482083, "learning_rate": 4.190559154732488e-06, "loss": 0.3889, "step": 6470 }, { "epoch": 1.6001483679525221, "grad_norm": 0.8139283373541343, "learning_rate": 4.190319693340126e-06, "loss": 0.3376, "step": 6471 }, { "epoch": 1.6003956478733927, "grad_norm": 0.7910523674582416, "learning_rate": 4.190080203376403e-06, "loss": 0.356, "step": 6472 }, { "epoch": 1.600642927794263, "grad_norm": 0.7622844144017935, "learning_rate": 4.189840684845367e-06, "loss": 0.3531, "step": 6473 }, { "epoch": 1.6008902077151337, "grad_norm": 0.7699055987117901, "learning_rate": 4.189601137751065e-06, "loss": 0.3679, "step": 6474 }, { "epoch": 1.601137487636004, "grad_norm": 0.8047511148474487, "learning_rate": 4.189361562097547e-06, "loss": 0.3877, "step": 6475 }, { "epoch": 1.6013847675568744, "grad_norm": 0.7753043443872449, "learning_rate": 4.189121957888862e-06, "loss": 0.3628, "step": 6476 }, { "epoch": 1.6016320474777448, "grad_norm": 0.7741912796989697, "learning_rate": 4.1888823251290615e-06, "loss": 0.3738, "step": 6477 }, { "epoch": 1.6018793273986152, "grad_norm": 0.795362740323005, "learning_rate": 4.188642663822193e-06, "loss": 0.3617, "step": 6478 }, { "epoch": 1.6021266073194855, "grad_norm": 0.8038960427273518, "learning_rate": 4.188402973972311e-06, "loss": 0.3493, "step": 6479 }, { "epoch": 1.6023738872403561, "grad_norm": 0.8002872835263772, "learning_rate": 4.1881632555834644e-06, "loss": 0.3669, "step": 6480 }, { "epoch": 1.6026211671612265, "grad_norm": 0.7981526081982089, "learning_rate": 4.187923508659706e-06, "loss": 0.3618, "step": 6481 }, { "epoch": 1.602868447082097, "grad_norm": 0.759055655045073, "learning_rate": 4.187683733205089e-06, "loss": 0.3891, "step": 6482 }, { "epoch": 1.6031157270029674, "grad_norm": 0.7940542943226044, "learning_rate": 4.1874439292236645e-06, "loss": 0.3699, "step": 6483 }, { "epoch": 1.6033630069238378, "grad_norm": 0.7815216049199761, "learning_rate": 4.187204096719488e-06, "loss": 0.3764, "step": 6484 }, { "epoch": 1.6036102868447082, "grad_norm": 0.7708728182021267, "learning_rate": 4.186964235696612e-06, "loss": 0.3835, "step": 6485 }, { "epoch": 1.6038575667655786, "grad_norm": 0.7949973084633778, "learning_rate": 4.1867243461590915e-06, "loss": 0.3744, "step": 6486 }, { "epoch": 1.604104846686449, "grad_norm": 0.7882077307750832, "learning_rate": 4.18648442811098e-06, "loss": 0.351, "step": 6487 }, { "epoch": 1.6043521266073195, "grad_norm": 0.7728962439518577, "learning_rate": 4.186244481556334e-06, "loss": 0.3746, "step": 6488 }, { "epoch": 1.6045994065281899, "grad_norm": 0.8174733792967105, "learning_rate": 4.18600450649921e-06, "loss": 0.3699, "step": 6489 }, { "epoch": 1.6048466864490605, "grad_norm": 0.8153764016058016, "learning_rate": 4.185764502943663e-06, "loss": 0.3708, "step": 6490 }, { "epoch": 1.6050939663699308, "grad_norm": 0.8038567547807687, "learning_rate": 4.18552447089375e-06, "loss": 0.3964, "step": 6491 }, { "epoch": 1.6053412462908012, "grad_norm": 0.7879853348576894, "learning_rate": 4.18528441035353e-06, "loss": 0.3747, "step": 6492 }, { "epoch": 1.6055885262116716, "grad_norm": 0.7687968540029664, "learning_rate": 4.185044321327057e-06, "loss": 0.3607, "step": 6493 }, { "epoch": 1.605835806132542, "grad_norm": 0.7816643622375332, "learning_rate": 4.1848042038183925e-06, "loss": 0.3541, "step": 6494 }, { "epoch": 1.6060830860534123, "grad_norm": 0.7751801534342962, "learning_rate": 4.184564057831594e-06, "loss": 0.3597, "step": 6495 }, { "epoch": 1.606330365974283, "grad_norm": 0.781019869045202, "learning_rate": 4.18432388337072e-06, "loss": 0.3954, "step": 6496 }, { "epoch": 1.6065776458951533, "grad_norm": 0.7646144317754546, "learning_rate": 4.184083680439832e-06, "loss": 0.3533, "step": 6497 }, { "epoch": 1.6068249258160239, "grad_norm": 0.7859753677731842, "learning_rate": 4.183843449042989e-06, "loss": 0.377, "step": 6498 }, { "epoch": 1.6070722057368942, "grad_norm": 0.7668710553318931, "learning_rate": 4.183603189184251e-06, "loss": 0.3796, "step": 6499 }, { "epoch": 1.6073194856577646, "grad_norm": 0.8017200888391119, "learning_rate": 4.1833629008676795e-06, "loss": 0.3629, "step": 6500 }, { "epoch": 1.607566765578635, "grad_norm": 0.8171959955664494, "learning_rate": 4.183122584097337e-06, "loss": 0.3632, "step": 6501 }, { "epoch": 1.6078140454995054, "grad_norm": 0.8241626179053188, "learning_rate": 4.182882238877284e-06, "loss": 0.3794, "step": 6502 }, { "epoch": 1.6080613254203757, "grad_norm": 0.7885557774401981, "learning_rate": 4.1826418652115856e-06, "loss": 0.3889, "step": 6503 }, { "epoch": 1.6083086053412463, "grad_norm": 0.7887134309940849, "learning_rate": 4.182401463104301e-06, "loss": 0.3482, "step": 6504 }, { "epoch": 1.6085558852621167, "grad_norm": 0.7578685388014297, "learning_rate": 4.182161032559497e-06, "loss": 0.39, "step": 6505 }, { "epoch": 1.6088031651829873, "grad_norm": 0.8029287584691536, "learning_rate": 4.181920573581237e-06, "loss": 0.3559, "step": 6506 }, { "epoch": 1.6090504451038576, "grad_norm": 0.7915836521288884, "learning_rate": 4.181680086173584e-06, "loss": 0.3571, "step": 6507 }, { "epoch": 1.609297725024728, "grad_norm": 0.7952017951356946, "learning_rate": 4.181439570340604e-06, "loss": 0.3703, "step": 6508 }, { "epoch": 1.6095450049455984, "grad_norm": 0.7782540508435584, "learning_rate": 4.181199026086362e-06, "loss": 0.3463, "step": 6509 }, { "epoch": 1.6097922848664687, "grad_norm": 0.7768744003671728, "learning_rate": 4.180958453414924e-06, "loss": 0.3749, "step": 6510 }, { "epoch": 1.6100395647873391, "grad_norm": 0.8024315993414298, "learning_rate": 4.180717852330358e-06, "loss": 0.3596, "step": 6511 }, { "epoch": 1.6102868447082097, "grad_norm": 0.7962565483727742, "learning_rate": 4.180477222836728e-06, "loss": 0.3756, "step": 6512 }, { "epoch": 1.61053412462908, "grad_norm": 0.7998847713758572, "learning_rate": 4.180236564938103e-06, "loss": 0.3479, "step": 6513 }, { "epoch": 1.6107814045499507, "grad_norm": 0.806858619124711, "learning_rate": 4.1799958786385505e-06, "loss": 0.3474, "step": 6514 }, { "epoch": 1.611028684470821, "grad_norm": 0.7820795313405432, "learning_rate": 4.179755163942139e-06, "loss": 0.3662, "step": 6515 }, { "epoch": 1.6112759643916914, "grad_norm": 0.7483903615787333, "learning_rate": 4.179514420852937e-06, "loss": 0.3844, "step": 6516 }, { "epoch": 1.6115232443125618, "grad_norm": 0.7907188375382053, "learning_rate": 4.179273649375014e-06, "loss": 0.3817, "step": 6517 }, { "epoch": 1.6117705242334321, "grad_norm": 0.7445075284802566, "learning_rate": 4.179032849512441e-06, "loss": 0.3693, "step": 6518 }, { "epoch": 1.6120178041543025, "grad_norm": 0.7585526149487934, "learning_rate": 4.178792021269285e-06, "loss": 0.3734, "step": 6519 }, { "epoch": 1.612265084075173, "grad_norm": 0.7571962899026257, "learning_rate": 4.178551164649619e-06, "loss": 0.3773, "step": 6520 }, { "epoch": 1.6125123639960435, "grad_norm": 0.7658639776714464, "learning_rate": 4.178310279657514e-06, "loss": 0.3867, "step": 6521 }, { "epoch": 1.612759643916914, "grad_norm": 0.7783392133819018, "learning_rate": 4.178069366297042e-06, "loss": 0.3857, "step": 6522 }, { "epoch": 1.6130069238377844, "grad_norm": 0.7731274553497984, "learning_rate": 4.177828424572274e-06, "loss": 0.329, "step": 6523 }, { "epoch": 1.6132542037586548, "grad_norm": 0.7725346587033379, "learning_rate": 4.177587454487283e-06, "loss": 0.3938, "step": 6524 }, { "epoch": 1.6135014836795252, "grad_norm": 0.8093234570799442, "learning_rate": 4.177346456046143e-06, "loss": 0.376, "step": 6525 }, { "epoch": 1.6137487636003955, "grad_norm": 0.7514790357874798, "learning_rate": 4.177105429252927e-06, "loss": 0.39, "step": 6526 }, { "epoch": 1.613996043521266, "grad_norm": 0.7803720333969721, "learning_rate": 4.176864374111708e-06, "loss": 0.3757, "step": 6527 }, { "epoch": 1.6142433234421365, "grad_norm": 0.7899244578272242, "learning_rate": 4.176623290626562e-06, "loss": 0.3358, "step": 6528 }, { "epoch": 1.6144906033630069, "grad_norm": 0.798401812125719, "learning_rate": 4.176382178801563e-06, "loss": 0.3376, "step": 6529 }, { "epoch": 1.6147378832838775, "grad_norm": 0.7435269480053011, "learning_rate": 4.176141038640788e-06, "loss": 0.3773, "step": 6530 }, { "epoch": 1.6149851632047478, "grad_norm": 0.7751187298864275, "learning_rate": 4.175899870148311e-06, "loss": 0.3832, "step": 6531 }, { "epoch": 1.6152324431256182, "grad_norm": 0.7964343470839508, "learning_rate": 4.175658673328209e-06, "loss": 0.3864, "step": 6532 }, { "epoch": 1.6154797230464886, "grad_norm": 0.7825092194521035, "learning_rate": 4.175417448184561e-06, "loss": 0.3549, "step": 6533 }, { "epoch": 1.615727002967359, "grad_norm": 0.7849631688346875, "learning_rate": 4.175176194721442e-06, "loss": 0.3558, "step": 6534 }, { "epoch": 1.6159742828882295, "grad_norm": 0.7996433034493756, "learning_rate": 4.17493491294293e-06, "loss": 0.353, "step": 6535 }, { "epoch": 1.6162215628091, "grad_norm": 0.8026931089106729, "learning_rate": 4.174693602853105e-06, "loss": 0.3691, "step": 6536 }, { "epoch": 1.6164688427299705, "grad_norm": 0.7729312716289806, "learning_rate": 4.174452264456045e-06, "loss": 0.3418, "step": 6537 }, { "epoch": 1.6167161226508409, "grad_norm": 0.7830155121303025, "learning_rate": 4.1742108977558285e-06, "loss": 0.4129, "step": 6538 }, { "epoch": 1.6169634025717112, "grad_norm": 0.7765629277442521, "learning_rate": 4.173969502756537e-06, "loss": 0.3769, "step": 6539 }, { "epoch": 1.6172106824925816, "grad_norm": 0.7341535017170654, "learning_rate": 4.173728079462248e-06, "loss": 0.3734, "step": 6540 }, { "epoch": 1.617457962413452, "grad_norm": 0.814844644921821, "learning_rate": 4.173486627877046e-06, "loss": 0.394, "step": 6541 }, { "epoch": 1.6177052423343223, "grad_norm": 0.7834529278829478, "learning_rate": 4.173245148005009e-06, "loss": 0.3608, "step": 6542 }, { "epoch": 1.617952522255193, "grad_norm": 0.7573461969077054, "learning_rate": 4.17300363985022e-06, "loss": 0.3657, "step": 6543 }, { "epoch": 1.6181998021760633, "grad_norm": 0.785093994989005, "learning_rate": 4.172762103416762e-06, "loss": 0.3652, "step": 6544 }, { "epoch": 1.6184470820969339, "grad_norm": 0.7965435815249788, "learning_rate": 4.172520538708717e-06, "loss": 0.371, "step": 6545 }, { "epoch": 1.6186943620178043, "grad_norm": 0.8230025567551037, "learning_rate": 4.172278945730167e-06, "loss": 0.3504, "step": 6546 }, { "epoch": 1.6189416419386746, "grad_norm": 0.7592547945385816, "learning_rate": 4.172037324485198e-06, "loss": 0.3738, "step": 6547 }, { "epoch": 1.619188921859545, "grad_norm": 0.8007511624950716, "learning_rate": 4.171795674977892e-06, "loss": 0.3303, "step": 6548 }, { "epoch": 1.6194362017804154, "grad_norm": 0.784386360093995, "learning_rate": 4.171553997212334e-06, "loss": 0.3802, "step": 6549 }, { "epoch": 1.6196834817012857, "grad_norm": 0.7654228209065834, "learning_rate": 4.171312291192611e-06, "loss": 0.3825, "step": 6550 }, { "epoch": 1.6199307616221563, "grad_norm": 0.7857845069168679, "learning_rate": 4.171070556922805e-06, "loss": 0.3862, "step": 6551 }, { "epoch": 1.6201780415430267, "grad_norm": 0.7812464151919272, "learning_rate": 4.170828794407005e-06, "loss": 0.3751, "step": 6552 }, { "epoch": 1.6204253214638973, "grad_norm": 0.759420652791139, "learning_rate": 4.170587003649296e-06, "loss": 0.3472, "step": 6553 }, { "epoch": 1.6206726013847677, "grad_norm": 0.7814059264292587, "learning_rate": 4.170345184653766e-06, "loss": 0.3591, "step": 6554 }, { "epoch": 1.620919881305638, "grad_norm": 0.801820759107527, "learning_rate": 4.170103337424502e-06, "loss": 0.3634, "step": 6555 }, { "epoch": 1.6211671612265084, "grad_norm": 0.7821675736962501, "learning_rate": 4.169861461965592e-06, "loss": 0.3837, "step": 6556 }, { "epoch": 1.6214144411473788, "grad_norm": 0.777816656909823, "learning_rate": 4.169619558281124e-06, "loss": 0.3852, "step": 6557 }, { "epoch": 1.6216617210682491, "grad_norm": 0.7663062362472419, "learning_rate": 4.1693776263751875e-06, "loss": 0.3562, "step": 6558 }, { "epoch": 1.6219090009891197, "grad_norm": 0.7658174632533151, "learning_rate": 4.16913566625187e-06, "loss": 0.3622, "step": 6559 }, { "epoch": 1.62215628090999, "grad_norm": 0.7632408924776578, "learning_rate": 4.168893677915265e-06, "loss": 0.3394, "step": 6560 }, { "epoch": 1.6224035608308607, "grad_norm": 0.7913637370291615, "learning_rate": 4.168651661369459e-06, "loss": 0.3528, "step": 6561 }, { "epoch": 1.622650840751731, "grad_norm": 0.761358930374269, "learning_rate": 4.168409616618545e-06, "loss": 0.3787, "step": 6562 }, { "epoch": 1.6228981206726014, "grad_norm": 0.7894631637850145, "learning_rate": 4.168167543666614e-06, "loss": 0.3917, "step": 6563 }, { "epoch": 1.6231454005934718, "grad_norm": 0.802041550250857, "learning_rate": 4.167925442517757e-06, "loss": 0.3934, "step": 6564 }, { "epoch": 1.6233926805143422, "grad_norm": 0.7959185559473047, "learning_rate": 4.167683313176067e-06, "loss": 0.3538, "step": 6565 }, { "epoch": 1.6236399604352125, "grad_norm": 0.7397368397764701, "learning_rate": 4.167441155645635e-06, "loss": 0.3952, "step": 6566 }, { "epoch": 1.6238872403560831, "grad_norm": 0.7788678880533607, "learning_rate": 4.167198969930557e-06, "loss": 0.3745, "step": 6567 }, { "epoch": 1.6241345202769535, "grad_norm": 0.7566717862412129, "learning_rate": 4.166956756034924e-06, "loss": 0.3386, "step": 6568 }, { "epoch": 1.624381800197824, "grad_norm": 0.7808493739112745, "learning_rate": 4.1667145139628315e-06, "loss": 0.3643, "step": 6569 }, { "epoch": 1.6246290801186944, "grad_norm": 0.8017369011652434, "learning_rate": 4.1664722437183755e-06, "loss": 0.4078, "step": 6570 }, { "epoch": 1.6248763600395648, "grad_norm": 0.786091141819244, "learning_rate": 4.166229945305647e-06, "loss": 0.3493, "step": 6571 }, { "epoch": 1.6251236399604352, "grad_norm": 0.7603176837540065, "learning_rate": 4.165987618728747e-06, "loss": 0.3659, "step": 6572 }, { "epoch": 1.6253709198813056, "grad_norm": 0.7484970366460793, "learning_rate": 4.1657452639917665e-06, "loss": 0.3985, "step": 6573 }, { "epoch": 1.625618199802176, "grad_norm": 0.797735833783814, "learning_rate": 4.165502881098804e-06, "loss": 0.3501, "step": 6574 }, { "epoch": 1.6258654797230465, "grad_norm": 0.7760749162567088, "learning_rate": 4.165260470053958e-06, "loss": 0.3809, "step": 6575 }, { "epoch": 1.6261127596439169, "grad_norm": 0.7932282421008757, "learning_rate": 4.165018030861323e-06, "loss": 0.3856, "step": 6576 }, { "epoch": 1.6263600395647875, "grad_norm": 0.7764587292633846, "learning_rate": 4.164775563524999e-06, "loss": 0.4068, "step": 6577 }, { "epoch": 1.6266073194856578, "grad_norm": 0.7766844881962046, "learning_rate": 4.1645330680490845e-06, "loss": 0.361, "step": 6578 }, { "epoch": 1.6268545994065282, "grad_norm": 0.7940637672685334, "learning_rate": 4.164290544437677e-06, "loss": 0.3632, "step": 6579 }, { "epoch": 1.6271018793273986, "grad_norm": 0.7916681400718711, "learning_rate": 4.164047992694877e-06, "loss": 0.3947, "step": 6580 }, { "epoch": 1.627349159248269, "grad_norm": 0.7665100672831624, "learning_rate": 4.163805412824783e-06, "loss": 0.3686, "step": 6581 }, { "epoch": 1.6275964391691393, "grad_norm": 0.7463667383199731, "learning_rate": 4.163562804831497e-06, "loss": 0.3567, "step": 6582 }, { "epoch": 1.62784371909001, "grad_norm": 0.772836674846759, "learning_rate": 4.163320168719119e-06, "loss": 0.3573, "step": 6583 }, { "epoch": 1.6280909990108803, "grad_norm": 0.7823942100996084, "learning_rate": 4.163077504491751e-06, "loss": 0.3928, "step": 6584 }, { "epoch": 1.6283382789317509, "grad_norm": 0.7894765007177402, "learning_rate": 4.162834812153493e-06, "loss": 0.3983, "step": 6585 }, { "epoch": 1.6285855588526212, "grad_norm": 0.8279158800783958, "learning_rate": 4.162592091708448e-06, "loss": 0.3867, "step": 6586 }, { "epoch": 1.6288328387734916, "grad_norm": 0.7530937573079554, "learning_rate": 4.16234934316072e-06, "loss": 0.3551, "step": 6587 }, { "epoch": 1.629080118694362, "grad_norm": 0.7842887930363651, "learning_rate": 4.16210656651441e-06, "loss": 0.3549, "step": 6588 }, { "epoch": 1.6293273986152323, "grad_norm": 0.8166740019733313, "learning_rate": 4.161863761773623e-06, "loss": 0.358, "step": 6589 }, { "epoch": 1.6295746785361027, "grad_norm": 0.792436175650329, "learning_rate": 4.161620928942464e-06, "loss": 0.34, "step": 6590 }, { "epoch": 1.6298219584569733, "grad_norm": 0.7709339908207171, "learning_rate": 4.161378068025035e-06, "loss": 0.3464, "step": 6591 }, { "epoch": 1.6300692383778437, "grad_norm": 0.7978518296475964, "learning_rate": 4.161135179025444e-06, "loss": 0.3528, "step": 6592 }, { "epoch": 1.6303165182987143, "grad_norm": 0.7457461622551066, "learning_rate": 4.1608922619477935e-06, "loss": 0.3705, "step": 6593 }, { "epoch": 1.6305637982195846, "grad_norm": 0.7449216364364831, "learning_rate": 4.160649316796192e-06, "loss": 0.3662, "step": 6594 }, { "epoch": 1.630811078140455, "grad_norm": 0.7925954268375928, "learning_rate": 4.160406343574745e-06, "loss": 0.3567, "step": 6595 }, { "epoch": 1.6310583580613254, "grad_norm": 0.7503371495548458, "learning_rate": 4.160163342287558e-06, "loss": 0.3897, "step": 6596 }, { "epoch": 1.6313056379821957, "grad_norm": 0.7941535298129292, "learning_rate": 4.159920312938741e-06, "loss": 0.3767, "step": 6597 }, { "epoch": 1.6315529179030661, "grad_norm": 0.7498773458035257, "learning_rate": 4.159677255532402e-06, "loss": 0.3859, "step": 6598 }, { "epoch": 1.6318001978239367, "grad_norm": 0.804309208796496, "learning_rate": 4.159434170072646e-06, "loss": 0.3827, "step": 6599 }, { "epoch": 1.632047477744807, "grad_norm": 0.7989364764650462, "learning_rate": 4.159191056563586e-06, "loss": 0.3697, "step": 6600 }, { "epoch": 1.6322947576656777, "grad_norm": 0.8024593223681382, "learning_rate": 4.158947915009328e-06, "loss": 0.3612, "step": 6601 }, { "epoch": 1.632542037586548, "grad_norm": 0.7808893338536192, "learning_rate": 4.158704745413984e-06, "loss": 0.3752, "step": 6602 }, { "epoch": 1.6327893175074184, "grad_norm": 0.7666191855183968, "learning_rate": 4.158461547781663e-06, "loss": 0.3638, "step": 6603 }, { "epoch": 1.6330365974282888, "grad_norm": 0.7505010214067886, "learning_rate": 4.158218322116477e-06, "loss": 0.3861, "step": 6604 }, { "epoch": 1.6332838773491591, "grad_norm": 0.8013950502352145, "learning_rate": 4.157975068422535e-06, "loss": 0.3626, "step": 6605 }, { "epoch": 1.6335311572700295, "grad_norm": 0.7621578891329653, "learning_rate": 4.157731786703952e-06, "loss": 0.3681, "step": 6606 }, { "epoch": 1.6337784371909, "grad_norm": 0.8031094362093028, "learning_rate": 4.157488476964837e-06, "loss": 0.3365, "step": 6607 }, { "epoch": 1.6340257171117705, "grad_norm": 0.7976389197325177, "learning_rate": 4.157245139209305e-06, "loss": 0.3351, "step": 6608 }, { "epoch": 1.634272997032641, "grad_norm": 0.7834843412155627, "learning_rate": 4.157001773441467e-06, "loss": 0.3345, "step": 6609 }, { "epoch": 1.6345202769535114, "grad_norm": 0.8030345474208987, "learning_rate": 4.156758379665439e-06, "loss": 0.4005, "step": 6610 }, { "epoch": 1.6347675568743818, "grad_norm": 0.7718495359003413, "learning_rate": 4.156514957885333e-06, "loss": 0.3579, "step": 6611 }, { "epoch": 1.6350148367952522, "grad_norm": 0.7568233132812655, "learning_rate": 4.156271508105264e-06, "loss": 0.3649, "step": 6612 }, { "epoch": 1.6352621167161225, "grad_norm": 0.7581876667937875, "learning_rate": 4.1560280303293475e-06, "loss": 0.3803, "step": 6613 }, { "epoch": 1.6355093966369931, "grad_norm": 0.7550903646678931, "learning_rate": 4.155784524561699e-06, "loss": 0.3907, "step": 6614 }, { "epoch": 1.6357566765578635, "grad_norm": 0.7759249553054927, "learning_rate": 4.155540990806435e-06, "loss": 0.3619, "step": 6615 }, { "epoch": 1.636003956478734, "grad_norm": 0.7479014334012715, "learning_rate": 4.155297429067671e-06, "loss": 0.3711, "step": 6616 }, { "epoch": 1.6362512363996045, "grad_norm": 0.7483030951909093, "learning_rate": 4.155053839349523e-06, "loss": 0.4095, "step": 6617 }, { "epoch": 1.6364985163204748, "grad_norm": 0.7858573610590913, "learning_rate": 4.15481022165611e-06, "loss": 0.3724, "step": 6618 }, { "epoch": 1.6367457962413452, "grad_norm": 0.7761120930773092, "learning_rate": 4.15456657599155e-06, "loss": 0.3339, "step": 6619 }, { "epoch": 1.6369930761622156, "grad_norm": 0.8011440861469366, "learning_rate": 4.154322902359961e-06, "loss": 0.3674, "step": 6620 }, { "epoch": 1.637240356083086, "grad_norm": 0.772543402399608, "learning_rate": 4.154079200765461e-06, "loss": 0.3603, "step": 6621 }, { "epoch": 1.6374876360039565, "grad_norm": 0.7687649880195133, "learning_rate": 4.153835471212169e-06, "loss": 0.3486, "step": 6622 }, { "epoch": 1.637734915924827, "grad_norm": 0.7641167651417369, "learning_rate": 4.153591713704206e-06, "loss": 0.413, "step": 6623 }, { "epoch": 1.6379821958456975, "grad_norm": 0.7601962218992611, "learning_rate": 4.153347928245691e-06, "loss": 0.3846, "step": 6624 }, { "epoch": 1.6382294757665679, "grad_norm": 0.7928109479858753, "learning_rate": 4.153104114840747e-06, "loss": 0.3754, "step": 6625 }, { "epoch": 1.6384767556874382, "grad_norm": 0.7694058091343746, "learning_rate": 4.152860273493492e-06, "loss": 0.3772, "step": 6626 }, { "epoch": 1.6387240356083086, "grad_norm": 0.7594909636207792, "learning_rate": 4.15261640420805e-06, "loss": 0.3534, "step": 6627 }, { "epoch": 1.638971315529179, "grad_norm": 0.8002038920710788, "learning_rate": 4.152372506988544e-06, "loss": 0.3794, "step": 6628 }, { "epoch": 1.6392185954500493, "grad_norm": 0.7547115244453144, "learning_rate": 4.152128581839092e-06, "loss": 0.3524, "step": 6629 }, { "epoch": 1.63946587537092, "grad_norm": 0.8059659415632907, "learning_rate": 4.151884628763822e-06, "loss": 0.3824, "step": 6630 }, { "epoch": 1.6397131552917903, "grad_norm": 0.7817484333866705, "learning_rate": 4.151640647766854e-06, "loss": 0.354, "step": 6631 }, { "epoch": 1.6399604352126609, "grad_norm": 0.7848416072563933, "learning_rate": 4.151396638852314e-06, "loss": 0.3599, "step": 6632 }, { "epoch": 1.6402077151335313, "grad_norm": 0.7381109827780001, "learning_rate": 4.151152602024327e-06, "loss": 0.3526, "step": 6633 }, { "epoch": 1.6404549950544016, "grad_norm": 0.7408883159129851, "learning_rate": 4.150908537287015e-06, "loss": 0.3641, "step": 6634 }, { "epoch": 1.640702274975272, "grad_norm": 0.7764443740008291, "learning_rate": 4.150664444644506e-06, "loss": 0.3847, "step": 6635 }, { "epoch": 1.6409495548961424, "grad_norm": 0.7770578775893883, "learning_rate": 4.150420324100925e-06, "loss": 0.407, "step": 6636 }, { "epoch": 1.6411968348170127, "grad_norm": 0.7704643754151688, "learning_rate": 4.150176175660399e-06, "loss": 0.3644, "step": 6637 }, { "epoch": 1.6414441147378833, "grad_norm": 0.7990548409505585, "learning_rate": 4.149931999327055e-06, "loss": 0.3675, "step": 6638 }, { "epoch": 1.6416913946587537, "grad_norm": 0.7597671588789775, "learning_rate": 4.149687795105018e-06, "loss": 0.372, "step": 6639 }, { "epoch": 1.6419386745796243, "grad_norm": 0.7730304933590324, "learning_rate": 4.149443562998419e-06, "loss": 0.3803, "step": 6640 }, { "epoch": 1.6421859545004946, "grad_norm": 0.777485399952902, "learning_rate": 4.149199303011383e-06, "loss": 0.3807, "step": 6641 }, { "epoch": 1.642433234421365, "grad_norm": 0.795346843207346, "learning_rate": 4.1489550151480414e-06, "loss": 0.3694, "step": 6642 }, { "epoch": 1.6426805143422354, "grad_norm": 0.7859332551589144, "learning_rate": 4.148710699412522e-06, "loss": 0.3826, "step": 6643 }, { "epoch": 1.6429277942631058, "grad_norm": 0.8125557389771046, "learning_rate": 4.148466355808955e-06, "loss": 0.3538, "step": 6644 }, { "epoch": 1.6431750741839761, "grad_norm": 0.7931676187947898, "learning_rate": 4.148221984341471e-06, "loss": 0.3536, "step": 6645 }, { "epoch": 1.6434223541048467, "grad_norm": 0.8129073171765568, "learning_rate": 4.147977585014198e-06, "loss": 0.3643, "step": 6646 }, { "epoch": 1.643669634025717, "grad_norm": 0.7860499796724447, "learning_rate": 4.1477331578312705e-06, "loss": 0.3701, "step": 6647 }, { "epoch": 1.6439169139465877, "grad_norm": 0.7550880251698127, "learning_rate": 4.147488702796818e-06, "loss": 0.3749, "step": 6648 }, { "epoch": 1.644164193867458, "grad_norm": 0.7752826050671672, "learning_rate": 4.147244219914973e-06, "loss": 0.3583, "step": 6649 }, { "epoch": 1.6444114737883284, "grad_norm": 0.7535258424400659, "learning_rate": 4.146999709189868e-06, "loss": 0.3744, "step": 6650 }, { "epoch": 1.6446587537091988, "grad_norm": 0.8138410422989789, "learning_rate": 4.146755170625636e-06, "loss": 0.3786, "step": 6651 }, { "epoch": 1.6449060336300692, "grad_norm": 0.7625535325924326, "learning_rate": 4.14651060422641e-06, "loss": 0.3785, "step": 6652 }, { "epoch": 1.6451533135509395, "grad_norm": 0.8324705872629392, "learning_rate": 4.146266009996325e-06, "loss": 0.3617, "step": 6653 }, { "epoch": 1.6454005934718101, "grad_norm": 0.7891219894616447, "learning_rate": 4.146021387939515e-06, "loss": 0.3385, "step": 6654 }, { "epoch": 1.6456478733926805, "grad_norm": 0.8024066821100267, "learning_rate": 4.145776738060113e-06, "loss": 0.3442, "step": 6655 }, { "epoch": 1.645895153313551, "grad_norm": 0.769496268772205, "learning_rate": 4.145532060362257e-06, "loss": 0.3351, "step": 6656 }, { "epoch": 1.6461424332344214, "grad_norm": 0.7500532584080575, "learning_rate": 4.14528735485008e-06, "loss": 0.4013, "step": 6657 }, { "epoch": 1.6463897131552918, "grad_norm": 0.756760091020438, "learning_rate": 4.1450426215277215e-06, "loss": 0.3612, "step": 6658 }, { "epoch": 1.6466369930761622, "grad_norm": 0.7845973149070005, "learning_rate": 4.144797860399316e-06, "loss": 0.3798, "step": 6659 }, { "epoch": 1.6468842729970326, "grad_norm": 0.7569889901510168, "learning_rate": 4.144553071469001e-06, "loss": 0.3932, "step": 6660 }, { "epoch": 1.647131552917903, "grad_norm": 0.7787014416232189, "learning_rate": 4.144308254740913e-06, "loss": 0.3731, "step": 6661 }, { "epoch": 1.6473788328387735, "grad_norm": 0.7481030633143924, "learning_rate": 4.144063410219193e-06, "loss": 0.4048, "step": 6662 }, { "epoch": 1.6476261127596439, "grad_norm": 0.7924094449652909, "learning_rate": 4.143818537907978e-06, "loss": 0.4162, "step": 6663 }, { "epoch": 1.6478733926805145, "grad_norm": 0.7518699451853108, "learning_rate": 4.143573637811406e-06, "loss": 0.3625, "step": 6664 }, { "epoch": 1.6481206726013848, "grad_norm": 0.7605051283972848, "learning_rate": 4.143328709933618e-06, "loss": 0.3744, "step": 6665 }, { "epoch": 1.6483679525222552, "grad_norm": 0.8039656139637356, "learning_rate": 4.143083754278754e-06, "loss": 0.3629, "step": 6666 }, { "epoch": 1.6486152324431256, "grad_norm": 0.7817602520725602, "learning_rate": 4.142838770850954e-06, "loss": 0.3631, "step": 6667 }, { "epoch": 1.648862512363996, "grad_norm": 0.7799648934299573, "learning_rate": 4.142593759654359e-06, "loss": 0.3677, "step": 6668 }, { "epoch": 1.6491097922848663, "grad_norm": 0.7533876065756595, "learning_rate": 4.14234872069311e-06, "loss": 0.372, "step": 6669 }, { "epoch": 1.649357072205737, "grad_norm": 0.7465422074285688, "learning_rate": 4.1421036539713496e-06, "loss": 0.3632, "step": 6670 }, { "epoch": 1.6496043521266073, "grad_norm": 0.7794428117744102, "learning_rate": 4.141858559493221e-06, "loss": 0.3602, "step": 6671 }, { "epoch": 1.6498516320474779, "grad_norm": 0.7508256745197475, "learning_rate": 4.141613437262864e-06, "loss": 0.3587, "step": 6672 }, { "epoch": 1.6500989119683482, "grad_norm": 0.7574058672321415, "learning_rate": 4.141368287284424e-06, "loss": 0.375, "step": 6673 }, { "epoch": 1.6503461918892186, "grad_norm": 0.7714453702452474, "learning_rate": 4.141123109562045e-06, "loss": 0.368, "step": 6674 }, { "epoch": 1.650593471810089, "grad_norm": 0.7599700272096709, "learning_rate": 4.140877904099871e-06, "loss": 0.3669, "step": 6675 }, { "epoch": 1.6508407517309593, "grad_norm": 0.780197142975183, "learning_rate": 4.140632670902046e-06, "loss": 0.3789, "step": 6676 }, { "epoch": 1.6510880316518297, "grad_norm": 0.78839773889891, "learning_rate": 4.140387409972716e-06, "loss": 0.3634, "step": 6677 }, { "epoch": 1.6513353115727003, "grad_norm": 0.767537602815221, "learning_rate": 4.140142121316026e-06, "loss": 0.3638, "step": 6678 }, { "epoch": 1.6515825914935707, "grad_norm": 0.7611988992554405, "learning_rate": 4.139896804936122e-06, "loss": 0.3441, "step": 6679 }, { "epoch": 1.6518298714144413, "grad_norm": 0.7454422342207341, "learning_rate": 4.13965146083715e-06, "loss": 0.3752, "step": 6680 }, { "epoch": 1.6520771513353116, "grad_norm": 0.7640820999128499, "learning_rate": 4.139406089023259e-06, "loss": 0.3485, "step": 6681 }, { "epoch": 1.652324431256182, "grad_norm": 0.8110929813525174, "learning_rate": 4.139160689498595e-06, "loss": 0.4098, "step": 6682 }, { "epoch": 1.6525717111770524, "grad_norm": 0.7948606088948243, "learning_rate": 4.138915262267307e-06, "loss": 0.3972, "step": 6683 }, { "epoch": 1.6528189910979227, "grad_norm": 0.7853893677030981, "learning_rate": 4.138669807333542e-06, "loss": 0.3643, "step": 6684 }, { "epoch": 1.653066271018793, "grad_norm": 0.7726620538805388, "learning_rate": 4.1384243247014515e-06, "loss": 0.3756, "step": 6685 }, { "epoch": 1.6533135509396637, "grad_norm": 0.7989898482381433, "learning_rate": 4.1381788143751814e-06, "loss": 0.3678, "step": 6686 }, { "epoch": 1.653560830860534, "grad_norm": 0.8065143771467421, "learning_rate": 4.1379332763588834e-06, "loss": 0.3513, "step": 6687 }, { "epoch": 1.6538081107814047, "grad_norm": 0.7559682733668793, "learning_rate": 4.137687710656707e-06, "loss": 0.3476, "step": 6688 }, { "epoch": 1.654055390702275, "grad_norm": 0.7439744237277218, "learning_rate": 4.137442117272805e-06, "loss": 0.356, "step": 6689 }, { "epoch": 1.6543026706231454, "grad_norm": 0.7457890716100732, "learning_rate": 4.137196496211326e-06, "loss": 0.3887, "step": 6690 }, { "epoch": 1.6545499505440158, "grad_norm": 0.7781253972555253, "learning_rate": 4.136950847476424e-06, "loss": 0.3692, "step": 6691 }, { "epoch": 1.6547972304648861, "grad_norm": 0.7929475000792862, "learning_rate": 4.13670517107225e-06, "loss": 0.4024, "step": 6692 }, { "epoch": 1.6550445103857567, "grad_norm": 0.7746353390506288, "learning_rate": 4.1364594670029576e-06, "loss": 0.3724, "step": 6693 }, { "epoch": 1.655291790306627, "grad_norm": 0.7658326578499578, "learning_rate": 4.136213735272699e-06, "loss": 0.368, "step": 6694 }, { "epoch": 1.6555390702274977, "grad_norm": 0.7508219324831267, "learning_rate": 4.135967975885627e-06, "loss": 0.3447, "step": 6695 }, { "epoch": 1.655786350148368, "grad_norm": 0.7510695857821048, "learning_rate": 4.135722188845896e-06, "loss": 0.362, "step": 6696 }, { "epoch": 1.6560336300692384, "grad_norm": 0.7651601465488482, "learning_rate": 4.135476374157662e-06, "loss": 0.3596, "step": 6697 }, { "epoch": 1.6562809099901088, "grad_norm": 0.7514016422765147, "learning_rate": 4.135230531825079e-06, "loss": 0.4036, "step": 6698 }, { "epoch": 1.6565281899109792, "grad_norm": 0.7822945291853071, "learning_rate": 4.1349846618523036e-06, "loss": 0.3473, "step": 6699 }, { "epoch": 1.6567754698318495, "grad_norm": 0.8003065579795783, "learning_rate": 4.134738764243489e-06, "loss": 0.3642, "step": 6700 }, { "epoch": 1.6570227497527201, "grad_norm": 0.812947232856654, "learning_rate": 4.134492839002794e-06, "loss": 0.3892, "step": 6701 }, { "epoch": 1.6572700296735905, "grad_norm": 0.8343482810160195, "learning_rate": 4.134246886134375e-06, "loss": 0.3382, "step": 6702 }, { "epoch": 1.657517309594461, "grad_norm": 0.7540389313752899, "learning_rate": 4.134000905642389e-06, "loss": 0.4003, "step": 6703 }, { "epoch": 1.6577645895153315, "grad_norm": 0.7850879768478018, "learning_rate": 4.133754897530993e-06, "loss": 0.3933, "step": 6704 }, { "epoch": 1.6580118694362018, "grad_norm": 0.7835028571148235, "learning_rate": 4.133508861804347e-06, "loss": 0.3557, "step": 6705 }, { "epoch": 1.6582591493570722, "grad_norm": 0.7937109147986788, "learning_rate": 4.13326279846661e-06, "loss": 0.4011, "step": 6706 }, { "epoch": 1.6585064292779426, "grad_norm": 0.7652630821505204, "learning_rate": 4.133016707521937e-06, "loss": 0.3693, "step": 6707 }, { "epoch": 1.658753709198813, "grad_norm": 0.7872391051504758, "learning_rate": 4.132770588974493e-06, "loss": 0.3472, "step": 6708 }, { "epoch": 1.6590009891196835, "grad_norm": 0.7737228341828423, "learning_rate": 4.132524442828436e-06, "loss": 0.341, "step": 6709 }, { "epoch": 1.659248269040554, "grad_norm": 0.7674860013552977, "learning_rate": 4.132278269087925e-06, "loss": 0.3664, "step": 6710 }, { "epoch": 1.6594955489614245, "grad_norm": 0.8007833512295761, "learning_rate": 4.132032067757124e-06, "loss": 0.3351, "step": 6711 }, { "epoch": 1.6597428288822949, "grad_norm": 0.7707564799077532, "learning_rate": 4.131785838840191e-06, "loss": 0.3729, "step": 6712 }, { "epoch": 1.6599901088031652, "grad_norm": 0.8016597900655299, "learning_rate": 4.1315395823412916e-06, "loss": 0.3678, "step": 6713 }, { "epoch": 1.6602373887240356, "grad_norm": 0.7930320094660072, "learning_rate": 4.131293298264586e-06, "loss": 0.3757, "step": 6714 }, { "epoch": 1.660484668644906, "grad_norm": 0.7496271491179527, "learning_rate": 4.131046986614238e-06, "loss": 0.3632, "step": 6715 }, { "epoch": 1.6607319485657763, "grad_norm": 0.7846615002486915, "learning_rate": 4.1308006473944105e-06, "loss": 0.3484, "step": 6716 }, { "epoch": 1.660979228486647, "grad_norm": 0.7903791852235255, "learning_rate": 4.1305542806092675e-06, "loss": 0.3161, "step": 6717 }, { "epoch": 1.6612265084075173, "grad_norm": 0.7777546600061166, "learning_rate": 4.130307886262973e-06, "loss": 0.3671, "step": 6718 }, { "epoch": 1.6614737883283879, "grad_norm": 0.7720627916966857, "learning_rate": 4.130061464359694e-06, "loss": 0.3912, "step": 6719 }, { "epoch": 1.6617210682492582, "grad_norm": 0.7568413453704113, "learning_rate": 4.129815014903593e-06, "loss": 0.3631, "step": 6720 }, { "epoch": 1.6619683481701286, "grad_norm": 0.775954794262503, "learning_rate": 4.129568537898836e-06, "loss": 0.4054, "step": 6721 }, { "epoch": 1.662215628090999, "grad_norm": 0.7580062824775112, "learning_rate": 4.129322033349591e-06, "loss": 0.3744, "step": 6722 }, { "epoch": 1.6624629080118694, "grad_norm": 0.7927017938056489, "learning_rate": 4.129075501260022e-06, "loss": 0.3738, "step": 6723 }, { "epoch": 1.6627101879327397, "grad_norm": 0.7812434706484619, "learning_rate": 4.128828941634299e-06, "loss": 0.3625, "step": 6724 }, { "epoch": 1.6629574678536103, "grad_norm": 0.781538930927741, "learning_rate": 4.128582354476588e-06, "loss": 0.3769, "step": 6725 }, { "epoch": 1.6632047477744807, "grad_norm": 0.7343925351226825, "learning_rate": 4.128335739791058e-06, "loss": 0.3905, "step": 6726 }, { "epoch": 1.6634520276953513, "grad_norm": 0.7863658688816686, "learning_rate": 4.1280890975818744e-06, "loss": 0.3655, "step": 6727 }, { "epoch": 1.6636993076162216, "grad_norm": 0.74027116501358, "learning_rate": 4.127842427853211e-06, "loss": 0.3469, "step": 6728 }, { "epoch": 1.663946587537092, "grad_norm": 0.7623610532414649, "learning_rate": 4.127595730609233e-06, "loss": 0.3637, "step": 6729 }, { "epoch": 1.6641938674579624, "grad_norm": 0.7759179409388686, "learning_rate": 4.127349005854113e-06, "loss": 0.3537, "step": 6730 }, { "epoch": 1.6644411473788328, "grad_norm": 0.779875317295481, "learning_rate": 4.12710225359202e-06, "loss": 0.3391, "step": 6731 }, { "epoch": 1.6646884272997031, "grad_norm": 0.7930172185939713, "learning_rate": 4.126855473827126e-06, "loss": 0.3702, "step": 6732 }, { "epoch": 1.6649357072205737, "grad_norm": 0.7847410058876421, "learning_rate": 4.126608666563602e-06, "loss": 0.3594, "step": 6733 }, { "epoch": 1.665182987141444, "grad_norm": 0.7715178558752019, "learning_rate": 4.126361831805618e-06, "loss": 0.3815, "step": 6734 }, { "epoch": 1.6654302670623147, "grad_norm": 0.7751173979817876, "learning_rate": 4.126114969557349e-06, "loss": 0.3566, "step": 6735 }, { "epoch": 1.665677546983185, "grad_norm": 0.7922956765025191, "learning_rate": 4.125868079822966e-06, "loss": 0.3783, "step": 6736 }, { "epoch": 1.6659248269040554, "grad_norm": 0.7738788482237993, "learning_rate": 4.125621162606641e-06, "loss": 0.3751, "step": 6737 }, { "epoch": 1.6661721068249258, "grad_norm": 0.7592306716220251, "learning_rate": 4.125374217912551e-06, "loss": 0.3577, "step": 6738 }, { "epoch": 1.6664193867457961, "grad_norm": 0.7684610130910087, "learning_rate": 4.125127245744867e-06, "loss": 0.366, "step": 6739 }, { "epoch": 1.6666666666666665, "grad_norm": 0.7909682127997512, "learning_rate": 4.124880246107765e-06, "loss": 0.39, "step": 6740 }, { "epoch": 1.666913946587537, "grad_norm": 0.7744113821373708, "learning_rate": 4.124633219005421e-06, "loss": 0.4188, "step": 6741 }, { "epoch": 1.6671612265084075, "grad_norm": 0.7882668074301632, "learning_rate": 4.124386164442007e-06, "loss": 0.3948, "step": 6742 }, { "epoch": 1.667408506429278, "grad_norm": 0.7996246282515111, "learning_rate": 4.124139082421703e-06, "loss": 0.354, "step": 6743 }, { "epoch": 1.6676557863501484, "grad_norm": 0.7841149603880603, "learning_rate": 4.1238919729486825e-06, "loss": 0.3605, "step": 6744 }, { "epoch": 1.6679030662710188, "grad_norm": 0.7732174960518529, "learning_rate": 4.123644836027124e-06, "loss": 0.36, "step": 6745 }, { "epoch": 1.6681503461918892, "grad_norm": 0.7643198229017921, "learning_rate": 4.123397671661205e-06, "loss": 0.3711, "step": 6746 }, { "epoch": 1.6683976261127595, "grad_norm": 0.7713430515373961, "learning_rate": 4.123150479855101e-06, "loss": 0.3538, "step": 6747 }, { "epoch": 1.66864490603363, "grad_norm": 0.7556496964524059, "learning_rate": 4.122903260612993e-06, "loss": 0.3987, "step": 6748 }, { "epoch": 1.6688921859545005, "grad_norm": 0.7643766272190781, "learning_rate": 4.122656013939058e-06, "loss": 0.393, "step": 6749 }, { "epoch": 1.6691394658753709, "grad_norm": 0.7899779321371286, "learning_rate": 4.122408739837477e-06, "loss": 0.3823, "step": 6750 }, { "epoch": 1.6693867457962415, "grad_norm": 0.7346068702331334, "learning_rate": 4.1221614383124275e-06, "loss": 0.3755, "step": 6751 }, { "epoch": 1.6696340257171118, "grad_norm": 0.7667795309305915, "learning_rate": 4.1219141093680906e-06, "loss": 0.3625, "step": 6752 }, { "epoch": 1.6698813056379822, "grad_norm": 0.7789611559352798, "learning_rate": 4.121666753008647e-06, "loss": 0.3785, "step": 6753 }, { "epoch": 1.6701285855588526, "grad_norm": 0.7771177691382541, "learning_rate": 4.1214193692382775e-06, "loss": 0.3726, "step": 6754 }, { "epoch": 1.670375865479723, "grad_norm": 0.8152025816894256, "learning_rate": 4.121171958061164e-06, "loss": 0.365, "step": 6755 }, { "epoch": 1.6706231454005933, "grad_norm": 0.7465351126295863, "learning_rate": 4.120924519481488e-06, "loss": 0.4046, "step": 6756 }, { "epoch": 1.670870425321464, "grad_norm": 0.7642960927055855, "learning_rate": 4.120677053503432e-06, "loss": 0.3793, "step": 6757 }, { "epoch": 1.6711177052423343, "grad_norm": 0.7564437302858789, "learning_rate": 4.120429560131179e-06, "loss": 0.3341, "step": 6758 }, { "epoch": 1.6713649851632049, "grad_norm": 0.789095562218791, "learning_rate": 4.120182039368913e-06, "loss": 0.3615, "step": 6759 }, { "epoch": 1.6716122650840752, "grad_norm": 0.7613777059744211, "learning_rate": 4.119934491220817e-06, "loss": 0.3788, "step": 6760 }, { "epoch": 1.6718595450049456, "grad_norm": 0.7654501856732779, "learning_rate": 4.119686915691075e-06, "loss": 0.3802, "step": 6761 }, { "epoch": 1.672106824925816, "grad_norm": 0.7607119884674866, "learning_rate": 4.119439312783873e-06, "loss": 0.4144, "step": 6762 }, { "epoch": 1.6723541048466863, "grad_norm": 0.7541366029052827, "learning_rate": 4.1191916825033955e-06, "loss": 0.3669, "step": 6763 }, { "epoch": 1.6726013847675567, "grad_norm": 0.7745907457005295, "learning_rate": 4.118944024853828e-06, "loss": 0.3709, "step": 6764 }, { "epoch": 1.6728486646884273, "grad_norm": 0.7820592016600657, "learning_rate": 4.118696339839357e-06, "loss": 0.3664, "step": 6765 }, { "epoch": 1.673095944609298, "grad_norm": 0.7975363927937225, "learning_rate": 4.118448627464168e-06, "loss": 0.3951, "step": 6766 }, { "epoch": 1.6733432245301683, "grad_norm": 0.7769527656284131, "learning_rate": 4.11820088773245e-06, "loss": 0.3557, "step": 6767 }, { "epoch": 1.6735905044510386, "grad_norm": 0.7781117165511573, "learning_rate": 4.117953120648389e-06, "loss": 0.3674, "step": 6768 }, { "epoch": 1.673837784371909, "grad_norm": 0.7845584714868564, "learning_rate": 4.1177053262161734e-06, "loss": 0.3652, "step": 6769 }, { "epoch": 1.6740850642927794, "grad_norm": 0.780654861130646, "learning_rate": 4.117457504439992e-06, "loss": 0.3342, "step": 6770 }, { "epoch": 1.6743323442136497, "grad_norm": 0.758400839442241, "learning_rate": 4.1172096553240334e-06, "loss": 0.3775, "step": 6771 }, { "epoch": 1.6745796241345203, "grad_norm": 0.779635501581052, "learning_rate": 4.116961778872488e-06, "loss": 0.3873, "step": 6772 }, { "epoch": 1.6748269040553907, "grad_norm": 0.777421470010576, "learning_rate": 4.116713875089543e-06, "loss": 0.357, "step": 6773 }, { "epoch": 1.6750741839762613, "grad_norm": 0.7402290064965504, "learning_rate": 4.1164659439793904e-06, "loss": 0.3721, "step": 6774 }, { "epoch": 1.6753214638971317, "grad_norm": 0.7606824192379439, "learning_rate": 4.116217985546223e-06, "loss": 0.3583, "step": 6775 }, { "epoch": 1.675568743818002, "grad_norm": 0.7754284083947218, "learning_rate": 4.115969999794228e-06, "loss": 0.3628, "step": 6776 }, { "epoch": 1.6758160237388724, "grad_norm": 0.7700535498155154, "learning_rate": 4.1157219867276e-06, "loss": 0.3626, "step": 6777 }, { "epoch": 1.6760633036597428, "grad_norm": 0.7799831619935285, "learning_rate": 4.11547394635053e-06, "loss": 0.3423, "step": 6778 }, { "epoch": 1.6763105835806131, "grad_norm": 0.7939004489645899, "learning_rate": 4.1152258786672105e-06, "loss": 0.3453, "step": 6779 }, { "epoch": 1.6765578635014837, "grad_norm": 0.7828578155730592, "learning_rate": 4.114977783681836e-06, "loss": 0.3512, "step": 6780 }, { "epoch": 1.676805143422354, "grad_norm": 0.7692471493279143, "learning_rate": 4.114729661398597e-06, "loss": 0.3534, "step": 6781 }, { "epoch": 1.6770524233432247, "grad_norm": 0.7803817304596469, "learning_rate": 4.114481511821692e-06, "loss": 0.3682, "step": 6782 }, { "epoch": 1.677299703264095, "grad_norm": 0.7748498696511033, "learning_rate": 4.114233334955311e-06, "loss": 0.3809, "step": 6783 }, { "epoch": 1.6775469831849654, "grad_norm": 0.7783262656453636, "learning_rate": 4.113985130803651e-06, "loss": 0.3485, "step": 6784 }, { "epoch": 1.6777942631058358, "grad_norm": 0.8194284329251744, "learning_rate": 4.1137368993709075e-06, "loss": 0.3949, "step": 6785 }, { "epoch": 1.6780415430267062, "grad_norm": 0.793303995289666, "learning_rate": 4.113488640661276e-06, "loss": 0.3563, "step": 6786 }, { "epoch": 1.6782888229475765, "grad_norm": 0.7535460067369611, "learning_rate": 4.113240354678952e-06, "loss": 0.3807, "step": 6787 }, { "epoch": 1.6785361028684471, "grad_norm": 0.774739143839503, "learning_rate": 4.112992041428134e-06, "loss": 0.3488, "step": 6788 }, { "epoch": 1.6787833827893175, "grad_norm": 0.7773272133159755, "learning_rate": 4.112743700913019e-06, "loss": 0.3643, "step": 6789 }, { "epoch": 1.679030662710188, "grad_norm": 0.7460412944328989, "learning_rate": 4.112495333137803e-06, "loss": 0.3593, "step": 6790 }, { "epoch": 1.6792779426310585, "grad_norm": 0.7465908220610171, "learning_rate": 4.112246938106685e-06, "loss": 0.3694, "step": 6791 }, { "epoch": 1.6795252225519288, "grad_norm": 0.7920767328549951, "learning_rate": 4.111998515823864e-06, "loss": 0.3844, "step": 6792 }, { "epoch": 1.6797725024727992, "grad_norm": 0.7930964458909333, "learning_rate": 4.1117500662935385e-06, "loss": 0.3815, "step": 6793 }, { "epoch": 1.6800197823936696, "grad_norm": 0.7935580789124886, "learning_rate": 4.111501589519908e-06, "loss": 0.4076, "step": 6794 }, { "epoch": 1.68026706231454, "grad_norm": 0.7472496090984891, "learning_rate": 4.111253085507174e-06, "loss": 0.3605, "step": 6795 }, { "epoch": 1.6805143422354105, "grad_norm": 0.7593119664639646, "learning_rate": 4.1110045542595355e-06, "loss": 0.3619, "step": 6796 }, { "epoch": 1.6807616221562809, "grad_norm": 0.7738991699532524, "learning_rate": 4.110755995781193e-06, "loss": 0.3642, "step": 6797 }, { "epoch": 1.6810089020771515, "grad_norm": 0.7983894575641497, "learning_rate": 4.1105074100763484e-06, "loss": 0.3479, "step": 6798 }, { "epoch": 1.6812561819980218, "grad_norm": 0.7541045006982412, "learning_rate": 4.110258797149205e-06, "loss": 0.3668, "step": 6799 }, { "epoch": 1.6815034619188922, "grad_norm": 0.7759120804929042, "learning_rate": 4.1100101570039615e-06, "loss": 0.3798, "step": 6800 }, { "epoch": 1.6817507418397626, "grad_norm": 0.7650254004191417, "learning_rate": 4.109761489644825e-06, "loss": 0.3946, "step": 6801 }, { "epoch": 1.681998021760633, "grad_norm": 0.790069495725842, "learning_rate": 4.109512795075996e-06, "loss": 0.3481, "step": 6802 }, { "epoch": 1.6822453016815033, "grad_norm": 0.7781830032966695, "learning_rate": 4.109264073301679e-06, "loss": 0.362, "step": 6803 }, { "epoch": 1.682492581602374, "grad_norm": 0.7824330228618519, "learning_rate": 4.109015324326076e-06, "loss": 0.3576, "step": 6804 }, { "epoch": 1.6827398615232443, "grad_norm": 0.7401219563426451, "learning_rate": 4.108766548153396e-06, "loss": 0.3741, "step": 6805 }, { "epoch": 1.6829871414441149, "grad_norm": 0.7749188106496967, "learning_rate": 4.10851774478784e-06, "loss": 0.3564, "step": 6806 }, { "epoch": 1.6832344213649852, "grad_norm": 0.8156966065941833, "learning_rate": 4.1082689142336166e-06, "loss": 0.3683, "step": 6807 }, { "epoch": 1.6834817012858556, "grad_norm": 0.7897736144734177, "learning_rate": 4.108020056494928e-06, "loss": 0.3518, "step": 6808 }, { "epoch": 1.683728981206726, "grad_norm": 0.7657235962813265, "learning_rate": 4.1077711715759845e-06, "loss": 0.3537, "step": 6809 }, { "epoch": 1.6839762611275964, "grad_norm": 0.7658073507585909, "learning_rate": 4.107522259480991e-06, "loss": 0.375, "step": 6810 }, { "epoch": 1.6842235410484667, "grad_norm": 0.7939181320615777, "learning_rate": 4.107273320214155e-06, "loss": 0.3765, "step": 6811 }, { "epoch": 1.6844708209693373, "grad_norm": 0.7446748209266905, "learning_rate": 4.107024353779684e-06, "loss": 0.3972, "step": 6812 }, { "epoch": 1.6847181008902077, "grad_norm": 0.7804972690366893, "learning_rate": 4.1067753601817874e-06, "loss": 0.3613, "step": 6813 }, { "epoch": 1.6849653808110783, "grad_norm": 0.7677541018811758, "learning_rate": 4.106526339424672e-06, "loss": 0.3758, "step": 6814 }, { "epoch": 1.6852126607319486, "grad_norm": 0.7810873669442249, "learning_rate": 4.10627729151255e-06, "loss": 0.3358, "step": 6815 }, { "epoch": 1.685459940652819, "grad_norm": 0.7571187735474829, "learning_rate": 4.106028216449629e-06, "loss": 0.3433, "step": 6816 }, { "epoch": 1.6857072205736894, "grad_norm": 0.8068695192121957, "learning_rate": 4.1057791142401186e-06, "loss": 0.3893, "step": 6817 }, { "epoch": 1.6859545004945597, "grad_norm": 0.7898066286713468, "learning_rate": 4.10552998488823e-06, "loss": 0.3879, "step": 6818 }, { "epoch": 1.6862017804154301, "grad_norm": 0.8251192456735383, "learning_rate": 4.105280828398175e-06, "loss": 0.3792, "step": 6819 }, { "epoch": 1.6864490603363007, "grad_norm": 0.7884594740130225, "learning_rate": 4.105031644774164e-06, "loss": 0.3889, "step": 6820 }, { "epoch": 1.686696340257171, "grad_norm": 0.8458369709767818, "learning_rate": 4.10478243402041e-06, "loss": 0.3796, "step": 6821 }, { "epoch": 1.6869436201780417, "grad_norm": 0.7622145411103807, "learning_rate": 4.104533196141124e-06, "loss": 0.3776, "step": 6822 }, { "epoch": 1.687190900098912, "grad_norm": 0.7548255346916738, "learning_rate": 4.1042839311405195e-06, "loss": 0.3877, "step": 6823 }, { "epoch": 1.6874381800197824, "grad_norm": 0.7747421942083091, "learning_rate": 4.104034639022811e-06, "loss": 0.3674, "step": 6824 }, { "epoch": 1.6876854599406528, "grad_norm": 0.7552602306829063, "learning_rate": 4.10378531979221e-06, "loss": 0.3842, "step": 6825 }, { "epoch": 1.6879327398615231, "grad_norm": 0.7742437062743861, "learning_rate": 4.103535973452932e-06, "loss": 0.3643, "step": 6826 }, { "epoch": 1.6881800197823935, "grad_norm": 0.7809991923567002, "learning_rate": 4.103286600009192e-06, "loss": 0.3602, "step": 6827 }, { "epoch": 1.688427299703264, "grad_norm": 0.758650203812773, "learning_rate": 4.103037199465205e-06, "loss": 0.3671, "step": 6828 }, { "epoch": 1.6886745796241345, "grad_norm": 0.754326359084176, "learning_rate": 4.102787771825186e-06, "loss": 0.3845, "step": 6829 }, { "epoch": 1.688921859545005, "grad_norm": 0.7713153219810283, "learning_rate": 4.102538317093351e-06, "loss": 0.3723, "step": 6830 }, { "epoch": 1.6891691394658754, "grad_norm": 0.7954227957213958, "learning_rate": 4.102288835273918e-06, "loss": 0.3265, "step": 6831 }, { "epoch": 1.6894164193867458, "grad_norm": 0.7648278531944628, "learning_rate": 4.102039326371101e-06, "loss": 0.369, "step": 6832 }, { "epoch": 1.6896636993076162, "grad_norm": 0.7781935895396228, "learning_rate": 4.101789790389121e-06, "loss": 0.3756, "step": 6833 }, { "epoch": 1.6899109792284865, "grad_norm": 0.7632764130515566, "learning_rate": 4.1015402273321936e-06, "loss": 0.3652, "step": 6834 }, { "epoch": 1.690158259149357, "grad_norm": 0.7812597554536533, "learning_rate": 4.101290637204538e-06, "loss": 0.3505, "step": 6835 }, { "epoch": 1.6904055390702275, "grad_norm": 0.7932265341572854, "learning_rate": 4.101041020010373e-06, "loss": 0.3717, "step": 6836 }, { "epoch": 1.6906528189910979, "grad_norm": 0.7312117773428601, "learning_rate": 4.1007913757539164e-06, "loss": 0.356, "step": 6837 }, { "epoch": 1.6909000989119685, "grad_norm": 0.7478849847026467, "learning_rate": 4.10054170443939e-06, "loss": 0.3876, "step": 6838 }, { "epoch": 1.6911473788328388, "grad_norm": 0.7630741420213187, "learning_rate": 4.100292006071014e-06, "loss": 0.3625, "step": 6839 }, { "epoch": 1.6913946587537092, "grad_norm": 0.7597441163966658, "learning_rate": 4.100042280653006e-06, "loss": 0.3837, "step": 6840 }, { "epoch": 1.6916419386745796, "grad_norm": 0.7757823652453952, "learning_rate": 4.0997925281895915e-06, "loss": 0.337, "step": 6841 }, { "epoch": 1.69188921859545, "grad_norm": 0.7539263042734787, "learning_rate": 4.099542748684989e-06, "loss": 0.3579, "step": 6842 }, { "epoch": 1.6921364985163203, "grad_norm": 0.7718710241531876, "learning_rate": 4.09929294214342e-06, "loss": 0.3579, "step": 6843 }, { "epoch": 1.692383778437191, "grad_norm": 0.7737411179363308, "learning_rate": 4.09904310856911e-06, "loss": 0.3666, "step": 6844 }, { "epoch": 1.6926310583580615, "grad_norm": 0.7808171834999194, "learning_rate": 4.098793247966279e-06, "loss": 0.3653, "step": 6845 }, { "epoch": 1.6928783382789319, "grad_norm": 0.80280274563415, "learning_rate": 4.098543360339152e-06, "loss": 0.3569, "step": 6846 }, { "epoch": 1.6931256181998022, "grad_norm": 0.7564382826999684, "learning_rate": 4.098293445691953e-06, "loss": 0.3389, "step": 6847 }, { "epoch": 1.6933728981206726, "grad_norm": 0.7264809108521235, "learning_rate": 4.098043504028906e-06, "loss": 0.3968, "step": 6848 }, { "epoch": 1.693620178041543, "grad_norm": 0.7655653132354476, "learning_rate": 4.097793535354234e-06, "loss": 0.3553, "step": 6849 }, { "epoch": 1.6938674579624133, "grad_norm": 0.7607676432732207, "learning_rate": 4.0975435396721645e-06, "loss": 0.3707, "step": 6850 }, { "epoch": 1.694114737883284, "grad_norm": 0.7738923432847333, "learning_rate": 4.0972935169869225e-06, "loss": 0.3751, "step": 6851 }, { "epoch": 1.6943620178041543, "grad_norm": 0.7751052552171689, "learning_rate": 4.097043467302732e-06, "loss": 0.338, "step": 6852 }, { "epoch": 1.6946092977250249, "grad_norm": 0.7909829309933709, "learning_rate": 4.096793390623824e-06, "loss": 0.3434, "step": 6853 }, { "epoch": 1.6948565776458953, "grad_norm": 0.8515195159821857, "learning_rate": 4.096543286954422e-06, "loss": 0.3563, "step": 6854 }, { "epoch": 1.6951038575667656, "grad_norm": 0.7781738957723506, "learning_rate": 4.096293156298755e-06, "loss": 0.3513, "step": 6855 }, { "epoch": 1.695351137487636, "grad_norm": 0.786259581601314, "learning_rate": 4.09604299866105e-06, "loss": 0.3659, "step": 6856 }, { "epoch": 1.6955984174085064, "grad_norm": 0.7856127015395366, "learning_rate": 4.095792814045535e-06, "loss": 0.3413, "step": 6857 }, { "epoch": 1.6958456973293767, "grad_norm": 0.7567189403704796, "learning_rate": 4.095542602456441e-06, "loss": 0.3665, "step": 6858 }, { "epoch": 1.6960929772502473, "grad_norm": 0.7829803312869794, "learning_rate": 4.095292363897995e-06, "loss": 0.356, "step": 6859 }, { "epoch": 1.6963402571711177, "grad_norm": 0.7783402337654084, "learning_rate": 4.095042098374429e-06, "loss": 0.4062, "step": 6860 }, { "epoch": 1.6965875370919883, "grad_norm": 0.7719727650596084, "learning_rate": 4.094791805889971e-06, "loss": 0.3762, "step": 6861 }, { "epoch": 1.6968348170128587, "grad_norm": 0.7538060265422741, "learning_rate": 4.094541486448853e-06, "loss": 0.3457, "step": 6862 }, { "epoch": 1.697082096933729, "grad_norm": 0.7803706475234192, "learning_rate": 4.094291140055306e-06, "loss": 0.388, "step": 6863 }, { "epoch": 1.6973293768545994, "grad_norm": 0.7768501933781822, "learning_rate": 4.094040766713561e-06, "loss": 0.3739, "step": 6864 }, { "epoch": 1.6975766567754698, "grad_norm": 0.7881530415236903, "learning_rate": 4.09379036642785e-06, "loss": 0.364, "step": 6865 }, { "epoch": 1.6978239366963401, "grad_norm": 0.784461045174359, "learning_rate": 4.093539939202407e-06, "loss": 0.3697, "step": 6866 }, { "epoch": 1.6980712166172107, "grad_norm": 0.7526995444730522, "learning_rate": 4.093289485041463e-06, "loss": 0.3733, "step": 6867 }, { "epoch": 1.698318496538081, "grad_norm": 0.737304757568076, "learning_rate": 4.093039003949253e-06, "loss": 0.3532, "step": 6868 }, { "epoch": 1.6985657764589517, "grad_norm": 0.8017760030113679, "learning_rate": 4.0927884959300095e-06, "loss": 0.3731, "step": 6869 }, { "epoch": 1.698813056379822, "grad_norm": 0.7346618673773516, "learning_rate": 4.092537960987968e-06, "loss": 0.3728, "step": 6870 }, { "epoch": 1.6990603363006924, "grad_norm": 0.8111365530926959, "learning_rate": 4.092287399127363e-06, "loss": 0.3427, "step": 6871 }, { "epoch": 1.6993076162215628, "grad_norm": 0.7896270049930395, "learning_rate": 4.092036810352429e-06, "loss": 0.3515, "step": 6872 }, { "epoch": 1.6995548961424332, "grad_norm": 0.7470398841354595, "learning_rate": 4.091786194667402e-06, "loss": 0.3503, "step": 6873 }, { "epoch": 1.6998021760633035, "grad_norm": 0.774442228755584, "learning_rate": 4.091535552076518e-06, "loss": 0.3622, "step": 6874 }, { "epoch": 1.7000494559841741, "grad_norm": 0.7606586678546672, "learning_rate": 4.091284882584016e-06, "loss": 0.3429, "step": 6875 }, { "epoch": 1.7002967359050445, "grad_norm": 0.8121940091301255, "learning_rate": 4.091034186194129e-06, "loss": 0.3639, "step": 6876 }, { "epoch": 1.700544015825915, "grad_norm": 0.7943593198626219, "learning_rate": 4.090783462911098e-06, "loss": 0.3696, "step": 6877 }, { "epoch": 1.7007912957467854, "grad_norm": 0.7347631371423126, "learning_rate": 4.0905327127391575e-06, "loss": 0.356, "step": 6878 }, { "epoch": 1.7010385756676558, "grad_norm": 0.7598762088075274, "learning_rate": 4.090281935682548e-06, "loss": 0.3521, "step": 6879 }, { "epoch": 1.7012858555885262, "grad_norm": 0.7550187055634684, "learning_rate": 4.09003113174551e-06, "loss": 0.3512, "step": 6880 }, { "epoch": 1.7015331355093966, "grad_norm": 0.7597664204548819, "learning_rate": 4.08978030093228e-06, "loss": 0.3705, "step": 6881 }, { "epoch": 1.701780415430267, "grad_norm": 0.7862586831086366, "learning_rate": 4.089529443247099e-06, "loss": 0.343, "step": 6882 }, { "epoch": 1.7020276953511375, "grad_norm": 0.7830940223930252, "learning_rate": 4.0892785586942065e-06, "loss": 0.3702, "step": 6883 }, { "epoch": 1.7022749752720079, "grad_norm": 0.7629015040676601, "learning_rate": 4.089027647277845e-06, "loss": 0.3563, "step": 6884 }, { "epoch": 1.7025222551928785, "grad_norm": 0.8006850921494197, "learning_rate": 4.088776709002253e-06, "loss": 0.3291, "step": 6885 }, { "epoch": 1.7027695351137488, "grad_norm": 0.7507392107392267, "learning_rate": 4.088525743871674e-06, "loss": 0.3683, "step": 6886 }, { "epoch": 1.7030168150346192, "grad_norm": 0.7673005571666215, "learning_rate": 4.088274751890349e-06, "loss": 0.3632, "step": 6887 }, { "epoch": 1.7032640949554896, "grad_norm": 0.7837161957367774, "learning_rate": 4.088023733062521e-06, "loss": 0.3739, "step": 6888 }, { "epoch": 1.70351137487636, "grad_norm": 0.7644486611056448, "learning_rate": 4.0877726873924335e-06, "loss": 0.394, "step": 6889 }, { "epoch": 1.7037586547972303, "grad_norm": 0.7711436042396053, "learning_rate": 4.08752161488433e-06, "loss": 0.3804, "step": 6890 }, { "epoch": 1.704005934718101, "grad_norm": 0.7547251638763387, "learning_rate": 4.087270515542453e-06, "loss": 0.3698, "step": 6891 }, { "epoch": 1.7042532146389713, "grad_norm": 0.7683369517780322, "learning_rate": 4.087019389371048e-06, "loss": 0.3709, "step": 6892 }, { "epoch": 1.7045004945598419, "grad_norm": 0.752689965615487, "learning_rate": 4.086768236374358e-06, "loss": 0.3512, "step": 6893 }, { "epoch": 1.7047477744807122, "grad_norm": 0.7561426566516867, "learning_rate": 4.086517056556631e-06, "loss": 0.378, "step": 6894 }, { "epoch": 1.7049950544015826, "grad_norm": 0.8028894692170766, "learning_rate": 4.08626584992211e-06, "loss": 0.3385, "step": 6895 }, { "epoch": 1.705242334322453, "grad_norm": 0.7608645501716909, "learning_rate": 4.086014616475043e-06, "loss": 0.3664, "step": 6896 }, { "epoch": 1.7054896142433233, "grad_norm": 0.8054526210054541, "learning_rate": 4.085763356219677e-06, "loss": 0.3488, "step": 6897 }, { "epoch": 1.7057368941641937, "grad_norm": 0.7813958881079314, "learning_rate": 4.085512069160256e-06, "loss": 0.3576, "step": 6898 }, { "epoch": 1.7059841740850643, "grad_norm": 0.8078916968898548, "learning_rate": 4.08526075530103e-06, "loss": 0.3853, "step": 6899 }, { "epoch": 1.7062314540059347, "grad_norm": 0.7760197845311633, "learning_rate": 4.0850094146462475e-06, "loss": 0.3551, "step": 6900 }, { "epoch": 1.7064787339268053, "grad_norm": 0.763983800480479, "learning_rate": 4.084758047200155e-06, "loss": 0.3633, "step": 6901 }, { "epoch": 1.7067260138476756, "grad_norm": 0.7699753102922031, "learning_rate": 4.084506652967002e-06, "loss": 0.3597, "step": 6902 }, { "epoch": 1.706973293768546, "grad_norm": 0.7562395760782918, "learning_rate": 4.0842552319510375e-06, "loss": 0.3361, "step": 6903 }, { "epoch": 1.7072205736894164, "grad_norm": 0.7555199176492264, "learning_rate": 4.084003784156513e-06, "loss": 0.3694, "step": 6904 }, { "epoch": 1.7074678536102867, "grad_norm": 0.7515918947874032, "learning_rate": 4.083752309587676e-06, "loss": 0.3729, "step": 6905 }, { "epoch": 1.7077151335311571, "grad_norm": 0.7853451114526137, "learning_rate": 4.083500808248779e-06, "loss": 0.3546, "step": 6906 }, { "epoch": 1.7079624134520277, "grad_norm": 0.7799825798604438, "learning_rate": 4.083249280144073e-06, "loss": 0.3785, "step": 6907 }, { "epoch": 1.708209693372898, "grad_norm": 0.8247431500295573, "learning_rate": 4.082997725277809e-06, "loss": 0.3637, "step": 6908 }, { "epoch": 1.7084569732937687, "grad_norm": 0.7946437883773425, "learning_rate": 4.082746143654239e-06, "loss": 0.3587, "step": 6909 }, { "epoch": 1.708704253214639, "grad_norm": 0.7990767001304133, "learning_rate": 4.082494535277616e-06, "loss": 0.3604, "step": 6910 }, { "epoch": 1.7089515331355094, "grad_norm": 0.7767882558968677, "learning_rate": 4.082242900152194e-06, "loss": 0.3436, "step": 6911 }, { "epoch": 1.7091988130563798, "grad_norm": 0.7834270076304838, "learning_rate": 4.081991238282223e-06, "loss": 0.342, "step": 6912 }, { "epoch": 1.7094460929772501, "grad_norm": 0.7792489787752981, "learning_rate": 4.081739549671959e-06, "loss": 0.364, "step": 6913 }, { "epoch": 1.7096933728981205, "grad_norm": 0.7431001478908503, "learning_rate": 4.0814878343256566e-06, "loss": 0.3697, "step": 6914 }, { "epoch": 1.709940652818991, "grad_norm": 0.797711980040918, "learning_rate": 4.08123609224757e-06, "loss": 0.384, "step": 6915 }, { "epoch": 1.7101879327398615, "grad_norm": 0.8069114189412001, "learning_rate": 4.080984323441954e-06, "loss": 0.3956, "step": 6916 }, { "epoch": 1.710435212660732, "grad_norm": 0.753925757186855, "learning_rate": 4.080732527913065e-06, "loss": 0.3664, "step": 6917 }, { "epoch": 1.7106824925816024, "grad_norm": 0.7744061135125553, "learning_rate": 4.080480705665159e-06, "loss": 0.3801, "step": 6918 }, { "epoch": 1.7109297725024728, "grad_norm": 0.7689629608749334, "learning_rate": 4.080228856702492e-06, "loss": 0.3702, "step": 6919 }, { "epoch": 1.7111770524233432, "grad_norm": 0.7566538602366784, "learning_rate": 4.079976981029321e-06, "loss": 0.3661, "step": 6920 }, { "epoch": 1.7114243323442135, "grad_norm": 0.7486698875551699, "learning_rate": 4.0797250786499045e-06, "loss": 0.3774, "step": 6921 }, { "epoch": 1.7116716122650841, "grad_norm": 0.7740232737968732, "learning_rate": 4.079473149568498e-06, "loss": 0.3722, "step": 6922 }, { "epoch": 1.7119188921859545, "grad_norm": 0.8005937860609753, "learning_rate": 4.0792211937893635e-06, "loss": 0.3538, "step": 6923 }, { "epoch": 1.712166172106825, "grad_norm": 0.7927401588381437, "learning_rate": 4.078969211316756e-06, "loss": 0.3606, "step": 6924 }, { "epoch": 1.7124134520276955, "grad_norm": 0.7948598614476975, "learning_rate": 4.078717202154937e-06, "loss": 0.3431, "step": 6925 }, { "epoch": 1.7126607319485658, "grad_norm": 0.8062345747705367, "learning_rate": 4.078465166308166e-06, "loss": 0.3468, "step": 6926 }, { "epoch": 1.7129080118694362, "grad_norm": 0.7895270395980503, "learning_rate": 4.078213103780702e-06, "loss": 0.351, "step": 6927 }, { "epoch": 1.7131552917903066, "grad_norm": 0.7840710083523917, "learning_rate": 4.077961014576807e-06, "loss": 0.3484, "step": 6928 }, { "epoch": 1.713402571711177, "grad_norm": 0.7809353906948471, "learning_rate": 4.077708898700742e-06, "loss": 0.3561, "step": 6929 }, { "epoch": 1.7136498516320475, "grad_norm": 0.7533193315153158, "learning_rate": 4.077456756156767e-06, "loss": 0.353, "step": 6930 }, { "epoch": 1.713897131552918, "grad_norm": 0.7821829789432027, "learning_rate": 4.077204586949145e-06, "loss": 0.3448, "step": 6931 }, { "epoch": 1.7141444114737885, "grad_norm": 0.762037693992641, "learning_rate": 4.0769523910821375e-06, "loss": 0.3749, "step": 6932 }, { "epoch": 1.7143916913946589, "grad_norm": 0.7583478775356401, "learning_rate": 4.076700168560009e-06, "loss": 0.4019, "step": 6933 }, { "epoch": 1.7146389713155292, "grad_norm": 0.7855303798433295, "learning_rate": 4.076447919387022e-06, "loss": 0.3467, "step": 6934 }, { "epoch": 1.7148862512363996, "grad_norm": 0.7664226163184932, "learning_rate": 4.07619564356744e-06, "loss": 0.3379, "step": 6935 }, { "epoch": 1.71513353115727, "grad_norm": 0.7968026538508066, "learning_rate": 4.075943341105527e-06, "loss": 0.3839, "step": 6936 }, { "epoch": 1.7153808110781403, "grad_norm": 0.8007179100036671, "learning_rate": 4.0756910120055496e-06, "loss": 0.3415, "step": 6937 }, { "epoch": 1.715628090999011, "grad_norm": 0.7597113442274294, "learning_rate": 4.075438656271769e-06, "loss": 0.379, "step": 6938 }, { "epoch": 1.7158753709198813, "grad_norm": 0.7550298127102274, "learning_rate": 4.075186273908455e-06, "loss": 0.3587, "step": 6939 }, { "epoch": 1.7161226508407519, "grad_norm": 0.7500569072716838, "learning_rate": 4.074933864919871e-06, "loss": 0.379, "step": 6940 }, { "epoch": 1.7163699307616223, "grad_norm": 0.7737309772084685, "learning_rate": 4.074681429310285e-06, "loss": 0.3738, "step": 6941 }, { "epoch": 1.7166172106824926, "grad_norm": 0.7753930637823759, "learning_rate": 4.074428967083961e-06, "loss": 0.3619, "step": 6942 }, { "epoch": 1.716864490603363, "grad_norm": 0.7715435354456546, "learning_rate": 4.07417647824517e-06, "loss": 0.3782, "step": 6943 }, { "epoch": 1.7171117705242334, "grad_norm": 0.7925464564941586, "learning_rate": 4.073923962798177e-06, "loss": 0.3533, "step": 6944 }, { "epoch": 1.7173590504451037, "grad_norm": 0.772370767477761, "learning_rate": 4.073671420747253e-06, "loss": 0.3462, "step": 6945 }, { "epoch": 1.7176063303659743, "grad_norm": 0.785898333365632, "learning_rate": 4.0734188520966634e-06, "loss": 0.3758, "step": 6946 }, { "epoch": 1.7178536102868447, "grad_norm": 0.7635541382922489, "learning_rate": 4.07316625685068e-06, "loss": 0.3594, "step": 6947 }, { "epoch": 1.7181008902077153, "grad_norm": 0.7774871500279409, "learning_rate": 4.072913635013572e-06, "loss": 0.3437, "step": 6948 }, { "epoch": 1.7183481701285857, "grad_norm": 0.7718404803767875, "learning_rate": 4.072660986589608e-06, "loss": 0.3664, "step": 6949 }, { "epoch": 1.718595450049456, "grad_norm": 0.7966906537959726, "learning_rate": 4.0724083115830606e-06, "loss": 0.3503, "step": 6950 }, { "epoch": 1.7188427299703264, "grad_norm": 0.7848007918379526, "learning_rate": 4.072155609998199e-06, "loss": 0.3342, "step": 6951 }, { "epoch": 1.7190900098911968, "grad_norm": 0.7656960731598338, "learning_rate": 4.071902881839294e-06, "loss": 0.3576, "step": 6952 }, { "epoch": 1.7193372898120671, "grad_norm": 0.7948502013180656, "learning_rate": 4.07165012711062e-06, "loss": 0.3279, "step": 6953 }, { "epoch": 1.7195845697329377, "grad_norm": 0.7889308813238782, "learning_rate": 4.071397345816447e-06, "loss": 0.3541, "step": 6954 }, { "epoch": 1.719831849653808, "grad_norm": 0.7817695367128196, "learning_rate": 4.07114453796105e-06, "loss": 0.3548, "step": 6955 }, { "epoch": 1.7200791295746787, "grad_norm": 0.7599966197180804, "learning_rate": 4.0708917035487e-06, "loss": 0.3743, "step": 6956 }, { "epoch": 1.720326409495549, "grad_norm": 0.7577689832820313, "learning_rate": 4.0706388425836715e-06, "loss": 0.3602, "step": 6957 }, { "epoch": 1.7205736894164194, "grad_norm": 0.7717292453748348, "learning_rate": 4.070385955070239e-06, "loss": 0.3859, "step": 6958 }, { "epoch": 1.7208209693372898, "grad_norm": 0.7789100563605627, "learning_rate": 4.070133041012677e-06, "loss": 0.3285, "step": 6959 }, { "epoch": 1.7210682492581602, "grad_norm": 0.7844231463183191, "learning_rate": 4.06988010041526e-06, "loss": 0.3632, "step": 6960 }, { "epoch": 1.7213155291790305, "grad_norm": 0.8144989982400906, "learning_rate": 4.069627133282263e-06, "loss": 0.3363, "step": 6961 }, { "epoch": 1.7215628090999011, "grad_norm": 0.8039171593715706, "learning_rate": 4.069374139617962e-06, "loss": 0.396, "step": 6962 }, { "epoch": 1.7218100890207715, "grad_norm": 0.7706479971264336, "learning_rate": 4.069121119426634e-06, "loss": 0.3732, "step": 6963 }, { "epoch": 1.722057368941642, "grad_norm": 0.7644613018084148, "learning_rate": 4.068868072712557e-06, "loss": 0.3522, "step": 6964 }, { "epoch": 1.7223046488625124, "grad_norm": 0.746812881803705, "learning_rate": 4.068614999480006e-06, "loss": 0.3895, "step": 6965 }, { "epoch": 1.7225519287833828, "grad_norm": 0.7746872265333469, "learning_rate": 4.068361899733259e-06, "loss": 0.3638, "step": 6966 }, { "epoch": 1.7227992087042532, "grad_norm": 0.7576294278120879, "learning_rate": 4.068108773476595e-06, "loss": 0.3854, "step": 6967 }, { "epoch": 1.7230464886251236, "grad_norm": 0.7844538481869971, "learning_rate": 4.067855620714293e-06, "loss": 0.36, "step": 6968 }, { "epoch": 1.723293768545994, "grad_norm": 0.7776847651819359, "learning_rate": 4.06760244145063e-06, "loss": 0.3774, "step": 6969 }, { "epoch": 1.7235410484668645, "grad_norm": 0.7796432311572153, "learning_rate": 4.067349235689887e-06, "loss": 0.3633, "step": 6970 }, { "epoch": 1.7237883283877349, "grad_norm": 0.7601971403084555, "learning_rate": 4.0670960034363435e-06, "loss": 0.3935, "step": 6971 }, { "epoch": 1.7240356083086055, "grad_norm": 0.7865822302904907, "learning_rate": 4.0668427446942805e-06, "loss": 0.3557, "step": 6972 }, { "epoch": 1.7242828882294758, "grad_norm": 0.7934673534311637, "learning_rate": 4.066589459467977e-06, "loss": 0.3522, "step": 6973 }, { "epoch": 1.7245301681503462, "grad_norm": 0.7789810904498446, "learning_rate": 4.066336147761718e-06, "loss": 0.3772, "step": 6974 }, { "epoch": 1.7247774480712166, "grad_norm": 0.7719303910925079, "learning_rate": 4.066082809579781e-06, "loss": 0.3783, "step": 6975 }, { "epoch": 1.725024727992087, "grad_norm": 0.7931806500147879, "learning_rate": 4.0658294449264505e-06, "loss": 0.3692, "step": 6976 }, { "epoch": 1.7252720079129573, "grad_norm": 0.7700320065600561, "learning_rate": 4.0655760538060085e-06, "loss": 0.409, "step": 6977 }, { "epoch": 1.725519287833828, "grad_norm": 0.771716252277789, "learning_rate": 4.0653226362227386e-06, "loss": 0.392, "step": 6978 }, { "epoch": 1.7257665677546983, "grad_norm": 0.7598791914585432, "learning_rate": 4.065069192180922e-06, "loss": 0.3678, "step": 6979 }, { "epoch": 1.7260138476755689, "grad_norm": 0.7698714263481536, "learning_rate": 4.064815721684846e-06, "loss": 0.4084, "step": 6980 }, { "epoch": 1.7262611275964392, "grad_norm": 0.741503696617465, "learning_rate": 4.064562224738793e-06, "loss": 0.3883, "step": 6981 }, { "epoch": 1.7265084075173096, "grad_norm": 0.7876131202314963, "learning_rate": 4.064308701347049e-06, "loss": 0.3625, "step": 6982 }, { "epoch": 1.72675568743818, "grad_norm": 0.8033727547333067, "learning_rate": 4.064055151513898e-06, "loss": 0.3639, "step": 6983 }, { "epoch": 1.7270029673590503, "grad_norm": 0.7658897236915391, "learning_rate": 4.0638015752436264e-06, "loss": 0.3635, "step": 6984 }, { "epoch": 1.7272502472799207, "grad_norm": 0.7527157222498757, "learning_rate": 4.063547972540521e-06, "loss": 0.3811, "step": 6985 }, { "epoch": 1.7274975272007913, "grad_norm": 0.7737666486231553, "learning_rate": 4.063294343408865e-06, "loss": 0.3578, "step": 6986 }, { "epoch": 1.7277448071216617, "grad_norm": 0.7772203011876646, "learning_rate": 4.063040687852951e-06, "loss": 0.3592, "step": 6987 }, { "epoch": 1.7279920870425323, "grad_norm": 0.8185144928801814, "learning_rate": 4.0627870058770624e-06, "loss": 0.399, "step": 6988 }, { "epoch": 1.7282393669634026, "grad_norm": 0.798194284142356, "learning_rate": 4.062533297485489e-06, "loss": 0.3802, "step": 6989 }, { "epoch": 1.728486646884273, "grad_norm": 0.7946148366575118, "learning_rate": 4.062279562682518e-06, "loss": 0.3587, "step": 6990 }, { "epoch": 1.7287339268051434, "grad_norm": 0.7599512639475916, "learning_rate": 4.06202580147244e-06, "loss": 0.369, "step": 6991 }, { "epoch": 1.7289812067260137, "grad_norm": 0.7849117056435625, "learning_rate": 4.061772013859543e-06, "loss": 0.3664, "step": 6992 }, { "epoch": 1.729228486646884, "grad_norm": 0.7632791327539057, "learning_rate": 4.061518199848117e-06, "loss": 0.3499, "step": 6993 }, { "epoch": 1.7294757665677547, "grad_norm": 0.7500825263360519, "learning_rate": 4.061264359442452e-06, "loss": 0.389, "step": 6994 }, { "epoch": 1.729723046488625, "grad_norm": 0.7570864742792838, "learning_rate": 4.061010492646839e-06, "loss": 0.364, "step": 6995 }, { "epoch": 1.7299703264094957, "grad_norm": 0.8045068795847844, "learning_rate": 4.060756599465569e-06, "loss": 0.3372, "step": 6996 }, { "epoch": 1.730217606330366, "grad_norm": 0.7580175277067982, "learning_rate": 4.060502679902933e-06, "loss": 0.3712, "step": 6997 }, { "epoch": 1.7304648862512364, "grad_norm": 0.7975880052095747, "learning_rate": 4.060248733963225e-06, "loss": 0.3512, "step": 6998 }, { "epoch": 1.7307121661721068, "grad_norm": 0.7863311232612694, "learning_rate": 4.059994761650735e-06, "loss": 0.3776, "step": 6999 }, { "epoch": 1.7309594460929771, "grad_norm": 0.7643074274107566, "learning_rate": 4.059740762969756e-06, "loss": 0.3411, "step": 7000 }, { "epoch": 1.7312067260138477, "grad_norm": 0.7700502924003946, "learning_rate": 4.0594867379245825e-06, "loss": 0.3567, "step": 7001 }, { "epoch": 1.731454005934718, "grad_norm": 0.7587858256763951, "learning_rate": 4.059232686519508e-06, "loss": 0.3625, "step": 7002 }, { "epoch": 1.7317012858555887, "grad_norm": 0.753533039213561, "learning_rate": 4.058978608758828e-06, "loss": 0.3698, "step": 7003 }, { "epoch": 1.731948565776459, "grad_norm": 0.7695171493569588, "learning_rate": 4.058724504646834e-06, "loss": 0.3861, "step": 7004 }, { "epoch": 1.7321958456973294, "grad_norm": 0.7744008632342444, "learning_rate": 4.058470374187824e-06, "loss": 0.3448, "step": 7005 }, { "epoch": 1.7324431256181998, "grad_norm": 0.78218527242023, "learning_rate": 4.058216217386091e-06, "loss": 0.3484, "step": 7006 }, { "epoch": 1.7326904055390702, "grad_norm": 0.7826793661836767, "learning_rate": 4.057962034245934e-06, "loss": 0.3739, "step": 7007 }, { "epoch": 1.7329376854599405, "grad_norm": 0.7680297657268716, "learning_rate": 4.0577078247716476e-06, "loss": 0.3643, "step": 7008 }, { "epoch": 1.7331849653808111, "grad_norm": 0.7900648941988906, "learning_rate": 4.057453588967528e-06, "loss": 0.406, "step": 7009 }, { "epoch": 1.7334322453016815, "grad_norm": 0.7405859984349653, "learning_rate": 4.057199326837874e-06, "loss": 0.3592, "step": 7010 }, { "epoch": 1.733679525222552, "grad_norm": 0.7366302286460359, "learning_rate": 4.056945038386983e-06, "loss": 0.3895, "step": 7011 }, { "epoch": 1.7339268051434225, "grad_norm": 0.7905261484030558, "learning_rate": 4.056690723619153e-06, "loss": 0.3559, "step": 7012 }, { "epoch": 1.7341740850642928, "grad_norm": 0.7628454397376258, "learning_rate": 4.056436382538683e-06, "loss": 0.3514, "step": 7013 }, { "epoch": 1.7344213649851632, "grad_norm": 0.7469436153230911, "learning_rate": 4.056182015149872e-06, "loss": 0.3908, "step": 7014 }, { "epoch": 1.7346686449060336, "grad_norm": 0.7946341207371491, "learning_rate": 4.055927621457019e-06, "loss": 0.3734, "step": 7015 }, { "epoch": 1.734915924826904, "grad_norm": 0.7758047445196575, "learning_rate": 4.055673201464424e-06, "loss": 0.3537, "step": 7016 }, { "epoch": 1.7351632047477745, "grad_norm": 0.7810388831926307, "learning_rate": 4.055418755176388e-06, "loss": 0.362, "step": 7017 }, { "epoch": 1.735410484668645, "grad_norm": 0.766762703508843, "learning_rate": 4.055164282597213e-06, "loss": 0.3547, "step": 7018 }, { "epoch": 1.7356577645895155, "grad_norm": 0.7852700677799317, "learning_rate": 4.054909783731198e-06, "loss": 0.3669, "step": 7019 }, { "epoch": 1.7359050445103859, "grad_norm": 0.7597425590706075, "learning_rate": 4.054655258582646e-06, "loss": 0.3556, "step": 7020 }, { "epoch": 1.7361523244312562, "grad_norm": 0.7978730307378277, "learning_rate": 4.054400707155859e-06, "loss": 0.3518, "step": 7021 }, { "epoch": 1.7363996043521266, "grad_norm": 0.7600438850784719, "learning_rate": 4.05414612945514e-06, "loss": 0.3584, "step": 7022 }, { "epoch": 1.736646884272997, "grad_norm": 0.8061649004823592, "learning_rate": 4.053891525484791e-06, "loss": 0.3618, "step": 7023 }, { "epoch": 1.7368941641938673, "grad_norm": 0.7613658968682742, "learning_rate": 4.053636895249118e-06, "loss": 0.3758, "step": 7024 }, { "epoch": 1.737141444114738, "grad_norm": 0.7567948074775, "learning_rate": 4.053382238752421e-06, "loss": 0.3581, "step": 7025 }, { "epoch": 1.7373887240356083, "grad_norm": 0.7416665761047087, "learning_rate": 4.053127555999009e-06, "loss": 0.3345, "step": 7026 }, { "epoch": 1.7376360039564789, "grad_norm": 0.8046796399576006, "learning_rate": 4.052872846993184e-06, "loss": 0.3519, "step": 7027 }, { "epoch": 1.7378832838773492, "grad_norm": 0.782170421727744, "learning_rate": 4.052618111739252e-06, "loss": 0.3831, "step": 7028 }, { "epoch": 1.7381305637982196, "grad_norm": 0.7635947888786893, "learning_rate": 4.052363350241519e-06, "loss": 0.3496, "step": 7029 }, { "epoch": 1.73837784371909, "grad_norm": 0.7849316870073001, "learning_rate": 4.052108562504291e-06, "loss": 0.3969, "step": 7030 }, { "epoch": 1.7386251236399604, "grad_norm": 0.7772635301494616, "learning_rate": 4.0518537485318745e-06, "loss": 0.3798, "step": 7031 }, { "epoch": 1.7388724035608307, "grad_norm": 0.744543231064669, "learning_rate": 4.051598908328577e-06, "loss": 0.3436, "step": 7032 }, { "epoch": 1.7391196834817013, "grad_norm": 0.7527937908598009, "learning_rate": 4.051344041898706e-06, "loss": 0.3705, "step": 7033 }, { "epoch": 1.7393669634025717, "grad_norm": 0.7552517844894254, "learning_rate": 4.05108914924657e-06, "loss": 0.3778, "step": 7034 }, { "epoch": 1.7396142433234423, "grad_norm": 0.7546862733518336, "learning_rate": 4.050834230376475e-06, "loss": 0.3802, "step": 7035 }, { "epoch": 1.7398615232443126, "grad_norm": 0.7262399724558687, "learning_rate": 4.050579285292733e-06, "loss": 0.3725, "step": 7036 }, { "epoch": 1.740108803165183, "grad_norm": 0.7932520612001878, "learning_rate": 4.050324313999652e-06, "loss": 0.3687, "step": 7037 }, { "epoch": 1.7403560830860534, "grad_norm": 0.7825661402191781, "learning_rate": 4.050069316501541e-06, "loss": 0.3499, "step": 7038 }, { "epoch": 1.7406033630069238, "grad_norm": 0.7695482271929012, "learning_rate": 4.0498142928027114e-06, "loss": 0.3645, "step": 7039 }, { "epoch": 1.7408506429277941, "grad_norm": 0.7857814985698663, "learning_rate": 4.049559242907473e-06, "loss": 0.3796, "step": 7040 }, { "epoch": 1.7410979228486647, "grad_norm": 0.806741152135066, "learning_rate": 4.049304166820138e-06, "loss": 0.3562, "step": 7041 }, { "epoch": 1.741345202769535, "grad_norm": 0.7509628427196419, "learning_rate": 4.049049064545017e-06, "loss": 0.341, "step": 7042 }, { "epoch": 1.7415924826904057, "grad_norm": 0.7695824709320216, "learning_rate": 4.048793936086423e-06, "loss": 0.3496, "step": 7043 }, { "epoch": 1.741839762611276, "grad_norm": 0.7649115181903443, "learning_rate": 4.048538781448666e-06, "loss": 0.3581, "step": 7044 }, { "epoch": 1.7420870425321464, "grad_norm": 0.7662167683301415, "learning_rate": 4.048283600636061e-06, "loss": 0.3633, "step": 7045 }, { "epoch": 1.7423343224530168, "grad_norm": 0.8068278730405143, "learning_rate": 4.048028393652921e-06, "loss": 0.3463, "step": 7046 }, { "epoch": 1.7425816023738872, "grad_norm": 0.789690596809118, "learning_rate": 4.04777316050356e-06, "loss": 0.3509, "step": 7047 }, { "epoch": 1.7428288822947575, "grad_norm": 0.7877335014419636, "learning_rate": 4.04751790119229e-06, "loss": 0.3917, "step": 7048 }, { "epoch": 1.743076162215628, "grad_norm": 0.7586808515943586, "learning_rate": 4.0472626157234295e-06, "loss": 0.3773, "step": 7049 }, { "epoch": 1.7433234421364985, "grad_norm": 0.765149068879448, "learning_rate": 4.0470073041012905e-06, "loss": 0.3481, "step": 7050 }, { "epoch": 1.743570722057369, "grad_norm": 0.7687964533305375, "learning_rate": 4.046751966330189e-06, "loss": 0.3603, "step": 7051 }, { "epoch": 1.7438180019782394, "grad_norm": 0.7349398566589338, "learning_rate": 4.0464966024144425e-06, "loss": 0.3891, "step": 7052 }, { "epoch": 1.7440652818991098, "grad_norm": 0.7713786471198739, "learning_rate": 4.0462412123583656e-06, "loss": 0.3876, "step": 7053 }, { "epoch": 1.7443125618199802, "grad_norm": 0.7780188480273936, "learning_rate": 4.0459857961662756e-06, "loss": 0.3646, "step": 7054 }, { "epoch": 1.7445598417408505, "grad_norm": 0.7612541326884265, "learning_rate": 4.045730353842491e-06, "loss": 0.3687, "step": 7055 }, { "epoch": 1.744807121661721, "grad_norm": 0.793326742574365, "learning_rate": 4.0454748853913274e-06, "loss": 0.3667, "step": 7056 }, { "epoch": 1.7450544015825915, "grad_norm": 0.8103454555471244, "learning_rate": 4.045219390817105e-06, "loss": 0.3272, "step": 7057 }, { "epoch": 1.7453016815034619, "grad_norm": 0.810322004325931, "learning_rate": 4.044963870124141e-06, "loss": 0.3585, "step": 7058 }, { "epoch": 1.7455489614243325, "grad_norm": 0.77999674182318, "learning_rate": 4.044708323316755e-06, "loss": 0.3603, "step": 7059 }, { "epoch": 1.7457962413452028, "grad_norm": 0.7803283597412688, "learning_rate": 4.044452750399267e-06, "loss": 0.4064, "step": 7060 }, { "epoch": 1.7460435212660732, "grad_norm": 0.7848513238280478, "learning_rate": 4.044197151375995e-06, "loss": 0.3647, "step": 7061 }, { "epoch": 1.7462908011869436, "grad_norm": 0.8008432191070298, "learning_rate": 4.043941526251262e-06, "loss": 0.3517, "step": 7062 }, { "epoch": 1.746538081107814, "grad_norm": 0.7961161114268391, "learning_rate": 4.043685875029387e-06, "loss": 0.3529, "step": 7063 }, { "epoch": 1.7467853610286843, "grad_norm": 0.7623150137725012, "learning_rate": 4.043430197714693e-06, "loss": 0.3941, "step": 7064 }, { "epoch": 1.747032640949555, "grad_norm": 0.7752733515385228, "learning_rate": 4.0431744943114994e-06, "loss": 0.3637, "step": 7065 }, { "epoch": 1.7472799208704253, "grad_norm": 0.7533298352830594, "learning_rate": 4.04291876482413e-06, "loss": 0.3623, "step": 7066 }, { "epoch": 1.7475272007912959, "grad_norm": 0.7937362461039762, "learning_rate": 4.042663009256906e-06, "loss": 0.3669, "step": 7067 }, { "epoch": 1.7477744807121662, "grad_norm": 0.8426290342488143, "learning_rate": 4.042407227614153e-06, "loss": 0.3547, "step": 7068 }, { "epoch": 1.7480217606330366, "grad_norm": 0.7733467251071312, "learning_rate": 4.042151419900191e-06, "loss": 0.3859, "step": 7069 }, { "epoch": 1.748269040553907, "grad_norm": 0.7869830428410212, "learning_rate": 4.041895586119346e-06, "loss": 0.3738, "step": 7070 }, { "epoch": 1.7485163204747773, "grad_norm": 0.770152550698399, "learning_rate": 4.041639726275942e-06, "loss": 0.3447, "step": 7071 }, { "epoch": 1.7487636003956477, "grad_norm": 0.7764701867561041, "learning_rate": 4.0413838403743045e-06, "loss": 0.3582, "step": 7072 }, { "epoch": 1.7490108803165183, "grad_norm": 0.77303839565855, "learning_rate": 4.041127928418758e-06, "loss": 0.3573, "step": 7073 }, { "epoch": 1.7492581602373887, "grad_norm": 0.7941741515673854, "learning_rate": 4.0408719904136265e-06, "loss": 0.3404, "step": 7074 }, { "epoch": 1.7495054401582593, "grad_norm": 0.7442998308725417, "learning_rate": 4.040616026363239e-06, "loss": 0.3659, "step": 7075 }, { "epoch": 1.7497527200791296, "grad_norm": 0.7952897462142476, "learning_rate": 4.040360036271921e-06, "loss": 0.3456, "step": 7076 }, { "epoch": 1.75, "grad_norm": 0.7707889267560707, "learning_rate": 4.040104020143999e-06, "loss": 0.359, "step": 7077 }, { "epoch": 1.7502472799208704, "grad_norm": 0.7718345811656522, "learning_rate": 4.0398479779838005e-06, "loss": 0.3257, "step": 7078 }, { "epoch": 1.7504945598417407, "grad_norm": 0.7521232413714659, "learning_rate": 4.039591909795654e-06, "loss": 0.3319, "step": 7079 }, { "epoch": 1.7507418397626113, "grad_norm": 0.7784396840921822, "learning_rate": 4.0393358155838865e-06, "loss": 0.3424, "step": 7080 }, { "epoch": 1.7509891196834817, "grad_norm": 0.7366915287243174, "learning_rate": 4.039079695352829e-06, "loss": 0.3675, "step": 7081 }, { "epoch": 1.7512363996043523, "grad_norm": 0.7972489403050761, "learning_rate": 4.038823549106808e-06, "loss": 0.3337, "step": 7082 }, { "epoch": 1.7514836795252227, "grad_norm": 0.786195385793335, "learning_rate": 4.038567376850155e-06, "loss": 0.3406, "step": 7083 }, { "epoch": 1.751730959446093, "grad_norm": 0.7744510796632099, "learning_rate": 4.038311178587199e-06, "loss": 0.3484, "step": 7084 }, { "epoch": 1.7519782393669634, "grad_norm": 0.7756821493015428, "learning_rate": 4.038054954322272e-06, "loss": 0.3781, "step": 7085 }, { "epoch": 1.7522255192878338, "grad_norm": 0.7829253366401063, "learning_rate": 4.037798704059702e-06, "loss": 0.3541, "step": 7086 }, { "epoch": 1.7524727992087041, "grad_norm": 0.756802854719484, "learning_rate": 4.037542427803825e-06, "loss": 0.3877, "step": 7087 }, { "epoch": 1.7527200791295747, "grad_norm": 0.7776926135745864, "learning_rate": 4.037286125558968e-06, "loss": 0.3701, "step": 7088 }, { "epoch": 1.752967359050445, "grad_norm": 0.7813882450402585, "learning_rate": 4.037029797329466e-06, "loss": 0.3645, "step": 7089 }, { "epoch": 1.7532146389713157, "grad_norm": 0.8073485768722412, "learning_rate": 4.036773443119651e-06, "loss": 0.3717, "step": 7090 }, { "epoch": 1.753461918892186, "grad_norm": 0.7493965611921639, "learning_rate": 4.036517062933856e-06, "loss": 0.3714, "step": 7091 }, { "epoch": 1.7537091988130564, "grad_norm": 0.7755456866906866, "learning_rate": 4.036260656776416e-06, "loss": 0.3506, "step": 7092 }, { "epoch": 1.7539564787339268, "grad_norm": 0.762870991175556, "learning_rate": 4.036004224651663e-06, "loss": 0.3673, "step": 7093 }, { "epoch": 1.7542037586547972, "grad_norm": 0.7934778828317254, "learning_rate": 4.0357477665639325e-06, "loss": 0.3808, "step": 7094 }, { "epoch": 1.7544510385756675, "grad_norm": 0.7631137269890108, "learning_rate": 4.035491282517558e-06, "loss": 0.3566, "step": 7095 }, { "epoch": 1.7546983184965381, "grad_norm": 0.7729357420117107, "learning_rate": 4.0352347725168776e-06, "loss": 0.3312, "step": 7096 }, { "epoch": 1.7549455984174085, "grad_norm": 0.7501112624543017, "learning_rate": 4.034978236566224e-06, "loss": 0.3256, "step": 7097 }, { "epoch": 1.755192878338279, "grad_norm": 0.7865779053575136, "learning_rate": 4.0347216746699355e-06, "loss": 0.3756, "step": 7098 }, { "epoch": 1.7554401582591495, "grad_norm": 0.7756603924220048, "learning_rate": 4.034465086832349e-06, "loss": 0.3757, "step": 7099 }, { "epoch": 1.7556874381800198, "grad_norm": 0.7774563565492126, "learning_rate": 4.0342084730578e-06, "loss": 0.3667, "step": 7100 }, { "epoch": 1.7559347181008902, "grad_norm": 0.8088370376191526, "learning_rate": 4.033951833350626e-06, "loss": 0.383, "step": 7101 }, { "epoch": 1.7561819980217606, "grad_norm": 0.7964722164024227, "learning_rate": 4.033695167715167e-06, "loss": 0.3533, "step": 7102 }, { "epoch": 1.756429277942631, "grad_norm": 0.7991700945044148, "learning_rate": 4.033438476155759e-06, "loss": 0.3549, "step": 7103 }, { "epoch": 1.7566765578635015, "grad_norm": 0.7386686944039567, "learning_rate": 4.033181758676743e-06, "loss": 0.3687, "step": 7104 }, { "epoch": 1.756923837784372, "grad_norm": 0.7894615403734903, "learning_rate": 4.0329250152824564e-06, "loss": 0.3639, "step": 7105 }, { "epoch": 1.7571711177052425, "grad_norm": 0.7733035403454106, "learning_rate": 4.03266824597724e-06, "loss": 0.3607, "step": 7106 }, { "epoch": 1.7574183976261128, "grad_norm": 0.7560245339628656, "learning_rate": 4.0324114507654336e-06, "loss": 0.3893, "step": 7107 }, { "epoch": 1.7576656775469832, "grad_norm": 0.7828721253315771, "learning_rate": 4.032154629651378e-06, "loss": 0.3579, "step": 7108 }, { "epoch": 1.7579129574678536, "grad_norm": 0.7851030192722324, "learning_rate": 4.031897782639414e-06, "loss": 0.386, "step": 7109 }, { "epoch": 1.758160237388724, "grad_norm": 0.8530210486858115, "learning_rate": 4.0316409097338835e-06, "loss": 0.3587, "step": 7110 }, { "epoch": 1.7584075173095943, "grad_norm": 0.7656053652829985, "learning_rate": 4.031384010939128e-06, "loss": 0.3829, "step": 7111 }, { "epoch": 1.758654797230465, "grad_norm": 0.8061596982694799, "learning_rate": 4.03112708625949e-06, "loss": 0.3427, "step": 7112 }, { "epoch": 1.7589020771513353, "grad_norm": 0.7801349226205111, "learning_rate": 4.030870135699312e-06, "loss": 0.3736, "step": 7113 }, { "epoch": 1.7591493570722059, "grad_norm": 0.7835573380239258, "learning_rate": 4.030613159262937e-06, "loss": 0.3508, "step": 7114 }, { "epoch": 1.7593966369930762, "grad_norm": 0.7552495332971592, "learning_rate": 4.03035615695471e-06, "loss": 0.3619, "step": 7115 }, { "epoch": 1.7596439169139466, "grad_norm": 0.7759150316735609, "learning_rate": 4.030099128778974e-06, "loss": 0.3511, "step": 7116 }, { "epoch": 1.759891196834817, "grad_norm": 0.768909137578835, "learning_rate": 4.029842074740074e-06, "loss": 0.3878, "step": 7117 }, { "epoch": 1.7601384767556874, "grad_norm": 0.776517820110676, "learning_rate": 4.029584994842353e-06, "loss": 0.3575, "step": 7118 }, { "epoch": 1.7603857566765577, "grad_norm": 0.7499995339560291, "learning_rate": 4.02932788909016e-06, "loss": 0.3702, "step": 7119 }, { "epoch": 1.7606330365974283, "grad_norm": 0.8117226549888129, "learning_rate": 4.029070757487837e-06, "loss": 0.3717, "step": 7120 }, { "epoch": 1.7608803165182987, "grad_norm": 0.774768522768347, "learning_rate": 4.028813600039734e-06, "loss": 0.3723, "step": 7121 }, { "epoch": 1.7611275964391693, "grad_norm": 0.7830744693793902, "learning_rate": 4.028556416750195e-06, "loss": 0.3707, "step": 7122 }, { "epoch": 1.7613748763600396, "grad_norm": 0.8063336193839151, "learning_rate": 4.028299207623568e-06, "loss": 0.3621, "step": 7123 }, { "epoch": 1.76162215628091, "grad_norm": 0.78249923333124, "learning_rate": 4.028041972664201e-06, "loss": 0.3724, "step": 7124 }, { "epoch": 1.7618694362017804, "grad_norm": 0.8180824727348749, "learning_rate": 4.027784711876443e-06, "loss": 0.3579, "step": 7125 }, { "epoch": 1.7621167161226508, "grad_norm": 0.7618220274159694, "learning_rate": 4.027527425264639e-06, "loss": 0.3661, "step": 7126 }, { "epoch": 1.7623639960435211, "grad_norm": 0.8179946177945342, "learning_rate": 4.027270112833141e-06, "loss": 0.3287, "step": 7127 }, { "epoch": 1.7626112759643917, "grad_norm": 0.7484619326335189, "learning_rate": 4.027012774586297e-06, "loss": 0.3829, "step": 7128 }, { "epoch": 1.762858555885262, "grad_norm": 0.7540004726898177, "learning_rate": 4.026755410528458e-06, "loss": 0.3776, "step": 7129 }, { "epoch": 1.7631058358061327, "grad_norm": 0.7711187456968842, "learning_rate": 4.0264980206639725e-06, "loss": 0.3394, "step": 7130 }, { "epoch": 1.763353115727003, "grad_norm": 0.7473951949264402, "learning_rate": 4.026240604997192e-06, "loss": 0.3901, "step": 7131 }, { "epoch": 1.7636003956478734, "grad_norm": 0.7989829277110387, "learning_rate": 4.025983163532469e-06, "loss": 0.3977, "step": 7132 }, { "epoch": 1.7638476755687438, "grad_norm": 0.7797091036472027, "learning_rate": 4.025725696274152e-06, "loss": 0.3978, "step": 7133 }, { "epoch": 1.7640949554896141, "grad_norm": 0.7860608727515578, "learning_rate": 4.025468203226595e-06, "loss": 0.3561, "step": 7134 }, { "epoch": 1.7643422354104845, "grad_norm": 0.791808316359867, "learning_rate": 4.025210684394149e-06, "loss": 0.3525, "step": 7135 }, { "epoch": 1.764589515331355, "grad_norm": 0.7688232354804099, "learning_rate": 4.02495313978117e-06, "loss": 0.3728, "step": 7136 }, { "epoch": 1.7648367952522255, "grad_norm": 0.7605786910269777, "learning_rate": 4.024695569392008e-06, "loss": 0.3619, "step": 7137 }, { "epoch": 1.765084075173096, "grad_norm": 0.8071158821005167, "learning_rate": 4.024437973231018e-06, "loss": 0.3675, "step": 7138 }, { "epoch": 1.7653313550939664, "grad_norm": 0.7751902937909572, "learning_rate": 4.0241803513025525e-06, "loss": 0.3773, "step": 7139 }, { "epoch": 1.7655786350148368, "grad_norm": 0.818815345566327, "learning_rate": 4.023922703610968e-06, "loss": 0.3759, "step": 7140 }, { "epoch": 1.7658259149357072, "grad_norm": 0.7846530949729205, "learning_rate": 4.023665030160619e-06, "loss": 0.3589, "step": 7141 }, { "epoch": 1.7660731948565775, "grad_norm": 0.7687673135470421, "learning_rate": 4.023407330955861e-06, "loss": 0.3761, "step": 7142 }, { "epoch": 1.766320474777448, "grad_norm": 0.7593172800608468, "learning_rate": 4.02314960600105e-06, "loss": 0.362, "step": 7143 }, { "epoch": 1.7665677546983185, "grad_norm": 0.7834701043762171, "learning_rate": 4.0228918553005415e-06, "loss": 0.3487, "step": 7144 }, { "epoch": 1.7668150346191889, "grad_norm": 0.7649404272785514, "learning_rate": 4.022634078858694e-06, "loss": 0.357, "step": 7145 }, { "epoch": 1.7670623145400595, "grad_norm": 0.7719024720825843, "learning_rate": 4.022376276679862e-06, "loss": 0.3842, "step": 7146 }, { "epoch": 1.7673095944609298, "grad_norm": 0.8051753826754433, "learning_rate": 4.022118448768405e-06, "loss": 0.3687, "step": 7147 }, { "epoch": 1.7675568743818002, "grad_norm": 0.7835094168533011, "learning_rate": 4.021860595128682e-06, "loss": 0.3346, "step": 7148 }, { "epoch": 1.7678041543026706, "grad_norm": 0.7776223049560492, "learning_rate": 4.021602715765048e-06, "loss": 0.3928, "step": 7149 }, { "epoch": 1.768051434223541, "grad_norm": 0.7310363239874548, "learning_rate": 4.0213448106818646e-06, "loss": 0.3721, "step": 7150 }, { "epoch": 1.7682987141444113, "grad_norm": 0.791046240926661, "learning_rate": 4.021086879883491e-06, "loss": 0.3633, "step": 7151 }, { "epoch": 1.768545994065282, "grad_norm": 0.7729303606555586, "learning_rate": 4.020828923374286e-06, "loss": 0.3427, "step": 7152 }, { "epoch": 1.7687932739861523, "grad_norm": 0.7726513517208957, "learning_rate": 4.020570941158612e-06, "loss": 0.3415, "step": 7153 }, { "epoch": 1.7690405539070229, "grad_norm": 0.7796262841245385, "learning_rate": 4.0203129332408255e-06, "loss": 0.3547, "step": 7154 }, { "epoch": 1.7692878338278932, "grad_norm": 0.76837479659239, "learning_rate": 4.020054899625292e-06, "loss": 0.3506, "step": 7155 }, { "epoch": 1.7695351137487636, "grad_norm": 0.7584672450235203, "learning_rate": 4.01979684031637e-06, "loss": 0.3641, "step": 7156 }, { "epoch": 1.769782393669634, "grad_norm": 0.7692639400504407, "learning_rate": 4.019538755318424e-06, "loss": 0.3509, "step": 7157 }, { "epoch": 1.7700296735905043, "grad_norm": 0.778020622964941, "learning_rate": 4.019280644635814e-06, "loss": 0.3681, "step": 7158 }, { "epoch": 1.770276953511375, "grad_norm": 0.7754782034540367, "learning_rate": 4.019022508272904e-06, "loss": 0.3565, "step": 7159 }, { "epoch": 1.7705242334322453, "grad_norm": 0.7845526636611452, "learning_rate": 4.018764346234058e-06, "loss": 0.3581, "step": 7160 }, { "epoch": 1.770771513353116, "grad_norm": 0.8181166987438833, "learning_rate": 4.018506158523637e-06, "loss": 0.3503, "step": 7161 }, { "epoch": 1.7710187932739863, "grad_norm": 0.7888188812786758, "learning_rate": 4.018247945146008e-06, "loss": 0.3567, "step": 7162 }, { "epoch": 1.7712660731948566, "grad_norm": 0.7669210459537595, "learning_rate": 4.0179897061055336e-06, "loss": 0.3371, "step": 7163 }, { "epoch": 1.771513353115727, "grad_norm": 0.7584271055644959, "learning_rate": 4.017731441406581e-06, "loss": 0.3656, "step": 7164 }, { "epoch": 1.7717606330365974, "grad_norm": 0.7683636550367243, "learning_rate": 4.017473151053514e-06, "loss": 0.3459, "step": 7165 }, { "epoch": 1.7720079129574677, "grad_norm": 0.7599990635358396, "learning_rate": 4.017214835050698e-06, "loss": 0.3748, "step": 7166 }, { "epoch": 1.7722551928783383, "grad_norm": 0.7747254956222718, "learning_rate": 4.016956493402501e-06, "loss": 0.3604, "step": 7167 }, { "epoch": 1.7725024727992087, "grad_norm": 0.7634114858899789, "learning_rate": 4.016698126113288e-06, "loss": 0.3631, "step": 7168 }, { "epoch": 1.7727497527200793, "grad_norm": 0.7778024303479925, "learning_rate": 4.0164397331874274e-06, "loss": 0.3965, "step": 7169 }, { "epoch": 1.7729970326409497, "grad_norm": 0.7840119491533951, "learning_rate": 4.0161813146292866e-06, "loss": 0.3621, "step": 7170 }, { "epoch": 1.77324431256182, "grad_norm": 0.7729527835195451, "learning_rate": 4.015922870443233e-06, "loss": 0.3414, "step": 7171 }, { "epoch": 1.7734915924826904, "grad_norm": 0.7924300549925892, "learning_rate": 4.015664400633636e-06, "loss": 0.3337, "step": 7172 }, { "epoch": 1.7737388724035608, "grad_norm": 0.7936694476248601, "learning_rate": 4.015405905204864e-06, "loss": 0.3713, "step": 7173 }, { "epoch": 1.7739861523244311, "grad_norm": 0.783389054821287, "learning_rate": 4.015147384161285e-06, "loss": 0.3866, "step": 7174 }, { "epoch": 1.7742334322453017, "grad_norm": 0.7765930998234429, "learning_rate": 4.0148888375072715e-06, "loss": 0.335, "step": 7175 }, { "epoch": 1.774480712166172, "grad_norm": 0.7963588878529659, "learning_rate": 4.014630265247191e-06, "loss": 0.3731, "step": 7176 }, { "epoch": 1.7747279920870427, "grad_norm": 0.7967857337885036, "learning_rate": 4.014371667385417e-06, "loss": 0.3673, "step": 7177 }, { "epoch": 1.774975272007913, "grad_norm": 0.770816933981129, "learning_rate": 4.014113043926318e-06, "loss": 0.3714, "step": 7178 }, { "epoch": 1.7752225519287834, "grad_norm": 0.7659609650155458, "learning_rate": 4.013854394874266e-06, "loss": 0.3758, "step": 7179 }, { "epoch": 1.7754698318496538, "grad_norm": 0.7888235275989428, "learning_rate": 4.013595720233634e-06, "loss": 0.3485, "step": 7180 }, { "epoch": 1.7757171117705242, "grad_norm": 0.7510317479289003, "learning_rate": 4.013337020008793e-06, "loss": 0.3631, "step": 7181 }, { "epoch": 1.7759643916913945, "grad_norm": 0.7741162507974444, "learning_rate": 4.013078294204118e-06, "loss": 0.3605, "step": 7182 }, { "epoch": 1.7762116716122651, "grad_norm": 0.7421914005782982, "learning_rate": 4.012819542823979e-06, "loss": 0.3704, "step": 7183 }, { "epoch": 1.7764589515331355, "grad_norm": 0.7789898939754114, "learning_rate": 4.0125607658727525e-06, "loss": 0.3611, "step": 7184 }, { "epoch": 1.776706231454006, "grad_norm": 0.776337466996287, "learning_rate": 4.012301963354811e-06, "loss": 0.4354, "step": 7185 }, { "epoch": 1.7769535113748764, "grad_norm": 0.7701083633555839, "learning_rate": 4.01204313527453e-06, "loss": 0.3227, "step": 7186 }, { "epoch": 1.7772007912957468, "grad_norm": 0.7731666246015851, "learning_rate": 4.011784281636284e-06, "loss": 0.3483, "step": 7187 }, { "epoch": 1.7774480712166172, "grad_norm": 0.7610158000665733, "learning_rate": 4.0115254024444484e-06, "loss": 0.3458, "step": 7188 }, { "epoch": 1.7776953511374876, "grad_norm": 0.7534438847155026, "learning_rate": 4.011266497703399e-06, "loss": 0.384, "step": 7189 }, { "epoch": 1.777942631058358, "grad_norm": 0.7459210961047691, "learning_rate": 4.011007567417511e-06, "loss": 0.3764, "step": 7190 }, { "epoch": 1.7781899109792285, "grad_norm": 0.7561332473963742, "learning_rate": 4.010748611591163e-06, "loss": 0.3455, "step": 7191 }, { "epoch": 1.7784371909000989, "grad_norm": 0.7451519576736813, "learning_rate": 4.010489630228731e-06, "loss": 0.3741, "step": 7192 }, { "epoch": 1.7786844708209695, "grad_norm": 0.7614353440538016, "learning_rate": 4.010230623334593e-06, "loss": 0.3705, "step": 7193 }, { "epoch": 1.7789317507418398, "grad_norm": 0.8059430809117466, "learning_rate": 4.009971590913127e-06, "loss": 0.3672, "step": 7194 }, { "epoch": 1.7791790306627102, "grad_norm": 0.766181169374355, "learning_rate": 4.00971253296871e-06, "loss": 0.3672, "step": 7195 }, { "epoch": 1.7794263105835806, "grad_norm": 0.7845575913699476, "learning_rate": 4.009453449505724e-06, "loss": 0.3431, "step": 7196 }, { "epoch": 1.779673590504451, "grad_norm": 0.7904401883638362, "learning_rate": 4.009194340528544e-06, "loss": 0.3766, "step": 7197 }, { "epoch": 1.7799208704253213, "grad_norm": 0.7615537319260887, "learning_rate": 4.008935206041553e-06, "loss": 0.3802, "step": 7198 }, { "epoch": 1.780168150346192, "grad_norm": 0.7772651265577275, "learning_rate": 4.0086760460491304e-06, "loss": 0.3589, "step": 7199 }, { "epoch": 1.7804154302670623, "grad_norm": 0.7434904006781674, "learning_rate": 4.008416860555656e-06, "loss": 0.3488, "step": 7200 }, { "epoch": 1.7806627101879329, "grad_norm": 0.7928024226365127, "learning_rate": 4.008157649565511e-06, "loss": 0.3595, "step": 7201 }, { "epoch": 1.7809099901088032, "grad_norm": 0.7341006797674348, "learning_rate": 4.007898413083078e-06, "loss": 0.3997, "step": 7202 }, { "epoch": 1.7811572700296736, "grad_norm": 0.7739088108251398, "learning_rate": 4.007639151112737e-06, "loss": 0.3682, "step": 7203 }, { "epoch": 1.781404549950544, "grad_norm": 0.7653019544143933, "learning_rate": 4.0073798636588725e-06, "loss": 0.3935, "step": 7204 }, { "epoch": 1.7816518298714143, "grad_norm": 0.7549758813627072, "learning_rate": 4.007120550725864e-06, "loss": 0.3548, "step": 7205 }, { "epoch": 1.7818991097922847, "grad_norm": 0.7565662469917791, "learning_rate": 4.006861212318099e-06, "loss": 0.3684, "step": 7206 }, { "epoch": 1.7821463897131553, "grad_norm": 0.7908244791229436, "learning_rate": 4.006601848439957e-06, "loss": 0.3556, "step": 7207 }, { "epoch": 1.7823936696340257, "grad_norm": 0.7764043257211857, "learning_rate": 4.006342459095824e-06, "loss": 0.361, "step": 7208 }, { "epoch": 1.7826409495548963, "grad_norm": 0.7989668355360754, "learning_rate": 4.006083044290085e-06, "loss": 0.3668, "step": 7209 }, { "epoch": 1.7828882294757666, "grad_norm": 0.7591537076877543, "learning_rate": 4.005823604027122e-06, "loss": 0.3578, "step": 7210 }, { "epoch": 1.783135509396637, "grad_norm": 0.7927965162960998, "learning_rate": 4.005564138311324e-06, "loss": 0.349, "step": 7211 }, { "epoch": 1.7833827893175074, "grad_norm": 0.8006939538708137, "learning_rate": 4.005304647147075e-06, "loss": 0.3579, "step": 7212 }, { "epoch": 1.7836300692383777, "grad_norm": 0.770693840784817, "learning_rate": 4.00504513053876e-06, "loss": 0.38, "step": 7213 }, { "epoch": 1.7838773491592481, "grad_norm": 0.7665915324620091, "learning_rate": 4.0047855884907675e-06, "loss": 0.3633, "step": 7214 }, { "epoch": 1.7841246290801187, "grad_norm": 0.7768734374756951, "learning_rate": 4.004526021007484e-06, "loss": 0.4024, "step": 7215 }, { "epoch": 1.784371909000989, "grad_norm": 0.7717258304505581, "learning_rate": 4.004266428093296e-06, "loss": 0.3774, "step": 7216 }, { "epoch": 1.7846191889218597, "grad_norm": 0.7834872580513829, "learning_rate": 4.004006809752593e-06, "loss": 0.3417, "step": 7217 }, { "epoch": 1.78486646884273, "grad_norm": 0.7535837584246923, "learning_rate": 4.003747165989762e-06, "loss": 0.3405, "step": 7218 }, { "epoch": 1.7851137487636004, "grad_norm": 0.7929454842683011, "learning_rate": 4.003487496809193e-06, "loss": 0.3388, "step": 7219 }, { "epoch": 1.7853610286844708, "grad_norm": 0.7612879965432602, "learning_rate": 4.003227802215273e-06, "loss": 0.3826, "step": 7220 }, { "epoch": 1.7856083086053411, "grad_norm": 0.7775964466129224, "learning_rate": 4.002968082212394e-06, "loss": 0.3507, "step": 7221 }, { "epoch": 1.7858555885262115, "grad_norm": 0.7816696934502708, "learning_rate": 4.002708336804945e-06, "loss": 0.3871, "step": 7222 }, { "epoch": 1.786102868447082, "grad_norm": 0.8247348082542035, "learning_rate": 4.002448565997316e-06, "loss": 0.3867, "step": 7223 }, { "epoch": 1.7863501483679525, "grad_norm": 0.7669580275342224, "learning_rate": 4.002188769793899e-06, "loss": 0.3688, "step": 7224 }, { "epoch": 1.786597428288823, "grad_norm": 0.7770139015186015, "learning_rate": 4.001928948199083e-06, "loss": 0.3824, "step": 7225 }, { "epoch": 1.7868447082096934, "grad_norm": 0.7559105763754408, "learning_rate": 4.001669101217264e-06, "loss": 0.3677, "step": 7226 }, { "epoch": 1.7870919881305638, "grad_norm": 0.7692066683722873, "learning_rate": 4.0014092288528296e-06, "loss": 0.3545, "step": 7227 }, { "epoch": 1.7873392680514342, "grad_norm": 0.7665557769214384, "learning_rate": 4.001149331110175e-06, "loss": 0.3581, "step": 7228 }, { "epoch": 1.7875865479723045, "grad_norm": 0.7839594331147018, "learning_rate": 4.000889407993692e-06, "loss": 0.3461, "step": 7229 }, { "epoch": 1.787833827893175, "grad_norm": 0.7801462205714615, "learning_rate": 4.000629459507776e-06, "loss": 0.3213, "step": 7230 }, { "epoch": 1.7880811078140455, "grad_norm": 0.7389738087834039, "learning_rate": 4.000369485656819e-06, "loss": 0.4221, "step": 7231 }, { "epoch": 1.7883283877349159, "grad_norm": 0.7733785377856531, "learning_rate": 4.000109486445216e-06, "loss": 0.3477, "step": 7232 }, { "epoch": 1.7885756676557865, "grad_norm": 0.7782670332477107, "learning_rate": 3.999849461877361e-06, "loss": 0.3506, "step": 7233 }, { "epoch": 1.7888229475766568, "grad_norm": 0.7672299830559178, "learning_rate": 3.99958941195765e-06, "loss": 0.3645, "step": 7234 }, { "epoch": 1.7890702274975272, "grad_norm": 0.7711107925383033, "learning_rate": 3.999329336690478e-06, "loss": 0.3488, "step": 7235 }, { "epoch": 1.7893175074183976, "grad_norm": 0.7832042369882355, "learning_rate": 3.999069236080243e-06, "loss": 0.3751, "step": 7236 }, { "epoch": 1.789564787339268, "grad_norm": 0.7567824724647196, "learning_rate": 3.998809110131338e-06, "loss": 0.3714, "step": 7237 }, { "epoch": 1.7898120672601385, "grad_norm": 0.7994000817184734, "learning_rate": 3.9985489588481644e-06, "loss": 0.3736, "step": 7238 }, { "epoch": 1.790059347181009, "grad_norm": 0.7597628160872285, "learning_rate": 3.998288782235115e-06, "loss": 0.3496, "step": 7239 }, { "epoch": 1.7903066271018795, "grad_norm": 0.7631172510144894, "learning_rate": 3.99802858029659e-06, "loss": 0.3872, "step": 7240 }, { "epoch": 1.7905539070227499, "grad_norm": 0.7961647187843612, "learning_rate": 3.997768353036987e-06, "loss": 0.3369, "step": 7241 }, { "epoch": 1.7908011869436202, "grad_norm": 0.7744069605973951, "learning_rate": 3.997508100460705e-06, "loss": 0.3413, "step": 7242 }, { "epoch": 1.7910484668644906, "grad_norm": 0.8181074444937908, "learning_rate": 3.9972478225721425e-06, "loss": 0.3732, "step": 7243 }, { "epoch": 1.791295746785361, "grad_norm": 0.7843529075050465, "learning_rate": 3.996987519375699e-06, "loss": 0.3447, "step": 7244 }, { "epoch": 1.7915430267062313, "grad_norm": 0.7995236105395649, "learning_rate": 3.996727190875774e-06, "loss": 0.3449, "step": 7245 }, { "epoch": 1.791790306627102, "grad_norm": 0.7837098336522321, "learning_rate": 3.996466837076769e-06, "loss": 0.3521, "step": 7246 }, { "epoch": 1.7920375865479723, "grad_norm": 0.7563848725121606, "learning_rate": 3.996206457983084e-06, "loss": 0.3736, "step": 7247 }, { "epoch": 1.7922848664688429, "grad_norm": 0.803387433322257, "learning_rate": 3.995946053599121e-06, "loss": 0.3594, "step": 7248 }, { "epoch": 1.7925321463897133, "grad_norm": 0.8068519325776944, "learning_rate": 3.9956856239292804e-06, "loss": 0.362, "step": 7249 }, { "epoch": 1.7927794263105836, "grad_norm": 0.773546852493055, "learning_rate": 3.9954251689779645e-06, "loss": 0.3341, "step": 7250 }, { "epoch": 1.793026706231454, "grad_norm": 0.7656604845485208, "learning_rate": 3.995164688749576e-06, "loss": 0.3802, "step": 7251 }, { "epoch": 1.7932739861523244, "grad_norm": 0.7805561669550511, "learning_rate": 3.994904183248518e-06, "loss": 0.3692, "step": 7252 }, { "epoch": 1.7935212660731947, "grad_norm": 0.7581420781282279, "learning_rate": 3.994643652479193e-06, "loss": 0.3758, "step": 7253 }, { "epoch": 1.7937685459940653, "grad_norm": 0.8266538351121558, "learning_rate": 3.994383096446006e-06, "loss": 0.3518, "step": 7254 }, { "epoch": 1.7940158259149357, "grad_norm": 0.8006215029960243, "learning_rate": 3.9941225151533604e-06, "loss": 0.3842, "step": 7255 }, { "epoch": 1.7942631058358063, "grad_norm": 0.78352746870191, "learning_rate": 3.993861908605659e-06, "loss": 0.344, "step": 7256 }, { "epoch": 1.7945103857566767, "grad_norm": 0.7695292567464369, "learning_rate": 3.993601276807311e-06, "loss": 0.3697, "step": 7257 }, { "epoch": 1.794757665677547, "grad_norm": 0.7979833831516164, "learning_rate": 3.993340619762719e-06, "loss": 0.3539, "step": 7258 }, { "epoch": 1.7950049455984174, "grad_norm": 0.7614571549528484, "learning_rate": 3.993079937476289e-06, "loss": 0.3552, "step": 7259 }, { "epoch": 1.7952522255192878, "grad_norm": 0.7996324605337116, "learning_rate": 3.992819229952427e-06, "loss": 0.3354, "step": 7260 }, { "epoch": 1.7954995054401581, "grad_norm": 0.7813322732822093, "learning_rate": 3.992558497195541e-06, "loss": 0.3769, "step": 7261 }, { "epoch": 1.7957467853610287, "grad_norm": 0.7501066172991729, "learning_rate": 3.992297739210037e-06, "loss": 0.3664, "step": 7262 }, { "epoch": 1.795994065281899, "grad_norm": 0.7501307140261373, "learning_rate": 3.992036956000324e-06, "loss": 0.3631, "step": 7263 }, { "epoch": 1.7962413452027697, "grad_norm": 0.7944306068764283, "learning_rate": 3.991776147570809e-06, "loss": 0.3631, "step": 7264 }, { "epoch": 1.79648862512364, "grad_norm": 0.7816757993319166, "learning_rate": 3.991515313925901e-06, "loss": 0.3662, "step": 7265 }, { "epoch": 1.7967359050445104, "grad_norm": 0.7884757205313266, "learning_rate": 3.991254455070007e-06, "loss": 0.3728, "step": 7266 }, { "epoch": 1.7969831849653808, "grad_norm": 0.7556719614912105, "learning_rate": 3.990993571007539e-06, "loss": 0.3847, "step": 7267 }, { "epoch": 1.7972304648862512, "grad_norm": 0.7839522396462182, "learning_rate": 3.990732661742904e-06, "loss": 0.3565, "step": 7268 }, { "epoch": 1.7974777448071215, "grad_norm": 0.8211170733174508, "learning_rate": 3.990471727280514e-06, "loss": 0.3391, "step": 7269 }, { "epoch": 1.7977250247279921, "grad_norm": 0.8154349819515231, "learning_rate": 3.990210767624779e-06, "loss": 0.335, "step": 7270 }, { "epoch": 1.7979723046488625, "grad_norm": 0.7456615256250396, "learning_rate": 3.989949782780111e-06, "loss": 0.3768, "step": 7271 }, { "epoch": 1.798219584569733, "grad_norm": 0.8098551752949212, "learning_rate": 3.989688772750919e-06, "loss": 0.3956, "step": 7272 }, { "epoch": 1.7984668644906034, "grad_norm": 0.747375090547007, "learning_rate": 3.989427737541617e-06, "loss": 0.3828, "step": 7273 }, { "epoch": 1.7987141444114738, "grad_norm": 0.7698057393306261, "learning_rate": 3.989166677156616e-06, "loss": 0.3689, "step": 7274 }, { "epoch": 1.7989614243323442, "grad_norm": 0.7597586253973976, "learning_rate": 3.988905591600331e-06, "loss": 0.3428, "step": 7275 }, { "epoch": 1.7992087042532146, "grad_norm": 0.7559006940983124, "learning_rate": 3.988644480877172e-06, "loss": 0.3769, "step": 7276 }, { "epoch": 1.799455984174085, "grad_norm": 0.7752980113681059, "learning_rate": 3.988383344991554e-06, "loss": 0.3764, "step": 7277 }, { "epoch": 1.7997032640949555, "grad_norm": 0.7940177715644315, "learning_rate": 3.988122183947891e-06, "loss": 0.3403, "step": 7278 }, { "epoch": 1.7999505440158259, "grad_norm": 0.796286068088447, "learning_rate": 3.987860997750597e-06, "loss": 0.3528, "step": 7279 }, { "epoch": 1.8001978239366965, "grad_norm": 0.7270793195683017, "learning_rate": 3.987599786404087e-06, "loss": 0.3863, "step": 7280 }, { "epoch": 1.8004451038575668, "grad_norm": 0.7583060592769464, "learning_rate": 3.987338549912776e-06, "loss": 0.3816, "step": 7281 }, { "epoch": 1.8006923837784372, "grad_norm": 0.7708906836118932, "learning_rate": 3.987077288281081e-06, "loss": 0.3486, "step": 7282 }, { "epoch": 1.8009396636993076, "grad_norm": 0.7969412385736876, "learning_rate": 3.986816001513416e-06, "loss": 0.3533, "step": 7283 }, { "epoch": 1.801186943620178, "grad_norm": 0.7607447507564805, "learning_rate": 3.986554689614199e-06, "loss": 0.3485, "step": 7284 }, { "epoch": 1.8014342235410483, "grad_norm": 0.781703907432996, "learning_rate": 3.986293352587847e-06, "loss": 0.3565, "step": 7285 }, { "epoch": 1.801681503461919, "grad_norm": 0.7675523030851547, "learning_rate": 3.986031990438776e-06, "loss": 0.3606, "step": 7286 }, { "epoch": 1.8019287833827893, "grad_norm": 0.7974067807389401, "learning_rate": 3.985770603171405e-06, "loss": 0.3626, "step": 7287 }, { "epoch": 1.8021760633036599, "grad_norm": 0.7742100397672838, "learning_rate": 3.985509190790152e-06, "loss": 0.3693, "step": 7288 }, { "epoch": 1.8024233432245302, "grad_norm": 0.7628452252023762, "learning_rate": 3.985247753299435e-06, "loss": 0.3586, "step": 7289 }, { "epoch": 1.8026706231454006, "grad_norm": 0.7839105627440003, "learning_rate": 3.984986290703674e-06, "loss": 0.3527, "step": 7290 }, { "epoch": 1.802917903066271, "grad_norm": 0.799878406935487, "learning_rate": 3.9847248030072875e-06, "loss": 0.3778, "step": 7291 }, { "epoch": 1.8031651829871413, "grad_norm": 0.7366582553733413, "learning_rate": 3.984463290214696e-06, "loss": 0.3495, "step": 7292 }, { "epoch": 1.8034124629080117, "grad_norm": 0.7731705621204102, "learning_rate": 3.98420175233032e-06, "loss": 0.3906, "step": 7293 }, { "epoch": 1.8036597428288823, "grad_norm": 0.7888381302885928, "learning_rate": 3.9839401893585805e-06, "loss": 0.3545, "step": 7294 }, { "epoch": 1.8039070227497527, "grad_norm": 0.8009286648984929, "learning_rate": 3.983678601303898e-06, "loss": 0.3533, "step": 7295 }, { "epoch": 1.8041543026706233, "grad_norm": 0.7470545145912033, "learning_rate": 3.983416988170694e-06, "loss": 0.373, "step": 7296 }, { "epoch": 1.8044015825914936, "grad_norm": 0.7579828980959165, "learning_rate": 3.9831553499633915e-06, "loss": 0.3666, "step": 7297 }, { "epoch": 1.804648862512364, "grad_norm": 0.7743325965785397, "learning_rate": 3.982893686686411e-06, "loss": 0.3537, "step": 7298 }, { "epoch": 1.8048961424332344, "grad_norm": 0.8054757237892601, "learning_rate": 3.9826319983441775e-06, "loss": 0.3701, "step": 7299 }, { "epoch": 1.8051434223541047, "grad_norm": 0.7582990988660984, "learning_rate": 3.982370284941113e-06, "loss": 0.3395, "step": 7300 }, { "epoch": 1.8053907022749751, "grad_norm": 0.8717085376649458, "learning_rate": 3.982108546481642e-06, "loss": 0.3529, "step": 7301 }, { "epoch": 1.8056379821958457, "grad_norm": 0.7638597713803753, "learning_rate": 3.981846782970189e-06, "loss": 0.387, "step": 7302 }, { "epoch": 1.805885262116716, "grad_norm": 0.7836529380939338, "learning_rate": 3.9815849944111774e-06, "loss": 0.3696, "step": 7303 }, { "epoch": 1.8061325420375867, "grad_norm": 0.755278447728005, "learning_rate": 3.981323180809033e-06, "loss": 0.3433, "step": 7304 }, { "epoch": 1.806379821958457, "grad_norm": 0.7868069921784078, "learning_rate": 3.98106134216818e-06, "loss": 0.3705, "step": 7305 }, { "epoch": 1.8066271018793274, "grad_norm": 0.7850804710602687, "learning_rate": 3.980799478493046e-06, "loss": 0.3382, "step": 7306 }, { "epoch": 1.8068743818001978, "grad_norm": 0.7877985649333125, "learning_rate": 3.980537589788056e-06, "loss": 0.3378, "step": 7307 }, { "epoch": 1.8071216617210681, "grad_norm": 0.7656799320839324, "learning_rate": 3.980275676057637e-06, "loss": 0.3655, "step": 7308 }, { "epoch": 1.8073689416419385, "grad_norm": 0.763777475430151, "learning_rate": 3.980013737306217e-06, "loss": 0.3929, "step": 7309 }, { "epoch": 1.807616221562809, "grad_norm": 0.7357988515156735, "learning_rate": 3.9797517735382215e-06, "loss": 0.3802, "step": 7310 }, { "epoch": 1.8078635014836797, "grad_norm": 0.7523115587769902, "learning_rate": 3.979489784758082e-06, "loss": 0.3544, "step": 7311 }, { "epoch": 1.80811078140455, "grad_norm": 0.7511522922851073, "learning_rate": 3.9792277709702226e-06, "loss": 0.3546, "step": 7312 }, { "epoch": 1.8083580613254204, "grad_norm": 0.756715512849783, "learning_rate": 3.978965732179074e-06, "loss": 0.3505, "step": 7313 }, { "epoch": 1.8086053412462908, "grad_norm": 0.7713931259357855, "learning_rate": 3.978703668389067e-06, "loss": 0.3619, "step": 7314 }, { "epoch": 1.8088526211671612, "grad_norm": 0.7818924485025746, "learning_rate": 3.978441579604629e-06, "loss": 0.3304, "step": 7315 }, { "epoch": 1.8090999010880315, "grad_norm": 0.7946814025173776, "learning_rate": 3.978179465830191e-06, "loss": 0.3751, "step": 7316 }, { "epoch": 1.8093471810089021, "grad_norm": 0.8134027601588883, "learning_rate": 3.977917327070183e-06, "loss": 0.3546, "step": 7317 }, { "epoch": 1.8095944609297725, "grad_norm": 0.78578562165036, "learning_rate": 3.977655163329036e-06, "loss": 0.3806, "step": 7318 }, { "epoch": 1.809841740850643, "grad_norm": 0.7807230935585607, "learning_rate": 3.977392974611183e-06, "loss": 0.3801, "step": 7319 }, { "epoch": 1.8100890207715135, "grad_norm": 0.744345368266045, "learning_rate": 3.977130760921053e-06, "loss": 0.3787, "step": 7320 }, { "epoch": 1.8103363006923838, "grad_norm": 0.759594152292472, "learning_rate": 3.97686852226308e-06, "loss": 0.3963, "step": 7321 }, { "epoch": 1.8105835806132542, "grad_norm": 0.7478060650410877, "learning_rate": 3.976606258641695e-06, "loss": 0.3924, "step": 7322 }, { "epoch": 1.8108308605341246, "grad_norm": 0.779587224852402, "learning_rate": 3.976343970061334e-06, "loss": 0.3674, "step": 7323 }, { "epoch": 1.811078140454995, "grad_norm": 0.7362485500123143, "learning_rate": 3.976081656526428e-06, "loss": 0.3947, "step": 7324 }, { "epoch": 1.8113254203758655, "grad_norm": 0.7829383625160734, "learning_rate": 3.975819318041411e-06, "loss": 0.3557, "step": 7325 }, { "epoch": 1.811572700296736, "grad_norm": 0.7981777401548518, "learning_rate": 3.975556954610718e-06, "loss": 0.3534, "step": 7326 }, { "epoch": 1.8118199802176065, "grad_norm": 0.7752012893839207, "learning_rate": 3.9752945662387835e-06, "loss": 0.407, "step": 7327 }, { "epoch": 1.8120672601384769, "grad_norm": 0.7973335528415081, "learning_rate": 3.975032152930043e-06, "loss": 0.3512, "step": 7328 }, { "epoch": 1.8123145400593472, "grad_norm": 0.7829333696544042, "learning_rate": 3.9747697146889305e-06, "loss": 0.3919, "step": 7329 }, { "epoch": 1.8125618199802176, "grad_norm": 0.7635667129528829, "learning_rate": 3.974507251519884e-06, "loss": 0.3463, "step": 7330 }, { "epoch": 1.812809099901088, "grad_norm": 0.7563785904337518, "learning_rate": 3.97424476342734e-06, "loss": 0.3526, "step": 7331 }, { "epoch": 1.8130563798219583, "grad_norm": 0.7601901959249305, "learning_rate": 3.973982250415732e-06, "loss": 0.3512, "step": 7332 }, { "epoch": 1.813303659742829, "grad_norm": 0.755399885946024, "learning_rate": 3.973719712489501e-06, "loss": 0.337, "step": 7333 }, { "epoch": 1.8135509396636993, "grad_norm": 0.7898261724455997, "learning_rate": 3.973457149653084e-06, "loss": 0.3688, "step": 7334 }, { "epoch": 1.8137982195845699, "grad_norm": 0.7823649634247314, "learning_rate": 3.973194561910917e-06, "loss": 0.3725, "step": 7335 }, { "epoch": 1.8140454995054403, "grad_norm": 0.7607392144858014, "learning_rate": 3.97293194926744e-06, "loss": 0.3877, "step": 7336 }, { "epoch": 1.8142927794263106, "grad_norm": 0.7595493956053893, "learning_rate": 3.972669311727092e-06, "loss": 0.3822, "step": 7337 }, { "epoch": 1.814540059347181, "grad_norm": 0.7669882760317928, "learning_rate": 3.972406649294313e-06, "loss": 0.3689, "step": 7338 }, { "epoch": 1.8147873392680514, "grad_norm": 0.7840406280104361, "learning_rate": 3.97214396197354e-06, "loss": 0.3698, "step": 7339 }, { "epoch": 1.8150346191889217, "grad_norm": 0.8010993137754997, "learning_rate": 3.971881249769216e-06, "loss": 0.3738, "step": 7340 }, { "epoch": 1.8152818991097923, "grad_norm": 0.7752587505432549, "learning_rate": 3.971618512685781e-06, "loss": 0.3715, "step": 7341 }, { "epoch": 1.8155291790306627, "grad_norm": 0.7699102728476689, "learning_rate": 3.971355750727675e-06, "loss": 0.3776, "step": 7342 }, { "epoch": 1.8157764589515333, "grad_norm": 0.7604088111488593, "learning_rate": 3.9710929638993414e-06, "loss": 0.3442, "step": 7343 }, { "epoch": 1.8160237388724036, "grad_norm": 0.7946031836421397, "learning_rate": 3.97083015220522e-06, "loss": 0.361, "step": 7344 }, { "epoch": 1.816271018793274, "grad_norm": 0.7731903103423462, "learning_rate": 3.970567315649754e-06, "loss": 0.3481, "step": 7345 }, { "epoch": 1.8165182987141444, "grad_norm": 0.761148973711678, "learning_rate": 3.970304454237386e-06, "loss": 0.3526, "step": 7346 }, { "epoch": 1.8167655786350148, "grad_norm": 0.7733114451172943, "learning_rate": 3.970041567972559e-06, "loss": 0.3801, "step": 7347 }, { "epoch": 1.8170128585558851, "grad_norm": 0.7889144486238302, "learning_rate": 3.9697786568597174e-06, "loss": 0.3579, "step": 7348 }, { "epoch": 1.8172601384767557, "grad_norm": 0.7988869597956187, "learning_rate": 3.969515720903303e-06, "loss": 0.3603, "step": 7349 }, { "epoch": 1.817507418397626, "grad_norm": 0.7784847445598695, "learning_rate": 3.969252760107763e-06, "loss": 0.3675, "step": 7350 }, { "epoch": 1.8177546983184967, "grad_norm": 0.7536652201967795, "learning_rate": 3.96898977447754e-06, "loss": 0.3647, "step": 7351 }, { "epoch": 1.818001978239367, "grad_norm": 0.7530291710527278, "learning_rate": 3.96872676401708e-06, "loss": 0.3629, "step": 7352 }, { "epoch": 1.8182492581602374, "grad_norm": 0.770081472175381, "learning_rate": 3.96846372873083e-06, "loss": 0.3578, "step": 7353 }, { "epoch": 1.8184965380811078, "grad_norm": 0.763111669033429, "learning_rate": 3.968200668623234e-06, "loss": 0.3764, "step": 7354 }, { "epoch": 1.8187438180019782, "grad_norm": 0.7865595753899918, "learning_rate": 3.967937583698739e-06, "loss": 0.3767, "step": 7355 }, { "epoch": 1.8189910979228485, "grad_norm": 0.7704495446394058, "learning_rate": 3.9676744739617925e-06, "loss": 0.3596, "step": 7356 }, { "epoch": 1.8192383778437191, "grad_norm": 0.757662772189102, "learning_rate": 3.967411339416841e-06, "loss": 0.348, "step": 7357 }, { "epoch": 1.8194856577645895, "grad_norm": 0.7764518370224494, "learning_rate": 3.967148180068334e-06, "loss": 0.3348, "step": 7358 }, { "epoch": 1.81973293768546, "grad_norm": 0.7760566819478711, "learning_rate": 3.966884995920717e-06, "loss": 0.3867, "step": 7359 }, { "epoch": 1.8199802176063304, "grad_norm": 0.7796506104145298, "learning_rate": 3.966621786978441e-06, "loss": 0.3561, "step": 7360 }, { "epoch": 1.8202274975272008, "grad_norm": 0.7866315107416375, "learning_rate": 3.9663585532459535e-06, "loss": 0.373, "step": 7361 }, { "epoch": 1.8204747774480712, "grad_norm": 0.7772640211670993, "learning_rate": 3.966095294727704e-06, "loss": 0.3661, "step": 7362 }, { "epoch": 1.8207220573689415, "grad_norm": 0.8128214342119254, "learning_rate": 3.965832011428144e-06, "loss": 0.3518, "step": 7363 }, { "epoch": 1.820969337289812, "grad_norm": 0.7728731436256171, "learning_rate": 3.965568703351722e-06, "loss": 0.3627, "step": 7364 }, { "epoch": 1.8212166172106825, "grad_norm": 0.7680703551893431, "learning_rate": 3.965305370502889e-06, "loss": 0.3598, "step": 7365 }, { "epoch": 1.8214638971315529, "grad_norm": 0.7632118133749927, "learning_rate": 3.965042012886097e-06, "loss": 0.3667, "step": 7366 }, { "epoch": 1.8217111770524235, "grad_norm": 0.8048950178436012, "learning_rate": 3.964778630505796e-06, "loss": 0.3139, "step": 7367 }, { "epoch": 1.8219584569732938, "grad_norm": 0.765111122088039, "learning_rate": 3.964515223366439e-06, "loss": 0.3926, "step": 7368 }, { "epoch": 1.8222057368941642, "grad_norm": 0.7914407074915291, "learning_rate": 3.964251791472478e-06, "loss": 0.3787, "step": 7369 }, { "epoch": 1.8224530168150346, "grad_norm": 0.8026903176044379, "learning_rate": 3.963988334828367e-06, "loss": 0.3702, "step": 7370 }, { "epoch": 1.822700296735905, "grad_norm": 0.7641770182759359, "learning_rate": 3.963724853438557e-06, "loss": 0.3932, "step": 7371 }, { "epoch": 1.8229475766567753, "grad_norm": 0.7738920291261927, "learning_rate": 3.9634613473075025e-06, "loss": 0.3864, "step": 7372 }, { "epoch": 1.823194856577646, "grad_norm": 0.7916093014719353, "learning_rate": 3.9631978164396585e-06, "loss": 0.3458, "step": 7373 }, { "epoch": 1.8234421364985163, "grad_norm": 0.7618021811157447, "learning_rate": 3.962934260839478e-06, "loss": 0.3941, "step": 7374 }, { "epoch": 1.8236894164193869, "grad_norm": 0.7434832439852919, "learning_rate": 3.962670680511417e-06, "loss": 0.3383, "step": 7375 }, { "epoch": 1.8239366963402572, "grad_norm": 0.757789641467671, "learning_rate": 3.9624070754599295e-06, "loss": 0.3578, "step": 7376 }, { "epoch": 1.8241839762611276, "grad_norm": 0.7679758397121798, "learning_rate": 3.9621434456894735e-06, "loss": 0.3779, "step": 7377 }, { "epoch": 1.824431256181998, "grad_norm": 0.8081994192448895, "learning_rate": 3.961879791204503e-06, "loss": 0.3342, "step": 7378 }, { "epoch": 1.8246785361028683, "grad_norm": 0.778881333632352, "learning_rate": 3.961616112009474e-06, "loss": 0.3328, "step": 7379 }, { "epoch": 1.8249258160237387, "grad_norm": 0.7746869803586666, "learning_rate": 3.961352408108846e-06, "loss": 0.3629, "step": 7380 }, { "epoch": 1.8251730959446093, "grad_norm": 0.7799733298464923, "learning_rate": 3.961088679507075e-06, "loss": 0.3307, "step": 7381 }, { "epoch": 1.8254203758654797, "grad_norm": 0.7647123904948452, "learning_rate": 3.960824926208618e-06, "loss": 0.3968, "step": 7382 }, { "epoch": 1.8256676557863503, "grad_norm": 0.7765812975624071, "learning_rate": 3.960561148217934e-06, "loss": 0.3546, "step": 7383 }, { "epoch": 1.8259149357072206, "grad_norm": 0.7775750815925597, "learning_rate": 3.960297345539481e-06, "loss": 0.375, "step": 7384 }, { "epoch": 1.826162215628091, "grad_norm": 0.7513863030725382, "learning_rate": 3.96003351817772e-06, "loss": 0.3751, "step": 7385 }, { "epoch": 1.8264094955489614, "grad_norm": 0.7804355197710163, "learning_rate": 3.959769666137109e-06, "loss": 0.3511, "step": 7386 }, { "epoch": 1.8266567754698317, "grad_norm": 0.787099207363827, "learning_rate": 3.9595057894221075e-06, "loss": 0.3445, "step": 7387 }, { "epoch": 1.826904055390702, "grad_norm": 0.7643023553024892, "learning_rate": 3.959241888037177e-06, "loss": 0.3612, "step": 7388 }, { "epoch": 1.8271513353115727, "grad_norm": 0.7788962328553705, "learning_rate": 3.958977961986776e-06, "loss": 0.3738, "step": 7389 }, { "epoch": 1.8273986152324433, "grad_norm": 0.7802383138837474, "learning_rate": 3.958714011275368e-06, "loss": 0.3749, "step": 7390 }, { "epoch": 1.8276458951533137, "grad_norm": 0.7929174798390334, "learning_rate": 3.958450035907413e-06, "loss": 0.3382, "step": 7391 }, { "epoch": 1.827893175074184, "grad_norm": 0.7816420555642002, "learning_rate": 3.9581860358873745e-06, "loss": 0.3514, "step": 7392 }, { "epoch": 1.8281404549950544, "grad_norm": 0.7603791374310098, "learning_rate": 3.957922011219714e-06, "loss": 0.3728, "step": 7393 }, { "epoch": 1.8283877349159248, "grad_norm": 0.8205534093967509, "learning_rate": 3.957657961908894e-06, "loss": 0.3414, "step": 7394 }, { "epoch": 1.8286350148367951, "grad_norm": 0.7627840093177868, "learning_rate": 3.9573938879593774e-06, "loss": 0.3827, "step": 7395 }, { "epoch": 1.8288822947576657, "grad_norm": 0.759225010876482, "learning_rate": 3.957129789375628e-06, "loss": 0.3476, "step": 7396 }, { "epoch": 1.829129574678536, "grad_norm": 0.7904236836259322, "learning_rate": 3.956865666162112e-06, "loss": 0.3612, "step": 7397 }, { "epoch": 1.8293768545994067, "grad_norm": 0.7661490457401557, "learning_rate": 3.956601518323291e-06, "loss": 0.3542, "step": 7398 }, { "epoch": 1.829624134520277, "grad_norm": 0.7828197685143107, "learning_rate": 3.9563373458636305e-06, "loss": 0.3514, "step": 7399 }, { "epoch": 1.8298714144411474, "grad_norm": 0.768537555997495, "learning_rate": 3.956073148787597e-06, "loss": 0.3602, "step": 7400 }, { "epoch": 1.8301186943620178, "grad_norm": 0.7888931388779784, "learning_rate": 3.9558089270996545e-06, "loss": 0.3772, "step": 7401 }, { "epoch": 1.8303659742828882, "grad_norm": 0.7628149504175374, "learning_rate": 3.955544680804271e-06, "loss": 0.3578, "step": 7402 }, { "epoch": 1.8306132542037585, "grad_norm": 0.7673677562241713, "learning_rate": 3.955280409905912e-06, "loss": 0.3463, "step": 7403 }, { "epoch": 1.8308605341246291, "grad_norm": 0.7608153180328122, "learning_rate": 3.955016114409045e-06, "loss": 0.407, "step": 7404 }, { "epoch": 1.8311078140454995, "grad_norm": 0.7687062128701716, "learning_rate": 3.9547517943181355e-06, "loss": 0.3652, "step": 7405 }, { "epoch": 1.83135509396637, "grad_norm": 0.7721075276891288, "learning_rate": 3.954487449637654e-06, "loss": 0.3563, "step": 7406 }, { "epoch": 1.8316023738872405, "grad_norm": 0.7460916947351195, "learning_rate": 3.954223080372067e-06, "loss": 0.3769, "step": 7407 }, { "epoch": 1.8318496538081108, "grad_norm": 0.7527982580239523, "learning_rate": 3.953958686525844e-06, "loss": 0.3817, "step": 7408 }, { "epoch": 1.8320969337289812, "grad_norm": 0.7793799993447887, "learning_rate": 3.953694268103453e-06, "loss": 0.3727, "step": 7409 }, { "epoch": 1.8323442136498516, "grad_norm": 0.7807769648905132, "learning_rate": 3.953429825109364e-06, "loss": 0.3754, "step": 7410 }, { "epoch": 1.832591493570722, "grad_norm": 0.7691274672566375, "learning_rate": 3.953165357548047e-06, "loss": 0.386, "step": 7411 }, { "epoch": 1.8328387734915925, "grad_norm": 0.7700121834521712, "learning_rate": 3.952900865423972e-06, "loss": 0.3832, "step": 7412 }, { "epoch": 1.833086053412463, "grad_norm": 0.7703341639584528, "learning_rate": 3.952636348741611e-06, "loss": 0.34, "step": 7413 }, { "epoch": 1.8333333333333335, "grad_norm": 0.7665277943664461, "learning_rate": 3.9523718075054326e-06, "loss": 0.3909, "step": 7414 }, { "epoch": 1.8335806132542039, "grad_norm": 0.8073785997191603, "learning_rate": 3.9521072417199095e-06, "loss": 0.3549, "step": 7415 }, { "epoch": 1.8338278931750742, "grad_norm": 0.8112946208049985, "learning_rate": 3.951842651389515e-06, "loss": 0.3594, "step": 7416 }, { "epoch": 1.8340751730959446, "grad_norm": 0.8051915768517403, "learning_rate": 3.9515780365187195e-06, "loss": 0.3874, "step": 7417 }, { "epoch": 1.834322453016815, "grad_norm": 0.8165285285824899, "learning_rate": 3.951313397111996e-06, "loss": 0.3723, "step": 7418 }, { "epoch": 1.8345697329376853, "grad_norm": 0.7539554376210432, "learning_rate": 3.951048733173819e-06, "loss": 0.3646, "step": 7419 }, { "epoch": 1.834817012858556, "grad_norm": 0.817443786192304, "learning_rate": 3.950784044708661e-06, "loss": 0.3498, "step": 7420 }, { "epoch": 1.8350642927794263, "grad_norm": 0.7678420723317751, "learning_rate": 3.9505193317209975e-06, "loss": 0.3541, "step": 7421 }, { "epoch": 1.8353115727002969, "grad_norm": 0.7676805525227901, "learning_rate": 3.9502545942153e-06, "loss": 0.3605, "step": 7422 }, { "epoch": 1.8355588526211672, "grad_norm": 0.7992806414594974, "learning_rate": 3.9499898321960465e-06, "loss": 0.372, "step": 7423 }, { "epoch": 1.8358061325420376, "grad_norm": 0.7548914014465345, "learning_rate": 3.949725045667711e-06, "loss": 0.3827, "step": 7424 }, { "epoch": 1.836053412462908, "grad_norm": 0.8281430003759476, "learning_rate": 3.949460234634768e-06, "loss": 0.3508, "step": 7425 }, { "epoch": 1.8363006923837784, "grad_norm": 0.76745964142457, "learning_rate": 3.949195399101695e-06, "loss": 0.3407, "step": 7426 }, { "epoch": 1.8365479723046487, "grad_norm": 0.7922525282112953, "learning_rate": 3.948930539072967e-06, "loss": 0.3639, "step": 7427 }, { "epoch": 1.8367952522255193, "grad_norm": 0.7965088307398251, "learning_rate": 3.9486656545530635e-06, "loss": 0.3327, "step": 7428 }, { "epoch": 1.8370425321463897, "grad_norm": 0.7684733719212643, "learning_rate": 3.94840074554646e-06, "loss": 0.3963, "step": 7429 }, { "epoch": 1.8372898120672603, "grad_norm": 0.7561461461012395, "learning_rate": 3.948135812057634e-06, "loss": 0.3588, "step": 7430 }, { "epoch": 1.8375370919881306, "grad_norm": 0.7791699942826947, "learning_rate": 3.947870854091065e-06, "loss": 0.3818, "step": 7431 }, { "epoch": 1.837784371909001, "grad_norm": 0.788379025175938, "learning_rate": 3.94760587165123e-06, "loss": 0.372, "step": 7432 }, { "epoch": 1.8380316518298714, "grad_norm": 0.7376147345264913, "learning_rate": 3.947340864742609e-06, "loss": 0.3405, "step": 7433 }, { "epoch": 1.8382789317507418, "grad_norm": 0.7668794954747092, "learning_rate": 3.947075833369682e-06, "loss": 0.3869, "step": 7434 }, { "epoch": 1.8385262116716121, "grad_norm": 0.7931966771160707, "learning_rate": 3.946810777536927e-06, "loss": 0.3599, "step": 7435 }, { "epoch": 1.8387734915924827, "grad_norm": 0.7769398840035104, "learning_rate": 3.9465456972488256e-06, "loss": 0.3739, "step": 7436 }, { "epoch": 1.839020771513353, "grad_norm": 0.7604618131665929, "learning_rate": 3.946280592509858e-06, "loss": 0.3347, "step": 7437 }, { "epoch": 1.8392680514342237, "grad_norm": 0.7975679220054768, "learning_rate": 3.946015463324505e-06, "loss": 0.3609, "step": 7438 }, { "epoch": 1.839515331355094, "grad_norm": 0.7509544270543206, "learning_rate": 3.945750309697249e-06, "loss": 0.3859, "step": 7439 }, { "epoch": 1.8397626112759644, "grad_norm": 0.7729653315694721, "learning_rate": 3.94548513163257e-06, "loss": 0.3551, "step": 7440 }, { "epoch": 1.8400098911968348, "grad_norm": 0.7684331265115407, "learning_rate": 3.945219929134953e-06, "loss": 0.3646, "step": 7441 }, { "epoch": 1.8402571711177051, "grad_norm": 0.7823693535833409, "learning_rate": 3.944954702208879e-06, "loss": 0.3666, "step": 7442 }, { "epoch": 1.8405044510385755, "grad_norm": 0.7948794179413049, "learning_rate": 3.944689450858831e-06, "loss": 0.3802, "step": 7443 }, { "epoch": 1.840751730959446, "grad_norm": 0.8241249757793069, "learning_rate": 3.944424175089292e-06, "loss": 0.3569, "step": 7444 }, { "epoch": 1.8409990108803165, "grad_norm": 0.7673447649729003, "learning_rate": 3.944158874904748e-06, "loss": 0.3561, "step": 7445 }, { "epoch": 1.841246290801187, "grad_norm": 0.7731086754938695, "learning_rate": 3.943893550309681e-06, "loss": 0.3885, "step": 7446 }, { "epoch": 1.8414935707220574, "grad_norm": 0.7502121026557462, "learning_rate": 3.943628201308578e-06, "loss": 0.3591, "step": 7447 }, { "epoch": 1.8417408506429278, "grad_norm": 0.7697454893805019, "learning_rate": 3.943362827905923e-06, "loss": 0.3474, "step": 7448 }, { "epoch": 1.8419881305637982, "grad_norm": 0.7515014956354322, "learning_rate": 3.943097430106201e-06, "loss": 0.3567, "step": 7449 }, { "epoch": 1.8422354104846685, "grad_norm": 0.7628959798319226, "learning_rate": 3.942832007913899e-06, "loss": 0.3332, "step": 7450 }, { "epoch": 1.842482690405539, "grad_norm": 0.7902231870722835, "learning_rate": 3.942566561333503e-06, "loss": 0.3549, "step": 7451 }, { "epoch": 1.8427299703264095, "grad_norm": 0.783366566916876, "learning_rate": 3.942301090369499e-06, "loss": 0.3698, "step": 7452 }, { "epoch": 1.8429772502472799, "grad_norm": 0.7929336098995579, "learning_rate": 3.942035595026377e-06, "loss": 0.3519, "step": 7453 }, { "epoch": 1.8432245301681505, "grad_norm": 0.7456871694614432, "learning_rate": 3.941770075308622e-06, "loss": 0.3333, "step": 7454 }, { "epoch": 1.8434718100890208, "grad_norm": 0.7909781335094875, "learning_rate": 3.941504531220722e-06, "loss": 0.3413, "step": 7455 }, { "epoch": 1.8437190900098912, "grad_norm": 0.7563767409453876, "learning_rate": 3.941238962767168e-06, "loss": 0.3616, "step": 7456 }, { "epoch": 1.8439663699307616, "grad_norm": 0.780799547909242, "learning_rate": 3.940973369952446e-06, "loss": 0.3708, "step": 7457 }, { "epoch": 1.844213649851632, "grad_norm": 0.7464199074296541, "learning_rate": 3.940707752781047e-06, "loss": 0.3664, "step": 7458 }, { "epoch": 1.8444609297725023, "grad_norm": 0.729594015830652, "learning_rate": 3.9404421112574594e-06, "loss": 0.3557, "step": 7459 }, { "epoch": 1.844708209693373, "grad_norm": 0.7812223287694934, "learning_rate": 3.940176445386175e-06, "loss": 0.3514, "step": 7460 }, { "epoch": 1.8449554896142433, "grad_norm": 0.7991688005700875, "learning_rate": 3.939910755171683e-06, "loss": 0.3422, "step": 7461 }, { "epoch": 1.8452027695351139, "grad_norm": 0.7643900507069668, "learning_rate": 3.939645040618475e-06, "loss": 0.3595, "step": 7462 }, { "epoch": 1.8454500494559842, "grad_norm": 0.7759140632824818, "learning_rate": 3.939379301731043e-06, "loss": 0.3599, "step": 7463 }, { "epoch": 1.8456973293768546, "grad_norm": 0.7716921189149191, "learning_rate": 3.939113538513877e-06, "loss": 0.3604, "step": 7464 }, { "epoch": 1.845944609297725, "grad_norm": 0.8074630137955849, "learning_rate": 3.93884775097147e-06, "loss": 0.3895, "step": 7465 }, { "epoch": 1.8461918892185953, "grad_norm": 0.7726650073117742, "learning_rate": 3.938581939108315e-06, "loss": 0.3642, "step": 7466 }, { "epoch": 1.846439169139466, "grad_norm": 0.7951020637120377, "learning_rate": 3.938316102928905e-06, "loss": 0.3622, "step": 7467 }, { "epoch": 1.8466864490603363, "grad_norm": 0.7689192071320986, "learning_rate": 3.938050242437733e-06, "loss": 0.3498, "step": 7468 }, { "epoch": 1.846933728981207, "grad_norm": 0.7515058804916465, "learning_rate": 3.937784357639293e-06, "loss": 0.3923, "step": 7469 }, { "epoch": 1.8471810089020773, "grad_norm": 0.7500638458472828, "learning_rate": 3.9375184485380785e-06, "loss": 0.3566, "step": 7470 }, { "epoch": 1.8474282888229476, "grad_norm": 0.7672245410284041, "learning_rate": 3.937252515138586e-06, "loss": 0.3648, "step": 7471 }, { "epoch": 1.847675568743818, "grad_norm": 0.7664908610885849, "learning_rate": 3.936986557445308e-06, "loss": 0.3701, "step": 7472 }, { "epoch": 1.8479228486646884, "grad_norm": 0.7279901992806597, "learning_rate": 3.936720575462742e-06, "loss": 0.398, "step": 7473 }, { "epoch": 1.8481701285855587, "grad_norm": 0.7948258799628871, "learning_rate": 3.936454569195384e-06, "loss": 0.3471, "step": 7474 }, { "epoch": 1.8484174085064293, "grad_norm": 0.777174843887328, "learning_rate": 3.936188538647728e-06, "loss": 0.3764, "step": 7475 }, { "epoch": 1.8486646884272997, "grad_norm": 0.7743888481795572, "learning_rate": 3.9359224838242725e-06, "loss": 0.348, "step": 7476 }, { "epoch": 1.8489119683481703, "grad_norm": 0.7576959217094786, "learning_rate": 3.935656404729516e-06, "loss": 0.3519, "step": 7477 }, { "epoch": 1.8491592482690407, "grad_norm": 0.751287653173154, "learning_rate": 3.935390301367953e-06, "loss": 0.3819, "step": 7478 }, { "epoch": 1.849406528189911, "grad_norm": 0.7611076333405554, "learning_rate": 3.935124173744082e-06, "loss": 0.3538, "step": 7479 }, { "epoch": 1.8496538081107814, "grad_norm": 0.7589538692379122, "learning_rate": 3.9348580218624035e-06, "loss": 0.3748, "step": 7480 }, { "epoch": 1.8499010880316518, "grad_norm": 0.7514243195607219, "learning_rate": 3.934591845727414e-06, "loss": 0.3833, "step": 7481 }, { "epoch": 1.8501483679525221, "grad_norm": 0.7682865841725005, "learning_rate": 3.934325645343614e-06, "loss": 0.3745, "step": 7482 }, { "epoch": 1.8503956478733927, "grad_norm": 0.7823407942564499, "learning_rate": 3.934059420715501e-06, "loss": 0.371, "step": 7483 }, { "epoch": 1.850642927794263, "grad_norm": 0.7934866971321299, "learning_rate": 3.933793171847579e-06, "loss": 0.3608, "step": 7484 }, { "epoch": 1.8508902077151337, "grad_norm": 0.8034894828550645, "learning_rate": 3.933526898744345e-06, "loss": 0.3603, "step": 7485 }, { "epoch": 1.851137487636004, "grad_norm": 0.7560531935469955, "learning_rate": 3.9332606014103e-06, "loss": 0.3578, "step": 7486 }, { "epoch": 1.8513847675568744, "grad_norm": 0.7737780563265462, "learning_rate": 3.932994279849946e-06, "loss": 0.3626, "step": 7487 }, { "epoch": 1.8516320474777448, "grad_norm": 0.7568210011727642, "learning_rate": 3.932727934067785e-06, "loss": 0.3755, "step": 7488 }, { "epoch": 1.8518793273986152, "grad_norm": 0.792350837655994, "learning_rate": 3.932461564068319e-06, "loss": 0.3812, "step": 7489 }, { "epoch": 1.8521266073194855, "grad_norm": 0.8253697568340685, "learning_rate": 3.93219516985605e-06, "loss": 0.3399, "step": 7490 }, { "epoch": 1.8523738872403561, "grad_norm": 0.7458000513389027, "learning_rate": 3.93192875143548e-06, "loss": 0.3588, "step": 7491 }, { "epoch": 1.8526211671612265, "grad_norm": 0.7682188720014556, "learning_rate": 3.931662308811114e-06, "loss": 0.3695, "step": 7492 }, { "epoch": 1.852868447082097, "grad_norm": 0.757826969415619, "learning_rate": 3.931395841987454e-06, "loss": 0.3623, "step": 7493 }, { "epoch": 1.8531157270029674, "grad_norm": 0.7909046030506207, "learning_rate": 3.931129350969005e-06, "loss": 0.3499, "step": 7494 }, { "epoch": 1.8533630069238378, "grad_norm": 0.7900630053192748, "learning_rate": 3.930862835760272e-06, "loss": 0.3485, "step": 7495 }, { "epoch": 1.8536102868447082, "grad_norm": 0.7725053259398369, "learning_rate": 3.9305962963657575e-06, "loss": 0.3709, "step": 7496 }, { "epoch": 1.8538575667655786, "grad_norm": 0.7715798258759305, "learning_rate": 3.93032973278997e-06, "loss": 0.3519, "step": 7497 }, { "epoch": 1.854104846686449, "grad_norm": 0.7930734274015365, "learning_rate": 3.930063145037414e-06, "loss": 0.3417, "step": 7498 }, { "epoch": 1.8543521266073195, "grad_norm": 0.7640462667573006, "learning_rate": 3.9297965331125945e-06, "loss": 0.3765, "step": 7499 }, { "epoch": 1.8545994065281899, "grad_norm": 0.7510865939978151, "learning_rate": 3.929529897020019e-06, "loss": 0.3751, "step": 7500 }, { "epoch": 1.8548466864490605, "grad_norm": 0.7854157920956928, "learning_rate": 3.929263236764194e-06, "loss": 0.3626, "step": 7501 }, { "epoch": 1.8550939663699308, "grad_norm": 0.7797669479081497, "learning_rate": 3.9289965523496274e-06, "loss": 0.3687, "step": 7502 }, { "epoch": 1.8553412462908012, "grad_norm": 0.8012112071687922, "learning_rate": 3.928729843780827e-06, "loss": 0.3535, "step": 7503 }, { "epoch": 1.8555885262116716, "grad_norm": 0.7570291115972011, "learning_rate": 3.9284631110623e-06, "loss": 0.3884, "step": 7504 }, { "epoch": 1.855835806132542, "grad_norm": 0.7460909026885473, "learning_rate": 3.928196354198556e-06, "loss": 0.3456, "step": 7505 }, { "epoch": 1.8560830860534123, "grad_norm": 0.8006096554723997, "learning_rate": 3.927929573194103e-06, "loss": 0.3527, "step": 7506 }, { "epoch": 1.856330365974283, "grad_norm": 0.7472441535636851, "learning_rate": 3.927662768053452e-06, "loss": 0.3879, "step": 7507 }, { "epoch": 1.8565776458951533, "grad_norm": 0.7933316741840656, "learning_rate": 3.927395938781111e-06, "loss": 0.3915, "step": 7508 }, { "epoch": 1.8568249258160239, "grad_norm": 0.7689210683819684, "learning_rate": 3.927129085381591e-06, "loss": 0.3417, "step": 7509 }, { "epoch": 1.8570722057368942, "grad_norm": 0.7675698978151224, "learning_rate": 3.926862207859403e-06, "loss": 0.3834, "step": 7510 }, { "epoch": 1.8573194856577646, "grad_norm": 0.7564636580709976, "learning_rate": 3.9265953062190575e-06, "loss": 0.3593, "step": 7511 }, { "epoch": 1.857566765578635, "grad_norm": 0.759105916569941, "learning_rate": 3.926328380465065e-06, "loss": 0.3457, "step": 7512 }, { "epoch": 1.8578140454995054, "grad_norm": 0.7507058901133974, "learning_rate": 3.926061430601939e-06, "loss": 0.3615, "step": 7513 }, { "epoch": 1.8580613254203757, "grad_norm": 0.7892529964104873, "learning_rate": 3.925794456634191e-06, "loss": 0.3538, "step": 7514 }, { "epoch": 1.8583086053412463, "grad_norm": 0.7752996256309875, "learning_rate": 3.925527458566334e-06, "loss": 0.3775, "step": 7515 }, { "epoch": 1.8585558852621167, "grad_norm": 0.7892757812046036, "learning_rate": 3.925260436402881e-06, "loss": 0.3661, "step": 7516 }, { "epoch": 1.8588031651829873, "grad_norm": 0.7610380989383098, "learning_rate": 3.924993390148344e-06, "loss": 0.3428, "step": 7517 }, { "epoch": 1.8590504451038576, "grad_norm": 0.788677542317239, "learning_rate": 3.924726319807239e-06, "loss": 0.4188, "step": 7518 }, { "epoch": 1.859297725024728, "grad_norm": 0.8101504959400309, "learning_rate": 3.924459225384079e-06, "loss": 0.3305, "step": 7519 }, { "epoch": 1.8595450049455984, "grad_norm": 0.7450039230296542, "learning_rate": 3.924192106883379e-06, "loss": 0.3655, "step": 7520 }, { "epoch": 1.8597922848664687, "grad_norm": 0.7807238000515154, "learning_rate": 3.923924964309654e-06, "loss": 0.3528, "step": 7521 }, { "epoch": 1.8600395647873391, "grad_norm": 0.7729902995812735, "learning_rate": 3.923657797667419e-06, "loss": 0.3674, "step": 7522 }, { "epoch": 1.8602868447082097, "grad_norm": 0.7948886269174196, "learning_rate": 3.923390606961191e-06, "loss": 0.3625, "step": 7523 }, { "epoch": 1.86053412462908, "grad_norm": 0.7448229300693585, "learning_rate": 3.923123392195487e-06, "loss": 0.3709, "step": 7524 }, { "epoch": 1.8607814045499507, "grad_norm": 0.7662338999153109, "learning_rate": 3.922856153374821e-06, "loss": 0.3828, "step": 7525 }, { "epoch": 1.861028684470821, "grad_norm": 0.7760701812589569, "learning_rate": 3.922588890503712e-06, "loss": 0.3436, "step": 7526 }, { "epoch": 1.8612759643916914, "grad_norm": 0.7635650915701457, "learning_rate": 3.922321603586677e-06, "loss": 0.3712, "step": 7527 }, { "epoch": 1.8615232443125618, "grad_norm": 0.7813196630575875, "learning_rate": 3.922054292628235e-06, "loss": 0.3677, "step": 7528 }, { "epoch": 1.8617705242334321, "grad_norm": 0.7696221854168249, "learning_rate": 3.9217869576329025e-06, "loss": 0.3566, "step": 7529 }, { "epoch": 1.8620178041543025, "grad_norm": 0.7685330675242315, "learning_rate": 3.9215195986052e-06, "loss": 0.3747, "step": 7530 }, { "epoch": 1.862265084075173, "grad_norm": 0.8068595378737301, "learning_rate": 3.921252215549645e-06, "loss": 0.3332, "step": 7531 }, { "epoch": 1.8625123639960435, "grad_norm": 0.7750439748643653, "learning_rate": 3.920984808470758e-06, "loss": 0.3519, "step": 7532 }, { "epoch": 1.862759643916914, "grad_norm": 0.823659636530547, "learning_rate": 3.920717377373059e-06, "loss": 0.375, "step": 7533 }, { "epoch": 1.8630069238377844, "grad_norm": 0.7521903711806562, "learning_rate": 3.9204499222610685e-06, "loss": 0.3726, "step": 7534 }, { "epoch": 1.8632542037586548, "grad_norm": 0.7592016915848203, "learning_rate": 3.9201824431393065e-06, "loss": 0.359, "step": 7535 }, { "epoch": 1.8635014836795252, "grad_norm": 0.7964987174343484, "learning_rate": 3.919914940012295e-06, "loss": 0.3499, "step": 7536 }, { "epoch": 1.8637487636003955, "grad_norm": 0.7481162562997978, "learning_rate": 3.919647412884555e-06, "loss": 0.3629, "step": 7537 }, { "epoch": 1.863996043521266, "grad_norm": 0.7513726261442457, "learning_rate": 3.919379861760609e-06, "loss": 0.3895, "step": 7538 }, { "epoch": 1.8642433234421365, "grad_norm": 0.7912105004636291, "learning_rate": 3.9191122866449785e-06, "loss": 0.3278, "step": 7539 }, { "epoch": 1.8644906033630069, "grad_norm": 0.8063007435198126, "learning_rate": 3.918844687542188e-06, "loss": 0.3507, "step": 7540 }, { "epoch": 1.8647378832838775, "grad_norm": 0.7863513392243732, "learning_rate": 3.918577064456759e-06, "loss": 0.3728, "step": 7541 }, { "epoch": 1.8649851632047478, "grad_norm": 0.7681286567389638, "learning_rate": 3.9183094173932156e-06, "loss": 0.3521, "step": 7542 }, { "epoch": 1.8652324431256182, "grad_norm": 0.7636705543203555, "learning_rate": 3.9180417463560825e-06, "loss": 0.3502, "step": 7543 }, { "epoch": 1.8654797230464886, "grad_norm": 0.7841981014327039, "learning_rate": 3.917774051349884e-06, "loss": 0.3613, "step": 7544 }, { "epoch": 1.865727002967359, "grad_norm": 0.7844740630298114, "learning_rate": 3.917506332379144e-06, "loss": 0.3469, "step": 7545 }, { "epoch": 1.8659742828882295, "grad_norm": 0.8012543537431414, "learning_rate": 3.917238589448388e-06, "loss": 0.3622, "step": 7546 }, { "epoch": 1.8662215628091, "grad_norm": 0.7792299897061307, "learning_rate": 3.916970822562142e-06, "loss": 0.3507, "step": 7547 }, { "epoch": 1.8664688427299705, "grad_norm": 0.785363581171074, "learning_rate": 3.916703031724933e-06, "loss": 0.3509, "step": 7548 }, { "epoch": 1.8667161226508409, "grad_norm": 0.76408799826388, "learning_rate": 3.916435216941286e-06, "loss": 0.3624, "step": 7549 }, { "epoch": 1.8669634025717112, "grad_norm": 0.7713512732490366, "learning_rate": 3.916167378215727e-06, "loss": 0.3639, "step": 7550 }, { "epoch": 1.8672106824925816, "grad_norm": 0.8215714049411577, "learning_rate": 3.9158995155527865e-06, "loss": 0.3791, "step": 7551 }, { "epoch": 1.867457962413452, "grad_norm": 0.7771190084542922, "learning_rate": 3.915631628956988e-06, "loss": 0.3535, "step": 7552 }, { "epoch": 1.8677052423343223, "grad_norm": 0.7712960670332445, "learning_rate": 3.915363718432864e-06, "loss": 0.3504, "step": 7553 }, { "epoch": 1.867952522255193, "grad_norm": 0.7457421786524694, "learning_rate": 3.91509578398494e-06, "loss": 0.3537, "step": 7554 }, { "epoch": 1.8681998021760633, "grad_norm": 0.7846820707592712, "learning_rate": 3.914827825617745e-06, "loss": 0.3485, "step": 7555 }, { "epoch": 1.8684470820969339, "grad_norm": 0.7614627872105295, "learning_rate": 3.9145598433358095e-06, "loss": 0.3634, "step": 7556 }, { "epoch": 1.8686943620178043, "grad_norm": 0.757163986553527, "learning_rate": 3.914291837143663e-06, "loss": 0.3854, "step": 7557 }, { "epoch": 1.8689416419386746, "grad_norm": 0.780167778261922, "learning_rate": 3.9140238070458344e-06, "loss": 0.3469, "step": 7558 }, { "epoch": 1.869188921859545, "grad_norm": 0.792825392195414, "learning_rate": 3.913755753046855e-06, "loss": 0.3445, "step": 7559 }, { "epoch": 1.8694362017804154, "grad_norm": 0.824229922481347, "learning_rate": 3.9134876751512565e-06, "loss": 0.35, "step": 7560 }, { "epoch": 1.8696834817012857, "grad_norm": 0.76026526041676, "learning_rate": 3.913219573363568e-06, "loss": 0.3736, "step": 7561 }, { "epoch": 1.8699307616221563, "grad_norm": 0.7977632187048497, "learning_rate": 3.912951447688324e-06, "loss": 0.3802, "step": 7562 }, { "epoch": 1.8701780415430267, "grad_norm": 0.8018379248482083, "learning_rate": 3.912683298130054e-06, "loss": 0.3452, "step": 7563 }, { "epoch": 1.8704253214638973, "grad_norm": 0.7833393041056065, "learning_rate": 3.9124151246932925e-06, "loss": 0.3685, "step": 7564 }, { "epoch": 1.8706726013847677, "grad_norm": 0.812925770548959, "learning_rate": 3.912146927382572e-06, "loss": 0.3518, "step": 7565 }, { "epoch": 1.870919881305638, "grad_norm": 0.7785685293109605, "learning_rate": 3.911878706202425e-06, "loss": 0.3813, "step": 7566 }, { "epoch": 1.8711671612265084, "grad_norm": 0.7573502719669485, "learning_rate": 3.911610461157386e-06, "loss": 0.3459, "step": 7567 }, { "epoch": 1.8714144411473788, "grad_norm": 0.7995779144554868, "learning_rate": 3.911342192251988e-06, "loss": 0.3732, "step": 7568 }, { "epoch": 1.8716617210682491, "grad_norm": 0.755574340058085, "learning_rate": 3.9110738994907675e-06, "loss": 0.3872, "step": 7569 }, { "epoch": 1.8719090009891197, "grad_norm": 0.7421540444615651, "learning_rate": 3.910805582878257e-06, "loss": 0.365, "step": 7570 }, { "epoch": 1.87215628090999, "grad_norm": 0.7616834403741762, "learning_rate": 3.910537242418994e-06, "loss": 0.3369, "step": 7571 }, { "epoch": 1.8724035608308607, "grad_norm": 0.7883435892063134, "learning_rate": 3.910268878117514e-06, "loss": 0.3407, "step": 7572 }, { "epoch": 1.872650840751731, "grad_norm": 0.7938674953258386, "learning_rate": 3.91000048997835e-06, "loss": 0.3767, "step": 7573 }, { "epoch": 1.8728981206726014, "grad_norm": 0.778173167253098, "learning_rate": 3.909732078006043e-06, "loss": 0.3798, "step": 7574 }, { "epoch": 1.8731454005934718, "grad_norm": 0.8117365159415827, "learning_rate": 3.909463642205128e-06, "loss": 0.3518, "step": 7575 }, { "epoch": 1.8733926805143422, "grad_norm": 0.7855440945119266, "learning_rate": 3.9091951825801425e-06, "loss": 0.4042, "step": 7576 }, { "epoch": 1.8736399604352125, "grad_norm": 0.7690048339697684, "learning_rate": 3.908926699135623e-06, "loss": 0.3318, "step": 7577 }, { "epoch": 1.8738872403560831, "grad_norm": 0.7760324136808382, "learning_rate": 3.90865819187611e-06, "loss": 0.3866, "step": 7578 }, { "epoch": 1.8741345202769535, "grad_norm": 0.746153338057066, "learning_rate": 3.90838966080614e-06, "loss": 0.3523, "step": 7579 }, { "epoch": 1.874381800197824, "grad_norm": 0.7872026530590983, "learning_rate": 3.908121105930253e-06, "loss": 0.3563, "step": 7580 }, { "epoch": 1.8746290801186944, "grad_norm": 0.7768785399210014, "learning_rate": 3.907852527252988e-06, "loss": 0.3373, "step": 7581 }, { "epoch": 1.8748763600395648, "grad_norm": 0.7545058384755295, "learning_rate": 3.907583924778886e-06, "loss": 0.4015, "step": 7582 }, { "epoch": 1.8751236399604352, "grad_norm": 0.7878766870307871, "learning_rate": 3.907315298512485e-06, "loss": 0.3676, "step": 7583 }, { "epoch": 1.8753709198813056, "grad_norm": 0.8042508252264293, "learning_rate": 3.907046648458326e-06, "loss": 0.362, "step": 7584 }, { "epoch": 1.875618199802176, "grad_norm": 0.8095823112077944, "learning_rate": 3.906777974620953e-06, "loss": 0.3627, "step": 7585 }, { "epoch": 1.8758654797230465, "grad_norm": 0.7715330100978545, "learning_rate": 3.906509277004903e-06, "loss": 0.3742, "step": 7586 }, { "epoch": 1.8761127596439169, "grad_norm": 0.7690048949748682, "learning_rate": 3.9062405556147196e-06, "loss": 0.354, "step": 7587 }, { "epoch": 1.8763600395647875, "grad_norm": 0.7991045145040779, "learning_rate": 3.905971810454947e-06, "loss": 0.3793, "step": 7588 }, { "epoch": 1.8766073194856578, "grad_norm": 0.7749016310374177, "learning_rate": 3.905703041530124e-06, "loss": 0.3501, "step": 7589 }, { "epoch": 1.8768545994065282, "grad_norm": 0.7806930245334618, "learning_rate": 3.905434248844797e-06, "loss": 0.3404, "step": 7590 }, { "epoch": 1.8771018793273986, "grad_norm": 0.7388335260081983, "learning_rate": 3.905165432403507e-06, "loss": 0.3706, "step": 7591 }, { "epoch": 1.877349159248269, "grad_norm": 0.7826311950053788, "learning_rate": 3.904896592210799e-06, "loss": 0.35, "step": 7592 }, { "epoch": 1.8775964391691393, "grad_norm": 0.7684599077870066, "learning_rate": 3.904627728271218e-06, "loss": 0.3683, "step": 7593 }, { "epoch": 1.87784371909001, "grad_norm": 0.769579155437606, "learning_rate": 3.904358840589306e-06, "loss": 0.3688, "step": 7594 }, { "epoch": 1.8780909990108803, "grad_norm": 0.76919737060494, "learning_rate": 3.90408992916961e-06, "loss": 0.3633, "step": 7595 }, { "epoch": 1.8783382789317509, "grad_norm": 0.7724291246040953, "learning_rate": 3.903820994016674e-06, "loss": 0.3713, "step": 7596 }, { "epoch": 1.8785855588526212, "grad_norm": 0.7783816991295779, "learning_rate": 3.903552035135046e-06, "loss": 0.3321, "step": 7597 }, { "epoch": 1.8788328387734916, "grad_norm": 0.8137449625149368, "learning_rate": 3.9032830525292706e-06, "loss": 0.3893, "step": 7598 }, { "epoch": 1.879080118694362, "grad_norm": 0.7493811538893674, "learning_rate": 3.9030140462038945e-06, "loss": 0.3614, "step": 7599 }, { "epoch": 1.8793273986152323, "grad_norm": 0.805407801978579, "learning_rate": 3.902745016163464e-06, "loss": 0.3806, "step": 7600 }, { "epoch": 1.8795746785361027, "grad_norm": 0.7777860274757499, "learning_rate": 3.902475962412529e-06, "loss": 0.3645, "step": 7601 }, { "epoch": 1.8798219584569733, "grad_norm": 0.7629103677951692, "learning_rate": 3.902206884955634e-06, "loss": 0.3557, "step": 7602 }, { "epoch": 1.8800692383778437, "grad_norm": 0.7647451180757912, "learning_rate": 3.901937783797329e-06, "loss": 0.3917, "step": 7603 }, { "epoch": 1.8803165182987143, "grad_norm": 0.7641456772090514, "learning_rate": 3.901668658942164e-06, "loss": 0.4026, "step": 7604 }, { "epoch": 1.8805637982195846, "grad_norm": 0.7988764179563592, "learning_rate": 3.901399510394686e-06, "loss": 0.3424, "step": 7605 }, { "epoch": 1.880811078140455, "grad_norm": 0.770920640651422, "learning_rate": 3.901130338159443e-06, "loss": 0.3591, "step": 7606 }, { "epoch": 1.8810583580613254, "grad_norm": 0.7437027498617683, "learning_rate": 3.9008611422409886e-06, "loss": 0.3884, "step": 7607 }, { "epoch": 1.8813056379821957, "grad_norm": 0.7671457766127537, "learning_rate": 3.90059192264387e-06, "loss": 0.3357, "step": 7608 }, { "epoch": 1.8815529179030661, "grad_norm": 0.7429056884825966, "learning_rate": 3.90032267937264e-06, "loss": 0.3743, "step": 7609 }, { "epoch": 1.8818001978239367, "grad_norm": 0.7453340633181097, "learning_rate": 3.900053412431848e-06, "loss": 0.3514, "step": 7610 }, { "epoch": 1.882047477744807, "grad_norm": 0.739741849363897, "learning_rate": 3.899784121826045e-06, "loss": 0.351, "step": 7611 }, { "epoch": 1.8822947576656777, "grad_norm": 0.7899089453151158, "learning_rate": 3.899514807559784e-06, "loss": 0.3373, "step": 7612 }, { "epoch": 1.882542037586548, "grad_norm": 0.7965046131836103, "learning_rate": 3.899245469637617e-06, "loss": 0.3542, "step": 7613 }, { "epoch": 1.8827893175074184, "grad_norm": 0.7739706176870914, "learning_rate": 3.898976108064097e-06, "loss": 0.3773, "step": 7614 }, { "epoch": 1.8830365974282888, "grad_norm": 0.761106209892257, "learning_rate": 3.898706722843776e-06, "loss": 0.3694, "step": 7615 }, { "epoch": 1.8832838773491591, "grad_norm": 0.7617644359233123, "learning_rate": 3.898437313981208e-06, "loss": 0.3696, "step": 7616 }, { "epoch": 1.8835311572700295, "grad_norm": 0.7510299362066338, "learning_rate": 3.898167881480946e-06, "loss": 0.3678, "step": 7617 }, { "epoch": 1.8837784371909, "grad_norm": 0.7724893638216881, "learning_rate": 3.897898425347545e-06, "loss": 0.3327, "step": 7618 }, { "epoch": 1.8840257171117705, "grad_norm": 0.7853053634074301, "learning_rate": 3.897628945585561e-06, "loss": 0.3199, "step": 7619 }, { "epoch": 1.884272997032641, "grad_norm": 0.76940104129383, "learning_rate": 3.897359442199545e-06, "loss": 0.3971, "step": 7620 }, { "epoch": 1.8845202769535114, "grad_norm": 0.8018667872138527, "learning_rate": 3.897089915194056e-06, "loss": 0.359, "step": 7621 }, { "epoch": 1.8847675568743818, "grad_norm": 0.7939539142048305, "learning_rate": 3.896820364573649e-06, "loss": 0.3774, "step": 7622 }, { "epoch": 1.8850148367952522, "grad_norm": 0.8462460098143587, "learning_rate": 3.8965507903428795e-06, "loss": 0.3536, "step": 7623 }, { "epoch": 1.8852621167161225, "grad_norm": 0.8366698466413852, "learning_rate": 3.896281192506305e-06, "loss": 0.3434, "step": 7624 }, { "epoch": 1.8855093966369931, "grad_norm": 0.8180105070646885, "learning_rate": 3.896011571068481e-06, "loss": 0.3488, "step": 7625 }, { "epoch": 1.8857566765578635, "grad_norm": 0.7528436565165316, "learning_rate": 3.8957419260339665e-06, "loss": 0.3618, "step": 7626 }, { "epoch": 1.886003956478734, "grad_norm": 0.7708492598727843, "learning_rate": 3.8954722574073185e-06, "loss": 0.3547, "step": 7627 }, { "epoch": 1.8862512363996045, "grad_norm": 0.7816176672762191, "learning_rate": 3.895202565193095e-06, "loss": 0.3675, "step": 7628 }, { "epoch": 1.8864985163204748, "grad_norm": 0.7587910859360786, "learning_rate": 3.894932849395855e-06, "loss": 0.3743, "step": 7629 }, { "epoch": 1.8867457962413452, "grad_norm": 0.7564176407042968, "learning_rate": 3.894663110020157e-06, "loss": 0.3756, "step": 7630 }, { "epoch": 1.8869930761622156, "grad_norm": 0.7987116424422377, "learning_rate": 3.894393347070562e-06, "loss": 0.3636, "step": 7631 }, { "epoch": 1.887240356083086, "grad_norm": 0.7932508691227107, "learning_rate": 3.894123560551627e-06, "loss": 0.3347, "step": 7632 }, { "epoch": 1.8874876360039565, "grad_norm": 0.8141932372285466, "learning_rate": 3.893853750467916e-06, "loss": 0.379, "step": 7633 }, { "epoch": 1.887734915924827, "grad_norm": 0.7539626286894375, "learning_rate": 3.893583916823985e-06, "loss": 0.3643, "step": 7634 }, { "epoch": 1.8879821958456975, "grad_norm": 0.753474231813385, "learning_rate": 3.893314059624398e-06, "loss": 0.3342, "step": 7635 }, { "epoch": 1.8882294757665679, "grad_norm": 0.8094797458070742, "learning_rate": 3.893044178873716e-06, "loss": 0.3699, "step": 7636 }, { "epoch": 1.8884767556874382, "grad_norm": 0.7919193938076482, "learning_rate": 3.8927742745765e-06, "loss": 0.3668, "step": 7637 }, { "epoch": 1.8887240356083086, "grad_norm": 0.7840976742849848, "learning_rate": 3.892504346737313e-06, "loss": 0.3731, "step": 7638 }, { "epoch": 1.888971315529179, "grad_norm": 0.7762140811504712, "learning_rate": 3.892234395360718e-06, "loss": 0.3544, "step": 7639 }, { "epoch": 1.8892185954500493, "grad_norm": 0.7929230153791406, "learning_rate": 3.891964420451275e-06, "loss": 0.3411, "step": 7640 }, { "epoch": 1.88946587537092, "grad_norm": 0.7629446781995387, "learning_rate": 3.891694422013551e-06, "loss": 0.3881, "step": 7641 }, { "epoch": 1.8897131552917903, "grad_norm": 0.7671364038467614, "learning_rate": 3.891424400052109e-06, "loss": 0.3291, "step": 7642 }, { "epoch": 1.8899604352126609, "grad_norm": 0.8387476896892823, "learning_rate": 3.891154354571511e-06, "loss": 0.327, "step": 7643 }, { "epoch": 1.8902077151335313, "grad_norm": 0.7590405574775121, "learning_rate": 3.890884285576324e-06, "loss": 0.3559, "step": 7644 }, { "epoch": 1.8904549950544016, "grad_norm": 0.7886184774888598, "learning_rate": 3.890614193071112e-06, "loss": 0.3412, "step": 7645 }, { "epoch": 1.890702274975272, "grad_norm": 0.7789659611493636, "learning_rate": 3.8903440770604396e-06, "loss": 0.3608, "step": 7646 }, { "epoch": 1.8909495548961424, "grad_norm": 0.7647939444264691, "learning_rate": 3.890073937548874e-06, "loss": 0.347, "step": 7647 }, { "epoch": 1.8911968348170127, "grad_norm": 0.7768600081991909, "learning_rate": 3.88980377454098e-06, "loss": 0.3582, "step": 7648 }, { "epoch": 1.8914441147378833, "grad_norm": 0.7652902323260241, "learning_rate": 3.889533588041325e-06, "loss": 0.3424, "step": 7649 }, { "epoch": 1.8916913946587537, "grad_norm": 0.7984918806498922, "learning_rate": 3.889263378054476e-06, "loss": 0.3682, "step": 7650 }, { "epoch": 1.8919386745796243, "grad_norm": 0.8087794936717414, "learning_rate": 3.888993144585001e-06, "loss": 0.3527, "step": 7651 }, { "epoch": 1.8921859545004946, "grad_norm": 0.7688042817996776, "learning_rate": 3.8887228876374645e-06, "loss": 0.3638, "step": 7652 }, { "epoch": 1.892433234421365, "grad_norm": 0.7769850248767669, "learning_rate": 3.888452607216439e-06, "loss": 0.3737, "step": 7653 }, { "epoch": 1.8926805143422354, "grad_norm": 0.7623018189076023, "learning_rate": 3.88818230332649e-06, "loss": 0.3652, "step": 7654 }, { "epoch": 1.8929277942631058, "grad_norm": 0.760380902553752, "learning_rate": 3.8879119759721876e-06, "loss": 0.3401, "step": 7655 }, { "epoch": 1.8931750741839761, "grad_norm": 0.777201368981278, "learning_rate": 3.887641625158101e-06, "loss": 0.3693, "step": 7656 }, { "epoch": 1.8934223541048467, "grad_norm": 0.7541840433974272, "learning_rate": 3.887371250888799e-06, "loss": 0.3535, "step": 7657 }, { "epoch": 1.893669634025717, "grad_norm": 0.7651892419478399, "learning_rate": 3.887100853168854e-06, "loss": 0.3391, "step": 7658 }, { "epoch": 1.8939169139465877, "grad_norm": 0.7559970824374844, "learning_rate": 3.886830432002835e-06, "loss": 0.3336, "step": 7659 }, { "epoch": 1.894164193867458, "grad_norm": 0.7812239605145789, "learning_rate": 3.886559987395313e-06, "loss": 0.3359, "step": 7660 }, { "epoch": 1.8944114737883284, "grad_norm": 0.7880756491155158, "learning_rate": 3.886289519350859e-06, "loss": 0.3577, "step": 7661 }, { "epoch": 1.8946587537091988, "grad_norm": 0.7578380521454351, "learning_rate": 3.886019027874044e-06, "loss": 0.3379, "step": 7662 }, { "epoch": 1.8949060336300692, "grad_norm": 0.7451110475125453, "learning_rate": 3.8857485129694425e-06, "loss": 0.3568, "step": 7663 }, { "epoch": 1.8951533135509395, "grad_norm": 0.7944740209020914, "learning_rate": 3.885477974641625e-06, "loss": 0.3426, "step": 7664 }, { "epoch": 1.8954005934718101, "grad_norm": 0.8070719011975224, "learning_rate": 3.8852074128951655e-06, "loss": 0.3827, "step": 7665 }, { "epoch": 1.8956478733926805, "grad_norm": 0.7709260770973116, "learning_rate": 3.8849368277346365e-06, "loss": 0.3329, "step": 7666 }, { "epoch": 1.895895153313551, "grad_norm": 0.7718939194131059, "learning_rate": 3.884666219164611e-06, "loss": 0.3922, "step": 7667 }, { "epoch": 1.8961424332344214, "grad_norm": 0.7409783861345415, "learning_rate": 3.884395587189666e-06, "loss": 0.3849, "step": 7668 }, { "epoch": 1.8963897131552918, "grad_norm": 0.7961839044240074, "learning_rate": 3.884124931814373e-06, "loss": 0.3807, "step": 7669 }, { "epoch": 1.8966369930761622, "grad_norm": 0.7787127367603112, "learning_rate": 3.883854253043307e-06, "loss": 0.353, "step": 7670 }, { "epoch": 1.8968842729970326, "grad_norm": 0.7643772679170109, "learning_rate": 3.883583550881046e-06, "loss": 0.3662, "step": 7671 }, { "epoch": 1.897131552917903, "grad_norm": 0.7772390832960558, "learning_rate": 3.883312825332163e-06, "loss": 0.3599, "step": 7672 }, { "epoch": 1.8973788328387735, "grad_norm": 0.799191050911357, "learning_rate": 3.883042076401234e-06, "loss": 0.3427, "step": 7673 }, { "epoch": 1.8976261127596439, "grad_norm": 0.7717914577365118, "learning_rate": 3.882771304092838e-06, "loss": 0.3704, "step": 7674 }, { "epoch": 1.8978733926805145, "grad_norm": 0.7315988941727097, "learning_rate": 3.88250050841155e-06, "loss": 0.3869, "step": 7675 }, { "epoch": 1.8981206726013848, "grad_norm": 0.7633843474292158, "learning_rate": 3.882229689361946e-06, "loss": 0.3569, "step": 7676 }, { "epoch": 1.8983679525222552, "grad_norm": 0.7391243815137151, "learning_rate": 3.881958846948606e-06, "loss": 0.3776, "step": 7677 }, { "epoch": 1.8986152324431256, "grad_norm": 0.7605997231402197, "learning_rate": 3.8816879811761066e-06, "loss": 0.3637, "step": 7678 }, { "epoch": 1.898862512363996, "grad_norm": 0.7531144335483578, "learning_rate": 3.881417092049027e-06, "loss": 0.3469, "step": 7679 }, { "epoch": 1.8991097922848663, "grad_norm": 0.7865535211640421, "learning_rate": 3.881146179571946e-06, "loss": 0.3722, "step": 7680 }, { "epoch": 1.899357072205737, "grad_norm": 0.755968715183093, "learning_rate": 3.8808752437494425e-06, "loss": 0.3653, "step": 7681 }, { "epoch": 1.8996043521266073, "grad_norm": 0.740124119465692, "learning_rate": 3.880604284586096e-06, "loss": 0.3717, "step": 7682 }, { "epoch": 1.8998516320474779, "grad_norm": 0.74612814768336, "learning_rate": 3.880333302086486e-06, "loss": 0.3683, "step": 7683 }, { "epoch": 1.9000989119683482, "grad_norm": 0.7713356951684961, "learning_rate": 3.880062296255194e-06, "loss": 0.3706, "step": 7684 }, { "epoch": 1.9003461918892186, "grad_norm": 0.8112186483945594, "learning_rate": 3.879791267096801e-06, "loss": 0.3383, "step": 7685 }, { "epoch": 1.900593471810089, "grad_norm": 0.7682989707328435, "learning_rate": 3.8795202146158875e-06, "loss": 0.3822, "step": 7686 }, { "epoch": 1.9008407517309593, "grad_norm": 0.7668918398950877, "learning_rate": 3.879249138817035e-06, "loss": 0.3558, "step": 7687 }, { "epoch": 1.9010880316518297, "grad_norm": 0.7269660270960058, "learning_rate": 3.878978039704825e-06, "loss": 0.3604, "step": 7688 }, { "epoch": 1.9013353115727003, "grad_norm": 0.7405591925416011, "learning_rate": 3.87870691728384e-06, "loss": 0.3494, "step": 7689 }, { "epoch": 1.9015825914935707, "grad_norm": 0.7550480860256942, "learning_rate": 3.878435771558665e-06, "loss": 0.3599, "step": 7690 }, { "epoch": 1.9018298714144413, "grad_norm": 0.7727493907847726, "learning_rate": 3.87816460253388e-06, "loss": 0.374, "step": 7691 }, { "epoch": 1.9020771513353116, "grad_norm": 0.7648404303665814, "learning_rate": 3.877893410214071e-06, "loss": 0.3726, "step": 7692 }, { "epoch": 1.902324431256182, "grad_norm": 0.7991553067964801, "learning_rate": 3.87762219460382e-06, "loss": 0.3841, "step": 7693 }, { "epoch": 1.9025717111770524, "grad_norm": 0.8019438792291238, "learning_rate": 3.8773509557077125e-06, "loss": 0.3636, "step": 7694 }, { "epoch": 1.9028189910979227, "grad_norm": 0.766549050426859, "learning_rate": 3.877079693530334e-06, "loss": 0.3724, "step": 7695 }, { "epoch": 1.903066271018793, "grad_norm": 0.7597548155266833, "learning_rate": 3.876808408076267e-06, "loss": 0.3499, "step": 7696 }, { "epoch": 1.9033135509396637, "grad_norm": 0.7664737726452326, "learning_rate": 3.876537099350099e-06, "loss": 0.3823, "step": 7697 }, { "epoch": 1.903560830860534, "grad_norm": 0.7514051571138207, "learning_rate": 3.876265767356416e-06, "loss": 0.3648, "step": 7698 }, { "epoch": 1.9038081107814047, "grad_norm": 0.7634792402743859, "learning_rate": 3.875994412099804e-06, "loss": 0.3547, "step": 7699 }, { "epoch": 1.904055390702275, "grad_norm": 0.7792955728692542, "learning_rate": 3.875723033584848e-06, "loss": 0.3395, "step": 7700 }, { "epoch": 1.9043026706231454, "grad_norm": 0.7697739049986629, "learning_rate": 3.875451631816138e-06, "loss": 0.337, "step": 7701 }, { "epoch": 1.9045499505440158, "grad_norm": 0.7984643250219724, "learning_rate": 3.87518020679826e-06, "loss": 0.3409, "step": 7702 }, { "epoch": 1.9047972304648861, "grad_norm": 0.7675280827790124, "learning_rate": 3.874908758535802e-06, "loss": 0.3404, "step": 7703 }, { "epoch": 1.9050445103857567, "grad_norm": 0.8102812137183726, "learning_rate": 3.874637287033352e-06, "loss": 0.3759, "step": 7704 }, { "epoch": 1.905291790306627, "grad_norm": 0.7601236935398131, "learning_rate": 3.8743657922954994e-06, "loss": 0.3632, "step": 7705 }, { "epoch": 1.9055390702274977, "grad_norm": 0.7524143233500237, "learning_rate": 3.874094274326833e-06, "loss": 0.3488, "step": 7706 }, { "epoch": 1.905786350148368, "grad_norm": 0.7483189359316083, "learning_rate": 3.873822733131941e-06, "loss": 0.3968, "step": 7707 }, { "epoch": 1.9060336300692384, "grad_norm": 0.7654322301038505, "learning_rate": 3.8735511687154146e-06, "loss": 0.3589, "step": 7708 }, { "epoch": 1.9062809099901088, "grad_norm": 0.7732450641042011, "learning_rate": 3.873279581081845e-06, "loss": 0.3606, "step": 7709 }, { "epoch": 1.9065281899109792, "grad_norm": 0.7643945965772845, "learning_rate": 3.873007970235821e-06, "loss": 0.3657, "step": 7710 }, { "epoch": 1.9067754698318495, "grad_norm": 0.7582843913265086, "learning_rate": 3.872736336181932e-06, "loss": 0.3621, "step": 7711 }, { "epoch": 1.9070227497527201, "grad_norm": 0.7715122309118245, "learning_rate": 3.872464678924774e-06, "loss": 0.4017, "step": 7712 }, { "epoch": 1.9072700296735905, "grad_norm": 0.8054917547854326, "learning_rate": 3.872192998468935e-06, "loss": 0.3384, "step": 7713 }, { "epoch": 1.907517309594461, "grad_norm": 0.7909690892149642, "learning_rate": 3.87192129481901e-06, "loss": 0.3707, "step": 7714 }, { "epoch": 1.9077645895153315, "grad_norm": 0.7900696637192695, "learning_rate": 3.871649567979589e-06, "loss": 0.3654, "step": 7715 }, { "epoch": 1.9080118694362018, "grad_norm": 0.8002729686223546, "learning_rate": 3.871377817955266e-06, "loss": 0.3565, "step": 7716 }, { "epoch": 1.9082591493570722, "grad_norm": 0.8057480886647806, "learning_rate": 3.871106044750635e-06, "loss": 0.3719, "step": 7717 }, { "epoch": 1.9085064292779426, "grad_norm": 0.7786441021460634, "learning_rate": 3.87083424837029e-06, "loss": 0.3608, "step": 7718 }, { "epoch": 1.908753709198813, "grad_norm": 0.7830506490998296, "learning_rate": 3.870562428818824e-06, "loss": 0.3502, "step": 7719 }, { "epoch": 1.9090009891196835, "grad_norm": 0.7676228328523772, "learning_rate": 3.870290586100831e-06, "loss": 0.3763, "step": 7720 }, { "epoch": 1.909248269040554, "grad_norm": 0.7653177649531618, "learning_rate": 3.870018720220908e-06, "loss": 0.358, "step": 7721 }, { "epoch": 1.9094955489614245, "grad_norm": 0.792217132785999, "learning_rate": 3.869746831183649e-06, "loss": 0.35, "step": 7722 }, { "epoch": 1.9097428288822949, "grad_norm": 0.7824604075269188, "learning_rate": 3.86947491899365e-06, "loss": 0.3679, "step": 7723 }, { "epoch": 1.9099901088031652, "grad_norm": 0.7862243994271736, "learning_rate": 3.8692029836555074e-06, "loss": 0.3869, "step": 7724 }, { "epoch": 1.9102373887240356, "grad_norm": 0.7768884892867929, "learning_rate": 3.868931025173817e-06, "loss": 0.3538, "step": 7725 }, { "epoch": 1.910484668644906, "grad_norm": 0.7798836004594475, "learning_rate": 3.868659043553176e-06, "loss": 0.3829, "step": 7726 }, { "epoch": 1.9107319485657763, "grad_norm": 0.7694486703467301, "learning_rate": 3.868387038798181e-06, "loss": 0.3681, "step": 7727 }, { "epoch": 1.910979228486647, "grad_norm": 0.785565236464694, "learning_rate": 3.868115010913432e-06, "loss": 0.3785, "step": 7728 }, { "epoch": 1.9112265084075173, "grad_norm": 0.7733442066698594, "learning_rate": 3.867842959903525e-06, "loss": 0.3356, "step": 7729 }, { "epoch": 1.9114737883283879, "grad_norm": 0.7396304388334268, "learning_rate": 3.867570885773058e-06, "loss": 0.3649, "step": 7730 }, { "epoch": 1.9117210682492582, "grad_norm": 0.7811486080787703, "learning_rate": 3.867298788526633e-06, "loss": 0.3433, "step": 7731 }, { "epoch": 1.9119683481701286, "grad_norm": 0.7496667170273488, "learning_rate": 3.867026668168846e-06, "loss": 0.3544, "step": 7732 }, { "epoch": 1.912215628090999, "grad_norm": 0.7654806811128165, "learning_rate": 3.8667545247042975e-06, "loss": 0.3629, "step": 7733 }, { "epoch": 1.9124629080118694, "grad_norm": 0.7666639200614865, "learning_rate": 3.866482358137588e-06, "loss": 0.3819, "step": 7734 }, { "epoch": 1.9127101879327397, "grad_norm": 0.7425993056752587, "learning_rate": 3.866210168473317e-06, "loss": 0.3852, "step": 7735 }, { "epoch": 1.9129574678536103, "grad_norm": 0.7555804494916509, "learning_rate": 3.865937955716087e-06, "loss": 0.3828, "step": 7736 }, { "epoch": 1.9132047477744807, "grad_norm": 0.7900150933598433, "learning_rate": 3.865665719870497e-06, "loss": 0.344, "step": 7737 }, { "epoch": 1.9134520276953513, "grad_norm": 0.7662631002557614, "learning_rate": 3.86539346094115e-06, "loss": 0.3856, "step": 7738 }, { "epoch": 1.9136993076162216, "grad_norm": 0.7468964940643352, "learning_rate": 3.8651211789326485e-06, "loss": 0.3474, "step": 7739 }, { "epoch": 1.913946587537092, "grad_norm": 0.7591493134800752, "learning_rate": 3.864848873849594e-06, "loss": 0.3649, "step": 7740 }, { "epoch": 1.9141938674579624, "grad_norm": 0.7779510904272178, "learning_rate": 3.864576545696589e-06, "loss": 0.3716, "step": 7741 }, { "epoch": 1.9144411473788328, "grad_norm": 0.7923358551448468, "learning_rate": 3.864304194478237e-06, "loss": 0.3781, "step": 7742 }, { "epoch": 1.9146884272997031, "grad_norm": 0.7603689078463539, "learning_rate": 3.864031820199141e-06, "loss": 0.3679, "step": 7743 }, { "epoch": 1.9149357072205737, "grad_norm": 0.7877926310022119, "learning_rate": 3.863759422863906e-06, "loss": 0.3765, "step": 7744 }, { "epoch": 1.915182987141444, "grad_norm": 0.7470249994672246, "learning_rate": 3.863487002477136e-06, "loss": 0.3465, "step": 7745 }, { "epoch": 1.9154302670623147, "grad_norm": 0.7630983523599421, "learning_rate": 3.863214559043435e-06, "loss": 0.3896, "step": 7746 }, { "epoch": 1.915677546983185, "grad_norm": 0.7619414379127547, "learning_rate": 3.8629420925674084e-06, "loss": 0.3486, "step": 7747 }, { "epoch": 1.9159248269040554, "grad_norm": 0.765166518782161, "learning_rate": 3.862669603053662e-06, "loss": 0.36, "step": 7748 }, { "epoch": 1.9161721068249258, "grad_norm": 0.7725537996812321, "learning_rate": 3.862397090506802e-06, "loss": 0.3642, "step": 7749 }, { "epoch": 1.9164193867457961, "grad_norm": 0.7917513540993627, "learning_rate": 3.862124554931434e-06, "loss": 0.3449, "step": 7750 }, { "epoch": 1.9166666666666665, "grad_norm": 0.7540072665211388, "learning_rate": 3.861851996332164e-06, "loss": 0.3487, "step": 7751 }, { "epoch": 1.916913946587537, "grad_norm": 0.8013975858023827, "learning_rate": 3.8615794147136e-06, "loss": 0.3722, "step": 7752 }, { "epoch": 1.9171612265084075, "grad_norm": 0.8232706005704993, "learning_rate": 3.86130681008035e-06, "loss": 0.3636, "step": 7753 }, { "epoch": 1.917408506429278, "grad_norm": 0.7393568275000946, "learning_rate": 3.861034182437021e-06, "loss": 0.3742, "step": 7754 }, { "epoch": 1.9176557863501484, "grad_norm": 0.7846882162086641, "learning_rate": 3.860761531788222e-06, "loss": 0.3788, "step": 7755 }, { "epoch": 1.9179030662710188, "grad_norm": 0.7558284121304701, "learning_rate": 3.860488858138559e-06, "loss": 0.3498, "step": 7756 }, { "epoch": 1.9181503461918892, "grad_norm": 0.7722634105614027, "learning_rate": 3.860216161492644e-06, "loss": 0.3366, "step": 7757 }, { "epoch": 1.9183976261127595, "grad_norm": 0.8027741391882139, "learning_rate": 3.859943441855085e-06, "loss": 0.3667, "step": 7758 }, { "epoch": 1.91864490603363, "grad_norm": 0.7677359685612672, "learning_rate": 3.859670699230492e-06, "loss": 0.3732, "step": 7759 }, { "epoch": 1.9188921859545005, "grad_norm": 0.7777686399616697, "learning_rate": 3.859397933623476e-06, "loss": 0.3509, "step": 7760 }, { "epoch": 1.9191394658753709, "grad_norm": 0.7674072600287208, "learning_rate": 3.859125145038646e-06, "loss": 0.3533, "step": 7761 }, { "epoch": 1.9193867457962415, "grad_norm": 0.7790726423105567, "learning_rate": 3.8588523334806135e-06, "loss": 0.3484, "step": 7762 }, { "epoch": 1.9196340257171118, "grad_norm": 0.7537326694735589, "learning_rate": 3.85857949895399e-06, "loss": 0.3419, "step": 7763 }, { "epoch": 1.9198813056379822, "grad_norm": 0.766786892527941, "learning_rate": 3.858306641463388e-06, "loss": 0.4015, "step": 7764 }, { "epoch": 1.9201285855588526, "grad_norm": 0.7816194248250703, "learning_rate": 3.858033761013418e-06, "loss": 0.3615, "step": 7765 }, { "epoch": 1.920375865479723, "grad_norm": 0.793078014394511, "learning_rate": 3.857760857608693e-06, "loss": 0.3536, "step": 7766 }, { "epoch": 1.9206231454005933, "grad_norm": 0.7676924735396191, "learning_rate": 3.857487931253826e-06, "loss": 0.359, "step": 7767 }, { "epoch": 1.920870425321464, "grad_norm": 0.7526610344573188, "learning_rate": 3.8572149819534314e-06, "loss": 0.3736, "step": 7768 }, { "epoch": 1.9211177052423343, "grad_norm": 0.8142498299107604, "learning_rate": 3.8569420097121215e-06, "loss": 0.3488, "step": 7769 }, { "epoch": 1.9213649851632049, "grad_norm": 0.7326356791188622, "learning_rate": 3.8566690145345094e-06, "loss": 0.366, "step": 7770 }, { "epoch": 1.9216122650840752, "grad_norm": 0.7559081699297503, "learning_rate": 3.856395996425212e-06, "loss": 0.3928, "step": 7771 }, { "epoch": 1.9218595450049456, "grad_norm": 0.8037228783191132, "learning_rate": 3.856122955388842e-06, "loss": 0.3351, "step": 7772 }, { "epoch": 1.922106824925816, "grad_norm": 0.7706696019780875, "learning_rate": 3.855849891430017e-06, "loss": 0.3663, "step": 7773 }, { "epoch": 1.9223541048466863, "grad_norm": 0.7292536920167064, "learning_rate": 3.855576804553349e-06, "loss": 0.3809, "step": 7774 }, { "epoch": 1.9226013847675567, "grad_norm": 0.7610676132350019, "learning_rate": 3.855303694763458e-06, "loss": 0.3647, "step": 7775 }, { "epoch": 1.9228486646884273, "grad_norm": 0.7688289032523868, "learning_rate": 3.855030562064957e-06, "loss": 0.3587, "step": 7776 }, { "epoch": 1.923095944609298, "grad_norm": 0.7851007692067695, "learning_rate": 3.8547574064624645e-06, "loss": 0.3874, "step": 7777 }, { "epoch": 1.9233432245301683, "grad_norm": 0.7623106751934833, "learning_rate": 3.854484227960598e-06, "loss": 0.3561, "step": 7778 }, { "epoch": 1.9235905044510386, "grad_norm": 0.7789082222794987, "learning_rate": 3.8542110265639734e-06, "loss": 0.3258, "step": 7779 }, { "epoch": 1.923837784371909, "grad_norm": 0.7460861241591878, "learning_rate": 3.8539378022772095e-06, "loss": 0.3411, "step": 7780 }, { "epoch": 1.9240850642927794, "grad_norm": 0.7885570083140909, "learning_rate": 3.853664555104925e-06, "loss": 0.3721, "step": 7781 }, { "epoch": 1.9243323442136497, "grad_norm": 0.7717307026643284, "learning_rate": 3.8533912850517375e-06, "loss": 0.3466, "step": 7782 }, { "epoch": 1.9245796241345203, "grad_norm": 0.7531010153903558, "learning_rate": 3.853117992122266e-06, "loss": 0.365, "step": 7783 }, { "epoch": 1.9248269040553907, "grad_norm": 0.7966753823459031, "learning_rate": 3.852844676321133e-06, "loss": 0.3603, "step": 7784 }, { "epoch": 1.9250741839762613, "grad_norm": 0.7368729173378272, "learning_rate": 3.852571337652954e-06, "loss": 0.3922, "step": 7785 }, { "epoch": 1.9253214638971317, "grad_norm": 0.7605182383638217, "learning_rate": 3.852297976122352e-06, "loss": 0.3796, "step": 7786 }, { "epoch": 1.925568743818002, "grad_norm": 0.7639017462455427, "learning_rate": 3.852024591733946e-06, "loss": 0.3571, "step": 7787 }, { "epoch": 1.9258160237388724, "grad_norm": 0.7671237081551592, "learning_rate": 3.851751184492359e-06, "loss": 0.3661, "step": 7788 }, { "epoch": 1.9260633036597428, "grad_norm": 0.7478373368002436, "learning_rate": 3.85147775440221e-06, "loss": 0.3659, "step": 7789 }, { "epoch": 1.9263105835806131, "grad_norm": 0.7861856652075442, "learning_rate": 3.851204301468124e-06, "loss": 0.3713, "step": 7790 }, { "epoch": 1.9265578635014837, "grad_norm": 0.77819000295178, "learning_rate": 3.850930825694719e-06, "loss": 0.3802, "step": 7791 }, { "epoch": 1.926805143422354, "grad_norm": 0.7618661253606407, "learning_rate": 3.85065732708662e-06, "loss": 0.3351, "step": 7792 }, { "epoch": 1.9270524233432247, "grad_norm": 0.7955344285564984, "learning_rate": 3.85038380564845e-06, "loss": 0.3825, "step": 7793 }, { "epoch": 1.927299703264095, "grad_norm": 0.7706672051347649, "learning_rate": 3.850110261384833e-06, "loss": 0.3494, "step": 7794 }, { "epoch": 1.9275469831849654, "grad_norm": 0.7711496440244562, "learning_rate": 3.84983669430039e-06, "loss": 0.3468, "step": 7795 }, { "epoch": 1.9277942631058358, "grad_norm": 0.7212753013542836, "learning_rate": 3.849563104399747e-06, "loss": 0.3527, "step": 7796 }, { "epoch": 1.9280415430267062, "grad_norm": 0.7597110492111684, "learning_rate": 3.849289491687529e-06, "loss": 0.3753, "step": 7797 }, { "epoch": 1.9282888229475765, "grad_norm": 0.757211439331712, "learning_rate": 3.849015856168359e-06, "loss": 0.3529, "step": 7798 }, { "epoch": 1.9285361028684471, "grad_norm": 0.77084447756534, "learning_rate": 3.848742197846864e-06, "loss": 0.3563, "step": 7799 }, { "epoch": 1.9287833827893175, "grad_norm": 0.8136018709521278, "learning_rate": 3.848468516727669e-06, "loss": 0.333, "step": 7800 }, { "epoch": 1.929030662710188, "grad_norm": 0.7514320110543244, "learning_rate": 3.8481948128154e-06, "loss": 0.3492, "step": 7801 }, { "epoch": 1.9292779426310585, "grad_norm": 0.7807975430834567, "learning_rate": 3.847921086114683e-06, "loss": 0.3468, "step": 7802 }, { "epoch": 1.9295252225519288, "grad_norm": 0.795463084853757, "learning_rate": 3.847647336630145e-06, "loss": 0.3762, "step": 7803 }, { "epoch": 1.9297725024727992, "grad_norm": 0.740291427659636, "learning_rate": 3.847373564366414e-06, "loss": 0.3631, "step": 7804 }, { "epoch": 1.9300197823936696, "grad_norm": 0.8334693884246338, "learning_rate": 3.8470997693281155e-06, "loss": 0.3543, "step": 7805 }, { "epoch": 1.93026706231454, "grad_norm": 0.7608504940566013, "learning_rate": 3.846825951519879e-06, "loss": 0.3552, "step": 7806 }, { "epoch": 1.9305143422354105, "grad_norm": 0.76037084538734, "learning_rate": 3.846552110946333e-06, "loss": 0.3539, "step": 7807 }, { "epoch": 1.9307616221562809, "grad_norm": 0.7651637455498947, "learning_rate": 3.846278247612106e-06, "loss": 0.3629, "step": 7808 }, { "epoch": 1.9310089020771515, "grad_norm": 0.7726823496388989, "learning_rate": 3.8460043615218275e-06, "loss": 0.3598, "step": 7809 }, { "epoch": 1.9312561819980218, "grad_norm": 0.7846615246597684, "learning_rate": 3.845730452680125e-06, "loss": 0.3737, "step": 7810 }, { "epoch": 1.9315034619188922, "grad_norm": 0.7742861497373384, "learning_rate": 3.84545652109163e-06, "loss": 0.3588, "step": 7811 }, { "epoch": 1.9317507418397626, "grad_norm": 0.7942886821360873, "learning_rate": 3.845182566760974e-06, "loss": 0.3707, "step": 7812 }, { "epoch": 1.931998021760633, "grad_norm": 0.7569555520754684, "learning_rate": 3.844908589692785e-06, "loss": 0.3972, "step": 7813 }, { "epoch": 1.9322453016815033, "grad_norm": 0.7792943726049434, "learning_rate": 3.844634589891695e-06, "loss": 0.383, "step": 7814 }, { "epoch": 1.932492581602374, "grad_norm": 0.7951186741739171, "learning_rate": 3.844360567362336e-06, "loss": 0.3579, "step": 7815 }, { "epoch": 1.9327398615232443, "grad_norm": 0.8058047804339189, "learning_rate": 3.844086522109339e-06, "loss": 0.3718, "step": 7816 }, { "epoch": 1.9329871414441149, "grad_norm": 0.7794637967696673, "learning_rate": 3.843812454137336e-06, "loss": 0.3476, "step": 7817 }, { "epoch": 1.9332344213649852, "grad_norm": 0.7677808984672223, "learning_rate": 3.843538363450961e-06, "loss": 0.3778, "step": 7818 }, { "epoch": 1.9334817012858556, "grad_norm": 0.787788627238028, "learning_rate": 3.843264250054845e-06, "loss": 0.3419, "step": 7819 }, { "epoch": 1.933728981206726, "grad_norm": 0.7447698567462923, "learning_rate": 3.842990113953623e-06, "loss": 0.3465, "step": 7820 }, { "epoch": 1.9339762611275964, "grad_norm": 0.7425263628003451, "learning_rate": 3.842715955151928e-06, "loss": 0.3606, "step": 7821 }, { "epoch": 1.9342235410484667, "grad_norm": 0.7662368572797076, "learning_rate": 3.842441773654394e-06, "loss": 0.3662, "step": 7822 }, { "epoch": 1.9344708209693373, "grad_norm": 0.7654476342875355, "learning_rate": 3.842167569465655e-06, "loss": 0.3507, "step": 7823 }, { "epoch": 1.9347181008902077, "grad_norm": 0.7758699814496822, "learning_rate": 3.8418933425903474e-06, "loss": 0.3362, "step": 7824 }, { "epoch": 1.9349653808110783, "grad_norm": 0.7260005594552964, "learning_rate": 3.841619093033104e-06, "loss": 0.3608, "step": 7825 }, { "epoch": 1.9352126607319486, "grad_norm": 0.7783994991925121, "learning_rate": 3.8413448207985635e-06, "loss": 0.3621, "step": 7826 }, { "epoch": 1.935459940652819, "grad_norm": 0.7771824699369275, "learning_rate": 3.841070525891359e-06, "loss": 0.3666, "step": 7827 }, { "epoch": 1.9357072205736894, "grad_norm": 0.8158124084089838, "learning_rate": 3.84079620831613e-06, "loss": 0.3224, "step": 7828 }, { "epoch": 1.9359545004945597, "grad_norm": 0.8148019857574055, "learning_rate": 3.84052186807751e-06, "loss": 0.3666, "step": 7829 }, { "epoch": 1.9362017804154301, "grad_norm": 0.7477523492151886, "learning_rate": 3.840247505180138e-06, "loss": 0.3583, "step": 7830 }, { "epoch": 1.9364490603363007, "grad_norm": 0.7695371782727671, "learning_rate": 3.839973119628651e-06, "loss": 0.4039, "step": 7831 }, { "epoch": 1.936696340257171, "grad_norm": 0.7496534958448707, "learning_rate": 3.839698711427687e-06, "loss": 0.3497, "step": 7832 }, { "epoch": 1.9369436201780417, "grad_norm": 0.7738644212768457, "learning_rate": 3.8394242805818845e-06, "loss": 0.3451, "step": 7833 }, { "epoch": 1.937190900098912, "grad_norm": 0.7500512446334783, "learning_rate": 3.839149827095881e-06, "loss": 0.3425, "step": 7834 }, { "epoch": 1.9374381800197824, "grad_norm": 0.7665896186248192, "learning_rate": 3.838875350974318e-06, "loss": 0.3781, "step": 7835 }, { "epoch": 1.9376854599406528, "grad_norm": 0.7597404510831184, "learning_rate": 3.838600852221833e-06, "loss": 0.3252, "step": 7836 }, { "epoch": 1.9379327398615231, "grad_norm": 0.7540213966854364, "learning_rate": 3.838326330843068e-06, "loss": 0.3668, "step": 7837 }, { "epoch": 1.9381800197823935, "grad_norm": 0.7675146730873836, "learning_rate": 3.838051786842659e-06, "loss": 0.3818, "step": 7838 }, { "epoch": 1.938427299703264, "grad_norm": 0.8058087888748352, "learning_rate": 3.8377772202252505e-06, "loss": 0.3479, "step": 7839 }, { "epoch": 1.9386745796241345, "grad_norm": 0.7413769970462268, "learning_rate": 3.837502630995482e-06, "loss": 0.3745, "step": 7840 }, { "epoch": 1.938921859545005, "grad_norm": 0.7923752957692709, "learning_rate": 3.837228019157996e-06, "loss": 0.3471, "step": 7841 }, { "epoch": 1.9391691394658754, "grad_norm": 0.7876329872358173, "learning_rate": 3.836953384717432e-06, "loss": 0.354, "step": 7842 }, { "epoch": 1.9394164193867458, "grad_norm": 0.7362614193488641, "learning_rate": 3.836678727678434e-06, "loss": 0.361, "step": 7843 }, { "epoch": 1.9396636993076162, "grad_norm": 0.8089109268757817, "learning_rate": 3.836404048045644e-06, "loss": 0.3847, "step": 7844 }, { "epoch": 1.9399109792284865, "grad_norm": 0.773959625665841, "learning_rate": 3.8361293458237044e-06, "loss": 0.3664, "step": 7845 }, { "epoch": 1.940158259149357, "grad_norm": 0.7545258207436686, "learning_rate": 3.83585462101726e-06, "loss": 0.3684, "step": 7846 }, { "epoch": 1.9404055390702275, "grad_norm": 0.7703434372986174, "learning_rate": 3.835579873630953e-06, "loss": 0.3665, "step": 7847 }, { "epoch": 1.9406528189910979, "grad_norm": 0.7788726745327452, "learning_rate": 3.835305103669427e-06, "loss": 0.3725, "step": 7848 }, { "epoch": 1.9409000989119685, "grad_norm": 0.799477648481557, "learning_rate": 3.835030311137327e-06, "loss": 0.3702, "step": 7849 }, { "epoch": 1.9411473788328388, "grad_norm": 0.7847541847470477, "learning_rate": 3.8347554960392986e-06, "loss": 0.3617, "step": 7850 }, { "epoch": 1.9413946587537092, "grad_norm": 0.7622988367588369, "learning_rate": 3.834480658379987e-06, "loss": 0.3613, "step": 7851 }, { "epoch": 1.9416419386745796, "grad_norm": 0.809759489316568, "learning_rate": 3.834205798164036e-06, "loss": 0.34, "step": 7852 }, { "epoch": 1.94188921859545, "grad_norm": 0.7628355298316196, "learning_rate": 3.833930915396093e-06, "loss": 0.3778, "step": 7853 }, { "epoch": 1.9421364985163203, "grad_norm": 0.7683749674611305, "learning_rate": 3.833656010080805e-06, "loss": 0.351, "step": 7854 }, { "epoch": 1.942383778437191, "grad_norm": 0.7706306700768426, "learning_rate": 3.833381082222817e-06, "loss": 0.3737, "step": 7855 }, { "epoch": 1.9426310583580615, "grad_norm": 0.7663769395509369, "learning_rate": 3.833106131826777e-06, "loss": 0.3595, "step": 7856 }, { "epoch": 1.9428783382789319, "grad_norm": 0.7773315319645896, "learning_rate": 3.832831158897332e-06, "loss": 0.3477, "step": 7857 }, { "epoch": 1.9431256181998022, "grad_norm": 0.8162396419977946, "learning_rate": 3.832556163439131e-06, "loss": 0.351, "step": 7858 }, { "epoch": 1.9433728981206726, "grad_norm": 0.7605947094885269, "learning_rate": 3.83228114545682e-06, "loss": 0.3749, "step": 7859 }, { "epoch": 1.943620178041543, "grad_norm": 0.7504949618212972, "learning_rate": 3.832006104955051e-06, "loss": 0.3694, "step": 7860 }, { "epoch": 1.9438674579624133, "grad_norm": 0.7585718931800385, "learning_rate": 3.831731041938469e-06, "loss": 0.3302, "step": 7861 }, { "epoch": 1.944114737883284, "grad_norm": 0.7779930574452575, "learning_rate": 3.831455956411726e-06, "loss": 0.3722, "step": 7862 }, { "epoch": 1.9443620178041543, "grad_norm": 0.7774956605825094, "learning_rate": 3.8311808483794714e-06, "loss": 0.3767, "step": 7863 }, { "epoch": 1.9446092977250249, "grad_norm": 0.8326315136235396, "learning_rate": 3.830905717846355e-06, "loss": 0.3346, "step": 7864 }, { "epoch": 1.9448565776458953, "grad_norm": 0.7746254232555222, "learning_rate": 3.830630564817027e-06, "loss": 0.3379, "step": 7865 }, { "epoch": 1.9451038575667656, "grad_norm": 0.783495264605004, "learning_rate": 3.8303553892961384e-06, "loss": 0.3226, "step": 7866 }, { "epoch": 1.945351137487636, "grad_norm": 0.7753877581927725, "learning_rate": 3.830080191288342e-06, "loss": 0.354, "step": 7867 }, { "epoch": 1.9455984174085064, "grad_norm": 0.7919387274488173, "learning_rate": 3.829804970798287e-06, "loss": 0.3525, "step": 7868 }, { "epoch": 1.9458456973293767, "grad_norm": 0.7824830637781723, "learning_rate": 3.829529727830627e-06, "loss": 0.3366, "step": 7869 }, { "epoch": 1.9460929772502473, "grad_norm": 0.8052678645633229, "learning_rate": 3.829254462390014e-06, "loss": 0.346, "step": 7870 }, { "epoch": 1.9463402571711177, "grad_norm": 0.7745233158753606, "learning_rate": 3.828979174481101e-06, "loss": 0.3771, "step": 7871 }, { "epoch": 1.9465875370919883, "grad_norm": 0.7567158893133228, "learning_rate": 3.82870386410854e-06, "loss": 0.3777, "step": 7872 }, { "epoch": 1.9468348170128587, "grad_norm": 0.7737924860988743, "learning_rate": 3.828428531276987e-06, "loss": 0.3404, "step": 7873 }, { "epoch": 1.947082096933729, "grad_norm": 0.7774692103062553, "learning_rate": 3.828153175991094e-06, "loss": 0.3506, "step": 7874 }, { "epoch": 1.9473293768545994, "grad_norm": 0.7832247898760165, "learning_rate": 3.8278777982555155e-06, "loss": 0.37, "step": 7875 }, { "epoch": 1.9475766567754698, "grad_norm": 0.7768720117615522, "learning_rate": 3.8276023980749065e-06, "loss": 0.3346, "step": 7876 }, { "epoch": 1.9478239366963401, "grad_norm": 0.7428976340896409, "learning_rate": 3.827326975453922e-06, "loss": 0.3409, "step": 7877 }, { "epoch": 1.9480712166172107, "grad_norm": 0.7835200868480776, "learning_rate": 3.827051530397218e-06, "loss": 0.3792, "step": 7878 }, { "epoch": 1.948318496538081, "grad_norm": 0.7896575987878751, "learning_rate": 3.826776062909448e-06, "loss": 0.3374, "step": 7879 }, { "epoch": 1.9485657764589517, "grad_norm": 0.7659442792153464, "learning_rate": 3.826500572995273e-06, "loss": 0.3826, "step": 7880 }, { "epoch": 1.948813056379822, "grad_norm": 0.7467578378433111, "learning_rate": 3.826225060659345e-06, "loss": 0.3592, "step": 7881 }, { "epoch": 1.9490603363006924, "grad_norm": 0.7500941319792008, "learning_rate": 3.825949525906323e-06, "loss": 0.373, "step": 7882 }, { "epoch": 1.9493076162215628, "grad_norm": 0.76294777659985, "learning_rate": 3.8256739687408625e-06, "loss": 0.3502, "step": 7883 }, { "epoch": 1.9495548961424332, "grad_norm": 0.7678813491775802, "learning_rate": 3.825398389167624e-06, "loss": 0.369, "step": 7884 }, { "epoch": 1.9498021760633035, "grad_norm": 0.7702085232100551, "learning_rate": 3.825122787191264e-06, "loss": 0.3493, "step": 7885 }, { "epoch": 1.9500494559841741, "grad_norm": 0.783029018479124, "learning_rate": 3.8248471628164415e-06, "loss": 0.3513, "step": 7886 }, { "epoch": 1.9502967359050445, "grad_norm": 0.7451195424151129, "learning_rate": 3.824571516047815e-06, "loss": 0.3532, "step": 7887 }, { "epoch": 1.950544015825915, "grad_norm": 0.7774857263179649, "learning_rate": 3.824295846890044e-06, "loss": 0.3571, "step": 7888 }, { "epoch": 1.9507912957467854, "grad_norm": 0.7858894401622692, "learning_rate": 3.824020155347787e-06, "loss": 0.3594, "step": 7889 }, { "epoch": 1.9510385756676558, "grad_norm": 0.7410129889660241, "learning_rate": 3.823744441425706e-06, "loss": 0.3748, "step": 7890 }, { "epoch": 1.9512858555885262, "grad_norm": 0.7435367843097874, "learning_rate": 3.82346870512846e-06, "loss": 0.3419, "step": 7891 }, { "epoch": 1.9515331355093966, "grad_norm": 0.7759965489079463, "learning_rate": 3.82319294646071e-06, "loss": 0.3741, "step": 7892 }, { "epoch": 1.951780415430267, "grad_norm": 0.749824196403074, "learning_rate": 3.822917165427118e-06, "loss": 0.3767, "step": 7893 }, { "epoch": 1.9520276953511375, "grad_norm": 0.7586276639622814, "learning_rate": 3.8226413620323435e-06, "loss": 0.3501, "step": 7894 }, { "epoch": 1.9522749752720079, "grad_norm": 0.7676020587863214, "learning_rate": 3.82236553628105e-06, "loss": 0.3663, "step": 7895 }, { "epoch": 1.9525222551928785, "grad_norm": 0.7633913279040413, "learning_rate": 3.8220896881779e-06, "loss": 0.3435, "step": 7896 }, { "epoch": 1.9527695351137488, "grad_norm": 0.7734700985179953, "learning_rate": 3.821813817727555e-06, "loss": 0.3285, "step": 7897 }, { "epoch": 1.9530168150346192, "grad_norm": 0.7965681991121353, "learning_rate": 3.8215379249346785e-06, "loss": 0.3732, "step": 7898 }, { "epoch": 1.9532640949554896, "grad_norm": 0.7986124132977612, "learning_rate": 3.821262009803934e-06, "loss": 0.3384, "step": 7899 }, { "epoch": 1.95351137487636, "grad_norm": 0.7807517105019565, "learning_rate": 3.820986072339985e-06, "loss": 0.3404, "step": 7900 }, { "epoch": 1.9537586547972303, "grad_norm": 0.8094650652104458, "learning_rate": 3.820710112547496e-06, "loss": 0.3546, "step": 7901 }, { "epoch": 1.954005934718101, "grad_norm": 0.7756295981683475, "learning_rate": 3.820434130431132e-06, "loss": 0.3766, "step": 7902 }, { "epoch": 1.9542532146389713, "grad_norm": 0.7810967181730114, "learning_rate": 3.820158125995557e-06, "loss": 0.3523, "step": 7903 }, { "epoch": 1.9545004945598419, "grad_norm": 0.7795516196612063, "learning_rate": 3.8198820992454354e-06, "loss": 0.3471, "step": 7904 }, { "epoch": 1.9547477744807122, "grad_norm": 0.7529857358903908, "learning_rate": 3.819606050185435e-06, "loss": 0.3511, "step": 7905 }, { "epoch": 1.9549950544015826, "grad_norm": 0.7508593530949317, "learning_rate": 3.819329978820221e-06, "loss": 0.3844, "step": 7906 }, { "epoch": 1.955242334322453, "grad_norm": 0.7583774941279995, "learning_rate": 3.81905388515446e-06, "loss": 0.3257, "step": 7907 }, { "epoch": 1.9554896142433233, "grad_norm": 0.765277337531209, "learning_rate": 3.818777769192818e-06, "loss": 0.3556, "step": 7908 }, { "epoch": 1.9557368941641937, "grad_norm": 0.7600236443057112, "learning_rate": 3.818501630939963e-06, "loss": 0.35, "step": 7909 }, { "epoch": 1.9559841740850643, "grad_norm": 0.7916916498882985, "learning_rate": 3.8182254704005614e-06, "loss": 0.3699, "step": 7910 }, { "epoch": 1.9562314540059347, "grad_norm": 0.7478638409944033, "learning_rate": 3.8179492875792825e-06, "loss": 0.3873, "step": 7911 }, { "epoch": 1.9564787339268053, "grad_norm": 0.7717211762469468, "learning_rate": 3.817673082480794e-06, "loss": 0.3791, "step": 7912 }, { "epoch": 1.9567260138476756, "grad_norm": 0.7639215261660418, "learning_rate": 3.8173968551097655e-06, "loss": 0.3829, "step": 7913 }, { "epoch": 1.956973293768546, "grad_norm": 0.758939917939761, "learning_rate": 3.8171206054708634e-06, "loss": 0.3877, "step": 7914 }, { "epoch": 1.9572205736894164, "grad_norm": 0.7775733526566887, "learning_rate": 3.81684433356876e-06, "loss": 0.367, "step": 7915 }, { "epoch": 1.9574678536102867, "grad_norm": 0.7411123040398683, "learning_rate": 3.816568039408124e-06, "loss": 0.3846, "step": 7916 }, { "epoch": 1.9577151335311571, "grad_norm": 0.7534974715895456, "learning_rate": 3.816291722993625e-06, "loss": 0.3605, "step": 7917 }, { "epoch": 1.9579624134520277, "grad_norm": 0.7802980106785317, "learning_rate": 3.816015384329935e-06, "loss": 0.3549, "step": 7918 }, { "epoch": 1.958209693372898, "grad_norm": 0.8062496399916748, "learning_rate": 3.815739023421724e-06, "loss": 0.3416, "step": 7919 }, { "epoch": 1.9584569732937687, "grad_norm": 0.7728199458363546, "learning_rate": 3.815462640273663e-06, "loss": 0.3467, "step": 7920 }, { "epoch": 1.958704253214639, "grad_norm": 0.7926170176402779, "learning_rate": 3.815186234890423e-06, "loss": 0.3387, "step": 7921 }, { "epoch": 1.9589515331355094, "grad_norm": 0.7583729635336474, "learning_rate": 3.8149098072766784e-06, "loss": 0.3594, "step": 7922 }, { "epoch": 1.9591988130563798, "grad_norm": 0.7811197932190499, "learning_rate": 3.814633357437101e-06, "loss": 0.3732, "step": 7923 }, { "epoch": 1.9594460929772501, "grad_norm": 0.7648371303018133, "learning_rate": 3.8143568853763613e-06, "loss": 0.3962, "step": 7924 }, { "epoch": 1.9596933728981205, "grad_norm": 0.7904439292037829, "learning_rate": 3.814080391099135e-06, "loss": 0.3429, "step": 7925 }, { "epoch": 1.959940652818991, "grad_norm": 0.7766092415436662, "learning_rate": 3.8138038746100955e-06, "loss": 0.3744, "step": 7926 }, { "epoch": 1.9601879327398615, "grad_norm": 0.7816327593068648, "learning_rate": 3.8135273359139152e-06, "loss": 0.3376, "step": 7927 }, { "epoch": 1.960435212660732, "grad_norm": 0.7839395253439827, "learning_rate": 3.8132507750152693e-06, "loss": 0.3549, "step": 7928 }, { "epoch": 1.9606824925816024, "grad_norm": 0.7484978285133097, "learning_rate": 3.812974191918833e-06, "loss": 0.365, "step": 7929 }, { "epoch": 1.9609297725024728, "grad_norm": 0.761322396299957, "learning_rate": 3.8126975866292805e-06, "loss": 0.3642, "step": 7930 }, { "epoch": 1.9611770524233432, "grad_norm": 0.8241662121402238, "learning_rate": 3.8124209591512877e-06, "loss": 0.3413, "step": 7931 }, { "epoch": 1.9614243323442135, "grad_norm": 0.7633100235999162, "learning_rate": 3.8121443094895304e-06, "loss": 0.3886, "step": 7932 }, { "epoch": 1.9616716122650841, "grad_norm": 0.7405253494354218, "learning_rate": 3.811867637648684e-06, "loss": 0.383, "step": 7933 }, { "epoch": 1.9619188921859545, "grad_norm": 0.7405056497457246, "learning_rate": 3.8115909436334264e-06, "loss": 0.3781, "step": 7934 }, { "epoch": 1.962166172106825, "grad_norm": 0.7675761174695119, "learning_rate": 3.811314227448434e-06, "loss": 0.3544, "step": 7935 }, { "epoch": 1.9624134520276955, "grad_norm": 0.7700486565974988, "learning_rate": 3.8110374890983837e-06, "loss": 0.3605, "step": 7936 }, { "epoch": 1.9626607319485658, "grad_norm": 0.7874695870189429, "learning_rate": 3.8107607285879534e-06, "loss": 0.3488, "step": 7937 }, { "epoch": 1.9629080118694362, "grad_norm": 0.7739369966938023, "learning_rate": 3.8104839459218224e-06, "loss": 0.3684, "step": 7938 }, { "epoch": 1.9631552917903066, "grad_norm": 0.8209519133891132, "learning_rate": 3.8102071411046665e-06, "loss": 0.3866, "step": 7939 }, { "epoch": 1.963402571711177, "grad_norm": 0.778593221146525, "learning_rate": 3.8099303141411663e-06, "loss": 0.3444, "step": 7940 }, { "epoch": 1.9636498516320475, "grad_norm": 0.7966914975263187, "learning_rate": 3.8096534650360012e-06, "loss": 0.3554, "step": 7941 }, { "epoch": 1.963897131552918, "grad_norm": 0.7570400284796089, "learning_rate": 3.80937659379385e-06, "loss": 0.3329, "step": 7942 }, { "epoch": 1.9641444114737885, "grad_norm": 0.7737863544151384, "learning_rate": 3.8090997004193942e-06, "loss": 0.3666, "step": 7943 }, { "epoch": 1.9643916913946589, "grad_norm": 0.7912523376853609, "learning_rate": 3.8088227849173108e-06, "loss": 0.3381, "step": 7944 }, { "epoch": 1.9646389713155292, "grad_norm": 0.7805340236175088, "learning_rate": 3.808545847292283e-06, "loss": 0.3517, "step": 7945 }, { "epoch": 1.9648862512363996, "grad_norm": 0.7422847641517919, "learning_rate": 3.808268887548992e-06, "loss": 0.364, "step": 7946 }, { "epoch": 1.96513353115727, "grad_norm": 0.7790387078201796, "learning_rate": 3.8079919056921182e-06, "loss": 0.3682, "step": 7947 }, { "epoch": 1.9653808110781403, "grad_norm": 0.7914670497052861, "learning_rate": 3.8077149017263444e-06, "loss": 0.3704, "step": 7948 }, { "epoch": 1.965628090999011, "grad_norm": 0.8051304459648962, "learning_rate": 3.8074378756563515e-06, "loss": 0.3678, "step": 7949 }, { "epoch": 1.9658753709198813, "grad_norm": 0.7868996265761373, "learning_rate": 3.8071608274868233e-06, "loss": 0.3427, "step": 7950 }, { "epoch": 1.9661226508407519, "grad_norm": 0.779856826771308, "learning_rate": 3.8068837572224416e-06, "loss": 0.3637, "step": 7951 }, { "epoch": 1.9663699307616223, "grad_norm": 0.7455732093879641, "learning_rate": 3.80660666486789e-06, "loss": 0.3545, "step": 7952 }, { "epoch": 1.9666172106824926, "grad_norm": 0.7715470161819376, "learning_rate": 3.806329550427853e-06, "loss": 0.3758, "step": 7953 }, { "epoch": 1.966864490603363, "grad_norm": 0.7872010267033738, "learning_rate": 3.8060524139070142e-06, "loss": 0.3375, "step": 7954 }, { "epoch": 1.9671117705242334, "grad_norm": 0.7627173619489025, "learning_rate": 3.8057752553100574e-06, "loss": 0.3423, "step": 7955 }, { "epoch": 1.9673590504451037, "grad_norm": 0.7817077435699813, "learning_rate": 3.8054980746416688e-06, "loss": 0.3191, "step": 7956 }, { "epoch": 1.9676063303659743, "grad_norm": 0.7582575476510286, "learning_rate": 3.8052208719065315e-06, "loss": 0.3585, "step": 7957 }, { "epoch": 1.9678536102868447, "grad_norm": 0.7522213601977064, "learning_rate": 3.804943647109333e-06, "loss": 0.349, "step": 7958 }, { "epoch": 1.9681008902077153, "grad_norm": 0.7659637002635107, "learning_rate": 3.8046664002547577e-06, "loss": 0.3454, "step": 7959 }, { "epoch": 1.9683481701285857, "grad_norm": 0.7565013026358746, "learning_rate": 3.8043891313474936e-06, "loss": 0.3615, "step": 7960 }, { "epoch": 1.968595450049456, "grad_norm": 0.7471315304500218, "learning_rate": 3.804111840392226e-06, "loss": 0.3696, "step": 7961 }, { "epoch": 1.9688427299703264, "grad_norm": 0.7746622916636341, "learning_rate": 3.8038345273936415e-06, "loss": 0.3525, "step": 7962 }, { "epoch": 1.9690900098911968, "grad_norm": 0.7768681435767831, "learning_rate": 3.80355719235643e-06, "loss": 0.3461, "step": 7963 }, { "epoch": 1.9693372898120671, "grad_norm": 0.7950529955000663, "learning_rate": 3.8032798352852755e-06, "loss": 0.3659, "step": 7964 }, { "epoch": 1.9695845697329377, "grad_norm": 0.7634453582714605, "learning_rate": 3.80300245618487e-06, "loss": 0.3357, "step": 7965 }, { "epoch": 1.969831849653808, "grad_norm": 0.7480079968719103, "learning_rate": 3.8027250550598987e-06, "loss": 0.3662, "step": 7966 }, { "epoch": 1.9700791295746787, "grad_norm": 0.7475283841692999, "learning_rate": 3.8024476319150525e-06, "loss": 0.3727, "step": 7967 }, { "epoch": 1.970326409495549, "grad_norm": 0.7699546504087326, "learning_rate": 3.802170186755021e-06, "loss": 0.3332, "step": 7968 }, { "epoch": 1.9705736894164194, "grad_norm": 0.7597122246058947, "learning_rate": 3.8018927195844925e-06, "loss": 0.3314, "step": 7969 }, { "epoch": 1.9708209693372898, "grad_norm": 0.8068616168685517, "learning_rate": 3.8016152304081574e-06, "loss": 0.3568, "step": 7970 }, { "epoch": 1.9710682492581602, "grad_norm": 0.7666221943407802, "learning_rate": 3.8013377192307064e-06, "loss": 0.3513, "step": 7971 }, { "epoch": 1.9713155291790305, "grad_norm": 0.765701938836362, "learning_rate": 3.8010601860568297e-06, "loss": 0.3719, "step": 7972 }, { "epoch": 1.9715628090999011, "grad_norm": 0.8156655259263106, "learning_rate": 3.8007826308912193e-06, "loss": 0.3482, "step": 7973 }, { "epoch": 1.9718100890207715, "grad_norm": 0.819499502423676, "learning_rate": 3.8005050537385656e-06, "loss": 0.3633, "step": 7974 }, { "epoch": 1.972057368941642, "grad_norm": 0.7897443520400108, "learning_rate": 3.800227454603562e-06, "loss": 0.3593, "step": 7975 }, { "epoch": 1.9723046488625124, "grad_norm": 0.791071300035924, "learning_rate": 3.7999498334908993e-06, "loss": 0.3757, "step": 7976 }, { "epoch": 1.9725519287833828, "grad_norm": 0.7751278627319633, "learning_rate": 3.79967219040527e-06, "loss": 0.395, "step": 7977 }, { "epoch": 1.9727992087042532, "grad_norm": 0.7701002767167324, "learning_rate": 3.7993945253513687e-06, "loss": 0.3445, "step": 7978 }, { "epoch": 1.9730464886251236, "grad_norm": 0.7760556567478782, "learning_rate": 3.7991168383338877e-06, "loss": 0.3498, "step": 7979 }, { "epoch": 1.973293768545994, "grad_norm": 0.776586056356586, "learning_rate": 3.7988391293575207e-06, "loss": 0.3689, "step": 7980 }, { "epoch": 1.9735410484668645, "grad_norm": 0.7632200299221464, "learning_rate": 3.7985613984269617e-06, "loss": 0.3498, "step": 7981 }, { "epoch": 1.9737883283877349, "grad_norm": 0.7532203672411112, "learning_rate": 3.798283645546905e-06, "loss": 0.3983, "step": 7982 }, { "epoch": 1.9740356083086055, "grad_norm": 0.7714218461056945, "learning_rate": 3.798005870722046e-06, "loss": 0.3544, "step": 7983 }, { "epoch": 1.9742828882294758, "grad_norm": 0.7944430967570533, "learning_rate": 3.797728073957079e-06, "loss": 0.3846, "step": 7984 }, { "epoch": 1.9745301681503462, "grad_norm": 0.7811563506406758, "learning_rate": 3.7974502552567015e-06, "loss": 0.351, "step": 7985 }, { "epoch": 1.9747774480712166, "grad_norm": 0.7803327203935243, "learning_rate": 3.7971724146256074e-06, "loss": 0.363, "step": 7986 }, { "epoch": 1.975024727992087, "grad_norm": 0.7397427179845135, "learning_rate": 3.7968945520684946e-06, "loss": 0.3756, "step": 7987 }, { "epoch": 1.9752720079129573, "grad_norm": 0.7636320681425557, "learning_rate": 3.7966166675900586e-06, "loss": 0.4085, "step": 7988 }, { "epoch": 1.975519287833828, "grad_norm": 0.7859780231001176, "learning_rate": 3.7963387611949966e-06, "loss": 0.3795, "step": 7989 }, { "epoch": 1.9757665677546983, "grad_norm": 0.7755986859934767, "learning_rate": 3.796060832888007e-06, "loss": 0.3733, "step": 7990 }, { "epoch": 1.9760138476755689, "grad_norm": 0.7755596809962013, "learning_rate": 3.7957828826737864e-06, "loss": 0.3267, "step": 7991 }, { "epoch": 1.9762611275964392, "grad_norm": 0.7854801586044354, "learning_rate": 3.795504910557034e-06, "loss": 0.3621, "step": 7992 }, { "epoch": 1.9765084075173096, "grad_norm": 0.7818166483670558, "learning_rate": 3.795226916542447e-06, "loss": 0.3577, "step": 7993 }, { "epoch": 1.97675568743818, "grad_norm": 0.7551612791160183, "learning_rate": 3.794948900634726e-06, "loss": 0.3747, "step": 7994 }, { "epoch": 1.9770029673590503, "grad_norm": 0.7897572803119051, "learning_rate": 3.794670862838569e-06, "loss": 0.3605, "step": 7995 }, { "epoch": 1.9772502472799207, "grad_norm": 0.7848824804893049, "learning_rate": 3.7943928031586763e-06, "loss": 0.3379, "step": 7996 }, { "epoch": 1.9774975272007913, "grad_norm": 0.7993405280259536, "learning_rate": 3.7941147215997478e-06, "loss": 0.3444, "step": 7997 }, { "epoch": 1.9777448071216617, "grad_norm": 0.7708272773693086, "learning_rate": 3.7938366181664835e-06, "loss": 0.3342, "step": 7998 }, { "epoch": 1.9779920870425323, "grad_norm": 0.7686450971778381, "learning_rate": 3.793558492863585e-06, "loss": 0.3371, "step": 7999 }, { "epoch": 1.9782393669634026, "grad_norm": 0.7690164348945304, "learning_rate": 3.7932803456957524e-06, "loss": 0.3546, "step": 8000 }, { "epoch": 1.978486646884273, "grad_norm": 0.7664230371282903, "learning_rate": 3.7930021766676883e-06, "loss": 0.3637, "step": 8001 }, { "epoch": 1.9787339268051434, "grad_norm": 0.7648049919355135, "learning_rate": 3.7927239857840935e-06, "loss": 0.3423, "step": 8002 }, { "epoch": 1.9789812067260137, "grad_norm": 0.7510404267059041, "learning_rate": 3.792445773049671e-06, "loss": 0.3738, "step": 8003 }, { "epoch": 1.979228486646884, "grad_norm": 0.7976307233079993, "learning_rate": 3.7921675384691225e-06, "loss": 0.3621, "step": 8004 }, { "epoch": 1.9794757665677547, "grad_norm": 0.7497505196274972, "learning_rate": 3.7918892820471526e-06, "loss": 0.3835, "step": 8005 }, { "epoch": 1.979723046488625, "grad_norm": 0.7898731589824831, "learning_rate": 3.7916110037884636e-06, "loss": 0.3505, "step": 8006 }, { "epoch": 1.9799703264094957, "grad_norm": 0.8357322459716116, "learning_rate": 3.791332703697759e-06, "loss": 0.3776, "step": 8007 }, { "epoch": 1.980217606330366, "grad_norm": 0.7765070279777918, "learning_rate": 3.791054381779743e-06, "loss": 0.3705, "step": 8008 }, { "epoch": 1.9804648862512364, "grad_norm": 0.7727422836819049, "learning_rate": 3.79077603803912e-06, "loss": 0.3443, "step": 8009 }, { "epoch": 1.9807121661721068, "grad_norm": 0.7790057858154583, "learning_rate": 3.790497672480596e-06, "loss": 0.3699, "step": 8010 }, { "epoch": 1.9809594460929771, "grad_norm": 0.7413672643555688, "learning_rate": 3.7902192851088743e-06, "loss": 0.3479, "step": 8011 }, { "epoch": 1.9812067260138477, "grad_norm": 0.7788698113673297, "learning_rate": 3.789940875928662e-06, "loss": 0.3254, "step": 8012 }, { "epoch": 1.981454005934718, "grad_norm": 0.7525769680684532, "learning_rate": 3.789662444944664e-06, "loss": 0.34, "step": 8013 }, { "epoch": 1.9817012858555887, "grad_norm": 0.7833620628774245, "learning_rate": 3.789383992161588e-06, "loss": 0.3411, "step": 8014 }, { "epoch": 1.981948565776459, "grad_norm": 0.7446692167001558, "learning_rate": 3.7891055175841383e-06, "loss": 0.3559, "step": 8015 }, { "epoch": 1.9821958456973294, "grad_norm": 0.7835583328443011, "learning_rate": 3.788827021217025e-06, "loss": 0.3392, "step": 8016 }, { "epoch": 1.9824431256181998, "grad_norm": 0.758998270725149, "learning_rate": 3.788548503064953e-06, "loss": 0.375, "step": 8017 }, { "epoch": 1.9826904055390702, "grad_norm": 0.80805191822548, "learning_rate": 3.7882699631326307e-06, "loss": 0.3262, "step": 8018 }, { "epoch": 1.9829376854599405, "grad_norm": 0.8207766572632098, "learning_rate": 3.7879914014247676e-06, "loss": 0.3732, "step": 8019 }, { "epoch": 1.9831849653808111, "grad_norm": 0.8202477547269553, "learning_rate": 3.7877128179460697e-06, "loss": 0.3331, "step": 8020 }, { "epoch": 1.9834322453016815, "grad_norm": 0.769720391381998, "learning_rate": 3.787434212701249e-06, "loss": 0.3193, "step": 8021 }, { "epoch": 1.983679525222552, "grad_norm": 0.7679292252440842, "learning_rate": 3.787155585695011e-06, "loss": 0.3492, "step": 8022 }, { "epoch": 1.9839268051434225, "grad_norm": 0.7927842124991492, "learning_rate": 3.7868769369320677e-06, "loss": 0.3365, "step": 8023 }, { "epoch": 1.9841740850642928, "grad_norm": 0.821294540349822, "learning_rate": 3.7865982664171296e-06, "loss": 0.354, "step": 8024 }, { "epoch": 1.9844213649851632, "grad_norm": 0.7838552495567704, "learning_rate": 3.7863195741549064e-06, "loss": 0.3445, "step": 8025 }, { "epoch": 1.9846686449060336, "grad_norm": 0.7862684668079565, "learning_rate": 3.7860408601501076e-06, "loss": 0.353, "step": 8026 }, { "epoch": 1.984915924826904, "grad_norm": 0.7472781211844226, "learning_rate": 3.785762124407446e-06, "loss": 0.3456, "step": 8027 }, { "epoch": 1.9851632047477745, "grad_norm": 0.7633629883563285, "learning_rate": 3.785483366931632e-06, "loss": 0.3689, "step": 8028 }, { "epoch": 1.985410484668645, "grad_norm": 0.7420384933908292, "learning_rate": 3.785204587727378e-06, "loss": 0.3596, "step": 8029 }, { "epoch": 1.9856577645895155, "grad_norm": 0.7748825408856977, "learning_rate": 3.7849257867993955e-06, "loss": 0.385, "step": 8030 }, { "epoch": 1.9859050445103859, "grad_norm": 0.7635602304217024, "learning_rate": 3.7846469641523976e-06, "loss": 0.3847, "step": 8031 }, { "epoch": 1.9861523244312562, "grad_norm": 0.8114635395406381, "learning_rate": 3.7843681197910976e-06, "loss": 0.3638, "step": 8032 }, { "epoch": 1.9863996043521266, "grad_norm": 0.7693926101024632, "learning_rate": 3.7840892537202073e-06, "loss": 0.375, "step": 8033 }, { "epoch": 1.986646884272997, "grad_norm": 0.7650067632445352, "learning_rate": 3.7838103659444425e-06, "loss": 0.3784, "step": 8034 }, { "epoch": 1.9868941641938673, "grad_norm": 0.770387613572559, "learning_rate": 3.7835314564685156e-06, "loss": 0.3751, "step": 8035 }, { "epoch": 1.987141444114738, "grad_norm": 0.7634296771072809, "learning_rate": 3.783252525297141e-06, "loss": 0.3198, "step": 8036 }, { "epoch": 1.9873887240356083, "grad_norm": 0.7705874260217943, "learning_rate": 3.7829735724350347e-06, "loss": 0.3697, "step": 8037 }, { "epoch": 1.9876360039564789, "grad_norm": 0.779943819644916, "learning_rate": 3.782694597886911e-06, "loss": 0.353, "step": 8038 }, { "epoch": 1.9878832838773492, "grad_norm": 0.8147202236149634, "learning_rate": 3.782415601657484e-06, "loss": 0.3844, "step": 8039 }, { "epoch": 1.9881305637982196, "grad_norm": 0.7826841725367639, "learning_rate": 3.7821365837514724e-06, "loss": 0.3526, "step": 8040 }, { "epoch": 1.98837784371909, "grad_norm": 0.8188973794893306, "learning_rate": 3.7818575441735904e-06, "loss": 0.371, "step": 8041 }, { "epoch": 1.9886251236399604, "grad_norm": 0.7886564183760332, "learning_rate": 3.7815784829285552e-06, "loss": 0.3331, "step": 8042 }, { "epoch": 1.9888724035608307, "grad_norm": 0.7711487989139294, "learning_rate": 3.7812994000210844e-06, "loss": 0.3457, "step": 8043 }, { "epoch": 1.9891196834817013, "grad_norm": 0.7560395531527948, "learning_rate": 3.781020295455894e-06, "loss": 0.3754, "step": 8044 }, { "epoch": 1.9893669634025717, "grad_norm": 0.7785797909962231, "learning_rate": 3.7807411692377027e-06, "loss": 0.3656, "step": 8045 }, { "epoch": 1.9896142433234423, "grad_norm": 0.7869820104882096, "learning_rate": 3.7804620213712285e-06, "loss": 0.3436, "step": 8046 }, { "epoch": 1.9898615232443126, "grad_norm": 0.7953954460923347, "learning_rate": 3.780182851861188e-06, "loss": 0.3491, "step": 8047 }, { "epoch": 1.990108803165183, "grad_norm": 0.7549939319349205, "learning_rate": 3.7799036607123036e-06, "loss": 0.3763, "step": 8048 }, { "epoch": 1.9903560830860534, "grad_norm": 0.7652426795519158, "learning_rate": 3.779624447929291e-06, "loss": 0.3756, "step": 8049 }, { "epoch": 1.9906033630069238, "grad_norm": 0.8008551762669335, "learning_rate": 3.7793452135168713e-06, "loss": 0.3641, "step": 8050 }, { "epoch": 1.9908506429277941, "grad_norm": 0.8387752795241713, "learning_rate": 3.7790659574797637e-06, "loss": 0.3391, "step": 8051 }, { "epoch": 1.9910979228486647, "grad_norm": 0.7972705450507616, "learning_rate": 3.7787866798226903e-06, "loss": 0.3462, "step": 8052 }, { "epoch": 1.991345202769535, "grad_norm": 0.7709353607702387, "learning_rate": 3.778507380550369e-06, "loss": 0.3843, "step": 8053 }, { "epoch": 1.9915924826904057, "grad_norm": 0.7803412166620947, "learning_rate": 3.7782280596675224e-06, "loss": 0.3344, "step": 8054 }, { "epoch": 1.991839762611276, "grad_norm": 0.8122118583006261, "learning_rate": 3.777948717178872e-06, "loss": 0.3412, "step": 8055 }, { "epoch": 1.9920870425321464, "grad_norm": 0.7409272991553482, "learning_rate": 3.7776693530891383e-06, "loss": 0.3458, "step": 8056 }, { "epoch": 1.9923343224530168, "grad_norm": 0.7670512836498297, "learning_rate": 3.7773899674030447e-06, "loss": 0.3419, "step": 8057 }, { "epoch": 1.9925816023738872, "grad_norm": 0.7685011201855094, "learning_rate": 3.777110560125312e-06, "loss": 0.3325, "step": 8058 }, { "epoch": 1.9928288822947575, "grad_norm": 0.7763533358426448, "learning_rate": 3.7768311312606654e-06, "loss": 0.3684, "step": 8059 }, { "epoch": 1.993076162215628, "grad_norm": 0.7623755134845402, "learning_rate": 3.776551680813826e-06, "loss": 0.349, "step": 8060 }, { "epoch": 1.9933234421364985, "grad_norm": 0.7768521034466981, "learning_rate": 3.7762722087895187e-06, "loss": 0.375, "step": 8061 }, { "epoch": 1.993570722057369, "grad_norm": 0.7882777730656926, "learning_rate": 3.7759927151924652e-06, "loss": 0.3531, "step": 8062 }, { "epoch": 1.9938180019782394, "grad_norm": 0.7686293875654023, "learning_rate": 3.775713200027393e-06, "loss": 0.3735, "step": 8063 }, { "epoch": 1.9940652818991098, "grad_norm": 0.7623246649764027, "learning_rate": 3.775433663299024e-06, "loss": 0.357, "step": 8064 }, { "epoch": 1.9943125618199802, "grad_norm": 0.7846204634877398, "learning_rate": 3.7751541050120848e-06, "loss": 0.353, "step": 8065 }, { "epoch": 1.9945598417408505, "grad_norm": 0.8172290946173688, "learning_rate": 3.7748745251713004e-06, "loss": 0.3716, "step": 8066 }, { "epoch": 1.994807121661721, "grad_norm": 0.7774311438037483, "learning_rate": 3.774594923781395e-06, "loss": 0.3515, "step": 8067 }, { "epoch": 1.9950544015825915, "grad_norm": 0.7556964487243285, "learning_rate": 3.7743153008470974e-06, "loss": 0.3486, "step": 8068 }, { "epoch": 1.9953016815034619, "grad_norm": 0.7602662561289288, "learning_rate": 3.774035656373132e-06, "loss": 0.3748, "step": 8069 }, { "epoch": 1.9955489614243325, "grad_norm": 0.7640743448185953, "learning_rate": 3.7737559903642263e-06, "loss": 0.3799, "step": 8070 }, { "epoch": 1.9957962413452028, "grad_norm": 0.7548044427577385, "learning_rate": 3.7734763028251064e-06, "loss": 0.3685, "step": 8071 }, { "epoch": 1.9960435212660732, "grad_norm": 0.7603133031159608, "learning_rate": 3.773196593760502e-06, "loss": 0.3321, "step": 8072 }, { "epoch": 1.9962908011869436, "grad_norm": 0.7388564211916194, "learning_rate": 3.77291686317514e-06, "loss": 0.3854, "step": 8073 }, { "epoch": 1.996538081107814, "grad_norm": 0.773242283971202, "learning_rate": 3.772637111073748e-06, "loss": 0.3323, "step": 8074 }, { "epoch": 1.9967853610286843, "grad_norm": 0.7807159557553084, "learning_rate": 3.7723573374610555e-06, "loss": 0.3724, "step": 8075 }, { "epoch": 1.997032640949555, "grad_norm": 0.767686187991623, "learning_rate": 3.772077542341791e-06, "loss": 0.3767, "step": 8076 }, { "epoch": 1.9972799208704253, "grad_norm": 0.7669380852652359, "learning_rate": 3.7717977257206846e-06, "loss": 0.3777, "step": 8077 }, { "epoch": 1.9975272007912959, "grad_norm": 0.7710362998599887, "learning_rate": 3.7715178876024645e-06, "loss": 0.3733, "step": 8078 }, { "epoch": 1.9977744807121662, "grad_norm": 0.7793864464426763, "learning_rate": 3.771238027991862e-06, "loss": 0.3488, "step": 8079 }, { "epoch": 1.9980217606330366, "grad_norm": 0.7628899812643342, "learning_rate": 3.7709581468936075e-06, "loss": 0.3756, "step": 8080 }, { "epoch": 1.998269040553907, "grad_norm": 0.7726010466807438, "learning_rate": 3.770678244312431e-06, "loss": 0.333, "step": 8081 }, { "epoch": 1.9985163204747773, "grad_norm": 0.7806698697200825, "learning_rate": 3.770398320253065e-06, "loss": 0.3641, "step": 8082 }, { "epoch": 1.9987636003956477, "grad_norm": 0.7748637845373251, "learning_rate": 3.77011837472024e-06, "loss": 0.3764, "step": 8083 }, { "epoch": 1.9990108803165183, "grad_norm": 0.7343343674479577, "learning_rate": 3.769838407718688e-06, "loss": 0.3821, "step": 8084 }, { "epoch": 1.9992581602373887, "grad_norm": 0.8271213300805976, "learning_rate": 3.7695584192531416e-06, "loss": 0.3215, "step": 8085 }, { "epoch": 1.9995054401582593, "grad_norm": 0.7538269029024902, "learning_rate": 3.769278409328333e-06, "loss": 0.3606, "step": 8086 }, { "epoch": 1.9997527200791296, "grad_norm": 0.8024082088588669, "learning_rate": 3.768998377948996e-06, "loss": 0.3746, "step": 8087 }, { "epoch": 2.0, "grad_norm": 0.7845990241681178, "learning_rate": 3.7687183251198635e-06, "loss": 0.3398, "step": 8088 } ], "logging_steps": 1, "max_steps": 24264, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 4044, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 833449623552000.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }