{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 4044, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002472799208704253, "grad_norm": 2.5362265715360204, "learning_rate": 5.0000000000000004e-08, "loss": 1.2213, "step": 1 }, { "epoch": 0.0004945598417408506, "grad_norm": 2.6997408857983025, "learning_rate": 1.0000000000000001e-07, "loss": 1.231, "step": 2 }, { "epoch": 0.000741839762611276, "grad_norm": 2.5867181090965676, "learning_rate": 1.5000000000000002e-07, "loss": 1.249, "step": 3 }, { "epoch": 0.0009891196834817012, "grad_norm": 2.357638460408377, "learning_rate": 2.0000000000000002e-07, "loss": 1.2146, "step": 4 }, { "epoch": 0.0012363996043521265, "grad_norm": 2.557414917391326, "learning_rate": 2.5000000000000004e-07, "loss": 1.2872, "step": 5 }, { "epoch": 0.001483679525222552, "grad_norm": 2.3734045786673423, "learning_rate": 3.0000000000000004e-07, "loss": 1.2081, "step": 6 }, { "epoch": 0.0017309594460929772, "grad_norm": 2.621127934186416, "learning_rate": 3.5000000000000004e-07, "loss": 1.2374, "step": 7 }, { "epoch": 0.0019782393669634025, "grad_norm": 2.7716435700320714, "learning_rate": 4.0000000000000003e-07, "loss": 1.2364, "step": 8 }, { "epoch": 0.002225519287833828, "grad_norm": 2.301737328244618, "learning_rate": 4.5000000000000003e-07, "loss": 1.2219, "step": 9 }, { "epoch": 0.002472799208704253, "grad_norm": 2.5519952552831398, "learning_rate": 5.000000000000001e-07, "loss": 1.2372, "step": 10 }, { "epoch": 0.0027200791295746785, "grad_norm": 2.3667701483261667, "learning_rate": 5.5e-07, "loss": 1.1995, "step": 11 }, { "epoch": 0.002967359050445104, "grad_norm": 2.3437329583372604, "learning_rate": 6.000000000000001e-07, "loss": 1.2414, "step": 12 }, { "epoch": 0.003214638971315529, "grad_norm": 2.060580346749902, "learning_rate": 6.5e-07, "loss": 1.2034, "step": 13 }, { "epoch": 0.0034619188921859545, "grad_norm": 2.0573625343235564, "learning_rate": 7.000000000000001e-07, "loss": 1.2025, "step": 14 }, { "epoch": 0.00370919881305638, "grad_norm": 2.374658307654524, "learning_rate": 7.5e-07, "loss": 1.2111, "step": 15 }, { "epoch": 0.003956478733926805, "grad_norm": 2.134642801469451, "learning_rate": 8.000000000000001e-07, "loss": 1.227, "step": 16 }, { "epoch": 0.0042037586547972305, "grad_norm": 2.2681061588439424, "learning_rate": 8.500000000000001e-07, "loss": 1.1964, "step": 17 }, { "epoch": 0.004451038575667656, "grad_norm": 1.8506691640366166, "learning_rate": 9.000000000000001e-07, "loss": 1.1977, "step": 18 }, { "epoch": 0.004698318496538081, "grad_norm": 1.948439188694478, "learning_rate": 9.500000000000001e-07, "loss": 1.2006, "step": 19 }, { "epoch": 0.004945598417408506, "grad_norm": 1.6605613980729736, "learning_rate": 1.0000000000000002e-06, "loss": 1.1666, "step": 20 }, { "epoch": 0.0051928783382789315, "grad_norm": 1.6922633947680867, "learning_rate": 1.0500000000000001e-06, "loss": 1.1989, "step": 21 }, { "epoch": 0.005440158259149357, "grad_norm": 1.5971190787663854, "learning_rate": 1.1e-06, "loss": 1.1874, "step": 22 }, { "epoch": 0.0056874381800197825, "grad_norm": 1.5631402047790957, "learning_rate": 1.1500000000000002e-06, "loss": 1.187, "step": 23 }, { "epoch": 0.005934718100890208, "grad_norm": 1.4965437093134575, "learning_rate": 1.2000000000000002e-06, "loss": 1.1685, "step": 24 }, { "epoch": 0.006181998021760633, "grad_norm": 1.4346303928048703, "learning_rate": 1.25e-06, "loss": 1.1712, "step": 25 }, { "epoch": 0.006429277942631058, "grad_norm": 1.4575013024426748, "learning_rate": 1.3e-06, "loss": 1.1598, "step": 26 }, { "epoch": 0.0066765578635014835, "grad_norm": 1.328401240687131, "learning_rate": 1.3500000000000002e-06, "loss": 1.1159, "step": 27 }, { "epoch": 0.006923837784371909, "grad_norm": 1.4155504497318074, "learning_rate": 1.4000000000000001e-06, "loss": 1.1471, "step": 28 }, { "epoch": 0.0071711177052423344, "grad_norm": 1.2794817067434705, "learning_rate": 1.45e-06, "loss": 1.1289, "step": 29 }, { "epoch": 0.00741839762611276, "grad_norm": 1.2986984654256946, "learning_rate": 1.5e-06, "loss": 1.1103, "step": 30 }, { "epoch": 0.007665677546983185, "grad_norm": 1.1904601251518023, "learning_rate": 1.5500000000000002e-06, "loss": 1.1071, "step": 31 }, { "epoch": 0.00791295746785361, "grad_norm": 1.211314439028396, "learning_rate": 1.6000000000000001e-06, "loss": 1.1066, "step": 32 }, { "epoch": 0.008160237388724036, "grad_norm": 1.1889594075249947, "learning_rate": 1.6500000000000003e-06, "loss": 1.1016, "step": 33 }, { "epoch": 0.008407517309594461, "grad_norm": 1.144650796083838, "learning_rate": 1.7000000000000002e-06, "loss": 1.0846, "step": 34 }, { "epoch": 0.008654797230464886, "grad_norm": 1.1047081836619403, "learning_rate": 1.75e-06, "loss": 1.052, "step": 35 }, { "epoch": 0.008902077151335312, "grad_norm": 1.1224289563936236, "learning_rate": 1.8000000000000001e-06, "loss": 1.083, "step": 36 }, { "epoch": 0.009149357072205737, "grad_norm": 1.165835379811125, "learning_rate": 1.85e-06, "loss": 1.0743, "step": 37 }, { "epoch": 0.009396636993076163, "grad_norm": 1.2275687643208097, "learning_rate": 1.9000000000000002e-06, "loss": 1.0349, "step": 38 }, { "epoch": 0.009643916913946587, "grad_norm": 1.1972161192753212, "learning_rate": 1.9500000000000004e-06, "loss": 1.016, "step": 39 }, { "epoch": 0.009891196834817012, "grad_norm": 1.082058322089044, "learning_rate": 2.0000000000000003e-06, "loss": 1.0162, "step": 40 }, { "epoch": 0.010138476755687438, "grad_norm": 1.0697734561517596, "learning_rate": 2.05e-06, "loss": 1.002, "step": 41 }, { "epoch": 0.010385756676557863, "grad_norm": 1.0899500742647537, "learning_rate": 2.1000000000000002e-06, "loss": 0.9707, "step": 42 }, { "epoch": 0.01063303659742829, "grad_norm": 1.0591744139232069, "learning_rate": 2.15e-06, "loss": 0.9578, "step": 43 }, { "epoch": 0.010880316518298714, "grad_norm": 1.0360370041311253, "learning_rate": 2.2e-06, "loss": 0.9401, "step": 44 }, { "epoch": 0.01112759643916914, "grad_norm": 0.9882030779618665, "learning_rate": 2.25e-06, "loss": 0.9465, "step": 45 }, { "epoch": 0.011374876360039565, "grad_norm": 1.101171577063728, "learning_rate": 2.3000000000000004e-06, "loss": 0.9889, "step": 46 }, { "epoch": 0.01162215628090999, "grad_norm": 0.967072842509527, "learning_rate": 2.35e-06, "loss": 0.9713, "step": 47 }, { "epoch": 0.011869436201780416, "grad_norm": 0.978495110160977, "learning_rate": 2.4000000000000003e-06, "loss": 0.9296, "step": 48 }, { "epoch": 0.01211671612265084, "grad_norm": 0.9606674955474925, "learning_rate": 2.4500000000000003e-06, "loss": 0.9297, "step": 49 }, { "epoch": 0.012363996043521267, "grad_norm": 1.0561055229672853, "learning_rate": 2.5e-06, "loss": 0.8987, "step": 50 }, { "epoch": 0.012611275964391691, "grad_norm": 0.9661450584899874, "learning_rate": 2.55e-06, "loss": 0.9205, "step": 51 }, { "epoch": 0.012858555885262116, "grad_norm": 0.9327207093078266, "learning_rate": 2.6e-06, "loss": 0.9184, "step": 52 }, { "epoch": 0.013105835806132542, "grad_norm": 0.9416813080118679, "learning_rate": 2.6500000000000005e-06, "loss": 0.9, "step": 53 }, { "epoch": 0.013353115727002967, "grad_norm": 0.8793376312707565, "learning_rate": 2.7000000000000004e-06, "loss": 0.8655, "step": 54 }, { "epoch": 0.013600395647873393, "grad_norm": 0.8190962235195288, "learning_rate": 2.7500000000000004e-06, "loss": 0.8803, "step": 55 }, { "epoch": 0.013847675568743818, "grad_norm": 0.8728021412747534, "learning_rate": 2.8000000000000003e-06, "loss": 0.8669, "step": 56 }, { "epoch": 0.014094955489614243, "grad_norm": 0.945032511017986, "learning_rate": 2.85e-06, "loss": 0.8457, "step": 57 }, { "epoch": 0.014342235410484669, "grad_norm": 0.8824615112963289, "learning_rate": 2.9e-06, "loss": 0.8369, "step": 58 }, { "epoch": 0.014589515331355093, "grad_norm": 0.8431299610759388, "learning_rate": 2.95e-06, "loss": 0.848, "step": 59 }, { "epoch": 0.01483679525222552, "grad_norm": 0.8541187789355988, "learning_rate": 3e-06, "loss": 0.8253, "step": 60 }, { "epoch": 0.015084075173095944, "grad_norm": 0.774863931803644, "learning_rate": 3.05e-06, "loss": 0.8426, "step": 61 }, { "epoch": 0.01533135509396637, "grad_norm": 0.7924045073787536, "learning_rate": 3.1000000000000004e-06, "loss": 0.8175, "step": 62 }, { "epoch": 0.015578635014836795, "grad_norm": 0.7945600224177056, "learning_rate": 3.1500000000000003e-06, "loss": 0.8014, "step": 63 }, { "epoch": 0.01582591493570722, "grad_norm": 0.8081849485275809, "learning_rate": 3.2000000000000003e-06, "loss": 0.7814, "step": 64 }, { "epoch": 0.016073194856577645, "grad_norm": 0.7702852209704323, "learning_rate": 3.2500000000000002e-06, "loss": 0.7893, "step": 65 }, { "epoch": 0.016320474777448073, "grad_norm": 0.7383664978090273, "learning_rate": 3.3000000000000006e-06, "loss": 0.7989, "step": 66 }, { "epoch": 0.016567754698318497, "grad_norm": 0.7436837377486586, "learning_rate": 3.3500000000000005e-06, "loss": 0.8058, "step": 67 }, { "epoch": 0.016815034619188922, "grad_norm": 0.7428905105848088, "learning_rate": 3.4000000000000005e-06, "loss": 0.7963, "step": 68 }, { "epoch": 0.017062314540059347, "grad_norm": 0.75729956851682, "learning_rate": 3.45e-06, "loss": 0.7828, "step": 69 }, { "epoch": 0.01730959446092977, "grad_norm": 0.6918807554019548, "learning_rate": 3.5e-06, "loss": 0.79, "step": 70 }, { "epoch": 0.0175568743818002, "grad_norm": 0.7250929492991405, "learning_rate": 3.5500000000000003e-06, "loss": 0.7984, "step": 71 }, { "epoch": 0.017804154302670624, "grad_norm": 0.7523258703265291, "learning_rate": 3.6000000000000003e-06, "loss": 0.7794, "step": 72 }, { "epoch": 0.01805143422354105, "grad_norm": 0.7596189403247041, "learning_rate": 3.65e-06, "loss": 0.7933, "step": 73 }, { "epoch": 0.018298714144411473, "grad_norm": 0.7098250319729716, "learning_rate": 3.7e-06, "loss": 0.7639, "step": 74 }, { "epoch": 0.018545994065281898, "grad_norm": 0.7325182854684962, "learning_rate": 3.7500000000000005e-06, "loss": 0.7613, "step": 75 }, { "epoch": 0.018793273986152326, "grad_norm": 0.7656692952105425, "learning_rate": 3.8000000000000005e-06, "loss": 0.7481, "step": 76 }, { "epoch": 0.01904055390702275, "grad_norm": 0.7299978668203949, "learning_rate": 3.85e-06, "loss": 0.7839, "step": 77 }, { "epoch": 0.019287833827893175, "grad_norm": 0.7157606053500201, "learning_rate": 3.900000000000001e-06, "loss": 0.7474, "step": 78 }, { "epoch": 0.0195351137487636, "grad_norm": 0.7324944560450859, "learning_rate": 3.95e-06, "loss": 0.763, "step": 79 }, { "epoch": 0.019782393669634024, "grad_norm": 0.7816018694216577, "learning_rate": 4.000000000000001e-06, "loss": 0.7219, "step": 80 }, { "epoch": 0.020029673590504452, "grad_norm": 0.7375563499331299, "learning_rate": 4.05e-06, "loss": 0.7654, "step": 81 }, { "epoch": 0.020276953511374877, "grad_norm": 0.7026975821766478, "learning_rate": 4.1e-06, "loss": 0.7733, "step": 82 }, { "epoch": 0.0205242334322453, "grad_norm": 0.6991770359109263, "learning_rate": 4.15e-06, "loss": 0.7457, "step": 83 }, { "epoch": 0.020771513353115726, "grad_norm": 0.7405828710814198, "learning_rate": 4.2000000000000004e-06, "loss": 0.7471, "step": 84 }, { "epoch": 0.02101879327398615, "grad_norm": 0.7172297475686586, "learning_rate": 4.25e-06, "loss": 0.7201, "step": 85 }, { "epoch": 0.02126607319485658, "grad_norm": 0.7029168304540451, "learning_rate": 4.3e-06, "loss": 0.7559, "step": 86 }, { "epoch": 0.021513353115727003, "grad_norm": 0.723359349050235, "learning_rate": 4.350000000000001e-06, "loss": 0.7287, "step": 87 }, { "epoch": 0.021760633036597428, "grad_norm": 0.7357471244599104, "learning_rate": 4.4e-06, "loss": 0.7218, "step": 88 }, { "epoch": 0.022007912957467853, "grad_norm": 0.7747133612650493, "learning_rate": 4.450000000000001e-06, "loss": 0.7442, "step": 89 }, { "epoch": 0.02225519287833828, "grad_norm": 0.6819171919004213, "learning_rate": 4.5e-06, "loss": 0.7168, "step": 90 }, { "epoch": 0.022502472799208705, "grad_norm": 0.6919857424868104, "learning_rate": 4.5500000000000005e-06, "loss": 0.7271, "step": 91 }, { "epoch": 0.02274975272007913, "grad_norm": 0.7250166917172235, "learning_rate": 4.600000000000001e-06, "loss": 0.7335, "step": 92 }, { "epoch": 0.022997032640949554, "grad_norm": 0.760740711812877, "learning_rate": 4.65e-06, "loss": 0.705, "step": 93 }, { "epoch": 0.02324431256181998, "grad_norm": 0.7159571116471827, "learning_rate": 4.7e-06, "loss": 0.7249, "step": 94 }, { "epoch": 0.023491592482690407, "grad_norm": 0.7112056318387862, "learning_rate": 4.75e-06, "loss": 0.7221, "step": 95 }, { "epoch": 0.02373887240356083, "grad_norm": 0.7407900000388679, "learning_rate": 4.800000000000001e-06, "loss": 0.7094, "step": 96 }, { "epoch": 0.023986152324431256, "grad_norm": 0.72179972775552, "learning_rate": 4.85e-06, "loss": 0.6995, "step": 97 }, { "epoch": 0.02423343224530168, "grad_norm": 0.6561750079352092, "learning_rate": 4.9000000000000005e-06, "loss": 0.6889, "step": 98 }, { "epoch": 0.024480712166172106, "grad_norm": 0.7107165656217939, "learning_rate": 4.95e-06, "loss": 0.7065, "step": 99 }, { "epoch": 0.024727992087042534, "grad_norm": 0.7536703835680636, "learning_rate": 5e-06, "loss": 0.738, "step": 100 }, { "epoch": 0.024975272007912958, "grad_norm": 0.7212863280521654, "learning_rate": 4.999999978871334e-06, "loss": 0.7177, "step": 101 }, { "epoch": 0.025222551928783383, "grad_norm": 0.7418268751267123, "learning_rate": 4.9999999154853315e-06, "loss": 0.7188, "step": 102 }, { "epoch": 0.025469831849653807, "grad_norm": 0.7559916354416649, "learning_rate": 4.999999809841997e-06, "loss": 0.7025, "step": 103 }, { "epoch": 0.025717111770524232, "grad_norm": 0.7811551138440429, "learning_rate": 4.999999661941331e-06, "loss": 0.7073, "step": 104 }, { "epoch": 0.02596439169139466, "grad_norm": 0.7497301361586501, "learning_rate": 4.999999471783337e-06, "loss": 0.6692, "step": 105 }, { "epoch": 0.026211671612265085, "grad_norm": 0.822184378582824, "learning_rate": 4.999999239368016e-06, "loss": 0.7119, "step": 106 }, { "epoch": 0.02645895153313551, "grad_norm": 0.8275732064267906, "learning_rate": 4.999998964695375e-06, "loss": 0.724, "step": 107 }, { "epoch": 0.026706231454005934, "grad_norm": 0.7022572200240244, "learning_rate": 4.9999986477654165e-06, "loss": 0.6907, "step": 108 }, { "epoch": 0.02695351137487636, "grad_norm": 0.733280029857595, "learning_rate": 4.999998288578146e-06, "loss": 0.6759, "step": 109 }, { "epoch": 0.027200791295746787, "grad_norm": 0.7526378507475411, "learning_rate": 4.9999978871335695e-06, "loss": 0.6685, "step": 110 }, { "epoch": 0.02744807121661721, "grad_norm": 0.7597255866628462, "learning_rate": 4.999997443431694e-06, "loss": 0.7168, "step": 111 }, { "epoch": 0.027695351137487636, "grad_norm": 0.7397163297259713, "learning_rate": 4.999996957472529e-06, "loss": 0.6834, "step": 112 }, { "epoch": 0.02794263105835806, "grad_norm": 0.7434766561795023, "learning_rate": 4.999996429256079e-06, "loss": 0.6713, "step": 113 }, { "epoch": 0.028189910979228485, "grad_norm": 0.688054583388141, "learning_rate": 4.9999958587823565e-06, "loss": 0.6771, "step": 114 }, { "epoch": 0.028437190900098913, "grad_norm": 0.7807222470327964, "learning_rate": 4.999995246051368e-06, "loss": 0.6803, "step": 115 }, { "epoch": 0.028684470820969338, "grad_norm": 0.7626145030682552, "learning_rate": 4.999994591063126e-06, "loss": 0.6709, "step": 116 }, { "epoch": 0.028931750741839762, "grad_norm": 0.712857303236686, "learning_rate": 4.999993893817641e-06, "loss": 0.6987, "step": 117 }, { "epoch": 0.029179030662710187, "grad_norm": 0.7175670694950973, "learning_rate": 4.999993154314924e-06, "loss": 0.6656, "step": 118 }, { "epoch": 0.02942631058358061, "grad_norm": 0.8126016444934842, "learning_rate": 4.999992372554989e-06, "loss": 0.7236, "step": 119 }, { "epoch": 0.02967359050445104, "grad_norm": 0.737989445104509, "learning_rate": 4.999991548537848e-06, "loss": 0.6728, "step": 120 }, { "epoch": 0.029920870425321464, "grad_norm": 0.8327590394230312, "learning_rate": 4.999990682263516e-06, "loss": 0.6558, "step": 121 }, { "epoch": 0.03016815034619189, "grad_norm": 0.731868642801384, "learning_rate": 4.999989773732007e-06, "loss": 0.6853, "step": 122 }, { "epoch": 0.030415430267062313, "grad_norm": 0.7396773552222567, "learning_rate": 4.999988822943335e-06, "loss": 0.6737, "step": 123 }, { "epoch": 0.03066271018793274, "grad_norm": 0.8637921752011817, "learning_rate": 4.999987829897519e-06, "loss": 0.6965, "step": 124 }, { "epoch": 0.030909990108803166, "grad_norm": 0.7451831927050838, "learning_rate": 4.999986794594574e-06, "loss": 0.706, "step": 125 }, { "epoch": 0.03115727002967359, "grad_norm": 0.7207943261718842, "learning_rate": 4.999985717034517e-06, "loss": 0.7053, "step": 126 }, { "epoch": 0.031404549950544015, "grad_norm": 0.7839501309721785, "learning_rate": 4.999984597217367e-06, "loss": 0.6709, "step": 127 }, { "epoch": 0.03165182987141444, "grad_norm": 0.7595007430844243, "learning_rate": 4.999983435143142e-06, "loss": 0.6786, "step": 128 }, { "epoch": 0.031899109792284865, "grad_norm": 0.7454986705571136, "learning_rate": 4.999982230811864e-06, "loss": 0.6662, "step": 129 }, { "epoch": 0.03214638971315529, "grad_norm": 0.7112711511564064, "learning_rate": 4.9999809842235515e-06, "loss": 0.6699, "step": 130 }, { "epoch": 0.032393669634025714, "grad_norm": 0.7659959221982169, "learning_rate": 4.999979695378226e-06, "loss": 0.6898, "step": 131 }, { "epoch": 0.032640949554896145, "grad_norm": 0.7641892451832198, "learning_rate": 4.999978364275908e-06, "loss": 0.6594, "step": 132 }, { "epoch": 0.03288822947576657, "grad_norm": 0.7591516753531807, "learning_rate": 4.999976990916622e-06, "loss": 0.6683, "step": 133 }, { "epoch": 0.033135509396636995, "grad_norm": 0.746675595620454, "learning_rate": 4.9999755753003905e-06, "loss": 0.641, "step": 134 }, { "epoch": 0.03338278931750742, "grad_norm": 0.7654447092133061, "learning_rate": 4.999974117427238e-06, "loss": 0.6755, "step": 135 }, { "epoch": 0.033630069238377844, "grad_norm": 0.7675507124577308, "learning_rate": 4.999972617297187e-06, "loss": 0.6462, "step": 136 }, { "epoch": 0.03387734915924827, "grad_norm": 0.7331037612104742, "learning_rate": 4.999971074910266e-06, "loss": 0.683, "step": 137 }, { "epoch": 0.03412462908011869, "grad_norm": 0.7870574637199415, "learning_rate": 4.999969490266498e-06, "loss": 0.651, "step": 138 }, { "epoch": 0.03437190900098912, "grad_norm": 0.7088258626006124, "learning_rate": 4.999967863365912e-06, "loss": 0.6635, "step": 139 }, { "epoch": 0.03461918892185954, "grad_norm": 0.727573343943909, "learning_rate": 4.999966194208534e-06, "loss": 0.6466, "step": 140 }, { "epoch": 0.034866468842729974, "grad_norm": 0.7272152984644323, "learning_rate": 4.999964482794394e-06, "loss": 0.6573, "step": 141 }, { "epoch": 0.0351137487636004, "grad_norm": 0.7802927520295474, "learning_rate": 4.999962729123519e-06, "loss": 0.6306, "step": 142 }, { "epoch": 0.03536102868447082, "grad_norm": 0.7516061535051545, "learning_rate": 4.99996093319594e-06, "loss": 0.6544, "step": 143 }, { "epoch": 0.03560830860534125, "grad_norm": 0.7714071852861921, "learning_rate": 4.9999590950116865e-06, "loss": 0.6428, "step": 144 }, { "epoch": 0.03585558852621167, "grad_norm": 0.7888111176086061, "learning_rate": 4.99995721457079e-06, "loss": 0.6436, "step": 145 }, { "epoch": 0.0361028684470821, "grad_norm": 0.8086998275264007, "learning_rate": 4.999955291873282e-06, "loss": 0.6447, "step": 146 }, { "epoch": 0.03635014836795252, "grad_norm": 0.7680250966004125, "learning_rate": 4.999953326919195e-06, "loss": 0.6522, "step": 147 }, { "epoch": 0.036597428288822946, "grad_norm": 0.7903118774805085, "learning_rate": 4.999951319708562e-06, "loss": 0.6537, "step": 148 }, { "epoch": 0.03684470820969337, "grad_norm": 0.7554875512244171, "learning_rate": 4.999949270241418e-06, "loss": 0.6793, "step": 149 }, { "epoch": 0.037091988130563795, "grad_norm": 0.801330159672659, "learning_rate": 4.999947178517798e-06, "loss": 0.6412, "step": 150 }, { "epoch": 0.03733926805143423, "grad_norm": 0.7520769967798256, "learning_rate": 4.999945044537735e-06, "loss": 0.6612, "step": 151 }, { "epoch": 0.03758654797230465, "grad_norm": 0.7142492113389125, "learning_rate": 4.999942868301266e-06, "loss": 0.6648, "step": 152 }, { "epoch": 0.037833827893175076, "grad_norm": 0.7702143241683127, "learning_rate": 4.999940649808429e-06, "loss": 0.6117, "step": 153 }, { "epoch": 0.0380811078140455, "grad_norm": 0.7774985935001439, "learning_rate": 4.999938389059261e-06, "loss": 0.6226, "step": 154 }, { "epoch": 0.038328387734915925, "grad_norm": 0.7960037248541736, "learning_rate": 4.999936086053799e-06, "loss": 0.6324, "step": 155 }, { "epoch": 0.03857566765578635, "grad_norm": 0.7699076528180318, "learning_rate": 4.9999337407920836e-06, "loss": 0.6512, "step": 156 }, { "epoch": 0.038822947576656774, "grad_norm": 0.8048775835752526, "learning_rate": 4.999931353274153e-06, "loss": 0.6475, "step": 157 }, { "epoch": 0.0390702274975272, "grad_norm": 0.8160115405346294, "learning_rate": 4.9999289235000495e-06, "loss": 0.6422, "step": 158 }, { "epoch": 0.039317507418397624, "grad_norm": 0.7275555749791285, "learning_rate": 4.9999264514698124e-06, "loss": 0.6369, "step": 159 }, { "epoch": 0.03956478733926805, "grad_norm": 0.7689207556493872, "learning_rate": 4.999923937183483e-06, "loss": 0.6508, "step": 160 }, { "epoch": 0.03981206726013848, "grad_norm": 0.8348108853775594, "learning_rate": 4.999921380641105e-06, "loss": 0.6671, "step": 161 }, { "epoch": 0.040059347181008904, "grad_norm": 0.7248271704801716, "learning_rate": 4.999918781842722e-06, "loss": 0.6491, "step": 162 }, { "epoch": 0.04030662710187933, "grad_norm": 0.7315284811526714, "learning_rate": 4.999916140788377e-06, "loss": 0.659, "step": 163 }, { "epoch": 0.040553907022749754, "grad_norm": 0.8578196886547811, "learning_rate": 4.999913457478115e-06, "loss": 0.6436, "step": 164 }, { "epoch": 0.04080118694362018, "grad_norm": 0.8963614849939332, "learning_rate": 4.999910731911981e-06, "loss": 0.6514, "step": 165 }, { "epoch": 0.0410484668644906, "grad_norm": 0.7739062484186896, "learning_rate": 4.999907964090022e-06, "loss": 0.6104, "step": 166 }, { "epoch": 0.04129574678536103, "grad_norm": 0.8074813391715496, "learning_rate": 4.999905154012284e-06, "loss": 0.617, "step": 167 }, { "epoch": 0.04154302670623145, "grad_norm": 0.7762599913589127, "learning_rate": 4.999902301678815e-06, "loss": 0.6375, "step": 168 }, { "epoch": 0.04179030662710188, "grad_norm": 0.7748970536822192, "learning_rate": 4.999899407089662e-06, "loss": 0.6184, "step": 169 }, { "epoch": 0.0420375865479723, "grad_norm": 0.7558565035852893, "learning_rate": 4.999896470244875e-06, "loss": 0.6369, "step": 170 }, { "epoch": 0.04228486646884273, "grad_norm": 0.8341248646777939, "learning_rate": 4.999893491144504e-06, "loss": 0.6418, "step": 171 }, { "epoch": 0.04253214638971316, "grad_norm": 0.7710490534111422, "learning_rate": 4.999890469788598e-06, "loss": 0.6382, "step": 172 }, { "epoch": 0.04277942631058358, "grad_norm": 0.7753712584186033, "learning_rate": 4.99988740617721e-06, "loss": 0.6553, "step": 173 }, { "epoch": 0.04302670623145401, "grad_norm": 0.8067411687298269, "learning_rate": 4.999884300310389e-06, "loss": 0.6542, "step": 174 }, { "epoch": 0.04327398615232443, "grad_norm": 0.7965266139793583, "learning_rate": 4.999881152188191e-06, "loss": 0.6297, "step": 175 }, { "epoch": 0.043521266073194856, "grad_norm": 0.7800110890457195, "learning_rate": 4.999877961810667e-06, "loss": 0.6328, "step": 176 }, { "epoch": 0.04376854599406528, "grad_norm": 0.7555029707902032, "learning_rate": 4.99987472917787e-06, "loss": 0.6567, "step": 177 }, { "epoch": 0.044015825914935705, "grad_norm": 0.8247765083336221, "learning_rate": 4.9998714542898566e-06, "loss": 0.6552, "step": 178 }, { "epoch": 0.04426310583580613, "grad_norm": 0.7853035050103945, "learning_rate": 4.999868137146682e-06, "loss": 0.6487, "step": 179 }, { "epoch": 0.04451038575667656, "grad_norm": 0.792588023124722, "learning_rate": 4.999864777748401e-06, "loss": 0.6343, "step": 180 }, { "epoch": 0.044757665677546986, "grad_norm": 0.7562657466952013, "learning_rate": 4.999861376095072e-06, "loss": 0.6372, "step": 181 }, { "epoch": 0.04500494559841741, "grad_norm": 0.7725421864498705, "learning_rate": 4.999857932186751e-06, "loss": 0.5965, "step": 182 }, { "epoch": 0.045252225519287835, "grad_norm": 0.801959316358281, "learning_rate": 4.999854446023496e-06, "loss": 0.6316, "step": 183 }, { "epoch": 0.04549950544015826, "grad_norm": 0.7854145905631205, "learning_rate": 4.999850917605369e-06, "loss": 0.6432, "step": 184 }, { "epoch": 0.045746785361028684, "grad_norm": 0.7625652300415934, "learning_rate": 4.999847346932426e-06, "loss": 0.6409, "step": 185 }, { "epoch": 0.04599406528189911, "grad_norm": 0.7070242573198564, "learning_rate": 4.999843734004729e-06, "loss": 0.6081, "step": 186 }, { "epoch": 0.046241345202769533, "grad_norm": 0.843090988301474, "learning_rate": 4.999840078822339e-06, "loss": 0.6494, "step": 187 }, { "epoch": 0.04648862512363996, "grad_norm": 0.7812267791142424, "learning_rate": 4.9998363813853175e-06, "loss": 0.6243, "step": 188 }, { "epoch": 0.04673590504451038, "grad_norm": 0.7932584261056079, "learning_rate": 4.999832641693727e-06, "loss": 0.6167, "step": 189 }, { "epoch": 0.046983184965380814, "grad_norm": 0.7886667271093307, "learning_rate": 4.999828859747631e-06, "loss": 0.6095, "step": 190 }, { "epoch": 0.04723046488625124, "grad_norm": 0.8023765397410424, "learning_rate": 4.999825035547093e-06, "loss": 0.6256, "step": 191 }, { "epoch": 0.04747774480712166, "grad_norm": 0.7820939097202719, "learning_rate": 4.999821169092178e-06, "loss": 0.6266, "step": 192 }, { "epoch": 0.04772502472799209, "grad_norm": 0.7864299627678912, "learning_rate": 4.9998172603829515e-06, "loss": 0.6168, "step": 193 }, { "epoch": 0.04797230464886251, "grad_norm": 0.7551224745595048, "learning_rate": 4.999813309419479e-06, "loss": 0.6034, "step": 194 }, { "epoch": 0.04821958456973294, "grad_norm": 0.7538626457900578, "learning_rate": 4.999809316201828e-06, "loss": 0.6154, "step": 195 }, { "epoch": 0.04846686449060336, "grad_norm": 0.7449552121210857, "learning_rate": 4.999805280730066e-06, "loss": 0.6332, "step": 196 }, { "epoch": 0.048714144411473786, "grad_norm": 0.8177138196697615, "learning_rate": 4.99980120300426e-06, "loss": 0.6385, "step": 197 }, { "epoch": 0.04896142433234421, "grad_norm": 0.7760055206345433, "learning_rate": 4.99979708302448e-06, "loss": 0.6227, "step": 198 }, { "epoch": 0.049208704253214636, "grad_norm": 0.7599944781350494, "learning_rate": 4.999792920790795e-06, "loss": 0.5753, "step": 199 }, { "epoch": 0.04945598417408507, "grad_norm": 0.7513478755596612, "learning_rate": 4.999788716303276e-06, "loss": 0.5782, "step": 200 }, { "epoch": 0.04970326409495549, "grad_norm": 0.7969737018661929, "learning_rate": 4.999784469561994e-06, "loss": 0.6112, "step": 201 }, { "epoch": 0.049950544015825916, "grad_norm": 0.7685838090874655, "learning_rate": 4.9997801805670204e-06, "loss": 0.6047, "step": 202 }, { "epoch": 0.05019782393669634, "grad_norm": 0.7733765759727036, "learning_rate": 4.9997758493184276e-06, "loss": 0.6378, "step": 203 }, { "epoch": 0.050445103857566766, "grad_norm": 0.7884015810991322, "learning_rate": 4.99977147581629e-06, "loss": 0.6162, "step": 204 }, { "epoch": 0.05069238377843719, "grad_norm": 0.7876715243321647, "learning_rate": 4.999767060060679e-06, "loss": 0.6017, "step": 205 }, { "epoch": 0.050939663699307615, "grad_norm": 0.76121613723978, "learning_rate": 4.999762602051673e-06, "loss": 0.6229, "step": 206 }, { "epoch": 0.05118694362017804, "grad_norm": 0.7837425671543329, "learning_rate": 4.9997581017893436e-06, "loss": 0.6376, "step": 207 }, { "epoch": 0.051434223541048464, "grad_norm": 0.7176819978947346, "learning_rate": 4.999753559273769e-06, "loss": 0.6102, "step": 208 }, { "epoch": 0.05168150346191889, "grad_norm": 0.7929682985852649, "learning_rate": 4.999748974505026e-06, "loss": 0.619, "step": 209 }, { "epoch": 0.05192878338278932, "grad_norm": 0.8166155545756911, "learning_rate": 4.999744347483191e-06, "loss": 0.5881, "step": 210 }, { "epoch": 0.052176063303659745, "grad_norm": 0.7925632446286965, "learning_rate": 4.999739678208343e-06, "loss": 0.6097, "step": 211 }, { "epoch": 0.05242334322453017, "grad_norm": 0.7724585651765387, "learning_rate": 4.99973496668056e-06, "loss": 0.6133, "step": 212 }, { "epoch": 0.052670623145400594, "grad_norm": 0.9395217619393431, "learning_rate": 4.999730212899923e-06, "loss": 0.5962, "step": 213 }, { "epoch": 0.05291790306627102, "grad_norm": 0.7846009290846587, "learning_rate": 4.999725416866512e-06, "loss": 0.5853, "step": 214 }, { "epoch": 0.05316518298714144, "grad_norm": 0.7538698383114204, "learning_rate": 4.999720578580407e-06, "loss": 0.6018, "step": 215 }, { "epoch": 0.05341246290801187, "grad_norm": 0.811056416448552, "learning_rate": 4.999715698041691e-06, "loss": 0.6114, "step": 216 }, { "epoch": 0.05365974282888229, "grad_norm": 0.8205077516037962, "learning_rate": 4.999710775250446e-06, "loss": 0.6306, "step": 217 }, { "epoch": 0.05390702274975272, "grad_norm": 0.8557907074033965, "learning_rate": 4.999705810206755e-06, "loss": 0.607, "step": 218 }, { "epoch": 0.05415430267062315, "grad_norm": 0.8333258626700865, "learning_rate": 4.999700802910702e-06, "loss": 0.6412, "step": 219 }, { "epoch": 0.05440158259149357, "grad_norm": 0.7928603799058208, "learning_rate": 4.999695753362372e-06, "loss": 0.6132, "step": 220 }, { "epoch": 0.054648862512364, "grad_norm": 0.809471093310714, "learning_rate": 4.99969066156185e-06, "loss": 0.5894, "step": 221 }, { "epoch": 0.05489614243323442, "grad_norm": 0.748135133878315, "learning_rate": 4.999685527509223e-06, "loss": 0.5785, "step": 222 }, { "epoch": 0.05514342235410485, "grad_norm": 0.7994273959936483, "learning_rate": 4.9996803512045756e-06, "loss": 0.611, "step": 223 }, { "epoch": 0.05539070227497527, "grad_norm": 0.8196600789722335, "learning_rate": 4.999675132647998e-06, "loss": 0.6116, "step": 224 }, { "epoch": 0.055637982195845696, "grad_norm": 0.7909171176460179, "learning_rate": 4.999669871839577e-06, "loss": 0.6217, "step": 225 }, { "epoch": 0.05588526211671612, "grad_norm": 0.8567648736166711, "learning_rate": 4.999664568779401e-06, "loss": 0.6112, "step": 226 }, { "epoch": 0.056132542037586546, "grad_norm": 0.7690407907423776, "learning_rate": 4.99965922346756e-06, "loss": 0.5871, "step": 227 }, { "epoch": 0.05637982195845697, "grad_norm": 0.7444628727728402, "learning_rate": 4.999653835904145e-06, "loss": 0.5681, "step": 228 }, { "epoch": 0.0566271018793274, "grad_norm": 0.7607893092647576, "learning_rate": 4.999648406089247e-06, "loss": 0.6139, "step": 229 }, { "epoch": 0.056874381800197826, "grad_norm": 0.7777597816934657, "learning_rate": 4.999642934022957e-06, "loss": 0.6332, "step": 230 }, { "epoch": 0.05712166172106825, "grad_norm": 0.82148546775061, "learning_rate": 4.999637419705369e-06, "loss": 0.5841, "step": 231 }, { "epoch": 0.057368941641938676, "grad_norm": 0.8010419071826838, "learning_rate": 4.9996318631365735e-06, "loss": 0.5925, "step": 232 }, { "epoch": 0.0576162215628091, "grad_norm": 0.7530690429731, "learning_rate": 4.9996262643166674e-06, "loss": 0.5951, "step": 233 }, { "epoch": 0.057863501483679525, "grad_norm": 0.7675728737861399, "learning_rate": 4.999620623245743e-06, "loss": 0.5849, "step": 234 }, { "epoch": 0.05811078140454995, "grad_norm": 0.7724654094858604, "learning_rate": 4.999614939923897e-06, "loss": 0.6169, "step": 235 }, { "epoch": 0.058358061325420374, "grad_norm": 0.7978696650914271, "learning_rate": 4.999609214351226e-06, "loss": 0.5964, "step": 236 }, { "epoch": 0.0586053412462908, "grad_norm": 0.7721298767744407, "learning_rate": 4.999603446527826e-06, "loss": 0.6255, "step": 237 }, { "epoch": 0.05885262116716122, "grad_norm": 0.7488208157668992, "learning_rate": 4.999597636453793e-06, "loss": 0.5971, "step": 238 }, { "epoch": 0.059099901088031655, "grad_norm": 0.8049217267637012, "learning_rate": 4.999591784129228e-06, "loss": 0.6221, "step": 239 }, { "epoch": 0.05934718100890208, "grad_norm": 0.7979752646183047, "learning_rate": 4.999585889554227e-06, "loss": 0.6165, "step": 240 }, { "epoch": 0.059594460929772504, "grad_norm": 0.7448820329141358, "learning_rate": 4.999579952728892e-06, "loss": 0.6154, "step": 241 }, { "epoch": 0.05984174085064293, "grad_norm": 0.8083942465217746, "learning_rate": 4.999573973653322e-06, "loss": 0.6086, "step": 242 }, { "epoch": 0.06008902077151335, "grad_norm": 0.7606973644680061, "learning_rate": 4.99956795232762e-06, "loss": 0.5945, "step": 243 }, { "epoch": 0.06033630069238378, "grad_norm": 0.812611247006916, "learning_rate": 4.999561888751885e-06, "loss": 0.5904, "step": 244 }, { "epoch": 0.0605835806132542, "grad_norm": 0.7727468846105328, "learning_rate": 4.9995557829262215e-06, "loss": 0.61, "step": 245 }, { "epoch": 0.06083086053412463, "grad_norm": 0.8133475657364307, "learning_rate": 4.999549634850732e-06, "loss": 0.5946, "step": 246 }, { "epoch": 0.06107814045499505, "grad_norm": 0.815731831489138, "learning_rate": 4.99954344452552e-06, "loss": 0.584, "step": 247 }, { "epoch": 0.06132542037586548, "grad_norm": 0.828023713294489, "learning_rate": 4.999537211950692e-06, "loss": 0.5963, "step": 248 }, { "epoch": 0.06157270029673591, "grad_norm": 0.7659802540309084, "learning_rate": 4.99953093712635e-06, "loss": 0.601, "step": 249 }, { "epoch": 0.06181998021760633, "grad_norm": 0.8129348699504775, "learning_rate": 4.999524620052603e-06, "loss": 0.5957, "step": 250 }, { "epoch": 0.06206726013847676, "grad_norm": 0.8344335464060704, "learning_rate": 4.999518260729557e-06, "loss": 0.5834, "step": 251 }, { "epoch": 0.06231454005934718, "grad_norm": 0.8014316640205381, "learning_rate": 4.999511859157319e-06, "loss": 0.5922, "step": 252 }, { "epoch": 0.0625618199802176, "grad_norm": 0.7753876743359669, "learning_rate": 4.999505415335998e-06, "loss": 0.5933, "step": 253 }, { "epoch": 0.06280909990108803, "grad_norm": 0.7817463507788848, "learning_rate": 4.9994989292657015e-06, "loss": 0.6109, "step": 254 }, { "epoch": 0.06305637982195846, "grad_norm": 0.828120989023736, "learning_rate": 4.99949240094654e-06, "loss": 0.6008, "step": 255 }, { "epoch": 0.06330365974282888, "grad_norm": 0.8407040487143337, "learning_rate": 4.999485830378625e-06, "loss": 0.5813, "step": 256 }, { "epoch": 0.0635509396636993, "grad_norm": 0.8114133897332183, "learning_rate": 4.999479217562066e-06, "loss": 0.5617, "step": 257 }, { "epoch": 0.06379821958456973, "grad_norm": 0.8420383888196065, "learning_rate": 4.999472562496975e-06, "loss": 0.5782, "step": 258 }, { "epoch": 0.06404549950544015, "grad_norm": 0.8146897788412562, "learning_rate": 4.999465865183465e-06, "loss": 0.6189, "step": 259 }, { "epoch": 0.06429277942631058, "grad_norm": 0.8609511362836474, "learning_rate": 4.999459125621649e-06, "loss": 0.6164, "step": 260 }, { "epoch": 0.064540059347181, "grad_norm": 0.8348638690296436, "learning_rate": 4.99945234381164e-06, "loss": 0.5344, "step": 261 }, { "epoch": 0.06478733926805143, "grad_norm": 0.772480507007813, "learning_rate": 4.999445519753555e-06, "loss": 0.5732, "step": 262 }, { "epoch": 0.06503461918892187, "grad_norm": 0.7745975874520878, "learning_rate": 4.999438653447507e-06, "loss": 0.5781, "step": 263 }, { "epoch": 0.06528189910979229, "grad_norm": 0.8688380494260995, "learning_rate": 4.999431744893613e-06, "loss": 0.5795, "step": 264 }, { "epoch": 0.06552917903066272, "grad_norm": 0.8584578845191142, "learning_rate": 4.999424794091989e-06, "loss": 0.6207, "step": 265 }, { "epoch": 0.06577645895153314, "grad_norm": 0.8130950950717318, "learning_rate": 4.9994178010427544e-06, "loss": 0.5666, "step": 266 }, { "epoch": 0.06602373887240356, "grad_norm": 0.8060107564461553, "learning_rate": 4.999410765746026e-06, "loss": 0.5419, "step": 267 }, { "epoch": 0.06627101879327399, "grad_norm": 0.7783985362196268, "learning_rate": 4.999403688201921e-06, "loss": 0.5613, "step": 268 }, { "epoch": 0.06651829871414441, "grad_norm": 0.8888599834984435, "learning_rate": 4.999396568410563e-06, "loss": 0.5607, "step": 269 }, { "epoch": 0.06676557863501484, "grad_norm": 0.8652022418369328, "learning_rate": 4.999389406372069e-06, "loss": 0.6023, "step": 270 }, { "epoch": 0.06701285855588526, "grad_norm": 0.8232756652188512, "learning_rate": 4.999382202086562e-06, "loss": 0.6064, "step": 271 }, { "epoch": 0.06726013847675569, "grad_norm": 0.783102206467771, "learning_rate": 4.9993749555541635e-06, "loss": 0.5697, "step": 272 }, { "epoch": 0.06750741839762611, "grad_norm": 0.8069785228514532, "learning_rate": 4.999367666774995e-06, "loss": 0.6105, "step": 273 }, { "epoch": 0.06775469831849654, "grad_norm": 0.7893414940765818, "learning_rate": 4.99936033574918e-06, "loss": 0.6075, "step": 274 }, { "epoch": 0.06800197823936696, "grad_norm": 0.8117544067424143, "learning_rate": 4.999352962476843e-06, "loss": 0.6306, "step": 275 }, { "epoch": 0.06824925816023739, "grad_norm": 0.7939328491906092, "learning_rate": 4.999345546958109e-06, "loss": 0.5861, "step": 276 }, { "epoch": 0.06849653808110781, "grad_norm": 0.8271668917371283, "learning_rate": 4.999338089193102e-06, "loss": 0.5754, "step": 277 }, { "epoch": 0.06874381800197824, "grad_norm": 0.8290077242370982, "learning_rate": 4.999330589181948e-06, "loss": 0.581, "step": 278 }, { "epoch": 0.06899109792284866, "grad_norm": 0.8239908337223516, "learning_rate": 4.999323046924776e-06, "loss": 0.5883, "step": 279 }, { "epoch": 0.06923837784371908, "grad_norm": 0.7867379951699298, "learning_rate": 4.999315462421711e-06, "loss": 0.5676, "step": 280 }, { "epoch": 0.06948565776458951, "grad_norm": 0.794330642065807, "learning_rate": 4.9993078356728816e-06, "loss": 0.5744, "step": 281 }, { "epoch": 0.06973293768545995, "grad_norm": 0.7920567183819462, "learning_rate": 4.999300166678419e-06, "loss": 0.5884, "step": 282 }, { "epoch": 0.06998021760633037, "grad_norm": 0.7934949697459773, "learning_rate": 4.99929245543845e-06, "loss": 0.6065, "step": 283 }, { "epoch": 0.0702274975272008, "grad_norm": 0.8421805349854998, "learning_rate": 4.999284701953106e-06, "loss": 0.5577, "step": 284 }, { "epoch": 0.07047477744807122, "grad_norm": 0.8124908525725804, "learning_rate": 4.9992769062225185e-06, "loss": 0.6129, "step": 285 }, { "epoch": 0.07072205736894165, "grad_norm": 0.7973073951210279, "learning_rate": 4.999269068246818e-06, "loss": 0.5694, "step": 286 }, { "epoch": 0.07096933728981207, "grad_norm": 0.8076738575720998, "learning_rate": 4.999261188026139e-06, "loss": 0.5669, "step": 287 }, { "epoch": 0.0712166172106825, "grad_norm": 0.7949443064726974, "learning_rate": 4.999253265560614e-06, "loss": 0.5859, "step": 288 }, { "epoch": 0.07146389713155292, "grad_norm": 0.785428609248276, "learning_rate": 4.999245300850375e-06, "loss": 0.5573, "step": 289 }, { "epoch": 0.07171117705242334, "grad_norm": 0.787601105135159, "learning_rate": 4.9992372938955595e-06, "loss": 0.5658, "step": 290 }, { "epoch": 0.07195845697329377, "grad_norm": 0.8181542303646591, "learning_rate": 4.999229244696301e-06, "loss": 0.5991, "step": 291 }, { "epoch": 0.0722057368941642, "grad_norm": 0.8058133731985243, "learning_rate": 4.9992211532527355e-06, "loss": 0.5862, "step": 292 }, { "epoch": 0.07245301681503462, "grad_norm": 0.7995192339831612, "learning_rate": 4.999213019565001e-06, "loss": 0.5738, "step": 293 }, { "epoch": 0.07270029673590504, "grad_norm": 0.83397514939666, "learning_rate": 4.999204843633234e-06, "loss": 0.6086, "step": 294 }, { "epoch": 0.07294757665677547, "grad_norm": 0.8022353428397413, "learning_rate": 4.9991966254575726e-06, "loss": 0.5817, "step": 295 }, { "epoch": 0.07319485657764589, "grad_norm": 0.7857047281743428, "learning_rate": 4.999188365038156e-06, "loss": 0.5678, "step": 296 }, { "epoch": 0.07344213649851632, "grad_norm": 0.8338345529151808, "learning_rate": 4.999180062375124e-06, "loss": 0.5902, "step": 297 }, { "epoch": 0.07368941641938674, "grad_norm": 0.788795960009146, "learning_rate": 4.999171717468617e-06, "loss": 0.5621, "step": 298 }, { "epoch": 0.07393669634025717, "grad_norm": 0.8007543016695616, "learning_rate": 4.999163330318777e-06, "loss": 0.5909, "step": 299 }, { "epoch": 0.07418397626112759, "grad_norm": 0.793092974341733, "learning_rate": 4.999154900925743e-06, "loss": 0.6027, "step": 300 }, { "epoch": 0.07443125618199802, "grad_norm": 0.7973449991777471, "learning_rate": 4.99914642928966e-06, "loss": 0.5768, "step": 301 }, { "epoch": 0.07467853610286845, "grad_norm": 0.8095789680023529, "learning_rate": 4.99913791541067e-06, "loss": 0.5654, "step": 302 }, { "epoch": 0.07492581602373888, "grad_norm": 0.8960253123671273, "learning_rate": 4.9991293592889174e-06, "loss": 0.5943, "step": 303 }, { "epoch": 0.0751730959446093, "grad_norm": 0.8324065932447318, "learning_rate": 4.999120760924547e-06, "loss": 0.5958, "step": 304 }, { "epoch": 0.07542037586547973, "grad_norm": 0.7774414096188346, "learning_rate": 4.999112120317703e-06, "loss": 0.5753, "step": 305 }, { "epoch": 0.07566765578635015, "grad_norm": 0.7876740890991246, "learning_rate": 4.9991034374685335e-06, "loss": 0.5706, "step": 306 }, { "epoch": 0.07591493570722058, "grad_norm": 0.8560375531506265, "learning_rate": 4.9990947123771825e-06, "loss": 0.5766, "step": 307 }, { "epoch": 0.076162215628091, "grad_norm": 0.8459626380953854, "learning_rate": 4.9990859450438e-06, "loss": 0.567, "step": 308 }, { "epoch": 0.07640949554896143, "grad_norm": 0.7880315987652132, "learning_rate": 4.999077135468533e-06, "loss": 0.5995, "step": 309 }, { "epoch": 0.07665677546983185, "grad_norm": 0.8955800578597224, "learning_rate": 4.9990682836515305e-06, "loss": 0.5843, "step": 310 }, { "epoch": 0.07690405539070228, "grad_norm": 0.8617447390840817, "learning_rate": 4.999059389592943e-06, "loss": 0.5698, "step": 311 }, { "epoch": 0.0771513353115727, "grad_norm": 0.8222678465858798, "learning_rate": 4.999050453292918e-06, "loss": 0.5792, "step": 312 }, { "epoch": 0.07739861523244312, "grad_norm": 0.8425038084425801, "learning_rate": 4.999041474751611e-06, "loss": 0.5654, "step": 313 }, { "epoch": 0.07764589515331355, "grad_norm": 0.7939986484417906, "learning_rate": 4.999032453969171e-06, "loss": 0.5844, "step": 314 }, { "epoch": 0.07789317507418397, "grad_norm": 0.8068900357493951, "learning_rate": 4.999023390945749e-06, "loss": 0.5842, "step": 315 }, { "epoch": 0.0781404549950544, "grad_norm": 0.8467336955386116, "learning_rate": 4.9990142856815015e-06, "loss": 0.5829, "step": 316 }, { "epoch": 0.07838773491592482, "grad_norm": 0.833281068277134, "learning_rate": 4.999005138176581e-06, "loss": 0.5519, "step": 317 }, { "epoch": 0.07863501483679525, "grad_norm": 0.8225381669617556, "learning_rate": 4.9989959484311415e-06, "loss": 0.5421, "step": 318 }, { "epoch": 0.07888229475766567, "grad_norm": 0.7395261056652702, "learning_rate": 4.998986716445339e-06, "loss": 0.5629, "step": 319 }, { "epoch": 0.0791295746785361, "grad_norm": 0.7829197261891223, "learning_rate": 4.99897744221933e-06, "loss": 0.5359, "step": 320 }, { "epoch": 0.07937685459940653, "grad_norm": 0.7979559607833192, "learning_rate": 4.998968125753271e-06, "loss": 0.5674, "step": 321 }, { "epoch": 0.07962413452027696, "grad_norm": 0.8490795677253689, "learning_rate": 4.998958767047319e-06, "loss": 0.5803, "step": 322 }, { "epoch": 0.07987141444114738, "grad_norm": 0.8033454860164779, "learning_rate": 4.998949366101631e-06, "loss": 0.577, "step": 323 }, { "epoch": 0.08011869436201781, "grad_norm": 0.8513966976989501, "learning_rate": 4.998939922916368e-06, "loss": 0.6031, "step": 324 }, { "epoch": 0.08036597428288823, "grad_norm": 0.86730395021255, "learning_rate": 4.998930437491689e-06, "loss": 0.5957, "step": 325 }, { "epoch": 0.08061325420375866, "grad_norm": 0.742412468911964, "learning_rate": 4.9989209098277545e-06, "loss": 0.5954, "step": 326 }, { "epoch": 0.08086053412462908, "grad_norm": 0.7672495419030062, "learning_rate": 4.998911339924726e-06, "loss": 0.5563, "step": 327 }, { "epoch": 0.08110781404549951, "grad_norm": 0.7723454315350652, "learning_rate": 4.998901727782763e-06, "loss": 0.5604, "step": 328 }, { "epoch": 0.08135509396636993, "grad_norm": 0.7687143526719443, "learning_rate": 4.99889207340203e-06, "loss": 0.575, "step": 329 }, { "epoch": 0.08160237388724036, "grad_norm": 0.8232392369424539, "learning_rate": 4.99888237678269e-06, "loss": 0.5772, "step": 330 }, { "epoch": 0.08184965380811078, "grad_norm": 0.7766370860070179, "learning_rate": 4.998872637924906e-06, "loss": 0.6117, "step": 331 }, { "epoch": 0.0820969337289812, "grad_norm": 0.7777679878898526, "learning_rate": 4.998862856828844e-06, "loss": 0.5678, "step": 332 }, { "epoch": 0.08234421364985163, "grad_norm": 0.8402362535184865, "learning_rate": 4.998853033494668e-06, "loss": 0.5627, "step": 333 }, { "epoch": 0.08259149357072205, "grad_norm": 0.8106043298842506, "learning_rate": 4.998843167922546e-06, "loss": 0.6011, "step": 334 }, { "epoch": 0.08283877349159248, "grad_norm": 0.8036741854165277, "learning_rate": 4.998833260112642e-06, "loss": 0.5678, "step": 335 }, { "epoch": 0.0830860534124629, "grad_norm": 0.8067745020147583, "learning_rate": 4.998823310065125e-06, "loss": 0.5808, "step": 336 }, { "epoch": 0.08333333333333333, "grad_norm": 0.7877781611533857, "learning_rate": 4.9988133177801625e-06, "loss": 0.5735, "step": 337 }, { "epoch": 0.08358061325420375, "grad_norm": 0.8373696159220352, "learning_rate": 4.9988032832579245e-06, "loss": 0.5613, "step": 338 }, { "epoch": 0.08382789317507418, "grad_norm": 0.8908331464192217, "learning_rate": 4.99879320649858e-06, "loss": 0.5427, "step": 339 }, { "epoch": 0.0840751730959446, "grad_norm": 0.7798491353155664, "learning_rate": 4.9987830875022995e-06, "loss": 0.5979, "step": 340 }, { "epoch": 0.08432245301681504, "grad_norm": 0.7836443525132174, "learning_rate": 4.998772926269254e-06, "loss": 0.5554, "step": 341 }, { "epoch": 0.08456973293768547, "grad_norm": 0.8875330119592878, "learning_rate": 4.998762722799615e-06, "loss": 0.5773, "step": 342 }, { "epoch": 0.08481701285855589, "grad_norm": 0.8458805310586042, "learning_rate": 4.9987524770935546e-06, "loss": 0.5576, "step": 343 }, { "epoch": 0.08506429277942631, "grad_norm": 0.8266480197072542, "learning_rate": 4.998742189151247e-06, "loss": 0.5821, "step": 344 }, { "epoch": 0.08531157270029674, "grad_norm": 0.7601244118882214, "learning_rate": 4.998731858972865e-06, "loss": 0.5631, "step": 345 }, { "epoch": 0.08555885262116716, "grad_norm": 0.8861885854250049, "learning_rate": 4.998721486558584e-06, "loss": 0.556, "step": 346 }, { "epoch": 0.08580613254203759, "grad_norm": 0.8790472177377368, "learning_rate": 4.998711071908579e-06, "loss": 0.5602, "step": 347 }, { "epoch": 0.08605341246290801, "grad_norm": 0.7871616565771263, "learning_rate": 4.998700615023027e-06, "loss": 0.5881, "step": 348 }, { "epoch": 0.08630069238377844, "grad_norm": 0.8602679302620617, "learning_rate": 4.9986901159021036e-06, "loss": 0.5692, "step": 349 }, { "epoch": 0.08654797230464886, "grad_norm": 0.869429029408227, "learning_rate": 4.998679574545986e-06, "loss": 0.555, "step": 350 }, { "epoch": 0.08679525222551929, "grad_norm": 0.8357333919438975, "learning_rate": 4.998668990954854e-06, "loss": 0.5494, "step": 351 }, { "epoch": 0.08704253214638971, "grad_norm": 0.8133863569483283, "learning_rate": 4.998658365128884e-06, "loss": 0.5666, "step": 352 }, { "epoch": 0.08728981206726014, "grad_norm": 0.8325395334428511, "learning_rate": 4.998647697068258e-06, "loss": 0.5591, "step": 353 }, { "epoch": 0.08753709198813056, "grad_norm": 0.8366416586559107, "learning_rate": 4.998636986773156e-06, "loss": 0.5428, "step": 354 }, { "epoch": 0.08778437190900099, "grad_norm": 0.8180821666277713, "learning_rate": 4.9986262342437566e-06, "loss": 0.5565, "step": 355 }, { "epoch": 0.08803165182987141, "grad_norm": 0.8302958602395372, "learning_rate": 4.9986154394802445e-06, "loss": 0.5754, "step": 356 }, { "epoch": 0.08827893175074183, "grad_norm": 0.908023409390746, "learning_rate": 4.998604602482801e-06, "loss": 0.5517, "step": 357 }, { "epoch": 0.08852621167161226, "grad_norm": 0.8461839551694633, "learning_rate": 4.998593723251609e-06, "loss": 0.5716, "step": 358 }, { "epoch": 0.08877349159248268, "grad_norm": 0.863219314588194, "learning_rate": 4.9985828017868534e-06, "loss": 0.5403, "step": 359 }, { "epoch": 0.08902077151335312, "grad_norm": 0.8146799690434612, "learning_rate": 4.998571838088717e-06, "loss": 0.5643, "step": 360 }, { "epoch": 0.08926805143422355, "grad_norm": 0.7935582497855002, "learning_rate": 4.9985608321573864e-06, "loss": 0.5698, "step": 361 }, { "epoch": 0.08951533135509397, "grad_norm": 0.8476507277896043, "learning_rate": 4.998549783993048e-06, "loss": 0.5528, "step": 362 }, { "epoch": 0.0897626112759644, "grad_norm": 0.8208621602814976, "learning_rate": 4.998538693595888e-06, "loss": 0.5786, "step": 363 }, { "epoch": 0.09000989119683482, "grad_norm": 0.882433464188905, "learning_rate": 4.998527560966094e-06, "loss": 0.5727, "step": 364 }, { "epoch": 0.09025717111770525, "grad_norm": 0.9015805731333748, "learning_rate": 4.9985163861038535e-06, "loss": 0.5669, "step": 365 }, { "epoch": 0.09050445103857567, "grad_norm": 0.8174154358479824, "learning_rate": 4.998505169009356e-06, "loss": 0.5541, "step": 366 }, { "epoch": 0.0907517309594461, "grad_norm": 0.7879889004369333, "learning_rate": 4.998493909682791e-06, "loss": 0.5377, "step": 367 }, { "epoch": 0.09099901088031652, "grad_norm": 0.8134430346957511, "learning_rate": 4.99848260812435e-06, "loss": 0.5673, "step": 368 }, { "epoch": 0.09124629080118694, "grad_norm": 0.7822338294968185, "learning_rate": 4.998471264334222e-06, "loss": 0.5747, "step": 369 }, { "epoch": 0.09149357072205737, "grad_norm": 0.846007678741242, "learning_rate": 4.998459878312598e-06, "loss": 0.5382, "step": 370 }, { "epoch": 0.0917408506429278, "grad_norm": 0.7834419615329641, "learning_rate": 4.998448450059674e-06, "loss": 0.5802, "step": 371 }, { "epoch": 0.09198813056379822, "grad_norm": 0.8462903242903708, "learning_rate": 4.998436979575641e-06, "loss": 0.5637, "step": 372 }, { "epoch": 0.09223541048466864, "grad_norm": 0.8512867722701203, "learning_rate": 4.998425466860692e-06, "loss": 0.5289, "step": 373 }, { "epoch": 0.09248269040553907, "grad_norm": 0.8021787954595253, "learning_rate": 4.998413911915025e-06, "loss": 0.5851, "step": 374 }, { "epoch": 0.09272997032640949, "grad_norm": 0.7994136334382167, "learning_rate": 4.998402314738831e-06, "loss": 0.5461, "step": 375 }, { "epoch": 0.09297725024727992, "grad_norm": 0.8676794377546366, "learning_rate": 4.998390675332308e-06, "loss": 0.5374, "step": 376 }, { "epoch": 0.09322453016815034, "grad_norm": 0.8618752560499016, "learning_rate": 4.9983789936956535e-06, "loss": 0.5235, "step": 377 }, { "epoch": 0.09347181008902077, "grad_norm": 0.8116452824280147, "learning_rate": 4.998367269829065e-06, "loss": 0.563, "step": 378 }, { "epoch": 0.09371909000989119, "grad_norm": 0.8601437613985123, "learning_rate": 4.998355503732739e-06, "loss": 0.5612, "step": 379 }, { "epoch": 0.09396636993076163, "grad_norm": 0.8155800361584301, "learning_rate": 4.9983436954068755e-06, "loss": 0.5712, "step": 380 }, { "epoch": 0.09421364985163205, "grad_norm": 0.8124067408872788, "learning_rate": 4.998331844851674e-06, "loss": 0.5323, "step": 381 }, { "epoch": 0.09446092977250248, "grad_norm": 0.8103826549978994, "learning_rate": 4.9983199520673345e-06, "loss": 0.5433, "step": 382 }, { "epoch": 0.0947082096933729, "grad_norm": 0.8323514299027938, "learning_rate": 4.998308017054059e-06, "loss": 0.5691, "step": 383 }, { "epoch": 0.09495548961424333, "grad_norm": 0.8000615986590797, "learning_rate": 4.998296039812047e-06, "loss": 0.5553, "step": 384 }, { "epoch": 0.09520276953511375, "grad_norm": 0.8339284242974745, "learning_rate": 4.9982840203415035e-06, "loss": 0.5579, "step": 385 }, { "epoch": 0.09545004945598418, "grad_norm": 0.8174909593172076, "learning_rate": 4.99827195864263e-06, "loss": 0.5556, "step": 386 }, { "epoch": 0.0956973293768546, "grad_norm": 0.7975261451643757, "learning_rate": 4.998259854715631e-06, "loss": 0.5661, "step": 387 }, { "epoch": 0.09594460929772503, "grad_norm": 0.7916331855598239, "learning_rate": 4.998247708560712e-06, "loss": 0.5409, "step": 388 }, { "epoch": 0.09619188921859545, "grad_norm": 0.8426876014737761, "learning_rate": 4.998235520178076e-06, "loss": 0.5245, "step": 389 }, { "epoch": 0.09643916913946587, "grad_norm": 0.7464229384648007, "learning_rate": 4.998223289567931e-06, "loss": 0.5494, "step": 390 }, { "epoch": 0.0966864490603363, "grad_norm": 0.8575385758360788, "learning_rate": 4.998211016730483e-06, "loss": 0.5247, "step": 391 }, { "epoch": 0.09693372898120672, "grad_norm": 0.8239224282844174, "learning_rate": 4.99819870166594e-06, "loss": 0.5592, "step": 392 }, { "epoch": 0.09718100890207715, "grad_norm": 0.949667436501745, "learning_rate": 4.998186344374509e-06, "loss": 0.5425, "step": 393 }, { "epoch": 0.09742828882294757, "grad_norm": 0.8371983680182382, "learning_rate": 4.9981739448564005e-06, "loss": 0.5675, "step": 394 }, { "epoch": 0.097675568743818, "grad_norm": 0.8190906217429191, "learning_rate": 4.998161503111822e-06, "loss": 0.6126, "step": 395 }, { "epoch": 0.09792284866468842, "grad_norm": 0.814594253339599, "learning_rate": 4.998149019140987e-06, "loss": 0.5475, "step": 396 }, { "epoch": 0.09817012858555885, "grad_norm": 0.881386637857726, "learning_rate": 4.998136492944102e-06, "loss": 0.5577, "step": 397 }, { "epoch": 0.09841740850642927, "grad_norm": 0.8200193748866078, "learning_rate": 4.998123924521383e-06, "loss": 0.5585, "step": 398 }, { "epoch": 0.09866468842729971, "grad_norm": 0.8111722627824661, "learning_rate": 4.99811131387304e-06, "loss": 0.5463, "step": 399 }, { "epoch": 0.09891196834817013, "grad_norm": 0.8941732458650538, "learning_rate": 4.9980986609992865e-06, "loss": 0.5478, "step": 400 }, { "epoch": 0.09915924826904056, "grad_norm": 0.9159041142286112, "learning_rate": 4.998085965900337e-06, "loss": 0.5607, "step": 401 }, { "epoch": 0.09940652818991098, "grad_norm": 0.8149622380453867, "learning_rate": 4.998073228576406e-06, "loss": 0.5563, "step": 402 }, { "epoch": 0.09965380811078141, "grad_norm": 0.8415024715760436, "learning_rate": 4.998060449027709e-06, "loss": 0.5735, "step": 403 }, { "epoch": 0.09990108803165183, "grad_norm": 0.8549093887109618, "learning_rate": 4.998047627254461e-06, "loss": 0.5285, "step": 404 }, { "epoch": 0.10014836795252226, "grad_norm": 0.8654615781606256, "learning_rate": 4.998034763256879e-06, "loss": 0.5543, "step": 405 }, { "epoch": 0.10039564787339268, "grad_norm": 0.8349697955100309, "learning_rate": 4.998021857035181e-06, "loss": 0.5738, "step": 406 }, { "epoch": 0.1006429277942631, "grad_norm": 0.8519615357866014, "learning_rate": 4.998008908589586e-06, "loss": 0.5524, "step": 407 }, { "epoch": 0.10089020771513353, "grad_norm": 0.9177041756133535, "learning_rate": 4.9979959179203095e-06, "loss": 0.5927, "step": 408 }, { "epoch": 0.10113748763600396, "grad_norm": 0.8578558789111481, "learning_rate": 4.997982885027575e-06, "loss": 0.5373, "step": 409 }, { "epoch": 0.10138476755687438, "grad_norm": 0.8249578416532023, "learning_rate": 4.997969809911601e-06, "loss": 0.5582, "step": 410 }, { "epoch": 0.1016320474777448, "grad_norm": 0.8322799534441788, "learning_rate": 4.997956692572609e-06, "loss": 0.5436, "step": 411 }, { "epoch": 0.10187932739861523, "grad_norm": 0.7946202487427521, "learning_rate": 4.9979435330108195e-06, "loss": 0.5561, "step": 412 }, { "epoch": 0.10212660731948565, "grad_norm": 0.8311385424691226, "learning_rate": 4.997930331226456e-06, "loss": 0.518, "step": 413 }, { "epoch": 0.10237388724035608, "grad_norm": 0.8304279162491168, "learning_rate": 4.997917087219741e-06, "loss": 0.5412, "step": 414 }, { "epoch": 0.1026211671612265, "grad_norm": 0.8877748788219127, "learning_rate": 4.9979038009909e-06, "loss": 0.517, "step": 415 }, { "epoch": 0.10286844708209693, "grad_norm": 0.8062638798591899, "learning_rate": 4.997890472540156e-06, "loss": 0.5317, "step": 416 }, { "epoch": 0.10311572700296735, "grad_norm": 0.8638613858007713, "learning_rate": 4.997877101867734e-06, "loss": 0.565, "step": 417 }, { "epoch": 0.10336300692383778, "grad_norm": 0.8549274979700249, "learning_rate": 4.997863688973862e-06, "loss": 0.5333, "step": 418 }, { "epoch": 0.10361028684470822, "grad_norm": 0.8238596502819312, "learning_rate": 4.997850233858765e-06, "loss": 0.5664, "step": 419 }, { "epoch": 0.10385756676557864, "grad_norm": 0.8179769972244695, "learning_rate": 4.99783673652267e-06, "loss": 0.5494, "step": 420 }, { "epoch": 0.10410484668644907, "grad_norm": 0.872760822491279, "learning_rate": 4.997823196965806e-06, "loss": 0.5686, "step": 421 }, { "epoch": 0.10435212660731949, "grad_norm": 0.8247079449845919, "learning_rate": 4.997809615188403e-06, "loss": 0.5761, "step": 422 }, { "epoch": 0.10459940652818991, "grad_norm": 0.8274894849461595, "learning_rate": 4.9977959911906885e-06, "loss": 0.546, "step": 423 }, { "epoch": 0.10484668644906034, "grad_norm": 0.8200195602952358, "learning_rate": 4.997782324972894e-06, "loss": 0.5487, "step": 424 }, { "epoch": 0.10509396636993076, "grad_norm": 0.8424425368017951, "learning_rate": 4.99776861653525e-06, "loss": 0.5284, "step": 425 }, { "epoch": 0.10534124629080119, "grad_norm": 0.8228965203175329, "learning_rate": 4.9977548658779885e-06, "loss": 0.5416, "step": 426 }, { "epoch": 0.10558852621167161, "grad_norm": 0.8329995359888007, "learning_rate": 4.997741073001342e-06, "loss": 0.5271, "step": 427 }, { "epoch": 0.10583580613254204, "grad_norm": 0.7637313695891461, "learning_rate": 4.997727237905543e-06, "loss": 0.5543, "step": 428 }, { "epoch": 0.10608308605341246, "grad_norm": 0.8469415248863096, "learning_rate": 4.9977133605908264e-06, "loss": 0.5194, "step": 429 }, { "epoch": 0.10633036597428289, "grad_norm": 0.8032002919054423, "learning_rate": 4.997699441057427e-06, "loss": 0.5175, "step": 430 }, { "epoch": 0.10657764589515331, "grad_norm": 0.789050525965101, "learning_rate": 4.997685479305577e-06, "loss": 0.5706, "step": 431 }, { "epoch": 0.10682492581602374, "grad_norm": 0.8025536621297401, "learning_rate": 4.997671475335517e-06, "loss": 0.5336, "step": 432 }, { "epoch": 0.10707220573689416, "grad_norm": 0.8376249206772568, "learning_rate": 4.99765742914748e-06, "loss": 0.5502, "step": 433 }, { "epoch": 0.10731948565776459, "grad_norm": 0.8226860382728634, "learning_rate": 4.9976433407417056e-06, "loss": 0.5352, "step": 434 }, { "epoch": 0.10756676557863501, "grad_norm": 0.8196965592986617, "learning_rate": 4.9976292101184305e-06, "loss": 0.5642, "step": 435 }, { "epoch": 0.10781404549950543, "grad_norm": 0.8027935956184445, "learning_rate": 4.997615037277894e-06, "loss": 0.5367, "step": 436 }, { "epoch": 0.10806132542037586, "grad_norm": 0.8412182776715479, "learning_rate": 4.997600822220336e-06, "loss": 0.5545, "step": 437 }, { "epoch": 0.1083086053412463, "grad_norm": 0.7967652156249115, "learning_rate": 4.997586564945998e-06, "loss": 0.538, "step": 438 }, { "epoch": 0.10855588526211672, "grad_norm": 0.8273469063469596, "learning_rate": 4.997572265455118e-06, "loss": 0.5349, "step": 439 }, { "epoch": 0.10880316518298715, "grad_norm": 0.816757505466961, "learning_rate": 4.9975579237479396e-06, "loss": 0.5598, "step": 440 }, { "epoch": 0.10905044510385757, "grad_norm": 0.8179328405367462, "learning_rate": 4.997543539824706e-06, "loss": 0.5461, "step": 441 }, { "epoch": 0.109297725024728, "grad_norm": 0.8095408289755744, "learning_rate": 4.997529113685659e-06, "loss": 0.5515, "step": 442 }, { "epoch": 0.10954500494559842, "grad_norm": 0.8916427729760883, "learning_rate": 4.997514645331042e-06, "loss": 0.5217, "step": 443 }, { "epoch": 0.10979228486646884, "grad_norm": 0.7917353212761363, "learning_rate": 4.9975001347611005e-06, "loss": 0.5371, "step": 444 }, { "epoch": 0.11003956478733927, "grad_norm": 0.8146465036810688, "learning_rate": 4.997485581976079e-06, "loss": 0.5987, "step": 445 }, { "epoch": 0.1102868447082097, "grad_norm": 0.8128773978395029, "learning_rate": 4.997470986976225e-06, "loss": 0.5481, "step": 446 }, { "epoch": 0.11053412462908012, "grad_norm": 0.7961091137875655, "learning_rate": 4.997456349761783e-06, "loss": 0.5582, "step": 447 }, { "epoch": 0.11078140454995054, "grad_norm": 0.813481787539708, "learning_rate": 4.997441670333003e-06, "loss": 0.5443, "step": 448 }, { "epoch": 0.11102868447082097, "grad_norm": 0.8158459985879026, "learning_rate": 4.997426948690131e-06, "loss": 0.5536, "step": 449 }, { "epoch": 0.11127596439169139, "grad_norm": 0.8327146727995968, "learning_rate": 4.997412184833417e-06, "loss": 0.5326, "step": 450 }, { "epoch": 0.11152324431256182, "grad_norm": 0.7905392611324622, "learning_rate": 4.99739737876311e-06, "loss": 0.5415, "step": 451 }, { "epoch": 0.11177052423343224, "grad_norm": 0.832901783841983, "learning_rate": 4.99738253047946e-06, "loss": 0.5263, "step": 452 }, { "epoch": 0.11201780415430267, "grad_norm": 0.8309586500909867, "learning_rate": 4.997367639982719e-06, "loss": 0.564, "step": 453 }, { "epoch": 0.11226508407517309, "grad_norm": 0.7896942371421249, "learning_rate": 4.997352707273138e-06, "loss": 0.5688, "step": 454 }, { "epoch": 0.11251236399604352, "grad_norm": 0.8159292622067303, "learning_rate": 4.9973377323509694e-06, "loss": 0.5397, "step": 455 }, { "epoch": 0.11275964391691394, "grad_norm": 0.8743313979782678, "learning_rate": 4.997322715216467e-06, "loss": 0.5425, "step": 456 }, { "epoch": 0.11300692383778438, "grad_norm": 0.8145131073267003, "learning_rate": 4.997307655869883e-06, "loss": 0.5279, "step": 457 }, { "epoch": 0.1132542037586548, "grad_norm": 0.8498643017964868, "learning_rate": 4.997292554311474e-06, "loss": 0.5439, "step": 458 }, { "epoch": 0.11350148367952523, "grad_norm": 0.876725589506382, "learning_rate": 4.997277410541493e-06, "loss": 0.5584, "step": 459 }, { "epoch": 0.11374876360039565, "grad_norm": 0.8392765984431413, "learning_rate": 4.9972622245601986e-06, "loss": 0.5696, "step": 460 }, { "epoch": 0.11399604352126608, "grad_norm": 0.8074407171452053, "learning_rate": 4.997246996367845e-06, "loss": 0.5587, "step": 461 }, { "epoch": 0.1142433234421365, "grad_norm": 0.8471548223920936, "learning_rate": 4.997231725964692e-06, "loss": 0.5244, "step": 462 }, { "epoch": 0.11449060336300693, "grad_norm": 0.8650548673944289, "learning_rate": 4.9972164133509955e-06, "loss": 0.5689, "step": 463 }, { "epoch": 0.11473788328387735, "grad_norm": 0.8510165250718368, "learning_rate": 4.997201058527016e-06, "loss": 0.5348, "step": 464 }, { "epoch": 0.11498516320474778, "grad_norm": 0.8119157561874207, "learning_rate": 4.997185661493011e-06, "loss": 0.5405, "step": 465 }, { "epoch": 0.1152324431256182, "grad_norm": 0.8063983800119408, "learning_rate": 4.997170222249244e-06, "loss": 0.5366, "step": 466 }, { "epoch": 0.11547972304648862, "grad_norm": 0.8326109923777355, "learning_rate": 4.997154740795972e-06, "loss": 0.5725, "step": 467 }, { "epoch": 0.11572700296735905, "grad_norm": 0.8157543563831605, "learning_rate": 4.99713921713346e-06, "loss": 0.5288, "step": 468 }, { "epoch": 0.11597428288822947, "grad_norm": 0.8035319772872705, "learning_rate": 4.997123651261969e-06, "loss": 0.5257, "step": 469 }, { "epoch": 0.1162215628090999, "grad_norm": 0.7921032125004203, "learning_rate": 4.997108043181762e-06, "loss": 0.5396, "step": 470 }, { "epoch": 0.11646884272997032, "grad_norm": 0.8305520689275583, "learning_rate": 4.9970923928931026e-06, "loss": 0.5511, "step": 471 }, { "epoch": 0.11671612265084075, "grad_norm": 0.8548918005506956, "learning_rate": 4.997076700396256e-06, "loss": 0.5318, "step": 472 }, { "epoch": 0.11696340257171117, "grad_norm": 0.8154176124090147, "learning_rate": 4.997060965691488e-06, "loss": 0.576, "step": 473 }, { "epoch": 0.1172106824925816, "grad_norm": 0.8412409936805049, "learning_rate": 4.9970451887790626e-06, "loss": 0.53, "step": 474 }, { "epoch": 0.11745796241345202, "grad_norm": 0.8226202940805938, "learning_rate": 4.997029369659249e-06, "loss": 0.521, "step": 475 }, { "epoch": 0.11770524233432245, "grad_norm": 0.8110352535755092, "learning_rate": 4.997013508332312e-06, "loss": 0.5311, "step": 476 }, { "epoch": 0.11795252225519288, "grad_norm": 0.86153650176347, "learning_rate": 4.996997604798522e-06, "loss": 0.5388, "step": 477 }, { "epoch": 0.11819980217606331, "grad_norm": 0.8483377928162458, "learning_rate": 4.996981659058146e-06, "loss": 0.5471, "step": 478 }, { "epoch": 0.11844708209693373, "grad_norm": 0.8624161681686172, "learning_rate": 4.9969656711114546e-06, "loss": 0.528, "step": 479 }, { "epoch": 0.11869436201780416, "grad_norm": 0.8400291073199923, "learning_rate": 4.996949640958718e-06, "loss": 0.5488, "step": 480 }, { "epoch": 0.11894164193867458, "grad_norm": 0.8311370315597272, "learning_rate": 4.996933568600206e-06, "loss": 0.5563, "step": 481 }, { "epoch": 0.11918892185954501, "grad_norm": 0.8870073351460207, "learning_rate": 4.996917454036192e-06, "loss": 0.5087, "step": 482 }, { "epoch": 0.11943620178041543, "grad_norm": 0.8713695523993678, "learning_rate": 4.996901297266947e-06, "loss": 0.5275, "step": 483 }, { "epoch": 0.11968348170128586, "grad_norm": 0.9038628652413889, "learning_rate": 4.996885098292745e-06, "loss": 0.5439, "step": 484 }, { "epoch": 0.11993076162215628, "grad_norm": 0.9167026612271514, "learning_rate": 4.99686885711386e-06, "loss": 0.5677, "step": 485 }, { "epoch": 0.1201780415430267, "grad_norm": 0.8720438532820466, "learning_rate": 4.996852573730565e-06, "loss": 0.5236, "step": 486 }, { "epoch": 0.12042532146389713, "grad_norm": 0.7557209197955111, "learning_rate": 4.996836248143138e-06, "loss": 0.5104, "step": 487 }, { "epoch": 0.12067260138476756, "grad_norm": 0.7989706682687766, "learning_rate": 4.996819880351851e-06, "loss": 0.5853, "step": 488 }, { "epoch": 0.12091988130563798, "grad_norm": 0.8984411166999768, "learning_rate": 4.996803470356984e-06, "loss": 0.5287, "step": 489 }, { "epoch": 0.1211671612265084, "grad_norm": 0.8648551900935894, "learning_rate": 4.996787018158813e-06, "loss": 0.5419, "step": 490 }, { "epoch": 0.12141444114737883, "grad_norm": 0.804324060258821, "learning_rate": 4.996770523757616e-06, "loss": 0.5205, "step": 491 }, { "epoch": 0.12166172106824925, "grad_norm": 0.9032754593092449, "learning_rate": 4.996753987153673e-06, "loss": 0.5159, "step": 492 }, { "epoch": 0.12190900098911968, "grad_norm": 0.9533551364220325, "learning_rate": 4.996737408347262e-06, "loss": 0.5226, "step": 493 }, { "epoch": 0.1221562809099901, "grad_norm": 0.8713670416215546, "learning_rate": 4.996720787338663e-06, "loss": 0.5618, "step": 494 }, { "epoch": 0.12240356083086053, "grad_norm": 0.8684842954756598, "learning_rate": 4.996704124128159e-06, "loss": 0.5639, "step": 495 }, { "epoch": 0.12265084075173097, "grad_norm": 0.8952885323193862, "learning_rate": 4.996687418716031e-06, "loss": 0.5079, "step": 496 }, { "epoch": 0.12289812067260139, "grad_norm": 0.8075323957819724, "learning_rate": 4.9966706711025596e-06, "loss": 0.5321, "step": 497 }, { "epoch": 0.12314540059347182, "grad_norm": 0.8494422449585496, "learning_rate": 4.996653881288029e-06, "loss": 0.5001, "step": 498 }, { "epoch": 0.12339268051434224, "grad_norm": 0.8875548481591807, "learning_rate": 4.996637049272724e-06, "loss": 0.5364, "step": 499 }, { "epoch": 0.12363996043521266, "grad_norm": 0.9020942067293436, "learning_rate": 4.996620175056928e-06, "loss": 0.5257, "step": 500 }, { "epoch": 0.12388724035608309, "grad_norm": 0.8675810693187886, "learning_rate": 4.9966032586409264e-06, "loss": 0.5365, "step": 501 }, { "epoch": 0.12413452027695351, "grad_norm": 0.8714654026651982, "learning_rate": 4.996586300025005e-06, "loss": 0.5008, "step": 502 }, { "epoch": 0.12438180019782394, "grad_norm": 0.8399592723027427, "learning_rate": 4.99656929920945e-06, "loss": 0.5151, "step": 503 }, { "epoch": 0.12462908011869436, "grad_norm": 0.7778730144888366, "learning_rate": 4.996552256194551e-06, "loss": 0.5302, "step": 504 }, { "epoch": 0.12487636003956479, "grad_norm": 0.8260924301670444, "learning_rate": 4.996535170980593e-06, "loss": 0.5182, "step": 505 }, { "epoch": 0.1251236399604352, "grad_norm": 0.8999624536805119, "learning_rate": 4.996518043567868e-06, "loss": 0.5232, "step": 506 }, { "epoch": 0.12537091988130564, "grad_norm": 0.8691087323090587, "learning_rate": 4.9965008739566615e-06, "loss": 0.5532, "step": 507 }, { "epoch": 0.12561819980217606, "grad_norm": 0.8672773310284954, "learning_rate": 4.9964836621472674e-06, "loss": 0.5627, "step": 508 }, { "epoch": 0.1258654797230465, "grad_norm": 0.790002975589831, "learning_rate": 4.996466408139975e-06, "loss": 0.5469, "step": 509 }, { "epoch": 0.1261127596439169, "grad_norm": 0.8660874103550955, "learning_rate": 4.996449111935075e-06, "loss": 0.5392, "step": 510 }, { "epoch": 0.12636003956478734, "grad_norm": 0.8196685228026358, "learning_rate": 4.996431773532863e-06, "loss": 0.5347, "step": 511 }, { "epoch": 0.12660731948565776, "grad_norm": 0.8679109946021679, "learning_rate": 4.996414392933629e-06, "loss": 0.5019, "step": 512 }, { "epoch": 0.12685459940652818, "grad_norm": 0.8309043787360044, "learning_rate": 4.996396970137668e-06, "loss": 0.5288, "step": 513 }, { "epoch": 0.1271018793273986, "grad_norm": 0.8410088998221761, "learning_rate": 4.9963795051452736e-06, "loss": 0.5466, "step": 514 }, { "epoch": 0.12734915924826903, "grad_norm": 0.8358751326108991, "learning_rate": 4.996361997956743e-06, "loss": 0.5428, "step": 515 }, { "epoch": 0.12759643916913946, "grad_norm": 0.8819431687194557, "learning_rate": 4.996344448572369e-06, "loss": 0.5364, "step": 516 }, { "epoch": 0.12784371909000988, "grad_norm": 0.8490128009282251, "learning_rate": 4.9963268569924515e-06, "loss": 0.5394, "step": 517 }, { "epoch": 0.1280909990108803, "grad_norm": 0.8244195316603701, "learning_rate": 4.996309223217285e-06, "loss": 0.5176, "step": 518 }, { "epoch": 0.12833827893175073, "grad_norm": 0.8235400710586557, "learning_rate": 4.99629154724717e-06, "loss": 0.5364, "step": 519 }, { "epoch": 0.12858555885262116, "grad_norm": 0.8301561516400873, "learning_rate": 4.996273829082404e-06, "loss": 0.5339, "step": 520 }, { "epoch": 0.12883283877349158, "grad_norm": 0.8383832053104039, "learning_rate": 4.996256068723287e-06, "loss": 0.5079, "step": 521 }, { "epoch": 0.129080118694362, "grad_norm": 0.8312860024324458, "learning_rate": 4.996238266170118e-06, "loss": 0.5199, "step": 522 }, { "epoch": 0.12932739861523243, "grad_norm": 0.8799017293310935, "learning_rate": 4.9962204214232005e-06, "loss": 0.499, "step": 523 }, { "epoch": 0.12957467853610286, "grad_norm": 0.8835474145873032, "learning_rate": 4.996202534482832e-06, "loss": 0.4991, "step": 524 }, { "epoch": 0.1298219584569733, "grad_norm": 0.8039735973515206, "learning_rate": 4.9961846053493194e-06, "loss": 0.5355, "step": 525 }, { "epoch": 0.13006923837784373, "grad_norm": 0.8361160383616958, "learning_rate": 4.9961666340229635e-06, "loss": 0.5466, "step": 526 }, { "epoch": 0.13031651829871416, "grad_norm": 0.8569201635197241, "learning_rate": 4.996148620504067e-06, "loss": 0.5349, "step": 527 }, { "epoch": 0.13056379821958458, "grad_norm": 0.8802630630022769, "learning_rate": 4.996130564792936e-06, "loss": 0.5228, "step": 528 }, { "epoch": 0.130811078140455, "grad_norm": 0.8030195373159202, "learning_rate": 4.996112466889876e-06, "loss": 0.5689, "step": 529 }, { "epoch": 0.13105835806132543, "grad_norm": 0.7747458285259381, "learning_rate": 4.996094326795192e-06, "loss": 0.5297, "step": 530 }, { "epoch": 0.13130563798219586, "grad_norm": 0.808534915335967, "learning_rate": 4.996076144509191e-06, "loss": 0.5333, "step": 531 }, { "epoch": 0.13155291790306628, "grad_norm": 0.8248147055112679, "learning_rate": 4.996057920032179e-06, "loss": 0.5338, "step": 532 }, { "epoch": 0.1318001978239367, "grad_norm": 0.8369441009968227, "learning_rate": 4.996039653364466e-06, "loss": 0.5249, "step": 533 }, { "epoch": 0.13204747774480713, "grad_norm": 0.8122213605964168, "learning_rate": 4.99602134450636e-06, "loss": 0.535, "step": 534 }, { "epoch": 0.13229475766567755, "grad_norm": 0.8156674463115173, "learning_rate": 4.9960029934581706e-06, "loss": 0.5331, "step": 535 }, { "epoch": 0.13254203758654798, "grad_norm": 0.7744707960410743, "learning_rate": 4.9959846002202075e-06, "loss": 0.5362, "step": 536 }, { "epoch": 0.1327893175074184, "grad_norm": 0.8384560596557845, "learning_rate": 4.995966164792782e-06, "loss": 0.5453, "step": 537 }, { "epoch": 0.13303659742828883, "grad_norm": 0.8058804582138253, "learning_rate": 4.9959476871762055e-06, "loss": 0.5157, "step": 538 }, { "epoch": 0.13328387734915925, "grad_norm": 0.8294745593661792, "learning_rate": 4.995929167370791e-06, "loss": 0.5766, "step": 539 }, { "epoch": 0.13353115727002968, "grad_norm": 0.8396457626210513, "learning_rate": 4.99591060537685e-06, "loss": 0.53, "step": 540 }, { "epoch": 0.1337784371909001, "grad_norm": 0.8422569925721224, "learning_rate": 4.995892001194699e-06, "loss": 0.5293, "step": 541 }, { "epoch": 0.13402571711177053, "grad_norm": 0.8097797303451486, "learning_rate": 4.995873354824649e-06, "loss": 0.5193, "step": 542 }, { "epoch": 0.13427299703264095, "grad_norm": 0.8282914521596959, "learning_rate": 4.995854666267017e-06, "loss": 0.517, "step": 543 }, { "epoch": 0.13452027695351138, "grad_norm": 0.8430683316926538, "learning_rate": 4.99583593552212e-06, "loss": 0.5572, "step": 544 }, { "epoch": 0.1347675568743818, "grad_norm": 0.8118636383334119, "learning_rate": 4.995817162590273e-06, "loss": 0.5268, "step": 545 }, { "epoch": 0.13501483679525222, "grad_norm": 0.7958627214231268, "learning_rate": 4.995798347471793e-06, "loss": 0.5271, "step": 546 }, { "epoch": 0.13526211671612265, "grad_norm": 0.8098286264528141, "learning_rate": 4.995779490166999e-06, "loss": 0.5318, "step": 547 }, { "epoch": 0.13550939663699307, "grad_norm": 0.8301867014748663, "learning_rate": 4.995760590676209e-06, "loss": 0.5593, "step": 548 }, { "epoch": 0.1357566765578635, "grad_norm": 0.7834537582380657, "learning_rate": 4.995741648999744e-06, "loss": 0.5145, "step": 549 }, { "epoch": 0.13600395647873392, "grad_norm": 0.8100081692650491, "learning_rate": 4.995722665137923e-06, "loss": 0.5353, "step": 550 }, { "epoch": 0.13625123639960435, "grad_norm": 0.80189918046591, "learning_rate": 4.995703639091067e-06, "loss": 0.5322, "step": 551 }, { "epoch": 0.13649851632047477, "grad_norm": 0.8350298859457816, "learning_rate": 4.995684570859497e-06, "loss": 0.5705, "step": 552 }, { "epoch": 0.1367457962413452, "grad_norm": 0.8182785573997086, "learning_rate": 4.995665460443536e-06, "loss": 0.5083, "step": 553 }, { "epoch": 0.13699307616221562, "grad_norm": 0.9006658583356458, "learning_rate": 4.995646307843508e-06, "loss": 0.54, "step": 554 }, { "epoch": 0.13724035608308605, "grad_norm": 0.7948803191646967, "learning_rate": 4.995627113059734e-06, "loss": 0.5562, "step": 555 }, { "epoch": 0.13748763600395647, "grad_norm": 0.8282246520634873, "learning_rate": 4.995607876092541e-06, "loss": 0.5289, "step": 556 }, { "epoch": 0.1377349159248269, "grad_norm": 0.8931269666220943, "learning_rate": 4.995588596942254e-06, "loss": 0.5478, "step": 557 }, { "epoch": 0.13798219584569732, "grad_norm": 0.8291498125431265, "learning_rate": 4.995569275609197e-06, "loss": 0.5269, "step": 558 }, { "epoch": 0.13822947576656774, "grad_norm": 0.824472874788768, "learning_rate": 4.995549912093698e-06, "loss": 0.5203, "step": 559 }, { "epoch": 0.13847675568743817, "grad_norm": 0.8380697493482778, "learning_rate": 4.995530506396084e-06, "loss": 0.5343, "step": 560 }, { "epoch": 0.1387240356083086, "grad_norm": 0.8197126786744611, "learning_rate": 4.995511058516683e-06, "loss": 0.5252, "step": 561 }, { "epoch": 0.13897131552917902, "grad_norm": 0.8478247444958412, "learning_rate": 4.995491568455824e-06, "loss": 0.5136, "step": 562 }, { "epoch": 0.13921859545004944, "grad_norm": 0.8005226390245985, "learning_rate": 4.9954720362138365e-06, "loss": 0.5269, "step": 563 }, { "epoch": 0.1394658753709199, "grad_norm": 0.8227428725452879, "learning_rate": 4.995452461791049e-06, "loss": 0.5127, "step": 564 }, { "epoch": 0.13971315529179032, "grad_norm": 0.8065785300544993, "learning_rate": 4.995432845187796e-06, "loss": 0.5156, "step": 565 }, { "epoch": 0.13996043521266074, "grad_norm": 0.8094394004280067, "learning_rate": 4.9954131864044055e-06, "loss": 0.5167, "step": 566 }, { "epoch": 0.14020771513353117, "grad_norm": 0.8659413683091203, "learning_rate": 4.995393485441211e-06, "loss": 0.5371, "step": 567 }, { "epoch": 0.1404549950544016, "grad_norm": 0.8210244711493505, "learning_rate": 4.995373742298545e-06, "loss": 0.517, "step": 568 }, { "epoch": 0.14070227497527202, "grad_norm": 0.7525169650037641, "learning_rate": 4.995353956976743e-06, "loss": 0.5185, "step": 569 }, { "epoch": 0.14094955489614244, "grad_norm": 0.844339650153545, "learning_rate": 4.995334129476137e-06, "loss": 0.4857, "step": 570 }, { "epoch": 0.14119683481701287, "grad_norm": 0.8248404365036152, "learning_rate": 4.995314259797065e-06, "loss": 0.5301, "step": 571 }, { "epoch": 0.1414441147378833, "grad_norm": 0.8040746643584687, "learning_rate": 4.99529434793986e-06, "loss": 0.5283, "step": 572 }, { "epoch": 0.14169139465875372, "grad_norm": 0.8189779701343372, "learning_rate": 4.995274393904861e-06, "loss": 0.5132, "step": 573 }, { "epoch": 0.14193867457962414, "grad_norm": 0.8739231936574126, "learning_rate": 4.995254397692403e-06, "loss": 0.497, "step": 574 }, { "epoch": 0.14218595450049457, "grad_norm": 0.8348211644398329, "learning_rate": 4.995234359302825e-06, "loss": 0.5216, "step": 575 }, { "epoch": 0.142433234421365, "grad_norm": 0.8033507653933986, "learning_rate": 4.995214278736467e-06, "loss": 0.5134, "step": 576 }, { "epoch": 0.14268051434223541, "grad_norm": 0.8666478902841073, "learning_rate": 4.9951941559936655e-06, "loss": 0.4913, "step": 577 }, { "epoch": 0.14292779426310584, "grad_norm": 0.8121736161408633, "learning_rate": 4.995173991074764e-06, "loss": 0.5204, "step": 578 }, { "epoch": 0.14317507418397626, "grad_norm": 0.8130603144768949, "learning_rate": 4.995153783980101e-06, "loss": 0.51, "step": 579 }, { "epoch": 0.1434223541048467, "grad_norm": 0.8376602599719988, "learning_rate": 4.995133534710018e-06, "loss": 0.5286, "step": 580 }, { "epoch": 0.1436696340257171, "grad_norm": 0.8028003299616209, "learning_rate": 4.995113243264859e-06, "loss": 0.5154, "step": 581 }, { "epoch": 0.14391691394658754, "grad_norm": 0.7863768226347286, "learning_rate": 4.995092909644966e-06, "loss": 0.5024, "step": 582 }, { "epoch": 0.14416419386745796, "grad_norm": 0.8021001800040607, "learning_rate": 4.995072533850682e-06, "loss": 0.5354, "step": 583 }, { "epoch": 0.1444114737883284, "grad_norm": 0.8402628321453506, "learning_rate": 4.995052115882353e-06, "loss": 0.5249, "step": 584 }, { "epoch": 0.1446587537091988, "grad_norm": 0.8331277575923502, "learning_rate": 4.9950316557403235e-06, "loss": 0.4983, "step": 585 }, { "epoch": 0.14490603363006924, "grad_norm": 0.8188619596157297, "learning_rate": 4.9950111534249375e-06, "loss": 0.5357, "step": 586 }, { "epoch": 0.14515331355093966, "grad_norm": 0.7985060052509381, "learning_rate": 4.994990608936544e-06, "loss": 0.5091, "step": 587 }, { "epoch": 0.14540059347181009, "grad_norm": 0.7655405143056095, "learning_rate": 4.99497002227549e-06, "loss": 0.5356, "step": 588 }, { "epoch": 0.1456478733926805, "grad_norm": 0.8256783148504685, "learning_rate": 4.9949493934421226e-06, "loss": 0.5148, "step": 589 }, { "epoch": 0.14589515331355093, "grad_norm": 0.8228995595458576, "learning_rate": 4.99492872243679e-06, "loss": 0.5198, "step": 590 }, { "epoch": 0.14614243323442136, "grad_norm": 0.7983002453848045, "learning_rate": 4.994908009259843e-06, "loss": 0.5196, "step": 591 }, { "epoch": 0.14638971315529178, "grad_norm": 0.8146201436352071, "learning_rate": 4.994887253911631e-06, "loss": 0.5521, "step": 592 }, { "epoch": 0.1466369930761622, "grad_norm": 0.8252581518039378, "learning_rate": 4.9948664563925054e-06, "loss": 0.5505, "step": 593 }, { "epoch": 0.14688427299703263, "grad_norm": 0.8172097763249438, "learning_rate": 4.994845616702817e-06, "loss": 0.5245, "step": 594 }, { "epoch": 0.14713155291790306, "grad_norm": 0.8308363666090501, "learning_rate": 4.994824734842918e-06, "loss": 0.5291, "step": 595 }, { "epoch": 0.14737883283877348, "grad_norm": 0.8236367133690201, "learning_rate": 4.994803810813161e-06, "loss": 0.5274, "step": 596 }, { "epoch": 0.1476261127596439, "grad_norm": 0.8921360057125517, "learning_rate": 4.9947828446139016e-06, "loss": 0.5239, "step": 597 }, { "epoch": 0.14787339268051433, "grad_norm": 0.8519369558875952, "learning_rate": 4.994761836245492e-06, "loss": 0.5568, "step": 598 }, { "epoch": 0.14812067260138476, "grad_norm": 0.8593306021580139, "learning_rate": 4.994740785708289e-06, "loss": 0.5183, "step": 599 }, { "epoch": 0.14836795252225518, "grad_norm": 0.921637121737583, "learning_rate": 4.994719693002646e-06, "loss": 0.5235, "step": 600 }, { "epoch": 0.1486152324431256, "grad_norm": 0.8399998695591865, "learning_rate": 4.994698558128923e-06, "loss": 0.566, "step": 601 }, { "epoch": 0.14886251236399603, "grad_norm": 0.7904438558914851, "learning_rate": 4.994677381087475e-06, "loss": 0.5184, "step": 602 }, { "epoch": 0.14910979228486648, "grad_norm": 0.8107257376120282, "learning_rate": 4.99465616187866e-06, "loss": 0.5232, "step": 603 }, { "epoch": 0.1493570722057369, "grad_norm": 0.8466653719532753, "learning_rate": 4.994634900502837e-06, "loss": 0.5461, "step": 604 }, { "epoch": 0.14960435212660733, "grad_norm": 0.8332150966857227, "learning_rate": 4.994613596960366e-06, "loss": 0.543, "step": 605 }, { "epoch": 0.14985163204747776, "grad_norm": 0.8494372220903758, "learning_rate": 4.994592251251606e-06, "loss": 0.5414, "step": 606 }, { "epoch": 0.15009891196834818, "grad_norm": 0.836739182677242, "learning_rate": 4.994570863376918e-06, "loss": 0.4868, "step": 607 }, { "epoch": 0.1503461918892186, "grad_norm": 0.8560168407660868, "learning_rate": 4.994549433336664e-06, "loss": 0.5357, "step": 608 }, { "epoch": 0.15059347181008903, "grad_norm": 0.8248533428465916, "learning_rate": 4.9945279611312066e-06, "loss": 0.5371, "step": 609 }, { "epoch": 0.15084075173095945, "grad_norm": 0.8279650051807312, "learning_rate": 4.9945064467609076e-06, "loss": 0.5133, "step": 610 }, { "epoch": 0.15108803165182988, "grad_norm": 0.8207095863487788, "learning_rate": 4.994484890226132e-06, "loss": 0.555, "step": 611 }, { "epoch": 0.1513353115727003, "grad_norm": 0.8473352308749146, "learning_rate": 4.9944632915272426e-06, "loss": 0.5249, "step": 612 }, { "epoch": 0.15158259149357073, "grad_norm": 0.8269313598527205, "learning_rate": 4.994441650664605e-06, "loss": 0.4928, "step": 613 }, { "epoch": 0.15182987141444115, "grad_norm": 0.8051367309430479, "learning_rate": 4.994419967638587e-06, "loss": 0.5126, "step": 614 }, { "epoch": 0.15207715133531158, "grad_norm": 0.9477532042198118, "learning_rate": 4.994398242449552e-06, "loss": 0.5297, "step": 615 }, { "epoch": 0.152324431256182, "grad_norm": 0.8121271507476093, "learning_rate": 4.994376475097869e-06, "loss": 0.5315, "step": 616 }, { "epoch": 0.15257171117705243, "grad_norm": 0.7962233363129194, "learning_rate": 4.994354665583906e-06, "loss": 0.5335, "step": 617 }, { "epoch": 0.15281899109792285, "grad_norm": 0.8342385325804931, "learning_rate": 4.9943328139080304e-06, "loss": 0.5026, "step": 618 }, { "epoch": 0.15306627101879328, "grad_norm": 0.7960492212543056, "learning_rate": 4.994310920070613e-06, "loss": 0.5243, "step": 619 }, { "epoch": 0.1533135509396637, "grad_norm": 0.8219481066829437, "learning_rate": 4.994288984072023e-06, "loss": 0.5422, "step": 620 }, { "epoch": 0.15356083086053413, "grad_norm": 0.8965110210914611, "learning_rate": 4.994267005912631e-06, "loss": 0.4988, "step": 621 }, { "epoch": 0.15380811078140455, "grad_norm": 0.8674803487785987, "learning_rate": 4.994244985592809e-06, "loss": 0.5235, "step": 622 }, { "epoch": 0.15405539070227497, "grad_norm": 0.8294901688893189, "learning_rate": 4.99422292311293e-06, "loss": 0.523, "step": 623 }, { "epoch": 0.1543026706231454, "grad_norm": 0.8573169917869833, "learning_rate": 4.994200818473365e-06, "loss": 0.5161, "step": 624 }, { "epoch": 0.15454995054401582, "grad_norm": 0.8239599780470138, "learning_rate": 4.994178671674489e-06, "loss": 0.5249, "step": 625 }, { "epoch": 0.15479723046488625, "grad_norm": 0.8573459117967988, "learning_rate": 4.994156482716677e-06, "loss": 0.4955, "step": 626 }, { "epoch": 0.15504451038575667, "grad_norm": 0.8243090403433094, "learning_rate": 4.994134251600302e-06, "loss": 0.5008, "step": 627 }, { "epoch": 0.1552917903066271, "grad_norm": 0.8606707487222631, "learning_rate": 4.994111978325741e-06, "loss": 0.5306, "step": 628 }, { "epoch": 0.15553907022749752, "grad_norm": 0.8525860766404425, "learning_rate": 4.99408966289337e-06, "loss": 0.5508, "step": 629 }, { "epoch": 0.15578635014836795, "grad_norm": 0.9058751830432761, "learning_rate": 4.994067305303567e-06, "loss": 0.532, "step": 630 }, { "epoch": 0.15603363006923837, "grad_norm": 0.9144797398646675, "learning_rate": 4.9940449055567096e-06, "loss": 0.5025, "step": 631 }, { "epoch": 0.1562809099901088, "grad_norm": 0.7934911770321739, "learning_rate": 4.994022463653176e-06, "loss": 0.4991, "step": 632 }, { "epoch": 0.15652818991097922, "grad_norm": 0.8508702206108024, "learning_rate": 4.993999979593346e-06, "loss": 0.5186, "step": 633 }, { "epoch": 0.15677546983184965, "grad_norm": 0.9125868217774985, "learning_rate": 4.993977453377599e-06, "loss": 0.5141, "step": 634 }, { "epoch": 0.15702274975272007, "grad_norm": 0.8314643342376442, "learning_rate": 4.993954885006316e-06, "loss": 0.5388, "step": 635 }, { "epoch": 0.1572700296735905, "grad_norm": 0.8567507360554277, "learning_rate": 4.9939322744798795e-06, "loss": 0.5099, "step": 636 }, { "epoch": 0.15751730959446092, "grad_norm": 0.9458345629100596, "learning_rate": 4.9939096217986706e-06, "loss": 0.5329, "step": 637 }, { "epoch": 0.15776458951533134, "grad_norm": 0.8717178654756879, "learning_rate": 4.993886926963072e-06, "loss": 0.5101, "step": 638 }, { "epoch": 0.15801186943620177, "grad_norm": 0.9099670727771642, "learning_rate": 4.993864189973468e-06, "loss": 0.5197, "step": 639 }, { "epoch": 0.1582591493570722, "grad_norm": 0.8690584059000377, "learning_rate": 4.993841410830243e-06, "loss": 0.5129, "step": 640 }, { "epoch": 0.15850642927794262, "grad_norm": 0.8395676636532586, "learning_rate": 4.993818589533781e-06, "loss": 0.5435, "step": 641 }, { "epoch": 0.15875370919881307, "grad_norm": 0.8390162044014295, "learning_rate": 4.993795726084469e-06, "loss": 0.4987, "step": 642 }, { "epoch": 0.1590009891196835, "grad_norm": 0.8628449906727201, "learning_rate": 4.993772820482693e-06, "loss": 0.5581, "step": 643 }, { "epoch": 0.15924826904055392, "grad_norm": 0.8769274046379205, "learning_rate": 4.99374987272884e-06, "loss": 0.5253, "step": 644 }, { "epoch": 0.15949554896142434, "grad_norm": 0.8423197065733172, "learning_rate": 4.9937268828232974e-06, "loss": 0.5182, "step": 645 }, { "epoch": 0.15974282888229477, "grad_norm": 0.8495522941564868, "learning_rate": 4.993703850766455e-06, "loss": 0.5176, "step": 646 }, { "epoch": 0.1599901088031652, "grad_norm": 0.8566253722599317, "learning_rate": 4.993680776558701e-06, "loss": 0.4967, "step": 647 }, { "epoch": 0.16023738872403562, "grad_norm": 0.8578979605277798, "learning_rate": 4.993657660200427e-06, "loss": 0.5321, "step": 648 }, { "epoch": 0.16048466864490604, "grad_norm": 0.8437931759195362, "learning_rate": 4.993634501692022e-06, "loss": 0.527, "step": 649 }, { "epoch": 0.16073194856577647, "grad_norm": 0.8312148356989333, "learning_rate": 4.993611301033878e-06, "loss": 0.5346, "step": 650 }, { "epoch": 0.1609792284866469, "grad_norm": 0.8583661441969466, "learning_rate": 4.993588058226388e-06, "loss": 0.4911, "step": 651 }, { "epoch": 0.16122650840751732, "grad_norm": 0.835364664881372, "learning_rate": 4.9935647732699426e-06, "loss": 0.508, "step": 652 }, { "epoch": 0.16147378832838774, "grad_norm": 0.9088368040786615, "learning_rate": 4.993541446164938e-06, "loss": 0.5452, "step": 653 }, { "epoch": 0.16172106824925817, "grad_norm": 0.901014609282978, "learning_rate": 4.993518076911766e-06, "loss": 0.5111, "step": 654 }, { "epoch": 0.1619683481701286, "grad_norm": 0.8774481271049077, "learning_rate": 4.993494665510825e-06, "loss": 0.5152, "step": 655 }, { "epoch": 0.16221562809099901, "grad_norm": 0.8413271828266707, "learning_rate": 4.993471211962508e-06, "loss": 0.5041, "step": 656 }, { "epoch": 0.16246290801186944, "grad_norm": 0.8387696777792335, "learning_rate": 4.993447716267211e-06, "loss": 0.4912, "step": 657 }, { "epoch": 0.16271018793273986, "grad_norm": 0.8858353120807996, "learning_rate": 4.993424178425334e-06, "loss": 0.5147, "step": 658 }, { "epoch": 0.1629574678536103, "grad_norm": 0.8753424782843612, "learning_rate": 4.9934005984372725e-06, "loss": 0.5108, "step": 659 }, { "epoch": 0.1632047477744807, "grad_norm": 0.8503862309995437, "learning_rate": 4.993376976303426e-06, "loss": 0.5374, "step": 660 }, { "epoch": 0.16345202769535114, "grad_norm": 0.9263303515730487, "learning_rate": 4.9933533120241925e-06, "loss": 0.5227, "step": 661 }, { "epoch": 0.16369930761622156, "grad_norm": 0.8091103017427309, "learning_rate": 4.993329605599974e-06, "loss": 0.4703, "step": 662 }, { "epoch": 0.163946587537092, "grad_norm": 0.8369441099222458, "learning_rate": 4.99330585703117e-06, "loss": 0.5025, "step": 663 }, { "epoch": 0.1641938674579624, "grad_norm": 0.8541506168890921, "learning_rate": 4.993282066318182e-06, "loss": 0.5111, "step": 664 }, { "epoch": 0.16444114737883284, "grad_norm": 0.8193787625042165, "learning_rate": 4.9932582334614124e-06, "loss": 0.5036, "step": 665 }, { "epoch": 0.16468842729970326, "grad_norm": 0.8507283745756844, "learning_rate": 4.993234358461264e-06, "loss": 0.5103, "step": 666 }, { "epoch": 0.16493570722057369, "grad_norm": 0.8357763055370562, "learning_rate": 4.9932104413181405e-06, "loss": 0.5099, "step": 667 }, { "epoch": 0.1651829871414441, "grad_norm": 0.7974423447277945, "learning_rate": 4.9931864820324445e-06, "loss": 0.526, "step": 668 }, { "epoch": 0.16543026706231453, "grad_norm": 0.856566840869885, "learning_rate": 4.993162480604584e-06, "loss": 0.5296, "step": 669 }, { "epoch": 0.16567754698318496, "grad_norm": 0.8202309677412768, "learning_rate": 4.993138437034963e-06, "loss": 0.5371, "step": 670 }, { "epoch": 0.16592482690405538, "grad_norm": 0.8537092958810982, "learning_rate": 4.993114351323987e-06, "loss": 0.5363, "step": 671 }, { "epoch": 0.1661721068249258, "grad_norm": 0.8941502744703057, "learning_rate": 4.993090223472065e-06, "loss": 0.5275, "step": 672 }, { "epoch": 0.16641938674579623, "grad_norm": 0.8602589105442886, "learning_rate": 4.9930660534796046e-06, "loss": 0.5173, "step": 673 }, { "epoch": 0.16666666666666666, "grad_norm": 0.8547239627917911, "learning_rate": 4.993041841347012e-06, "loss": 0.5322, "step": 674 }, { "epoch": 0.16691394658753708, "grad_norm": 0.8148644041571959, "learning_rate": 4.9930175870747e-06, "loss": 0.544, "step": 675 }, { "epoch": 0.1671612265084075, "grad_norm": 0.8546772113475981, "learning_rate": 4.992993290663076e-06, "loss": 0.4969, "step": 676 }, { "epoch": 0.16740850642927793, "grad_norm": 0.9273768329874028, "learning_rate": 4.9929689521125515e-06, "loss": 0.5229, "step": 677 }, { "epoch": 0.16765578635014836, "grad_norm": 0.8370005835873986, "learning_rate": 4.992944571423538e-06, "loss": 0.4871, "step": 678 }, { "epoch": 0.16790306627101878, "grad_norm": 0.8525307890631186, "learning_rate": 4.992920148596447e-06, "loss": 0.5083, "step": 679 }, { "epoch": 0.1681503461918892, "grad_norm": 0.8290265597255343, "learning_rate": 4.9928956836316915e-06, "loss": 0.5309, "step": 680 }, { "epoch": 0.16839762611275966, "grad_norm": 0.8254586728696632, "learning_rate": 4.992871176529686e-06, "loss": 0.5231, "step": 681 }, { "epoch": 0.16864490603363008, "grad_norm": 0.827176590783688, "learning_rate": 4.992846627290844e-06, "loss": 0.5417, "step": 682 }, { "epoch": 0.1688921859545005, "grad_norm": 0.8415975897359966, "learning_rate": 4.99282203591558e-06, "loss": 0.507, "step": 683 }, { "epoch": 0.16913946587537093, "grad_norm": 0.8563520086330875, "learning_rate": 4.99279740240431e-06, "loss": 0.4986, "step": 684 }, { "epoch": 0.16938674579624136, "grad_norm": 0.8614893988702672, "learning_rate": 4.992772726757451e-06, "loss": 0.5088, "step": 685 }, { "epoch": 0.16963402571711178, "grad_norm": 0.7756849470892396, "learning_rate": 4.992748008975419e-06, "loss": 0.5599, "step": 686 }, { "epoch": 0.1698813056379822, "grad_norm": 0.8256108345562647, "learning_rate": 4.992723249058633e-06, "loss": 0.4938, "step": 687 }, { "epoch": 0.17012858555885263, "grad_norm": 0.8319058935689565, "learning_rate": 4.992698447007511e-06, "loss": 0.5157, "step": 688 }, { "epoch": 0.17037586547972305, "grad_norm": 0.8374831493414546, "learning_rate": 4.992673602822472e-06, "loss": 0.5417, "step": 689 }, { "epoch": 0.17062314540059348, "grad_norm": 0.8135538110680443, "learning_rate": 4.992648716503936e-06, "loss": 0.5134, "step": 690 }, { "epoch": 0.1708704253214639, "grad_norm": 0.9041030573320586, "learning_rate": 4.9926237880523235e-06, "loss": 0.5517, "step": 691 }, { "epoch": 0.17111770524233433, "grad_norm": 0.895349923324367, "learning_rate": 4.9925988174680565e-06, "loss": 0.5376, "step": 692 }, { "epoch": 0.17136498516320475, "grad_norm": 0.94778749124042, "learning_rate": 4.992573804751557e-06, "loss": 0.5212, "step": 693 }, { "epoch": 0.17161226508407518, "grad_norm": 0.8730544070580147, "learning_rate": 4.992548749903247e-06, "loss": 0.5119, "step": 694 }, { "epoch": 0.1718595450049456, "grad_norm": 0.8316096946499686, "learning_rate": 4.9925236529235495e-06, "loss": 0.5234, "step": 695 }, { "epoch": 0.17210682492581603, "grad_norm": 0.8724157769080901, "learning_rate": 4.992498513812891e-06, "loss": 0.5287, "step": 696 }, { "epoch": 0.17235410484668645, "grad_norm": 0.919022031040849, "learning_rate": 4.992473332571696e-06, "loss": 0.487, "step": 697 }, { "epoch": 0.17260138476755688, "grad_norm": 0.8603603196815058, "learning_rate": 4.9924481092003874e-06, "loss": 0.5009, "step": 698 }, { "epoch": 0.1728486646884273, "grad_norm": 0.8609657848997537, "learning_rate": 4.992422843699394e-06, "loss": 0.537, "step": 699 }, { "epoch": 0.17309594460929772, "grad_norm": 0.9497038354277341, "learning_rate": 4.992397536069143e-06, "loss": 0.5157, "step": 700 }, { "epoch": 0.17334322453016815, "grad_norm": 0.925901592259855, "learning_rate": 4.99237218631006e-06, "loss": 0.5256, "step": 701 }, { "epoch": 0.17359050445103857, "grad_norm": 0.8059602370847663, "learning_rate": 4.992346794422576e-06, "loss": 0.509, "step": 702 }, { "epoch": 0.173837784371909, "grad_norm": 0.8093787052289616, "learning_rate": 4.992321360407119e-06, "loss": 0.5125, "step": 703 }, { "epoch": 0.17408506429277942, "grad_norm": 0.8354283265994193, "learning_rate": 4.992295884264119e-06, "loss": 0.5071, "step": 704 }, { "epoch": 0.17433234421364985, "grad_norm": 0.8442582808343858, "learning_rate": 4.992270365994006e-06, "loss": 0.53, "step": 705 }, { "epoch": 0.17457962413452027, "grad_norm": 0.7836454779510944, "learning_rate": 4.9922448055972125e-06, "loss": 0.5214, "step": 706 }, { "epoch": 0.1748269040553907, "grad_norm": 0.840735881666944, "learning_rate": 4.99221920307417e-06, "loss": 0.5171, "step": 707 }, { "epoch": 0.17507418397626112, "grad_norm": 0.8829052650768754, "learning_rate": 4.992193558425311e-06, "loss": 0.5168, "step": 708 }, { "epoch": 0.17532146389713155, "grad_norm": 0.8729350937304439, "learning_rate": 4.9921678716510705e-06, "loss": 0.4853, "step": 709 }, { "epoch": 0.17556874381800197, "grad_norm": 0.9103933430094363, "learning_rate": 4.9921421427518804e-06, "loss": 0.4959, "step": 710 }, { "epoch": 0.1758160237388724, "grad_norm": 0.8035835757258305, "learning_rate": 4.992116371728176e-06, "loss": 0.4941, "step": 711 }, { "epoch": 0.17606330365974282, "grad_norm": 0.8305929115660557, "learning_rate": 4.9920905585803945e-06, "loss": 0.4876, "step": 712 }, { "epoch": 0.17631058358061324, "grad_norm": 0.7978374484195752, "learning_rate": 4.992064703308971e-06, "loss": 0.5257, "step": 713 }, { "epoch": 0.17655786350148367, "grad_norm": 0.8418807640743572, "learning_rate": 4.992038805914343e-06, "loss": 0.513, "step": 714 }, { "epoch": 0.1768051434223541, "grad_norm": 0.8330452494475705, "learning_rate": 4.992012866396948e-06, "loss": 0.4956, "step": 715 }, { "epoch": 0.17705242334322452, "grad_norm": 0.8661744791086904, "learning_rate": 4.991986884757224e-06, "loss": 0.4939, "step": 716 }, { "epoch": 0.17729970326409494, "grad_norm": 0.8486526830433232, "learning_rate": 4.991960860995611e-06, "loss": 0.4879, "step": 717 }, { "epoch": 0.17754698318496537, "grad_norm": 0.8352424841247117, "learning_rate": 4.991934795112548e-06, "loss": 0.4961, "step": 718 }, { "epoch": 0.1777942631058358, "grad_norm": 0.8789343341243142, "learning_rate": 4.991908687108477e-06, "loss": 0.4897, "step": 719 }, { "epoch": 0.17804154302670624, "grad_norm": 0.823652151046163, "learning_rate": 4.991882536983839e-06, "loss": 0.512, "step": 720 }, { "epoch": 0.17828882294757667, "grad_norm": 0.8134645433823909, "learning_rate": 4.991856344739073e-06, "loss": 0.5404, "step": 721 }, { "epoch": 0.1785361028684471, "grad_norm": 0.8501968648399841, "learning_rate": 4.991830110374626e-06, "loss": 0.5137, "step": 722 }, { "epoch": 0.17878338278931752, "grad_norm": 0.801152491159058, "learning_rate": 4.991803833890939e-06, "loss": 0.5255, "step": 723 }, { "epoch": 0.17903066271018794, "grad_norm": 0.8249255075508455, "learning_rate": 4.991777515288457e-06, "loss": 0.518, "step": 724 }, { "epoch": 0.17927794263105837, "grad_norm": 0.8299863504018892, "learning_rate": 4.991751154567625e-06, "loss": 0.4974, "step": 725 }, { "epoch": 0.1795252225519288, "grad_norm": 0.902641066855043, "learning_rate": 4.991724751728888e-06, "loss": 0.4966, "step": 726 }, { "epoch": 0.17977250247279922, "grad_norm": 0.8069318501808427, "learning_rate": 4.991698306772692e-06, "loss": 0.5069, "step": 727 }, { "epoch": 0.18001978239366964, "grad_norm": 0.8545994506172374, "learning_rate": 4.991671819699484e-06, "loss": 0.4956, "step": 728 }, { "epoch": 0.18026706231454007, "grad_norm": 0.8164400523644465, "learning_rate": 4.9916452905097135e-06, "loss": 0.5065, "step": 729 }, { "epoch": 0.1805143422354105, "grad_norm": 0.8086218873858831, "learning_rate": 4.991618719203827e-06, "loss": 0.5011, "step": 730 }, { "epoch": 0.18076162215628092, "grad_norm": 0.8552002311074453, "learning_rate": 4.991592105782274e-06, "loss": 0.5104, "step": 731 }, { "epoch": 0.18100890207715134, "grad_norm": 0.8717792545878794, "learning_rate": 4.9915654502455045e-06, "loss": 0.534, "step": 732 }, { "epoch": 0.18125618199802176, "grad_norm": 0.7849441865788286, "learning_rate": 4.9915387525939695e-06, "loss": 0.4982, "step": 733 }, { "epoch": 0.1815034619188922, "grad_norm": 0.8340027119415854, "learning_rate": 4.99151201282812e-06, "loss": 0.5142, "step": 734 }, { "epoch": 0.18175074183976261, "grad_norm": 0.8413817442374992, "learning_rate": 4.991485230948407e-06, "loss": 0.5332, "step": 735 }, { "epoch": 0.18199802176063304, "grad_norm": 0.8452871469316092, "learning_rate": 4.991458406955285e-06, "loss": 0.5022, "step": 736 }, { "epoch": 0.18224530168150346, "grad_norm": 0.872847298100631, "learning_rate": 4.991431540849206e-06, "loss": 0.4867, "step": 737 }, { "epoch": 0.1824925816023739, "grad_norm": 0.8514132061403205, "learning_rate": 4.991404632630625e-06, "loss": 0.5106, "step": 738 }, { "epoch": 0.1827398615232443, "grad_norm": 0.8949447429217282, "learning_rate": 4.991377682299996e-06, "loss": 0.4973, "step": 739 }, { "epoch": 0.18298714144411474, "grad_norm": 0.8326410302920144, "learning_rate": 4.991350689857775e-06, "loss": 0.5125, "step": 740 }, { "epoch": 0.18323442136498516, "grad_norm": 0.8675242195523758, "learning_rate": 4.9913236553044185e-06, "loss": 0.4978, "step": 741 }, { "epoch": 0.1834817012858556, "grad_norm": 0.801469598537518, "learning_rate": 4.991296578640383e-06, "loss": 0.5135, "step": 742 }, { "epoch": 0.183728981206726, "grad_norm": 0.8201428570448723, "learning_rate": 4.991269459866126e-06, "loss": 0.5144, "step": 743 }, { "epoch": 0.18397626112759644, "grad_norm": 0.8623718853955257, "learning_rate": 4.991242298982107e-06, "loss": 0.4808, "step": 744 }, { "epoch": 0.18422354104846686, "grad_norm": 0.7946767655860458, "learning_rate": 4.991215095988784e-06, "loss": 0.5226, "step": 745 }, { "epoch": 0.18447082096933728, "grad_norm": 0.8439539102467273, "learning_rate": 4.991187850886618e-06, "loss": 0.4925, "step": 746 }, { "epoch": 0.1847181008902077, "grad_norm": 0.7919859124879289, "learning_rate": 4.991160563676067e-06, "loss": 0.4916, "step": 747 }, { "epoch": 0.18496538081107813, "grad_norm": 0.8643732765638406, "learning_rate": 4.991133234357595e-06, "loss": 0.4911, "step": 748 }, { "epoch": 0.18521266073194856, "grad_norm": 0.8750805346885794, "learning_rate": 4.9911058629316615e-06, "loss": 0.4655, "step": 749 }, { "epoch": 0.18545994065281898, "grad_norm": 0.8199097338629083, "learning_rate": 4.991078449398732e-06, "loss": 0.523, "step": 750 }, { "epoch": 0.1857072205736894, "grad_norm": 0.8894643167179568, "learning_rate": 4.991050993759268e-06, "loss": 0.4973, "step": 751 }, { "epoch": 0.18595450049455983, "grad_norm": 0.8224624160630544, "learning_rate": 4.991023496013734e-06, "loss": 0.4931, "step": 752 }, { "epoch": 0.18620178041543026, "grad_norm": 0.8215579795094418, "learning_rate": 4.990995956162593e-06, "loss": 0.5023, "step": 753 }, { "epoch": 0.18644906033630068, "grad_norm": 0.8083894480373339, "learning_rate": 4.990968374206314e-06, "loss": 0.5224, "step": 754 }, { "epoch": 0.1866963402571711, "grad_norm": 0.8521252726155766, "learning_rate": 4.9909407501453625e-06, "loss": 0.511, "step": 755 }, { "epoch": 0.18694362017804153, "grad_norm": 0.8703568142440704, "learning_rate": 4.990913083980202e-06, "loss": 0.5258, "step": 756 }, { "epoch": 0.18719090009891196, "grad_norm": 0.84010044458961, "learning_rate": 4.990885375711304e-06, "loss": 0.5004, "step": 757 }, { "epoch": 0.18743818001978238, "grad_norm": 0.8318706342323815, "learning_rate": 4.990857625339135e-06, "loss": 0.5028, "step": 758 }, { "epoch": 0.18768545994065283, "grad_norm": 0.8416207130288796, "learning_rate": 4.9908298328641645e-06, "loss": 0.5053, "step": 759 }, { "epoch": 0.18793273986152326, "grad_norm": 0.8888518172116866, "learning_rate": 4.9908019982868625e-06, "loss": 0.5172, "step": 760 }, { "epoch": 0.18818001978239368, "grad_norm": 0.8502389509777805, "learning_rate": 4.990774121607699e-06, "loss": 0.5059, "step": 761 }, { "epoch": 0.1884272997032641, "grad_norm": 0.8610956280514308, "learning_rate": 4.990746202827145e-06, "loss": 0.535, "step": 762 }, { "epoch": 0.18867457962413453, "grad_norm": 0.7972219564385539, "learning_rate": 4.990718241945673e-06, "loss": 0.5218, "step": 763 }, { "epoch": 0.18892185954500496, "grad_norm": 0.7805412892800699, "learning_rate": 4.990690238963756e-06, "loss": 0.509, "step": 764 }, { "epoch": 0.18916913946587538, "grad_norm": 0.8236493383786434, "learning_rate": 4.990662193881865e-06, "loss": 0.5031, "step": 765 }, { "epoch": 0.1894164193867458, "grad_norm": 0.8999257859616762, "learning_rate": 4.9906341067004784e-06, "loss": 0.488, "step": 766 }, { "epoch": 0.18966369930761623, "grad_norm": 0.8708077868377747, "learning_rate": 4.990605977420067e-06, "loss": 0.5233, "step": 767 }, { "epoch": 0.18991097922848665, "grad_norm": 0.8420116856277404, "learning_rate": 4.990577806041108e-06, "loss": 0.5135, "step": 768 }, { "epoch": 0.19015825914935708, "grad_norm": 0.8548814632547316, "learning_rate": 4.990549592564076e-06, "loss": 0.4867, "step": 769 }, { "epoch": 0.1904055390702275, "grad_norm": 0.8268530338638478, "learning_rate": 4.99052133698945e-06, "loss": 0.4688, "step": 770 }, { "epoch": 0.19065281899109793, "grad_norm": 0.8320420070519919, "learning_rate": 4.990493039317707e-06, "loss": 0.5263, "step": 771 }, { "epoch": 0.19090009891196835, "grad_norm": 0.8365840878509793, "learning_rate": 4.990464699549325e-06, "loss": 0.4931, "step": 772 }, { "epoch": 0.19114737883283878, "grad_norm": 0.8101506431051484, "learning_rate": 4.990436317684782e-06, "loss": 0.5262, "step": 773 }, { "epoch": 0.1913946587537092, "grad_norm": 0.8647254791244104, "learning_rate": 4.990407893724561e-06, "loss": 0.5095, "step": 774 }, { "epoch": 0.19164193867457963, "grad_norm": 0.813120102249058, "learning_rate": 4.990379427669138e-06, "loss": 0.4984, "step": 775 }, { "epoch": 0.19188921859545005, "grad_norm": 0.8042977000267384, "learning_rate": 4.990350919518997e-06, "loss": 0.4978, "step": 776 }, { "epoch": 0.19213649851632048, "grad_norm": 0.8483867646954429, "learning_rate": 4.9903223692746196e-06, "loss": 0.4732, "step": 777 }, { "epoch": 0.1923837784371909, "grad_norm": 0.8647994593068955, "learning_rate": 4.990293776936488e-06, "loss": 0.545, "step": 778 }, { "epoch": 0.19263105835806132, "grad_norm": 0.8740789736428879, "learning_rate": 4.990265142505085e-06, "loss": 0.4954, "step": 779 }, { "epoch": 0.19287833827893175, "grad_norm": 0.8174292157713233, "learning_rate": 4.990236465980896e-06, "loss": 0.4935, "step": 780 }, { "epoch": 0.19312561819980217, "grad_norm": 0.8630949715708419, "learning_rate": 4.990207747364404e-06, "loss": 0.5115, "step": 781 }, { "epoch": 0.1933728981206726, "grad_norm": 0.8452101849069688, "learning_rate": 4.9901789866560955e-06, "loss": 0.5075, "step": 782 }, { "epoch": 0.19362017804154302, "grad_norm": 0.8222049299567881, "learning_rate": 4.990150183856457e-06, "loss": 0.4868, "step": 783 }, { "epoch": 0.19386745796241345, "grad_norm": 0.8045405700762853, "learning_rate": 4.990121338965975e-06, "loss": 0.4996, "step": 784 }, { "epoch": 0.19411473788328387, "grad_norm": 0.839462282900959, "learning_rate": 4.9900924519851354e-06, "loss": 0.513, "step": 785 }, { "epoch": 0.1943620178041543, "grad_norm": 0.8129960974230438, "learning_rate": 4.990063522914429e-06, "loss": 0.5067, "step": 786 }, { "epoch": 0.19460929772502472, "grad_norm": 0.8498358711521139, "learning_rate": 4.990034551754344e-06, "loss": 0.4971, "step": 787 }, { "epoch": 0.19485657764589515, "grad_norm": 0.8342170766695094, "learning_rate": 4.9900055385053696e-06, "loss": 0.5785, "step": 788 }, { "epoch": 0.19510385756676557, "grad_norm": 0.7720839055732265, "learning_rate": 4.9899764831679954e-06, "loss": 0.5091, "step": 789 }, { "epoch": 0.195351137487636, "grad_norm": 0.8448261448148362, "learning_rate": 4.989947385742715e-06, "loss": 0.5128, "step": 790 }, { "epoch": 0.19559841740850642, "grad_norm": 0.8355306276455249, "learning_rate": 4.9899182462300175e-06, "loss": 0.5334, "step": 791 }, { "epoch": 0.19584569732937684, "grad_norm": 0.8108028837754382, "learning_rate": 4.989889064630397e-06, "loss": 0.5037, "step": 792 }, { "epoch": 0.19609297725024727, "grad_norm": 0.8099327440790443, "learning_rate": 4.989859840944346e-06, "loss": 0.5074, "step": 793 }, { "epoch": 0.1963402571711177, "grad_norm": 0.8540803863374226, "learning_rate": 4.989830575172361e-06, "loss": 0.4785, "step": 794 }, { "epoch": 0.19658753709198812, "grad_norm": 0.8361945024114827, "learning_rate": 4.9898012673149325e-06, "loss": 0.4938, "step": 795 }, { "epoch": 0.19683481701285854, "grad_norm": 0.8603101439414645, "learning_rate": 4.989771917372559e-06, "loss": 0.458, "step": 796 }, { "epoch": 0.19708209693372897, "grad_norm": 0.8623322983540171, "learning_rate": 4.989742525345736e-06, "loss": 0.5032, "step": 797 }, { "epoch": 0.19732937685459942, "grad_norm": 0.8473115195100572, "learning_rate": 4.9897130912349585e-06, "loss": 0.4936, "step": 798 }, { "epoch": 0.19757665677546984, "grad_norm": 0.7940684143161678, "learning_rate": 4.9896836150407256e-06, "loss": 0.5473, "step": 799 }, { "epoch": 0.19782393669634027, "grad_norm": 0.8272717457807083, "learning_rate": 4.989654096763537e-06, "loss": 0.5171, "step": 800 }, { "epoch": 0.1980712166172107, "grad_norm": 0.8902992860806108, "learning_rate": 4.989624536403888e-06, "loss": 0.5375, "step": 801 }, { "epoch": 0.19831849653808112, "grad_norm": 0.9101693315058536, "learning_rate": 4.989594933962281e-06, "loss": 0.4882, "step": 802 }, { "epoch": 0.19856577645895154, "grad_norm": 0.823665912051704, "learning_rate": 4.989565289439216e-06, "loss": 0.5004, "step": 803 }, { "epoch": 0.19881305637982197, "grad_norm": 0.8537141855296777, "learning_rate": 4.9895356028351936e-06, "loss": 0.5057, "step": 804 }, { "epoch": 0.1990603363006924, "grad_norm": 0.8499363471459407, "learning_rate": 4.989505874150716e-06, "loss": 0.5051, "step": 805 }, { "epoch": 0.19930761622156282, "grad_norm": 0.8713007651523925, "learning_rate": 4.989476103386285e-06, "loss": 0.5093, "step": 806 }, { "epoch": 0.19955489614243324, "grad_norm": 0.8878831506094113, "learning_rate": 4.9894462905424035e-06, "loss": 0.5067, "step": 807 }, { "epoch": 0.19980217606330367, "grad_norm": 0.8677379139060017, "learning_rate": 4.989416435619577e-06, "loss": 0.5013, "step": 808 }, { "epoch": 0.2000494559841741, "grad_norm": 0.8442842813703827, "learning_rate": 4.98938653861831e-06, "loss": 0.5048, "step": 809 }, { "epoch": 0.20029673590504452, "grad_norm": 0.8111674446939234, "learning_rate": 4.989356599539106e-06, "loss": 0.5167, "step": 810 }, { "epoch": 0.20054401582591494, "grad_norm": 0.8954173542551098, "learning_rate": 4.989326618382471e-06, "loss": 0.5147, "step": 811 }, { "epoch": 0.20079129574678536, "grad_norm": 0.9233165216979647, "learning_rate": 4.9892965951489154e-06, "loss": 0.5064, "step": 812 }, { "epoch": 0.2010385756676558, "grad_norm": 0.9134666739113686, "learning_rate": 4.989266529838943e-06, "loss": 0.5009, "step": 813 }, { "epoch": 0.2012858555885262, "grad_norm": 0.8492218856969446, "learning_rate": 4.989236422453064e-06, "loss": 0.5124, "step": 814 }, { "epoch": 0.20153313550939664, "grad_norm": 0.8652036940944711, "learning_rate": 4.989206272991785e-06, "loss": 0.5366, "step": 815 }, { "epoch": 0.20178041543026706, "grad_norm": 0.8418660565061913, "learning_rate": 4.9891760814556186e-06, "loss": 0.5105, "step": 816 }, { "epoch": 0.2020276953511375, "grad_norm": 0.8878975245185788, "learning_rate": 4.989145847845074e-06, "loss": 0.5132, "step": 817 }, { "epoch": 0.2022749752720079, "grad_norm": 0.8315141988567767, "learning_rate": 4.989115572160661e-06, "loss": 0.5008, "step": 818 }, { "epoch": 0.20252225519287834, "grad_norm": 0.8376037839192603, "learning_rate": 4.989085254402892e-06, "loss": 0.5057, "step": 819 }, { "epoch": 0.20276953511374876, "grad_norm": 0.8037174338863466, "learning_rate": 4.98905489457228e-06, "loss": 0.4975, "step": 820 }, { "epoch": 0.20301681503461919, "grad_norm": 0.8766782392881515, "learning_rate": 4.9890244926693385e-06, "loss": 0.4667, "step": 821 }, { "epoch": 0.2032640949554896, "grad_norm": 0.9181666145995278, "learning_rate": 4.98899404869458e-06, "loss": 0.5197, "step": 822 }, { "epoch": 0.20351137487636003, "grad_norm": 0.8421985589488679, "learning_rate": 4.98896356264852e-06, "loss": 0.4693, "step": 823 }, { "epoch": 0.20375865479723046, "grad_norm": 0.8414578412087721, "learning_rate": 4.988933034531674e-06, "loss": 0.4959, "step": 824 }, { "epoch": 0.20400593471810088, "grad_norm": 0.8207895326319039, "learning_rate": 4.988902464344557e-06, "loss": 0.4968, "step": 825 }, { "epoch": 0.2042532146389713, "grad_norm": 0.8258058754282276, "learning_rate": 4.988871852087687e-06, "loss": 0.4806, "step": 826 }, { "epoch": 0.20450049455984173, "grad_norm": 0.7911811305702641, "learning_rate": 4.988841197761581e-06, "loss": 0.5105, "step": 827 }, { "epoch": 0.20474777448071216, "grad_norm": 0.8067032711527126, "learning_rate": 4.988810501366756e-06, "loss": 0.4988, "step": 828 }, { "epoch": 0.20499505440158258, "grad_norm": 0.8248411254954989, "learning_rate": 4.988779762903733e-06, "loss": 0.4679, "step": 829 }, { "epoch": 0.205242334322453, "grad_norm": 0.8075962457898619, "learning_rate": 4.98874898237303e-06, "loss": 0.4802, "step": 830 }, { "epoch": 0.20548961424332343, "grad_norm": 0.8694350300009082, "learning_rate": 4.988718159775168e-06, "loss": 0.512, "step": 831 }, { "epoch": 0.20573689416419386, "grad_norm": 0.838526356063612, "learning_rate": 4.988687295110667e-06, "loss": 0.4772, "step": 832 }, { "epoch": 0.20598417408506428, "grad_norm": 0.8581376012516917, "learning_rate": 4.98865638838005e-06, "loss": 0.4997, "step": 833 }, { "epoch": 0.2062314540059347, "grad_norm": 0.8508868762945916, "learning_rate": 4.988625439583838e-06, "loss": 0.5016, "step": 834 }, { "epoch": 0.20647873392680513, "grad_norm": 0.8649500124082152, "learning_rate": 4.988594448722556e-06, "loss": 0.4915, "step": 835 }, { "epoch": 0.20672601384767555, "grad_norm": 0.8094411363162982, "learning_rate": 4.988563415796726e-06, "loss": 0.5196, "step": 836 }, { "epoch": 0.206973293768546, "grad_norm": 0.8414623331186055, "learning_rate": 4.988532340806873e-06, "loss": 0.5158, "step": 837 }, { "epoch": 0.20722057368941643, "grad_norm": 0.8243945840705411, "learning_rate": 4.9885012237535235e-06, "loss": 0.4897, "step": 838 }, { "epoch": 0.20746785361028686, "grad_norm": 0.9140469955319724, "learning_rate": 4.988470064637202e-06, "loss": 0.4759, "step": 839 }, { "epoch": 0.20771513353115728, "grad_norm": 0.8558925215879722, "learning_rate": 4.988438863458436e-06, "loss": 0.5119, "step": 840 }, { "epoch": 0.2079624134520277, "grad_norm": 0.815440252440534, "learning_rate": 4.988407620217752e-06, "loss": 0.4945, "step": 841 }, { "epoch": 0.20820969337289813, "grad_norm": 0.8232200098822217, "learning_rate": 4.988376334915679e-06, "loss": 0.4996, "step": 842 }, { "epoch": 0.20845697329376855, "grad_norm": 0.8691564428161908, "learning_rate": 4.988345007552746e-06, "loss": 0.5097, "step": 843 }, { "epoch": 0.20870425321463898, "grad_norm": 0.8264653567410514, "learning_rate": 4.9883136381294816e-06, "loss": 0.5119, "step": 844 }, { "epoch": 0.2089515331355094, "grad_norm": 0.8785002227076522, "learning_rate": 4.988282226646417e-06, "loss": 0.514, "step": 845 }, { "epoch": 0.20919881305637983, "grad_norm": 0.8023528924268092, "learning_rate": 4.988250773104083e-06, "loss": 0.5428, "step": 846 }, { "epoch": 0.20944609297725025, "grad_norm": 0.8211524437917096, "learning_rate": 4.98821927750301e-06, "loss": 0.5256, "step": 847 }, { "epoch": 0.20969337289812068, "grad_norm": 0.8010942784871227, "learning_rate": 4.988187739843731e-06, "loss": 0.5346, "step": 848 }, { "epoch": 0.2099406528189911, "grad_norm": 0.9044984773545841, "learning_rate": 4.988156160126781e-06, "loss": 0.5038, "step": 849 }, { "epoch": 0.21018793273986153, "grad_norm": 0.8706964927904601, "learning_rate": 4.98812453835269e-06, "loss": 0.507, "step": 850 }, { "epoch": 0.21043521266073195, "grad_norm": 0.8736523995044558, "learning_rate": 4.988092874521996e-06, "loss": 0.4939, "step": 851 }, { "epoch": 0.21068249258160238, "grad_norm": 0.8851801188270471, "learning_rate": 4.988061168635232e-06, "loss": 0.5165, "step": 852 }, { "epoch": 0.2109297725024728, "grad_norm": 0.853684837000266, "learning_rate": 4.9880294206929356e-06, "loss": 0.5153, "step": 853 }, { "epoch": 0.21117705242334323, "grad_norm": 0.911771572862549, "learning_rate": 4.9879976306956415e-06, "loss": 0.4859, "step": 854 }, { "epoch": 0.21142433234421365, "grad_norm": 0.8626964859349916, "learning_rate": 4.987965798643889e-06, "loss": 0.4644, "step": 855 }, { "epoch": 0.21167161226508407, "grad_norm": 0.8572993279837756, "learning_rate": 4.987933924538215e-06, "loss": 0.5022, "step": 856 }, { "epoch": 0.2119188921859545, "grad_norm": 0.8963927749480562, "learning_rate": 4.987902008379159e-06, "loss": 0.4736, "step": 857 }, { "epoch": 0.21216617210682492, "grad_norm": 0.8505265096635852, "learning_rate": 4.987870050167259e-06, "loss": 0.509, "step": 858 }, { "epoch": 0.21241345202769535, "grad_norm": 0.8393583930696208, "learning_rate": 4.987838049903058e-06, "loss": 0.5031, "step": 859 }, { "epoch": 0.21266073194856577, "grad_norm": 0.8669348205976165, "learning_rate": 4.987806007587094e-06, "loss": 0.4743, "step": 860 }, { "epoch": 0.2129080118694362, "grad_norm": 0.8272724159339694, "learning_rate": 4.9877739232199095e-06, "loss": 0.5207, "step": 861 }, { "epoch": 0.21315529179030662, "grad_norm": 0.8732070794723419, "learning_rate": 4.987741796802047e-06, "loss": 0.5009, "step": 862 }, { "epoch": 0.21340257171117705, "grad_norm": 0.8813154447433397, "learning_rate": 4.987709628334051e-06, "loss": 0.5135, "step": 863 }, { "epoch": 0.21364985163204747, "grad_norm": 0.8325507743445312, "learning_rate": 4.987677417816462e-06, "loss": 0.5058, "step": 864 }, { "epoch": 0.2138971315529179, "grad_norm": 0.8747157253538062, "learning_rate": 4.987645165249827e-06, "loss": 0.5056, "step": 865 }, { "epoch": 0.21414441147378832, "grad_norm": 0.90232302210331, "learning_rate": 4.987612870634691e-06, "loss": 0.5056, "step": 866 }, { "epoch": 0.21439169139465875, "grad_norm": 0.84923644734889, "learning_rate": 4.987580533971599e-06, "loss": 0.4947, "step": 867 }, { "epoch": 0.21463897131552917, "grad_norm": 0.8698770676663805, "learning_rate": 4.9875481552610975e-06, "loss": 0.5125, "step": 868 }, { "epoch": 0.2148862512363996, "grad_norm": 0.817003058821012, "learning_rate": 4.9875157345037345e-06, "loss": 0.5422, "step": 869 }, { "epoch": 0.21513353115727002, "grad_norm": 0.8417204204341288, "learning_rate": 4.9874832717000576e-06, "loss": 0.4953, "step": 870 }, { "epoch": 0.21538081107814044, "grad_norm": 0.9572380936686788, "learning_rate": 4.9874507668506155e-06, "loss": 0.4844, "step": 871 }, { "epoch": 0.21562809099901087, "grad_norm": 0.834643198655235, "learning_rate": 4.987418219955958e-06, "loss": 0.5242, "step": 872 }, { "epoch": 0.2158753709198813, "grad_norm": 0.8559434479885895, "learning_rate": 4.987385631016635e-06, "loss": 0.5118, "step": 873 }, { "epoch": 0.21612265084075172, "grad_norm": 0.8746611306575874, "learning_rate": 4.987353000033197e-06, "loss": 0.4964, "step": 874 }, { "epoch": 0.21636993076162217, "grad_norm": 0.8810491800616369, "learning_rate": 4.987320327006196e-06, "loss": 0.4804, "step": 875 }, { "epoch": 0.2166172106824926, "grad_norm": 0.857857539044612, "learning_rate": 4.987287611936185e-06, "loss": 0.5073, "step": 876 }, { "epoch": 0.21686449060336302, "grad_norm": 0.8268996691952446, "learning_rate": 4.987254854823715e-06, "loss": 0.5007, "step": 877 }, { "epoch": 0.21711177052423344, "grad_norm": 0.828244801524196, "learning_rate": 4.987222055669342e-06, "loss": 0.5075, "step": 878 }, { "epoch": 0.21735905044510387, "grad_norm": 0.8662331481181559, "learning_rate": 4.987189214473618e-06, "loss": 0.5054, "step": 879 }, { "epoch": 0.2176063303659743, "grad_norm": 0.840848336618183, "learning_rate": 4.987156331237099e-06, "loss": 0.4954, "step": 880 }, { "epoch": 0.21785361028684472, "grad_norm": 0.8368772772807719, "learning_rate": 4.987123405960343e-06, "loss": 0.5116, "step": 881 }, { "epoch": 0.21810089020771514, "grad_norm": 0.8593385012576953, "learning_rate": 4.987090438643904e-06, "loss": 0.5273, "step": 882 }, { "epoch": 0.21834817012858557, "grad_norm": 0.8364107176881544, "learning_rate": 4.98705742928834e-06, "loss": 0.5014, "step": 883 }, { "epoch": 0.218595450049456, "grad_norm": 0.8227427355602717, "learning_rate": 4.987024377894208e-06, "loss": 0.4951, "step": 884 }, { "epoch": 0.21884272997032642, "grad_norm": 0.8150154334336953, "learning_rate": 4.986991284462068e-06, "loss": 0.49, "step": 885 }, { "epoch": 0.21909000989119684, "grad_norm": 0.8449279651032495, "learning_rate": 4.98695814899248e-06, "loss": 0.5147, "step": 886 }, { "epoch": 0.21933728981206727, "grad_norm": 0.8199419337132845, "learning_rate": 4.986924971486001e-06, "loss": 0.5203, "step": 887 }, { "epoch": 0.2195845697329377, "grad_norm": 0.8211194462863136, "learning_rate": 4.986891751943196e-06, "loss": 0.5027, "step": 888 }, { "epoch": 0.21983184965380811, "grad_norm": 0.8510718071011489, "learning_rate": 4.986858490364624e-06, "loss": 0.4842, "step": 889 }, { "epoch": 0.22007912957467854, "grad_norm": 0.8122596001789849, "learning_rate": 4.986825186750846e-06, "loss": 0.4882, "step": 890 }, { "epoch": 0.22032640949554896, "grad_norm": 0.8227992726845009, "learning_rate": 4.986791841102427e-06, "loss": 0.4894, "step": 891 }, { "epoch": 0.2205736894164194, "grad_norm": 0.8330978830853362, "learning_rate": 4.986758453419931e-06, "loss": 0.5047, "step": 892 }, { "epoch": 0.2208209693372898, "grad_norm": 0.8289885814322678, "learning_rate": 4.986725023703921e-06, "loss": 0.5211, "step": 893 }, { "epoch": 0.22106824925816024, "grad_norm": 0.7967487040612072, "learning_rate": 4.986691551954962e-06, "loss": 0.4961, "step": 894 }, { "epoch": 0.22131552917903066, "grad_norm": 0.8663375538553278, "learning_rate": 4.986658038173621e-06, "loss": 0.51, "step": 895 }, { "epoch": 0.2215628090999011, "grad_norm": 0.7906515926897258, "learning_rate": 4.986624482360464e-06, "loss": 0.5029, "step": 896 }, { "epoch": 0.2218100890207715, "grad_norm": 0.7744820254840943, "learning_rate": 4.986590884516057e-06, "loss": 0.5023, "step": 897 }, { "epoch": 0.22205736894164194, "grad_norm": 0.883399410152621, "learning_rate": 4.98655724464097e-06, "loss": 0.4854, "step": 898 }, { "epoch": 0.22230464886251236, "grad_norm": 0.8630333018932411, "learning_rate": 4.98652356273577e-06, "loss": 0.4881, "step": 899 }, { "epoch": 0.22255192878338279, "grad_norm": 0.8277706025700802, "learning_rate": 4.986489838801027e-06, "loss": 0.5004, "step": 900 }, { "epoch": 0.2227992087042532, "grad_norm": 0.8204017170552954, "learning_rate": 4.98645607283731e-06, "loss": 0.4963, "step": 901 }, { "epoch": 0.22304648862512363, "grad_norm": 0.8265861428479575, "learning_rate": 4.986422264845191e-06, "loss": 0.4839, "step": 902 }, { "epoch": 0.22329376854599406, "grad_norm": 0.8276540389714453, "learning_rate": 4.986388414825242e-06, "loss": 0.4946, "step": 903 }, { "epoch": 0.22354104846686448, "grad_norm": 0.8699787108810072, "learning_rate": 4.986354522778033e-06, "loss": 0.5113, "step": 904 }, { "epoch": 0.2237883283877349, "grad_norm": 0.7617126884375469, "learning_rate": 4.986320588704139e-06, "loss": 0.5106, "step": 905 }, { "epoch": 0.22403560830860533, "grad_norm": 0.8589768854675831, "learning_rate": 4.986286612604132e-06, "loss": 0.4754, "step": 906 }, { "epoch": 0.22428288822947576, "grad_norm": 0.8161695889367072, "learning_rate": 4.986252594478588e-06, "loss": 0.4865, "step": 907 }, { "epoch": 0.22453016815034618, "grad_norm": 0.8425510497631642, "learning_rate": 4.98621853432808e-06, "loss": 0.4961, "step": 908 }, { "epoch": 0.2247774480712166, "grad_norm": 0.8942256571041988, "learning_rate": 4.986184432153185e-06, "loss": 0.5114, "step": 909 }, { "epoch": 0.22502472799208703, "grad_norm": 0.8569701182433922, "learning_rate": 4.986150287954479e-06, "loss": 0.4747, "step": 910 }, { "epoch": 0.22527200791295746, "grad_norm": 0.8342740450002151, "learning_rate": 4.986116101732539e-06, "loss": 0.4965, "step": 911 }, { "epoch": 0.22551928783382788, "grad_norm": 0.906710700659202, "learning_rate": 4.986081873487944e-06, "loss": 0.5079, "step": 912 }, { "epoch": 0.2257665677546983, "grad_norm": 0.8145221155521087, "learning_rate": 4.98604760322127e-06, "loss": 0.4913, "step": 913 }, { "epoch": 0.22601384767556876, "grad_norm": 0.8166511496949852, "learning_rate": 4.986013290933099e-06, "loss": 0.472, "step": 914 }, { "epoch": 0.22626112759643918, "grad_norm": 0.914692426611638, "learning_rate": 4.98597893662401e-06, "loss": 0.4695, "step": 915 }, { "epoch": 0.2265084075173096, "grad_norm": 0.7809607473935676, "learning_rate": 4.985944540294584e-06, "loss": 0.5388, "step": 916 }, { "epoch": 0.22675568743818003, "grad_norm": 0.8231717409594672, "learning_rate": 4.9859101019454015e-06, "loss": 0.5249, "step": 917 }, { "epoch": 0.22700296735905046, "grad_norm": 0.8361911011029732, "learning_rate": 4.985875621577045e-06, "loss": 0.5121, "step": 918 }, { "epoch": 0.22725024727992088, "grad_norm": 0.8048366668863958, "learning_rate": 4.985841099190098e-06, "loss": 0.4892, "step": 919 }, { "epoch": 0.2274975272007913, "grad_norm": 0.8660579797696568, "learning_rate": 4.985806534785143e-06, "loss": 0.4799, "step": 920 }, { "epoch": 0.22774480712166173, "grad_norm": 0.8318167298034147, "learning_rate": 4.9857719283627635e-06, "loss": 0.498, "step": 921 }, { "epoch": 0.22799208704253215, "grad_norm": 0.8605410511892199, "learning_rate": 4.985737279923547e-06, "loss": 0.5138, "step": 922 }, { "epoch": 0.22823936696340258, "grad_norm": 0.8703326428337433, "learning_rate": 4.9857025894680775e-06, "loss": 0.4784, "step": 923 }, { "epoch": 0.228486646884273, "grad_norm": 0.8370959017594702, "learning_rate": 4.9856678569969415e-06, "loss": 0.488, "step": 924 }, { "epoch": 0.22873392680514343, "grad_norm": 0.8693400164771248, "learning_rate": 4.985633082510727e-06, "loss": 0.5124, "step": 925 }, { "epoch": 0.22898120672601385, "grad_norm": 0.8636007951346818, "learning_rate": 4.985598266010021e-06, "loss": 0.5016, "step": 926 }, { "epoch": 0.22922848664688428, "grad_norm": 0.8832259920894722, "learning_rate": 4.985563407495411e-06, "loss": 0.4954, "step": 927 }, { "epoch": 0.2294757665677547, "grad_norm": 0.8191758445943106, "learning_rate": 4.985528506967488e-06, "loss": 0.507, "step": 928 }, { "epoch": 0.22972304648862513, "grad_norm": 0.8789118908623763, "learning_rate": 4.985493564426841e-06, "loss": 0.4885, "step": 929 }, { "epoch": 0.22997032640949555, "grad_norm": 0.8924491857440008, "learning_rate": 4.985458579874061e-06, "loss": 0.5033, "step": 930 }, { "epoch": 0.23021760633036598, "grad_norm": 0.8383332268067973, "learning_rate": 4.9854235533097396e-06, "loss": 0.5156, "step": 931 }, { "epoch": 0.2304648862512364, "grad_norm": 0.8767336119514518, "learning_rate": 4.985388484734467e-06, "loss": 0.4849, "step": 932 }, { "epoch": 0.23071216617210683, "grad_norm": 0.8405975137037067, "learning_rate": 4.985353374148838e-06, "loss": 0.506, "step": 933 }, { "epoch": 0.23095944609297725, "grad_norm": 0.8170821427085228, "learning_rate": 4.9853182215534465e-06, "loss": 0.4962, "step": 934 }, { "epoch": 0.23120672601384767, "grad_norm": 0.8314334184214403, "learning_rate": 4.985283026948885e-06, "loss": 0.483, "step": 935 }, { "epoch": 0.2314540059347181, "grad_norm": 0.9084995950646598, "learning_rate": 4.985247790335748e-06, "loss": 0.4794, "step": 936 }, { "epoch": 0.23170128585558852, "grad_norm": 0.8483507056354166, "learning_rate": 4.9852125117146335e-06, "loss": 0.5065, "step": 937 }, { "epoch": 0.23194856577645895, "grad_norm": 0.8402483495826693, "learning_rate": 4.985177191086136e-06, "loss": 0.4915, "step": 938 }, { "epoch": 0.23219584569732937, "grad_norm": 0.823169932427246, "learning_rate": 4.985141828450852e-06, "loss": 0.4918, "step": 939 }, { "epoch": 0.2324431256181998, "grad_norm": 0.8243831987168234, "learning_rate": 4.985106423809381e-06, "loss": 0.4755, "step": 940 }, { "epoch": 0.23269040553907022, "grad_norm": 0.8252991162421606, "learning_rate": 4.98507097716232e-06, "loss": 0.5114, "step": 941 }, { "epoch": 0.23293768545994065, "grad_norm": 0.836106206941783, "learning_rate": 4.98503548851027e-06, "loss": 0.483, "step": 942 }, { "epoch": 0.23318496538081107, "grad_norm": 0.8506488758080649, "learning_rate": 4.984999957853829e-06, "loss": 0.4987, "step": 943 }, { "epoch": 0.2334322453016815, "grad_norm": 0.8289096749494381, "learning_rate": 4.984964385193598e-06, "loss": 0.5003, "step": 944 }, { "epoch": 0.23367952522255192, "grad_norm": 0.8768460912190904, "learning_rate": 4.9849287705301786e-06, "loss": 0.475, "step": 945 }, { "epoch": 0.23392680514342234, "grad_norm": 0.8140839084687289, "learning_rate": 4.984893113864173e-06, "loss": 0.5001, "step": 946 }, { "epoch": 0.23417408506429277, "grad_norm": 0.8527563886559387, "learning_rate": 4.9848574151961835e-06, "loss": 0.5011, "step": 947 }, { "epoch": 0.2344213649851632, "grad_norm": 0.8497649216961695, "learning_rate": 4.984821674526813e-06, "loss": 0.5048, "step": 948 }, { "epoch": 0.23466864490603362, "grad_norm": 0.8118793995529595, "learning_rate": 4.984785891856667e-06, "loss": 0.4742, "step": 949 }, { "epoch": 0.23491592482690404, "grad_norm": 0.8791594821173937, "learning_rate": 4.984750067186349e-06, "loss": 0.4938, "step": 950 }, { "epoch": 0.23516320474777447, "grad_norm": 0.8888145994376402, "learning_rate": 4.984714200516465e-06, "loss": 0.5096, "step": 951 }, { "epoch": 0.2354104846686449, "grad_norm": 0.8348077340788664, "learning_rate": 4.9846782918476225e-06, "loss": 0.4902, "step": 952 }, { "epoch": 0.23565776458951534, "grad_norm": 0.8778483850793724, "learning_rate": 4.9846423411804255e-06, "loss": 0.4926, "step": 953 }, { "epoch": 0.23590504451038577, "grad_norm": 0.8187933596056294, "learning_rate": 4.984606348515485e-06, "loss": 0.4858, "step": 954 }, { "epoch": 0.2361523244312562, "grad_norm": 0.7797555946495061, "learning_rate": 4.984570313853408e-06, "loss": 0.4931, "step": 955 }, { "epoch": 0.23639960435212662, "grad_norm": 0.8552422210148904, "learning_rate": 4.984534237194802e-06, "loss": 0.5172, "step": 956 }, { "epoch": 0.23664688427299704, "grad_norm": 0.8013218564963912, "learning_rate": 4.984498118540279e-06, "loss": 0.4941, "step": 957 }, { "epoch": 0.23689416419386747, "grad_norm": 0.8354238632072138, "learning_rate": 4.984461957890449e-06, "loss": 0.4857, "step": 958 }, { "epoch": 0.2371414441147379, "grad_norm": 0.8201898617265962, "learning_rate": 4.984425755245923e-06, "loss": 0.4968, "step": 959 }, { "epoch": 0.23738872403560832, "grad_norm": 0.8470092115227233, "learning_rate": 4.984389510607313e-06, "loss": 0.4862, "step": 960 }, { "epoch": 0.23763600395647874, "grad_norm": 0.845953728104242, "learning_rate": 4.984353223975231e-06, "loss": 0.486, "step": 961 }, { "epoch": 0.23788328387734917, "grad_norm": 0.8270735966336179, "learning_rate": 4.98431689535029e-06, "loss": 0.5043, "step": 962 }, { "epoch": 0.2381305637982196, "grad_norm": 0.8152648813245965, "learning_rate": 4.984280524733107e-06, "loss": 0.4591, "step": 963 }, { "epoch": 0.23837784371909002, "grad_norm": 0.8230530257009434, "learning_rate": 4.984244112124293e-06, "loss": 0.4709, "step": 964 }, { "epoch": 0.23862512363996044, "grad_norm": 0.8769324474154776, "learning_rate": 4.9842076575244665e-06, "loss": 0.4615, "step": 965 }, { "epoch": 0.23887240356083086, "grad_norm": 0.8170880234645825, "learning_rate": 4.984171160934243e-06, "loss": 0.4801, "step": 966 }, { "epoch": 0.2391196834817013, "grad_norm": 0.8473619382925522, "learning_rate": 4.9841346223542375e-06, "loss": 0.4687, "step": 967 }, { "epoch": 0.23936696340257171, "grad_norm": 0.8226901640257309, "learning_rate": 4.984098041785069e-06, "loss": 0.4927, "step": 968 }, { "epoch": 0.23961424332344214, "grad_norm": 0.9073747433995021, "learning_rate": 4.9840614192273565e-06, "loss": 0.4763, "step": 969 }, { "epoch": 0.23986152324431256, "grad_norm": 0.8676535240669757, "learning_rate": 4.984024754681717e-06, "loss": 0.4699, "step": 970 }, { "epoch": 0.240108803165183, "grad_norm": 0.8592342568796588, "learning_rate": 4.983988048148773e-06, "loss": 0.4589, "step": 971 }, { "epoch": 0.2403560830860534, "grad_norm": 0.8106560924699778, "learning_rate": 4.983951299629142e-06, "loss": 0.4864, "step": 972 }, { "epoch": 0.24060336300692384, "grad_norm": 0.8273722868885549, "learning_rate": 4.983914509123447e-06, "loss": 0.4741, "step": 973 }, { "epoch": 0.24085064292779426, "grad_norm": 0.7817588752032723, "learning_rate": 4.983877676632311e-06, "loss": 0.4825, "step": 974 }, { "epoch": 0.2410979228486647, "grad_norm": 0.7946571630913781, "learning_rate": 4.983840802156353e-06, "loss": 0.5078, "step": 975 }, { "epoch": 0.2413452027695351, "grad_norm": 0.8172722258882839, "learning_rate": 4.983803885696199e-06, "loss": 0.4906, "step": 976 }, { "epoch": 0.24159248269040554, "grad_norm": 0.858810786727647, "learning_rate": 4.983766927252472e-06, "loss": 0.473, "step": 977 }, { "epoch": 0.24183976261127596, "grad_norm": 0.8672721626002375, "learning_rate": 4.983729926825798e-06, "loss": 0.4743, "step": 978 }, { "epoch": 0.24208704253214638, "grad_norm": 0.9329297411602999, "learning_rate": 4.983692884416801e-06, "loss": 0.472, "step": 979 }, { "epoch": 0.2423343224530168, "grad_norm": 0.9015184792086365, "learning_rate": 4.983655800026108e-06, "loss": 0.5101, "step": 980 }, { "epoch": 0.24258160237388723, "grad_norm": 0.8477549099000955, "learning_rate": 4.983618673654344e-06, "loss": 0.5164, "step": 981 }, { "epoch": 0.24282888229475766, "grad_norm": 0.8737374865512695, "learning_rate": 4.983581505302139e-06, "loss": 0.5048, "step": 982 }, { "epoch": 0.24307616221562808, "grad_norm": 0.8492727947637201, "learning_rate": 4.983544294970121e-06, "loss": 0.4872, "step": 983 }, { "epoch": 0.2433234421364985, "grad_norm": 0.8312941393834273, "learning_rate": 4.983507042658917e-06, "loss": 0.4921, "step": 984 }, { "epoch": 0.24357072205736893, "grad_norm": 0.8457934782705832, "learning_rate": 4.983469748369159e-06, "loss": 0.5009, "step": 985 }, { "epoch": 0.24381800197823936, "grad_norm": 0.889062562190951, "learning_rate": 4.983432412101475e-06, "loss": 0.5046, "step": 986 }, { "epoch": 0.24406528189910978, "grad_norm": 0.9200936675121365, "learning_rate": 4.983395033856498e-06, "loss": 0.5063, "step": 987 }, { "epoch": 0.2443125618199802, "grad_norm": 0.865579100161537, "learning_rate": 4.9833576136348595e-06, "loss": 0.4931, "step": 988 }, { "epoch": 0.24455984174085063, "grad_norm": 0.8758672885998708, "learning_rate": 4.983320151437191e-06, "loss": 0.481, "step": 989 }, { "epoch": 0.24480712166172106, "grad_norm": 0.8658318444737918, "learning_rate": 4.983282647264126e-06, "loss": 0.4712, "step": 990 }, { "epoch": 0.24505440158259148, "grad_norm": 0.8382351361641673, "learning_rate": 4.983245101116299e-06, "loss": 0.4911, "step": 991 }, { "epoch": 0.24530168150346193, "grad_norm": 0.8463983674050847, "learning_rate": 4.983207512994345e-06, "loss": 0.5312, "step": 992 }, { "epoch": 0.24554896142433236, "grad_norm": 0.8829203582411207, "learning_rate": 4.983169882898898e-06, "loss": 0.459, "step": 993 }, { "epoch": 0.24579624134520278, "grad_norm": 0.8208662802668374, "learning_rate": 4.983132210830596e-06, "loss": 0.4835, "step": 994 }, { "epoch": 0.2460435212660732, "grad_norm": 0.868902743454076, "learning_rate": 4.983094496790074e-06, "loss": 0.4895, "step": 995 }, { "epoch": 0.24629080118694363, "grad_norm": 0.8525195601900595, "learning_rate": 4.98305674077797e-06, "loss": 0.5008, "step": 996 }, { "epoch": 0.24653808110781406, "grad_norm": 0.8486858338119296, "learning_rate": 4.9830189427949225e-06, "loss": 0.4767, "step": 997 }, { "epoch": 0.24678536102868448, "grad_norm": 0.8399588457998348, "learning_rate": 4.982981102841569e-06, "loss": 0.4807, "step": 998 }, { "epoch": 0.2470326409495549, "grad_norm": 0.8400317988123928, "learning_rate": 4.982943220918552e-06, "loss": 0.4472, "step": 999 }, { "epoch": 0.24727992087042533, "grad_norm": 0.7933513082453183, "learning_rate": 4.982905297026509e-06, "loss": 0.4897, "step": 1000 }, { "epoch": 0.24752720079129575, "grad_norm": 0.8533021805361142, "learning_rate": 4.982867331166083e-06, "loss": 0.4826, "step": 1001 }, { "epoch": 0.24777448071216618, "grad_norm": 0.871571750396739, "learning_rate": 4.982829323337914e-06, "loss": 0.4822, "step": 1002 }, { "epoch": 0.2480217606330366, "grad_norm": 0.8561307423314862, "learning_rate": 4.982791273542646e-06, "loss": 0.4928, "step": 1003 }, { "epoch": 0.24826904055390703, "grad_norm": 0.8162243679119858, "learning_rate": 4.9827531817809215e-06, "loss": 0.4918, "step": 1004 }, { "epoch": 0.24851632047477745, "grad_norm": 0.8091135061963646, "learning_rate": 4.9827150480533835e-06, "loss": 0.5059, "step": 1005 }, { "epoch": 0.24876360039564788, "grad_norm": 0.839406106272644, "learning_rate": 4.982676872360677e-06, "loss": 0.5087, "step": 1006 }, { "epoch": 0.2490108803165183, "grad_norm": 0.8313205468854626, "learning_rate": 4.982638654703449e-06, "loss": 0.4686, "step": 1007 }, { "epoch": 0.24925816023738873, "grad_norm": 0.8271088700543145, "learning_rate": 4.9826003950823445e-06, "loss": 0.4938, "step": 1008 }, { "epoch": 0.24950544015825915, "grad_norm": 0.8405321735427581, "learning_rate": 4.982562093498009e-06, "loss": 0.4876, "step": 1009 }, { "epoch": 0.24975272007912958, "grad_norm": 0.8665303043556788, "learning_rate": 4.982523749951091e-06, "loss": 0.4805, "step": 1010 }, { "epoch": 0.25, "grad_norm": 0.8787115918536075, "learning_rate": 4.982485364442238e-06, "loss": 0.487, "step": 1011 }, { "epoch": 0.2502472799208704, "grad_norm": 0.8409882874323071, "learning_rate": 4.982446936972099e-06, "loss": 0.4678, "step": 1012 }, { "epoch": 0.25049455984174085, "grad_norm": 0.8561426691383394, "learning_rate": 4.982408467541325e-06, "loss": 0.4897, "step": 1013 }, { "epoch": 0.2507418397626113, "grad_norm": 0.8572923571758608, "learning_rate": 4.982369956150563e-06, "loss": 0.4852, "step": 1014 }, { "epoch": 0.2509891196834817, "grad_norm": 0.8981118858205535, "learning_rate": 4.982331402800468e-06, "loss": 0.4807, "step": 1015 }, { "epoch": 0.2512363996043521, "grad_norm": 0.9095037169315664, "learning_rate": 4.982292807491688e-06, "loss": 0.5035, "step": 1016 }, { "epoch": 0.25148367952522255, "grad_norm": 0.8421819705790509, "learning_rate": 4.982254170224878e-06, "loss": 0.4421, "step": 1017 }, { "epoch": 0.251730959446093, "grad_norm": 0.8399733420526356, "learning_rate": 4.982215491000689e-06, "loss": 0.4987, "step": 1018 }, { "epoch": 0.2519782393669634, "grad_norm": 0.8546805880965436, "learning_rate": 4.982176769819777e-06, "loss": 0.494, "step": 1019 }, { "epoch": 0.2522255192878338, "grad_norm": 0.891980552444231, "learning_rate": 4.982138006682795e-06, "loss": 0.4995, "step": 1020 }, { "epoch": 0.25247279920870425, "grad_norm": 0.832920847499595, "learning_rate": 4.982099201590399e-06, "loss": 0.4675, "step": 1021 }, { "epoch": 0.25272007912957467, "grad_norm": 0.8018275844329361, "learning_rate": 4.982060354543244e-06, "loss": 0.5018, "step": 1022 }, { "epoch": 0.2529673590504451, "grad_norm": 0.8518048762893595, "learning_rate": 4.982021465541988e-06, "loss": 0.5046, "step": 1023 }, { "epoch": 0.2532146389713155, "grad_norm": 0.830301537925145, "learning_rate": 4.9819825345872855e-06, "loss": 0.4926, "step": 1024 }, { "epoch": 0.25346191889218594, "grad_norm": 0.8314313821169597, "learning_rate": 4.981943561679799e-06, "loss": 0.4857, "step": 1025 }, { "epoch": 0.25370919881305637, "grad_norm": 0.8085067071753731, "learning_rate": 4.981904546820183e-06, "loss": 0.4997, "step": 1026 }, { "epoch": 0.2539564787339268, "grad_norm": 0.8312408342022083, "learning_rate": 4.981865490009099e-06, "loss": 0.4873, "step": 1027 }, { "epoch": 0.2542037586547972, "grad_norm": 0.8236446385494339, "learning_rate": 4.9818263912472074e-06, "loss": 0.4854, "step": 1028 }, { "epoch": 0.25445103857566764, "grad_norm": 0.8279083883523636, "learning_rate": 4.9817872505351686e-06, "loss": 0.4848, "step": 1029 }, { "epoch": 0.25469831849653807, "grad_norm": 0.8690599083185739, "learning_rate": 4.9817480678736426e-06, "loss": 0.4865, "step": 1030 }, { "epoch": 0.2549455984174085, "grad_norm": 0.8421918735243161, "learning_rate": 4.981708843263295e-06, "loss": 0.4685, "step": 1031 }, { "epoch": 0.2551928783382789, "grad_norm": 0.8407079219400131, "learning_rate": 4.981669576704787e-06, "loss": 0.4956, "step": 1032 }, { "epoch": 0.25544015825914934, "grad_norm": 0.8773337094609159, "learning_rate": 4.9816302681987825e-06, "loss": 0.4724, "step": 1033 }, { "epoch": 0.25568743818001977, "grad_norm": 0.868844911839338, "learning_rate": 4.981590917745945e-06, "loss": 0.4981, "step": 1034 }, { "epoch": 0.2559347181008902, "grad_norm": 0.8666128355880677, "learning_rate": 4.981551525346941e-06, "loss": 0.5125, "step": 1035 }, { "epoch": 0.2561819980217606, "grad_norm": 0.9213503864137835, "learning_rate": 4.9815120910024365e-06, "loss": 0.516, "step": 1036 }, { "epoch": 0.25642927794263104, "grad_norm": 0.8307356659494541, "learning_rate": 4.981472614713096e-06, "loss": 0.5132, "step": 1037 }, { "epoch": 0.25667655786350146, "grad_norm": 0.8297935826184679, "learning_rate": 4.981433096479588e-06, "loss": 0.4802, "step": 1038 }, { "epoch": 0.2569238377843719, "grad_norm": 0.8327277791219414, "learning_rate": 4.981393536302582e-06, "loss": 0.4928, "step": 1039 }, { "epoch": 0.2571711177052423, "grad_norm": 0.8707053568449094, "learning_rate": 4.981353934182745e-06, "loss": 0.4899, "step": 1040 }, { "epoch": 0.25741839762611274, "grad_norm": 0.8652524290884858, "learning_rate": 4.981314290120747e-06, "loss": 0.4886, "step": 1041 }, { "epoch": 0.25766567754698316, "grad_norm": 0.8466892303798033, "learning_rate": 4.981274604117257e-06, "loss": 0.5103, "step": 1042 }, { "epoch": 0.2579129574678536, "grad_norm": 0.9783250538294788, "learning_rate": 4.981234876172947e-06, "loss": 0.4887, "step": 1043 }, { "epoch": 0.258160237388724, "grad_norm": 0.9318808540890211, "learning_rate": 4.981195106288488e-06, "loss": 0.5011, "step": 1044 }, { "epoch": 0.25840751730959444, "grad_norm": 0.8551466560368262, "learning_rate": 4.981155294464552e-06, "loss": 0.5029, "step": 1045 }, { "epoch": 0.25865479723046486, "grad_norm": 0.8793301608280027, "learning_rate": 4.981115440701814e-06, "loss": 0.4742, "step": 1046 }, { "epoch": 0.2589020771513353, "grad_norm": 0.8969182144318821, "learning_rate": 4.981075545000944e-06, "loss": 0.5168, "step": 1047 }, { "epoch": 0.2591493570722057, "grad_norm": 0.8662813622902052, "learning_rate": 4.981035607362619e-06, "loss": 0.4981, "step": 1048 }, { "epoch": 0.25939663699307614, "grad_norm": 0.8852575900920927, "learning_rate": 4.980995627787513e-06, "loss": 0.4845, "step": 1049 }, { "epoch": 0.2596439169139466, "grad_norm": 0.8434609325170388, "learning_rate": 4.980955606276303e-06, "loss": 0.4663, "step": 1050 }, { "epoch": 0.25989119683481704, "grad_norm": 0.8403758855291492, "learning_rate": 4.980915542829664e-06, "loss": 0.4831, "step": 1051 }, { "epoch": 0.26013847675568746, "grad_norm": 0.8877325981859041, "learning_rate": 4.980875437448274e-06, "loss": 0.4785, "step": 1052 }, { "epoch": 0.2603857566765579, "grad_norm": 0.8052008309585926, "learning_rate": 4.98083529013281e-06, "loss": 0.5048, "step": 1053 }, { "epoch": 0.2606330365974283, "grad_norm": 0.8478864104168425, "learning_rate": 4.980795100883953e-06, "loss": 0.4704, "step": 1054 }, { "epoch": 0.26088031651829874, "grad_norm": 0.8794267459522233, "learning_rate": 4.9807548697023795e-06, "loss": 0.4629, "step": 1055 }, { "epoch": 0.26112759643916916, "grad_norm": 0.8058144806983949, "learning_rate": 4.9807145965887705e-06, "loss": 0.4852, "step": 1056 }, { "epoch": 0.2613748763600396, "grad_norm": 0.8843411492674896, "learning_rate": 4.980674281543807e-06, "loss": 0.4644, "step": 1057 }, { "epoch": 0.26162215628091, "grad_norm": 0.8493178218035321, "learning_rate": 4.98063392456817e-06, "loss": 0.4879, "step": 1058 }, { "epoch": 0.26186943620178044, "grad_norm": 0.8356662278593041, "learning_rate": 4.980593525662544e-06, "loss": 0.4703, "step": 1059 }, { "epoch": 0.26211671612265086, "grad_norm": 0.8475187900521053, "learning_rate": 4.980553084827607e-06, "loss": 0.4914, "step": 1060 }, { "epoch": 0.2623639960435213, "grad_norm": 0.8450529868997396, "learning_rate": 4.980512602064047e-06, "loss": 0.4844, "step": 1061 }, { "epoch": 0.2626112759643917, "grad_norm": 0.8079903048049786, "learning_rate": 4.9804720773725465e-06, "loss": 0.4752, "step": 1062 }, { "epoch": 0.26285855588526214, "grad_norm": 0.8511900505503611, "learning_rate": 4.980431510753791e-06, "loss": 0.4774, "step": 1063 }, { "epoch": 0.26310583580613256, "grad_norm": 0.8332435125227167, "learning_rate": 4.980390902208465e-06, "loss": 0.4751, "step": 1064 }, { "epoch": 0.263353115727003, "grad_norm": 0.8536824402482716, "learning_rate": 4.980350251737256e-06, "loss": 0.5205, "step": 1065 }, { "epoch": 0.2636003956478734, "grad_norm": 0.8553553797802288, "learning_rate": 4.980309559340851e-06, "loss": 0.4665, "step": 1066 }, { "epoch": 0.26384767556874383, "grad_norm": 0.8286430821029018, "learning_rate": 4.980268825019939e-06, "loss": 0.4861, "step": 1067 }, { "epoch": 0.26409495548961426, "grad_norm": 0.8349366249443254, "learning_rate": 4.980228048775205e-06, "loss": 0.4921, "step": 1068 }, { "epoch": 0.2643422354104847, "grad_norm": 0.8327046354641331, "learning_rate": 4.980187230607341e-06, "loss": 0.4672, "step": 1069 }, { "epoch": 0.2645895153313551, "grad_norm": 0.8665614695088318, "learning_rate": 4.980146370517037e-06, "loss": 0.4803, "step": 1070 }, { "epoch": 0.26483679525222553, "grad_norm": 0.8410944228997952, "learning_rate": 4.980105468504983e-06, "loss": 0.4753, "step": 1071 }, { "epoch": 0.26508407517309596, "grad_norm": 0.8731177178369249, "learning_rate": 4.9800645245718705e-06, "loss": 0.5105, "step": 1072 }, { "epoch": 0.2653313550939664, "grad_norm": 0.87074377533807, "learning_rate": 4.980023538718392e-06, "loss": 0.4868, "step": 1073 }, { "epoch": 0.2655786350148368, "grad_norm": 0.8497553336959501, "learning_rate": 4.979982510945239e-06, "loss": 0.46, "step": 1074 }, { "epoch": 0.26582591493570723, "grad_norm": 0.8482593664870046, "learning_rate": 4.9799414412531056e-06, "loss": 0.5059, "step": 1075 }, { "epoch": 0.26607319485657766, "grad_norm": 0.8734764426183708, "learning_rate": 4.9799003296426864e-06, "loss": 0.457, "step": 1076 }, { "epoch": 0.2663204747774481, "grad_norm": 0.8762054435854876, "learning_rate": 4.979859176114676e-06, "loss": 0.4828, "step": 1077 }, { "epoch": 0.2665677546983185, "grad_norm": 0.8386188836793864, "learning_rate": 4.979817980669771e-06, "loss": 0.4531, "step": 1078 }, { "epoch": 0.26681503461918893, "grad_norm": 0.8303211743323535, "learning_rate": 4.979776743308667e-06, "loss": 0.4786, "step": 1079 }, { "epoch": 0.26706231454005935, "grad_norm": 0.8763603274871483, "learning_rate": 4.979735464032059e-06, "loss": 0.4729, "step": 1080 }, { "epoch": 0.2673095944609298, "grad_norm": 0.8482768577451538, "learning_rate": 4.979694142840647e-06, "loss": 0.4685, "step": 1081 }, { "epoch": 0.2675568743818002, "grad_norm": 0.8181406667783483, "learning_rate": 4.9796527797351304e-06, "loss": 0.4883, "step": 1082 }, { "epoch": 0.2678041543026706, "grad_norm": 0.820627311447771, "learning_rate": 4.979611374716207e-06, "loss": 0.4595, "step": 1083 }, { "epoch": 0.26805143422354105, "grad_norm": 0.8494425109762445, "learning_rate": 4.979569927784576e-06, "loss": 0.5001, "step": 1084 }, { "epoch": 0.2682987141444115, "grad_norm": 0.8347143974885791, "learning_rate": 4.979528438940938e-06, "loss": 0.4854, "step": 1085 }, { "epoch": 0.2685459940652819, "grad_norm": 0.8293185823179237, "learning_rate": 4.979486908185996e-06, "loss": 0.491, "step": 1086 }, { "epoch": 0.2687932739861523, "grad_norm": 0.8624687861606527, "learning_rate": 4.97944533552045e-06, "loss": 0.4668, "step": 1087 }, { "epoch": 0.26904055390702275, "grad_norm": 0.8647425254764696, "learning_rate": 4.979403720945004e-06, "loss": 0.4785, "step": 1088 }, { "epoch": 0.2692878338278932, "grad_norm": 0.8642500279889467, "learning_rate": 4.979362064460361e-06, "loss": 0.4906, "step": 1089 }, { "epoch": 0.2695351137487636, "grad_norm": 0.8188279970742318, "learning_rate": 4.979320366067226e-06, "loss": 0.4922, "step": 1090 }, { "epoch": 0.269782393669634, "grad_norm": 0.8494061066145452, "learning_rate": 4.979278625766302e-06, "loss": 0.4373, "step": 1091 }, { "epoch": 0.27002967359050445, "grad_norm": 0.8541094055180173, "learning_rate": 4.979236843558296e-06, "loss": 0.4982, "step": 1092 }, { "epoch": 0.2702769535113749, "grad_norm": 0.8901352777542862, "learning_rate": 4.979195019443913e-06, "loss": 0.4895, "step": 1093 }, { "epoch": 0.2705242334322453, "grad_norm": 0.8723236124194512, "learning_rate": 4.9791531534238615e-06, "loss": 0.4876, "step": 1094 }, { "epoch": 0.2707715133531157, "grad_norm": 0.8460275573328927, "learning_rate": 4.9791112454988485e-06, "loss": 0.4582, "step": 1095 }, { "epoch": 0.27101879327398615, "grad_norm": 0.8032142309223071, "learning_rate": 4.979069295669582e-06, "loss": 0.4979, "step": 1096 }, { "epoch": 0.27126607319485657, "grad_norm": 0.8984869165646926, "learning_rate": 4.979027303936771e-06, "loss": 0.4883, "step": 1097 }, { "epoch": 0.271513353115727, "grad_norm": 0.823481716476794, "learning_rate": 4.9789852703011255e-06, "loss": 0.4748, "step": 1098 }, { "epoch": 0.2717606330365974, "grad_norm": 0.8770522684481887, "learning_rate": 4.978943194763356e-06, "loss": 0.4761, "step": 1099 }, { "epoch": 0.27200791295746785, "grad_norm": 0.8827483342198571, "learning_rate": 4.978901077324174e-06, "loss": 0.5047, "step": 1100 }, { "epoch": 0.27225519287833827, "grad_norm": 0.8442680181370851, "learning_rate": 4.978858917984292e-06, "loss": 0.476, "step": 1101 }, { "epoch": 0.2725024727992087, "grad_norm": 0.8093223452069177, "learning_rate": 4.9788167167444206e-06, "loss": 0.4974, "step": 1102 }, { "epoch": 0.2727497527200791, "grad_norm": 0.8520597566674003, "learning_rate": 4.978774473605274e-06, "loss": 0.4953, "step": 1103 }, { "epoch": 0.27299703264094954, "grad_norm": 0.8851794426709616, "learning_rate": 4.978732188567568e-06, "loss": 0.4748, "step": 1104 }, { "epoch": 0.27324431256181997, "grad_norm": 0.8047095063413441, "learning_rate": 4.978689861632016e-06, "loss": 0.4799, "step": 1105 }, { "epoch": 0.2734915924826904, "grad_norm": 0.8036515216440242, "learning_rate": 4.978647492799332e-06, "loss": 0.4623, "step": 1106 }, { "epoch": 0.2737388724035608, "grad_norm": 0.8287473081378811, "learning_rate": 4.978605082070234e-06, "loss": 0.4808, "step": 1107 }, { "epoch": 0.27398615232443124, "grad_norm": 0.7830078111911496, "learning_rate": 4.9785626294454385e-06, "loss": 0.4848, "step": 1108 }, { "epoch": 0.27423343224530167, "grad_norm": 0.8796446581845684, "learning_rate": 4.978520134925663e-06, "loss": 0.4649, "step": 1109 }, { "epoch": 0.2744807121661721, "grad_norm": 0.8207939048014206, "learning_rate": 4.978477598511625e-06, "loss": 0.4956, "step": 1110 }, { "epoch": 0.2747279920870425, "grad_norm": 0.8206309916991434, "learning_rate": 4.978435020204045e-06, "loss": 0.5177, "step": 1111 }, { "epoch": 0.27497527200791294, "grad_norm": 0.8349867209882694, "learning_rate": 4.978392400003642e-06, "loss": 0.4801, "step": 1112 }, { "epoch": 0.27522255192878337, "grad_norm": 0.8467923995371689, "learning_rate": 4.978349737911136e-06, "loss": 0.4868, "step": 1113 }, { "epoch": 0.2754698318496538, "grad_norm": 0.8578801603556364, "learning_rate": 4.9783070339272485e-06, "loss": 0.487, "step": 1114 }, { "epoch": 0.2757171117705242, "grad_norm": 0.7810402137613652, "learning_rate": 4.978264288052701e-06, "loss": 0.4741, "step": 1115 }, { "epoch": 0.27596439169139464, "grad_norm": 0.7896403749725643, "learning_rate": 4.978221500288217e-06, "loss": 0.5014, "step": 1116 }, { "epoch": 0.27621167161226506, "grad_norm": 0.8440418279389584, "learning_rate": 4.978178670634518e-06, "loss": 0.4677, "step": 1117 }, { "epoch": 0.2764589515331355, "grad_norm": 0.8645968361942973, "learning_rate": 4.97813579909233e-06, "loss": 0.4951, "step": 1118 }, { "epoch": 0.2767062314540059, "grad_norm": 0.8850512904631548, "learning_rate": 4.9780928856623765e-06, "loss": 0.4813, "step": 1119 }, { "epoch": 0.27695351137487634, "grad_norm": 0.8682217080189116, "learning_rate": 4.978049930345382e-06, "loss": 0.4832, "step": 1120 }, { "epoch": 0.27720079129574676, "grad_norm": 0.8551141090731345, "learning_rate": 4.978006933142075e-06, "loss": 0.4796, "step": 1121 }, { "epoch": 0.2774480712166172, "grad_norm": 0.8561104145175743, "learning_rate": 4.97796389405318e-06, "loss": 0.4945, "step": 1122 }, { "epoch": 0.2776953511374876, "grad_norm": 0.8408560414663986, "learning_rate": 4.977920813079426e-06, "loss": 0.464, "step": 1123 }, { "epoch": 0.27794263105835804, "grad_norm": 0.8654252795240646, "learning_rate": 4.97787769022154e-06, "loss": 0.4858, "step": 1124 }, { "epoch": 0.27818991097922846, "grad_norm": 0.860275164094403, "learning_rate": 4.9778345254802505e-06, "loss": 0.4902, "step": 1125 }, { "epoch": 0.2784371909000989, "grad_norm": 0.8232668434710275, "learning_rate": 4.977791318856289e-06, "loss": 0.4662, "step": 1126 }, { "epoch": 0.2786844708209693, "grad_norm": 0.8368306472164873, "learning_rate": 4.977748070350385e-06, "loss": 0.4809, "step": 1127 }, { "epoch": 0.2789317507418398, "grad_norm": 0.836130275495686, "learning_rate": 4.977704779963269e-06, "loss": 0.4929, "step": 1128 }, { "epoch": 0.2791790306627102, "grad_norm": 0.9149228882397445, "learning_rate": 4.9776614476956735e-06, "loss": 0.4691, "step": 1129 }, { "epoch": 0.27942631058358064, "grad_norm": 0.8068181975659364, "learning_rate": 4.97761807354833e-06, "loss": 0.4684, "step": 1130 }, { "epoch": 0.27967359050445106, "grad_norm": 0.8422358197300707, "learning_rate": 4.977574657521973e-06, "loss": 0.4761, "step": 1131 }, { "epoch": 0.2799208704253215, "grad_norm": 0.8502642960030118, "learning_rate": 4.977531199617335e-06, "loss": 0.4574, "step": 1132 }, { "epoch": 0.2801681503461919, "grad_norm": 0.8596392419555559, "learning_rate": 4.977487699835151e-06, "loss": 0.4956, "step": 1133 }, { "epoch": 0.28041543026706234, "grad_norm": 0.8457975155369798, "learning_rate": 4.977444158176157e-06, "loss": 0.5049, "step": 1134 }, { "epoch": 0.28066271018793276, "grad_norm": 0.8237071413210715, "learning_rate": 4.9774005746410885e-06, "loss": 0.4795, "step": 1135 }, { "epoch": 0.2809099901088032, "grad_norm": 0.8690081793064807, "learning_rate": 4.977356949230681e-06, "loss": 0.4831, "step": 1136 }, { "epoch": 0.2811572700296736, "grad_norm": 0.8522428926389992, "learning_rate": 4.977313281945674e-06, "loss": 0.4606, "step": 1137 }, { "epoch": 0.28140454995054404, "grad_norm": 0.8686896451812126, "learning_rate": 4.977269572786804e-06, "loss": 0.4681, "step": 1138 }, { "epoch": 0.28165182987141446, "grad_norm": 0.8152365489529878, "learning_rate": 4.9772258217548105e-06, "loss": 0.4911, "step": 1139 }, { "epoch": 0.2818991097922849, "grad_norm": 0.8264760785996421, "learning_rate": 4.977182028850434e-06, "loss": 0.4598, "step": 1140 }, { "epoch": 0.2821463897131553, "grad_norm": 0.8211689788246055, "learning_rate": 4.9771381940744114e-06, "loss": 0.457, "step": 1141 }, { "epoch": 0.28239366963402573, "grad_norm": 0.8623747209430598, "learning_rate": 4.977094317427488e-06, "loss": 0.4527, "step": 1142 }, { "epoch": 0.28264094955489616, "grad_norm": 0.8244132721987166, "learning_rate": 4.977050398910402e-06, "loss": 0.4821, "step": 1143 }, { "epoch": 0.2828882294757666, "grad_norm": 0.88533076955512, "learning_rate": 4.977006438523898e-06, "loss": 0.4614, "step": 1144 }, { "epoch": 0.283135509396637, "grad_norm": 0.91782791472328, "learning_rate": 4.9769624362687175e-06, "loss": 0.5028, "step": 1145 }, { "epoch": 0.28338278931750743, "grad_norm": 0.8483629265141885, "learning_rate": 4.9769183921456045e-06, "loss": 0.4735, "step": 1146 }, { "epoch": 0.28363006923837786, "grad_norm": 0.8051321989492259, "learning_rate": 4.976874306155305e-06, "loss": 0.4827, "step": 1147 }, { "epoch": 0.2838773491592483, "grad_norm": 0.9174584209766616, "learning_rate": 4.9768301782985625e-06, "loss": 0.4711, "step": 1148 }, { "epoch": 0.2841246290801187, "grad_norm": 0.8715629939118485, "learning_rate": 4.9767860085761234e-06, "loss": 0.4716, "step": 1149 }, { "epoch": 0.28437190900098913, "grad_norm": 0.8190866023796931, "learning_rate": 4.9767417969887345e-06, "loss": 0.4717, "step": 1150 }, { "epoch": 0.28461918892185956, "grad_norm": 0.8436727789264102, "learning_rate": 4.976697543537144e-06, "loss": 0.4655, "step": 1151 }, { "epoch": 0.28486646884273, "grad_norm": 0.9569028098073552, "learning_rate": 4.976653248222097e-06, "loss": 0.4459, "step": 1152 }, { "epoch": 0.2851137487636004, "grad_norm": 0.9183949145964179, "learning_rate": 4.976608911044345e-06, "loss": 0.4595, "step": 1153 }, { "epoch": 0.28536102868447083, "grad_norm": 0.8331716153606737, "learning_rate": 4.976564532004636e-06, "loss": 0.4654, "step": 1154 }, { "epoch": 0.28560830860534125, "grad_norm": 0.818296150644725, "learning_rate": 4.976520111103721e-06, "loss": 0.4777, "step": 1155 }, { "epoch": 0.2858555885262117, "grad_norm": 0.88625253346554, "learning_rate": 4.976475648342351e-06, "loss": 0.4807, "step": 1156 }, { "epoch": 0.2861028684470821, "grad_norm": 0.8305715999786408, "learning_rate": 4.976431143721277e-06, "loss": 0.4482, "step": 1157 }, { "epoch": 0.28635014836795253, "grad_norm": 0.8426926584136363, "learning_rate": 4.976386597241251e-06, "loss": 0.4872, "step": 1158 }, { "epoch": 0.28659742828882295, "grad_norm": 0.8366689422958908, "learning_rate": 4.976342008903025e-06, "loss": 0.486, "step": 1159 }, { "epoch": 0.2868447082096934, "grad_norm": 0.8956525760405114, "learning_rate": 4.976297378707355e-06, "loss": 0.4715, "step": 1160 }, { "epoch": 0.2870919881305638, "grad_norm": 0.8272900765226568, "learning_rate": 4.976252706654995e-06, "loss": 0.4691, "step": 1161 }, { "epoch": 0.2873392680514342, "grad_norm": 0.8050824860585051, "learning_rate": 4.976207992746699e-06, "loss": 0.4725, "step": 1162 }, { "epoch": 0.28758654797230465, "grad_norm": 0.8407468844517442, "learning_rate": 4.976163236983223e-06, "loss": 0.4985, "step": 1163 }, { "epoch": 0.2878338278931751, "grad_norm": 0.9432126176475985, "learning_rate": 4.976118439365324e-06, "loss": 0.4755, "step": 1164 }, { "epoch": 0.2880811078140455, "grad_norm": 0.8343734797289791, "learning_rate": 4.976073599893758e-06, "loss": 0.4469, "step": 1165 }, { "epoch": 0.2883283877349159, "grad_norm": 0.80957107924281, "learning_rate": 4.976028718569285e-06, "loss": 0.479, "step": 1166 }, { "epoch": 0.28857566765578635, "grad_norm": 0.7838786090709212, "learning_rate": 4.975983795392662e-06, "loss": 0.4671, "step": 1167 }, { "epoch": 0.2888229475766568, "grad_norm": 0.8892179393889282, "learning_rate": 4.975938830364649e-06, "loss": 0.4629, "step": 1168 }, { "epoch": 0.2890702274975272, "grad_norm": 0.8885646660585704, "learning_rate": 4.975893823486006e-06, "loss": 0.5188, "step": 1169 }, { "epoch": 0.2893175074183976, "grad_norm": 0.8950341542311611, "learning_rate": 4.975848774757493e-06, "loss": 0.4882, "step": 1170 }, { "epoch": 0.28956478733926805, "grad_norm": 0.8153300668207971, "learning_rate": 4.975803684179873e-06, "loss": 0.4822, "step": 1171 }, { "epoch": 0.2898120672601385, "grad_norm": 0.8661852061603399, "learning_rate": 4.975758551753906e-06, "loss": 0.4655, "step": 1172 }, { "epoch": 0.2900593471810089, "grad_norm": 0.8741765156999356, "learning_rate": 4.975713377480357e-06, "loss": 0.4802, "step": 1173 }, { "epoch": 0.2903066271018793, "grad_norm": 0.8232541934414569, "learning_rate": 4.975668161359988e-06, "loss": 0.4991, "step": 1174 }, { "epoch": 0.29055390702274975, "grad_norm": 0.8204906769924353, "learning_rate": 4.9756229033935646e-06, "loss": 0.4593, "step": 1175 }, { "epoch": 0.29080118694362017, "grad_norm": 0.8707746000436765, "learning_rate": 4.97557760358185e-06, "loss": 0.4607, "step": 1176 }, { "epoch": 0.2910484668644906, "grad_norm": 0.9088228180075838, "learning_rate": 4.975532261925612e-06, "loss": 0.479, "step": 1177 }, { "epoch": 0.291295746785361, "grad_norm": 0.8474947512406128, "learning_rate": 4.975486878425616e-06, "loss": 0.449, "step": 1178 }, { "epoch": 0.29154302670623145, "grad_norm": 0.8214047939389825, "learning_rate": 4.975441453082629e-06, "loss": 0.5202, "step": 1179 }, { "epoch": 0.29179030662710187, "grad_norm": 0.874242317071171, "learning_rate": 4.9753959858974195e-06, "loss": 0.5118, "step": 1180 }, { "epoch": 0.2920375865479723, "grad_norm": 0.8628389428690536, "learning_rate": 4.975350476870755e-06, "loss": 0.4553, "step": 1181 }, { "epoch": 0.2922848664688427, "grad_norm": 0.8522753264964066, "learning_rate": 4.975304926003405e-06, "loss": 0.4472, "step": 1182 }, { "epoch": 0.29253214638971314, "grad_norm": 0.8249790271701399, "learning_rate": 4.97525933329614e-06, "loss": 0.4739, "step": 1183 }, { "epoch": 0.29277942631058357, "grad_norm": 0.9003269024269399, "learning_rate": 4.97521369874973e-06, "loss": 0.4815, "step": 1184 }, { "epoch": 0.293026706231454, "grad_norm": 0.920371626612895, "learning_rate": 4.975168022364948e-06, "loss": 0.4552, "step": 1185 }, { "epoch": 0.2932739861523244, "grad_norm": 0.8453419054277556, "learning_rate": 4.975122304142564e-06, "loss": 0.4827, "step": 1186 }, { "epoch": 0.29352126607319484, "grad_norm": 0.893824981194909, "learning_rate": 4.97507654408335e-06, "loss": 0.4593, "step": 1187 }, { "epoch": 0.29376854599406527, "grad_norm": 0.8765416230068477, "learning_rate": 4.9750307421880825e-06, "loss": 0.468, "step": 1188 }, { "epoch": 0.2940158259149357, "grad_norm": 0.7894978603788854, "learning_rate": 4.974984898457534e-06, "loss": 0.486, "step": 1189 }, { "epoch": 0.2942631058358061, "grad_norm": 0.8576759645871942, "learning_rate": 4.9749390128924806e-06, "loss": 0.5149, "step": 1190 }, { "epoch": 0.29451038575667654, "grad_norm": 0.8637297369732642, "learning_rate": 4.9748930854936955e-06, "loss": 0.4778, "step": 1191 }, { "epoch": 0.29475766567754697, "grad_norm": 0.8527005901061284, "learning_rate": 4.974847116261957e-06, "loss": 0.5054, "step": 1192 }, { "epoch": 0.2950049455984174, "grad_norm": 0.815567947874674, "learning_rate": 4.974801105198042e-06, "loss": 0.4802, "step": 1193 }, { "epoch": 0.2952522255192878, "grad_norm": 0.8479219848611034, "learning_rate": 4.974755052302726e-06, "loss": 0.4702, "step": 1194 }, { "epoch": 0.29549950544015824, "grad_norm": 0.8556137872597406, "learning_rate": 4.974708957576791e-06, "loss": 0.4704, "step": 1195 }, { "epoch": 0.29574678536102866, "grad_norm": 0.8398783844649267, "learning_rate": 4.974662821021014e-06, "loss": 0.4854, "step": 1196 }, { "epoch": 0.2959940652818991, "grad_norm": 0.8769661180300431, "learning_rate": 4.974616642636174e-06, "loss": 0.502, "step": 1197 }, { "epoch": 0.2962413452027695, "grad_norm": 0.846002502686499, "learning_rate": 4.974570422423053e-06, "loss": 0.5028, "step": 1198 }, { "epoch": 0.29648862512363994, "grad_norm": 0.8732437761139039, "learning_rate": 4.974524160382433e-06, "loss": 0.4487, "step": 1199 }, { "epoch": 0.29673590504451036, "grad_norm": 0.8534970199568261, "learning_rate": 4.974477856515094e-06, "loss": 0.4772, "step": 1200 }, { "epoch": 0.2969831849653808, "grad_norm": 0.8100385165833147, "learning_rate": 4.97443151082182e-06, "loss": 0.5059, "step": 1201 }, { "epoch": 0.2972304648862512, "grad_norm": 0.8427846867481712, "learning_rate": 4.974385123303394e-06, "loss": 0.4697, "step": 1202 }, { "epoch": 0.29747774480712164, "grad_norm": 0.8733326416501844, "learning_rate": 4.974338693960599e-06, "loss": 0.4638, "step": 1203 }, { "epoch": 0.29772502472799206, "grad_norm": 0.8599299796559156, "learning_rate": 4.974292222794223e-06, "loss": 0.4563, "step": 1204 }, { "epoch": 0.2979723046488625, "grad_norm": 0.8424539008316979, "learning_rate": 4.9742457098050475e-06, "loss": 0.5017, "step": 1205 }, { "epoch": 0.29821958456973297, "grad_norm": 0.8474616781082716, "learning_rate": 4.974199154993862e-06, "loss": 0.4874, "step": 1206 }, { "epoch": 0.2984668644906034, "grad_norm": 0.8432090258049286, "learning_rate": 4.974152558361451e-06, "loss": 0.4536, "step": 1207 }, { "epoch": 0.2987141444114738, "grad_norm": 0.8365560807553833, "learning_rate": 4.9741059199086024e-06, "loss": 0.4971, "step": 1208 }, { "epoch": 0.29896142433234424, "grad_norm": 0.8247988037749963, "learning_rate": 4.974059239636106e-06, "loss": 0.4564, "step": 1209 }, { "epoch": 0.29920870425321466, "grad_norm": 0.8364405143056981, "learning_rate": 4.97401251754475e-06, "loss": 0.539, "step": 1210 }, { "epoch": 0.2994559841740851, "grad_norm": 0.8749742019563281, "learning_rate": 4.973965753635325e-06, "loss": 0.4773, "step": 1211 }, { "epoch": 0.2997032640949555, "grad_norm": 0.8643498007988748, "learning_rate": 4.97391894790862e-06, "loss": 0.4521, "step": 1212 }, { "epoch": 0.29995054401582594, "grad_norm": 0.8051146754308409, "learning_rate": 4.973872100365427e-06, "loss": 0.4659, "step": 1213 }, { "epoch": 0.30019782393669636, "grad_norm": 0.8483014608260354, "learning_rate": 4.973825211006537e-06, "loss": 0.4753, "step": 1214 }, { "epoch": 0.3004451038575668, "grad_norm": 0.8784932670634085, "learning_rate": 4.973778279832744e-06, "loss": 0.4907, "step": 1215 }, { "epoch": 0.3006923837784372, "grad_norm": 0.8310183524315704, "learning_rate": 4.97373130684484e-06, "loss": 0.4672, "step": 1216 }, { "epoch": 0.30093966369930764, "grad_norm": 0.8636283603039391, "learning_rate": 4.973684292043619e-06, "loss": 0.435, "step": 1217 }, { "epoch": 0.30118694362017806, "grad_norm": 0.8389114184976831, "learning_rate": 4.973637235429877e-06, "loss": 0.4993, "step": 1218 }, { "epoch": 0.3014342235410485, "grad_norm": 0.8429199752469544, "learning_rate": 4.973590137004408e-06, "loss": 0.5084, "step": 1219 }, { "epoch": 0.3016815034619189, "grad_norm": 0.8648994179752031, "learning_rate": 4.9735429967680094e-06, "loss": 0.4584, "step": 1220 }, { "epoch": 0.30192878338278933, "grad_norm": 0.8092649880087687, "learning_rate": 4.973495814721477e-06, "loss": 0.4748, "step": 1221 }, { "epoch": 0.30217606330365976, "grad_norm": 0.8045236519854421, "learning_rate": 4.9734485908656075e-06, "loss": 0.5183, "step": 1222 }, { "epoch": 0.3024233432245302, "grad_norm": 0.8375848102184394, "learning_rate": 4.973401325201202e-06, "loss": 0.4846, "step": 1223 }, { "epoch": 0.3026706231454006, "grad_norm": 0.8204333352305591, "learning_rate": 4.9733540177290566e-06, "loss": 0.4735, "step": 1224 }, { "epoch": 0.30291790306627103, "grad_norm": 0.9320660470455538, "learning_rate": 4.973306668449971e-06, "loss": 0.487, "step": 1225 }, { "epoch": 0.30316518298714146, "grad_norm": 0.8512691440015686, "learning_rate": 4.973259277364748e-06, "loss": 0.4564, "step": 1226 }, { "epoch": 0.3034124629080119, "grad_norm": 0.8448716455291453, "learning_rate": 4.973211844474187e-06, "loss": 0.4607, "step": 1227 }, { "epoch": 0.3036597428288823, "grad_norm": 0.9007565601848737, "learning_rate": 4.973164369779089e-06, "loss": 0.4714, "step": 1228 }, { "epoch": 0.30390702274975273, "grad_norm": 0.8980928349718376, "learning_rate": 4.9731168532802586e-06, "loss": 0.4674, "step": 1229 }, { "epoch": 0.30415430267062316, "grad_norm": 0.8184075705545406, "learning_rate": 4.973069294978497e-06, "loss": 0.4683, "step": 1230 }, { "epoch": 0.3044015825914936, "grad_norm": 0.8307595261131485, "learning_rate": 4.973021694874609e-06, "loss": 0.4803, "step": 1231 }, { "epoch": 0.304648862512364, "grad_norm": 0.8995162154565732, "learning_rate": 4.972974052969399e-06, "loss": 0.4909, "step": 1232 }, { "epoch": 0.30489614243323443, "grad_norm": 0.8256622372929362, "learning_rate": 4.972926369263672e-06, "loss": 0.5004, "step": 1233 }, { "epoch": 0.30514342235410485, "grad_norm": 0.7964903751490071, "learning_rate": 4.972878643758234e-06, "loss": 0.5006, "step": 1234 }, { "epoch": 0.3053907022749753, "grad_norm": 0.9183004423758536, "learning_rate": 4.972830876453893e-06, "loss": 0.4817, "step": 1235 }, { "epoch": 0.3056379821958457, "grad_norm": 0.8931482050228089, "learning_rate": 4.972783067351455e-06, "loss": 0.4799, "step": 1236 }, { "epoch": 0.30588526211671613, "grad_norm": 0.9003322117775642, "learning_rate": 4.972735216451728e-06, "loss": 0.4832, "step": 1237 }, { "epoch": 0.30613254203758655, "grad_norm": 0.8025764006110967, "learning_rate": 4.972687323755522e-06, "loss": 0.4695, "step": 1238 }, { "epoch": 0.306379821958457, "grad_norm": 0.8313814899782755, "learning_rate": 4.972639389263645e-06, "loss": 0.5017, "step": 1239 }, { "epoch": 0.3066271018793274, "grad_norm": 0.8085003588929675, "learning_rate": 4.97259141297691e-06, "loss": 0.484, "step": 1240 }, { "epoch": 0.3068743818001978, "grad_norm": 0.8789937098823752, "learning_rate": 4.9725433948961235e-06, "loss": 0.4629, "step": 1241 }, { "epoch": 0.30712166172106825, "grad_norm": 0.8417003920288945, "learning_rate": 4.972495335022101e-06, "loss": 0.4892, "step": 1242 }, { "epoch": 0.3073689416419387, "grad_norm": 0.8296348241115892, "learning_rate": 4.972447233355654e-06, "loss": 0.471, "step": 1243 }, { "epoch": 0.3076162215628091, "grad_norm": 0.8383843142341431, "learning_rate": 4.972399089897594e-06, "loss": 0.5055, "step": 1244 }, { "epoch": 0.3078635014836795, "grad_norm": 0.8684099957350048, "learning_rate": 4.972350904648736e-06, "loss": 0.456, "step": 1245 }, { "epoch": 0.30811078140454995, "grad_norm": 0.8774377280391846, "learning_rate": 4.972302677609895e-06, "loss": 0.4821, "step": 1246 }, { "epoch": 0.3083580613254204, "grad_norm": 0.8757062818355338, "learning_rate": 4.972254408781885e-06, "loss": 0.4662, "step": 1247 }, { "epoch": 0.3086053412462908, "grad_norm": 0.8623536682487267, "learning_rate": 4.972206098165522e-06, "loss": 0.4751, "step": 1248 }, { "epoch": 0.3088526211671612, "grad_norm": 0.8482930842606974, "learning_rate": 4.972157745761624e-06, "loss": 0.4963, "step": 1249 }, { "epoch": 0.30909990108803165, "grad_norm": 0.8841543541026415, "learning_rate": 4.972109351571006e-06, "loss": 0.4845, "step": 1250 }, { "epoch": 0.3093471810089021, "grad_norm": 0.8180021244000164, "learning_rate": 4.972060915594488e-06, "loss": 0.5033, "step": 1251 }, { "epoch": 0.3095944609297725, "grad_norm": 0.8183146518355443, "learning_rate": 4.9720124378328885e-06, "loss": 0.4673, "step": 1252 }, { "epoch": 0.3098417408506429, "grad_norm": 0.9126517291897078, "learning_rate": 4.971963918287026e-06, "loss": 0.4785, "step": 1253 }, { "epoch": 0.31008902077151335, "grad_norm": 0.8475981845701279, "learning_rate": 4.971915356957721e-06, "loss": 0.4515, "step": 1254 }, { "epoch": 0.31033630069238377, "grad_norm": 0.829251256081981, "learning_rate": 4.971866753845794e-06, "loss": 0.4841, "step": 1255 }, { "epoch": 0.3105835806132542, "grad_norm": 0.810109516370709, "learning_rate": 4.971818108952066e-06, "loss": 0.4957, "step": 1256 }, { "epoch": 0.3108308605341246, "grad_norm": 0.8424982626224964, "learning_rate": 4.9717694222773624e-06, "loss": 0.4903, "step": 1257 }, { "epoch": 0.31107814045499504, "grad_norm": 0.8273938509757517, "learning_rate": 4.971720693822503e-06, "loss": 0.4696, "step": 1258 }, { "epoch": 0.31132542037586547, "grad_norm": 0.8584209103624083, "learning_rate": 4.971671923588312e-06, "loss": 0.4694, "step": 1259 }, { "epoch": 0.3115727002967359, "grad_norm": 0.8635606173746708, "learning_rate": 4.971623111575614e-06, "loss": 0.4716, "step": 1260 }, { "epoch": 0.3118199802176063, "grad_norm": 0.8772316694757156, "learning_rate": 4.971574257785234e-06, "loss": 0.4748, "step": 1261 }, { "epoch": 0.31206726013847674, "grad_norm": 0.9346025894181195, "learning_rate": 4.971525362217998e-06, "loss": 0.4853, "step": 1262 }, { "epoch": 0.31231454005934717, "grad_norm": 0.8196238081047258, "learning_rate": 4.971476424874733e-06, "loss": 0.4775, "step": 1263 }, { "epoch": 0.3125618199802176, "grad_norm": 0.8501168249933853, "learning_rate": 4.971427445756265e-06, "loss": 0.485, "step": 1264 }, { "epoch": 0.312809099901088, "grad_norm": 0.8453782824523821, "learning_rate": 4.971378424863423e-06, "loss": 0.4672, "step": 1265 }, { "epoch": 0.31305637982195844, "grad_norm": 0.8316282468393369, "learning_rate": 4.971329362197035e-06, "loss": 0.4592, "step": 1266 }, { "epoch": 0.31330365974282887, "grad_norm": 0.8913623594613578, "learning_rate": 4.97128025775793e-06, "loss": 0.4562, "step": 1267 }, { "epoch": 0.3135509396636993, "grad_norm": 0.8805133912519674, "learning_rate": 4.971231111546939e-06, "loss": 0.4554, "step": 1268 }, { "epoch": 0.3137982195845697, "grad_norm": 0.8378082421317632, "learning_rate": 4.971181923564892e-06, "loss": 0.4898, "step": 1269 }, { "epoch": 0.31404549950544014, "grad_norm": 0.9098239301913463, "learning_rate": 4.97113269381262e-06, "loss": 0.507, "step": 1270 }, { "epoch": 0.31429277942631056, "grad_norm": 0.8805091022613049, "learning_rate": 4.971083422290956e-06, "loss": 0.459, "step": 1271 }, { "epoch": 0.314540059347181, "grad_norm": 0.8558333892198907, "learning_rate": 4.971034109000732e-06, "loss": 0.4786, "step": 1272 }, { "epoch": 0.3147873392680514, "grad_norm": 0.830002999251238, "learning_rate": 4.970984753942783e-06, "loss": 0.4449, "step": 1273 }, { "epoch": 0.31503461918892184, "grad_norm": 0.8401177086898065, "learning_rate": 4.970935357117941e-06, "loss": 0.5094, "step": 1274 }, { "epoch": 0.31528189910979226, "grad_norm": 0.8369130683042246, "learning_rate": 4.9708859185270435e-06, "loss": 0.4823, "step": 1275 }, { "epoch": 0.3155291790306627, "grad_norm": 0.8319862899317088, "learning_rate": 4.970836438170924e-06, "loss": 0.4855, "step": 1276 }, { "epoch": 0.3157764589515331, "grad_norm": 0.8244796578240013, "learning_rate": 4.97078691605042e-06, "loss": 0.4527, "step": 1277 }, { "epoch": 0.31602373887240354, "grad_norm": 0.8239294602758832, "learning_rate": 4.970737352166368e-06, "loss": 0.4307, "step": 1278 }, { "epoch": 0.31627101879327396, "grad_norm": 0.812065916663278, "learning_rate": 4.970687746519607e-06, "loss": 0.4816, "step": 1279 }, { "epoch": 0.3165182987141444, "grad_norm": 0.8330917454579297, "learning_rate": 4.970638099110974e-06, "loss": 0.5176, "step": 1280 }, { "epoch": 0.3167655786350148, "grad_norm": 0.8260932984000681, "learning_rate": 4.970588409941308e-06, "loss": 0.4897, "step": 1281 }, { "epoch": 0.31701285855588524, "grad_norm": 0.8279566577283952, "learning_rate": 4.9705386790114505e-06, "loss": 0.4774, "step": 1282 }, { "epoch": 0.31726013847675566, "grad_norm": 0.8146960068506958, "learning_rate": 4.970488906322241e-06, "loss": 0.4885, "step": 1283 }, { "epoch": 0.31750741839762614, "grad_norm": 0.855983788309996, "learning_rate": 4.970439091874521e-06, "loss": 0.4864, "step": 1284 }, { "epoch": 0.31775469831849656, "grad_norm": 0.8477308612881375, "learning_rate": 4.970389235669133e-06, "loss": 0.4884, "step": 1285 }, { "epoch": 0.318001978239367, "grad_norm": 0.8379968825304376, "learning_rate": 4.97033933770692e-06, "loss": 0.4715, "step": 1286 }, { "epoch": 0.3182492581602374, "grad_norm": 0.8551487364632888, "learning_rate": 4.970289397988724e-06, "loss": 0.4888, "step": 1287 }, { "epoch": 0.31849653808110784, "grad_norm": 0.8350034977215137, "learning_rate": 4.970239416515389e-06, "loss": 0.4842, "step": 1288 }, { "epoch": 0.31874381800197826, "grad_norm": 0.8237669521560471, "learning_rate": 4.970189393287761e-06, "loss": 0.4764, "step": 1289 }, { "epoch": 0.3189910979228487, "grad_norm": 0.8858090914826243, "learning_rate": 4.970139328306686e-06, "loss": 0.4789, "step": 1290 }, { "epoch": 0.3192383778437191, "grad_norm": 0.8106320226692121, "learning_rate": 4.970089221573008e-06, "loss": 0.4689, "step": 1291 }, { "epoch": 0.31948565776458954, "grad_norm": 0.8175678750319051, "learning_rate": 4.970039073087577e-06, "loss": 0.4512, "step": 1292 }, { "epoch": 0.31973293768545996, "grad_norm": 0.7995128955825734, "learning_rate": 4.969988882851238e-06, "loss": 0.4832, "step": 1293 }, { "epoch": 0.3199802176063304, "grad_norm": 0.799837226959758, "learning_rate": 4.969938650864841e-06, "loss": 0.4528, "step": 1294 }, { "epoch": 0.3202274975272008, "grad_norm": 0.834121167692778, "learning_rate": 4.969888377129234e-06, "loss": 0.4597, "step": 1295 }, { "epoch": 0.32047477744807124, "grad_norm": 0.8653081086264667, "learning_rate": 4.969838061645268e-06, "loss": 0.4595, "step": 1296 }, { "epoch": 0.32072205736894166, "grad_norm": 0.8673155533135949, "learning_rate": 4.969787704413792e-06, "loss": 0.4812, "step": 1297 }, { "epoch": 0.3209693372898121, "grad_norm": 0.8122719817063542, "learning_rate": 4.969737305435658e-06, "loss": 0.4781, "step": 1298 }, { "epoch": 0.3212166172106825, "grad_norm": 0.8047716005193889, "learning_rate": 4.969686864711718e-06, "loss": 0.4999, "step": 1299 }, { "epoch": 0.32146389713155293, "grad_norm": 0.8407561134099769, "learning_rate": 4.969636382242825e-06, "loss": 0.4627, "step": 1300 }, { "epoch": 0.32171117705242336, "grad_norm": 0.8419429745734757, "learning_rate": 4.969585858029831e-06, "loss": 0.4729, "step": 1301 }, { "epoch": 0.3219584569732938, "grad_norm": 0.8166830456200092, "learning_rate": 4.96953529207359e-06, "loss": 0.4871, "step": 1302 }, { "epoch": 0.3222057368941642, "grad_norm": 0.7917900543444013, "learning_rate": 4.969484684374959e-06, "loss": 0.4935, "step": 1303 }, { "epoch": 0.32245301681503463, "grad_norm": 0.9590690166755755, "learning_rate": 4.9694340349347904e-06, "loss": 0.429, "step": 1304 }, { "epoch": 0.32270029673590506, "grad_norm": 0.8500944513695028, "learning_rate": 4.969383343753943e-06, "loss": 0.456, "step": 1305 }, { "epoch": 0.3229475766567755, "grad_norm": 0.9050469051293906, "learning_rate": 4.9693326108332716e-06, "loss": 0.4792, "step": 1306 }, { "epoch": 0.3231948565776459, "grad_norm": 0.8340987878049144, "learning_rate": 4.969281836173635e-06, "loss": 0.4722, "step": 1307 }, { "epoch": 0.32344213649851633, "grad_norm": 0.8987401808532778, "learning_rate": 4.969231019775891e-06, "loss": 0.4293, "step": 1308 }, { "epoch": 0.32368941641938676, "grad_norm": 0.8581280019324882, "learning_rate": 4.969180161640898e-06, "loss": 0.4519, "step": 1309 }, { "epoch": 0.3239366963402572, "grad_norm": 0.8465852233831679, "learning_rate": 4.9691292617695165e-06, "loss": 0.4632, "step": 1310 }, { "epoch": 0.3241839762611276, "grad_norm": 0.8182499069557968, "learning_rate": 4.969078320162607e-06, "loss": 0.4673, "step": 1311 }, { "epoch": 0.32443125618199803, "grad_norm": 0.8583292489781121, "learning_rate": 4.969027336821029e-06, "loss": 0.4787, "step": 1312 }, { "epoch": 0.32467853610286845, "grad_norm": 0.9144514645243177, "learning_rate": 4.968976311745647e-06, "loss": 0.4611, "step": 1313 }, { "epoch": 0.3249258160237389, "grad_norm": 0.8542093569498418, "learning_rate": 4.96892524493732e-06, "loss": 0.4807, "step": 1314 }, { "epoch": 0.3251730959446093, "grad_norm": 0.8531682089560951, "learning_rate": 4.968874136396914e-06, "loss": 0.463, "step": 1315 }, { "epoch": 0.3254203758654797, "grad_norm": 0.8740204340015105, "learning_rate": 4.968822986125292e-06, "loss": 0.4723, "step": 1316 }, { "epoch": 0.32566765578635015, "grad_norm": 0.7890203994099924, "learning_rate": 4.968771794123318e-06, "loss": 0.4811, "step": 1317 }, { "epoch": 0.3259149357072206, "grad_norm": 0.8646734697790014, "learning_rate": 4.968720560391859e-06, "loss": 0.4412, "step": 1318 }, { "epoch": 0.326162215628091, "grad_norm": 0.8417898419858085, "learning_rate": 4.968669284931779e-06, "loss": 0.4992, "step": 1319 }, { "epoch": 0.3264094955489614, "grad_norm": 0.8403085258475721, "learning_rate": 4.968617967743945e-06, "loss": 0.4671, "step": 1320 }, { "epoch": 0.32665677546983185, "grad_norm": 0.8161124612225035, "learning_rate": 4.968566608829225e-06, "loss": 0.4612, "step": 1321 }, { "epoch": 0.3269040553907023, "grad_norm": 0.8176140395301841, "learning_rate": 4.968515208188487e-06, "loss": 0.4773, "step": 1322 }, { "epoch": 0.3271513353115727, "grad_norm": 0.8612121577584373, "learning_rate": 4.9684637658226e-06, "loss": 0.4636, "step": 1323 }, { "epoch": 0.3273986152324431, "grad_norm": 0.7636475891146703, "learning_rate": 4.968412281732433e-06, "loss": 0.4591, "step": 1324 }, { "epoch": 0.32764589515331355, "grad_norm": 0.8356407912568218, "learning_rate": 4.968360755918858e-06, "loss": 0.4654, "step": 1325 }, { "epoch": 0.327893175074184, "grad_norm": 0.8130038980473393, "learning_rate": 4.968309188382743e-06, "loss": 0.493, "step": 1326 }, { "epoch": 0.3281404549950544, "grad_norm": 0.8073046510001293, "learning_rate": 4.968257579124962e-06, "loss": 0.4621, "step": 1327 }, { "epoch": 0.3283877349159248, "grad_norm": 0.8016870786489134, "learning_rate": 4.968205928146386e-06, "loss": 0.4643, "step": 1328 }, { "epoch": 0.32863501483679525, "grad_norm": 0.8448895040799353, "learning_rate": 4.968154235447889e-06, "loss": 0.4859, "step": 1329 }, { "epoch": 0.32888229475766567, "grad_norm": 0.8191604076948825, "learning_rate": 4.9681025010303445e-06, "loss": 0.4691, "step": 1330 }, { "epoch": 0.3291295746785361, "grad_norm": 0.8128752296677418, "learning_rate": 4.968050724894626e-06, "loss": 0.4621, "step": 1331 }, { "epoch": 0.3293768545994065, "grad_norm": 0.843228610985233, "learning_rate": 4.9679989070416106e-06, "loss": 0.442, "step": 1332 }, { "epoch": 0.32962413452027695, "grad_norm": 0.7963756355605212, "learning_rate": 4.967947047472172e-06, "loss": 0.4616, "step": 1333 }, { "epoch": 0.32987141444114737, "grad_norm": 0.839654855859208, "learning_rate": 4.967895146187189e-06, "loss": 0.4643, "step": 1334 }, { "epoch": 0.3301186943620178, "grad_norm": 0.8274671903075769, "learning_rate": 4.967843203187537e-06, "loss": 0.4426, "step": 1335 }, { "epoch": 0.3303659742828882, "grad_norm": 0.8799303601806644, "learning_rate": 4.967791218474095e-06, "loss": 0.4644, "step": 1336 }, { "epoch": 0.33061325420375864, "grad_norm": 0.8179823637800756, "learning_rate": 4.967739192047741e-06, "loss": 0.4659, "step": 1337 }, { "epoch": 0.33086053412462907, "grad_norm": 0.8195725524565542, "learning_rate": 4.967687123909355e-06, "loss": 0.5096, "step": 1338 }, { "epoch": 0.3311078140454995, "grad_norm": 0.84197720805029, "learning_rate": 4.9676350140598165e-06, "loss": 0.4727, "step": 1339 }, { "epoch": 0.3313550939663699, "grad_norm": 0.8260250113198276, "learning_rate": 4.9675828625000065e-06, "loss": 0.4779, "step": 1340 }, { "epoch": 0.33160237388724034, "grad_norm": 0.8880207738324564, "learning_rate": 4.967530669230808e-06, "loss": 0.4368, "step": 1341 }, { "epoch": 0.33184965380811077, "grad_norm": 0.8222910498101001, "learning_rate": 4.967478434253101e-06, "loss": 0.4881, "step": 1342 }, { "epoch": 0.3320969337289812, "grad_norm": 0.9239814524826327, "learning_rate": 4.9674261575677696e-06, "loss": 0.4394, "step": 1343 }, { "epoch": 0.3323442136498516, "grad_norm": 0.8162367375969642, "learning_rate": 4.967373839175696e-06, "loss": 0.43, "step": 1344 }, { "epoch": 0.33259149357072204, "grad_norm": 0.8420930848992175, "learning_rate": 4.967321479077768e-06, "loss": 0.4708, "step": 1345 }, { "epoch": 0.33283877349159247, "grad_norm": 0.8299200362774546, "learning_rate": 4.967269077274867e-06, "loss": 0.502, "step": 1346 }, { "epoch": 0.3330860534124629, "grad_norm": 0.8903658765485658, "learning_rate": 4.96721663376788e-06, "loss": 0.4873, "step": 1347 }, { "epoch": 0.3333333333333333, "grad_norm": 0.842432576192086, "learning_rate": 4.967164148557694e-06, "loss": 0.4523, "step": 1348 }, { "epoch": 0.33358061325420374, "grad_norm": 0.8784188816986657, "learning_rate": 4.967111621645195e-06, "loss": 0.4648, "step": 1349 }, { "epoch": 0.33382789317507416, "grad_norm": 0.8890735207690358, "learning_rate": 4.967059053031272e-06, "loss": 0.4735, "step": 1350 }, { "epoch": 0.3340751730959446, "grad_norm": 0.9139484525999664, "learning_rate": 4.967006442716814e-06, "loss": 0.4811, "step": 1351 }, { "epoch": 0.334322453016815, "grad_norm": 0.8432725929225718, "learning_rate": 4.966953790702709e-06, "loss": 0.4814, "step": 1352 }, { "epoch": 0.33456973293768544, "grad_norm": 0.7968350339816171, "learning_rate": 4.9669010969898465e-06, "loss": 0.4945, "step": 1353 }, { "epoch": 0.33481701285855586, "grad_norm": 0.8448439028983895, "learning_rate": 4.966848361579119e-06, "loss": 0.471, "step": 1354 }, { "epoch": 0.3350642927794263, "grad_norm": 0.8700792089452078, "learning_rate": 4.966795584471417e-06, "loss": 0.4778, "step": 1355 }, { "epoch": 0.3353115727002967, "grad_norm": 0.8576573597500617, "learning_rate": 4.966742765667632e-06, "loss": 0.439, "step": 1356 }, { "epoch": 0.33555885262116714, "grad_norm": 0.8412617464584977, "learning_rate": 4.9666899051686565e-06, "loss": 0.4579, "step": 1357 }, { "epoch": 0.33580613254203756, "grad_norm": 0.8660641241003527, "learning_rate": 4.966637002975387e-06, "loss": 0.4947, "step": 1358 }, { "epoch": 0.336053412462908, "grad_norm": 0.8784136045891929, "learning_rate": 4.966584059088714e-06, "loss": 0.4509, "step": 1359 }, { "epoch": 0.3363006923837784, "grad_norm": 0.8575154859392102, "learning_rate": 4.966531073509534e-06, "loss": 0.4628, "step": 1360 }, { "epoch": 0.3365479723046489, "grad_norm": 0.8558968082320106, "learning_rate": 4.966478046238742e-06, "loss": 0.4539, "step": 1361 }, { "epoch": 0.3367952522255193, "grad_norm": 0.8463118657623281, "learning_rate": 4.966424977277236e-06, "loss": 0.4874, "step": 1362 }, { "epoch": 0.33704253214638974, "grad_norm": 0.8582147987913828, "learning_rate": 4.966371866625912e-06, "loss": 0.4827, "step": 1363 }, { "epoch": 0.33728981206726016, "grad_norm": 0.8425557042931806, "learning_rate": 4.966318714285667e-06, "loss": 0.5158, "step": 1364 }, { "epoch": 0.3375370919881306, "grad_norm": 0.8198957499784623, "learning_rate": 4.966265520257399e-06, "loss": 0.4602, "step": 1365 }, { "epoch": 0.337784371909001, "grad_norm": 0.901675824405454, "learning_rate": 4.9662122845420105e-06, "loss": 0.4638, "step": 1366 }, { "epoch": 0.33803165182987144, "grad_norm": 0.8421039706556158, "learning_rate": 4.9661590071403975e-06, "loss": 0.4773, "step": 1367 }, { "epoch": 0.33827893175074186, "grad_norm": 0.8881502365059725, "learning_rate": 4.966105688053462e-06, "loss": 0.4977, "step": 1368 }, { "epoch": 0.3385262116716123, "grad_norm": 0.8286924700026043, "learning_rate": 4.966052327282106e-06, "loss": 0.4528, "step": 1369 }, { "epoch": 0.3387734915924827, "grad_norm": 0.815530177047265, "learning_rate": 4.96599892482723e-06, "loss": 0.4678, "step": 1370 }, { "epoch": 0.33902077151335314, "grad_norm": 0.8498159029250026, "learning_rate": 4.965945480689738e-06, "loss": 0.4797, "step": 1371 }, { "epoch": 0.33926805143422356, "grad_norm": 0.837998064753534, "learning_rate": 4.965891994870533e-06, "loss": 0.4518, "step": 1372 }, { "epoch": 0.339515331355094, "grad_norm": 0.8571090602934665, "learning_rate": 4.965838467370518e-06, "loss": 0.4516, "step": 1373 }, { "epoch": 0.3397626112759644, "grad_norm": 0.9212861435307308, "learning_rate": 4.9657848981905985e-06, "loss": 0.4589, "step": 1374 }, { "epoch": 0.34000989119683483, "grad_norm": 0.8646273616764547, "learning_rate": 4.9657312873316806e-06, "loss": 0.4656, "step": 1375 }, { "epoch": 0.34025717111770526, "grad_norm": 0.8649249854703567, "learning_rate": 4.965677634794671e-06, "loss": 0.4678, "step": 1376 }, { "epoch": 0.3405044510385757, "grad_norm": 0.8280715209748639, "learning_rate": 4.965623940580474e-06, "loss": 0.4806, "step": 1377 }, { "epoch": 0.3407517309594461, "grad_norm": 0.8540637518211892, "learning_rate": 4.965570204689999e-06, "loss": 0.466, "step": 1378 }, { "epoch": 0.34099901088031653, "grad_norm": 0.8722297658167012, "learning_rate": 4.965516427124155e-06, "loss": 0.4912, "step": 1379 }, { "epoch": 0.34124629080118696, "grad_norm": 0.8324087882341675, "learning_rate": 4.965462607883849e-06, "loss": 0.4282, "step": 1380 }, { "epoch": 0.3414935707220574, "grad_norm": 0.8542616511213038, "learning_rate": 4.965408746969993e-06, "loss": 0.4565, "step": 1381 }, { "epoch": 0.3417408506429278, "grad_norm": 0.8897532286442891, "learning_rate": 4.965354844383494e-06, "loss": 0.4476, "step": 1382 }, { "epoch": 0.34198813056379823, "grad_norm": 0.900818254020432, "learning_rate": 4.965300900125267e-06, "loss": 0.4626, "step": 1383 }, { "epoch": 0.34223541048466866, "grad_norm": 0.8121891811895046, "learning_rate": 4.965246914196222e-06, "loss": 0.4723, "step": 1384 }, { "epoch": 0.3424826904055391, "grad_norm": 0.8085099498992192, "learning_rate": 4.965192886597271e-06, "loss": 0.4969, "step": 1385 }, { "epoch": 0.3427299703264095, "grad_norm": 0.8169789083197132, "learning_rate": 4.965138817329328e-06, "loss": 0.4983, "step": 1386 }, { "epoch": 0.34297725024727993, "grad_norm": 0.8413101020719048, "learning_rate": 4.965084706393307e-06, "loss": 0.4624, "step": 1387 }, { "epoch": 0.34322453016815035, "grad_norm": 0.8554114796569622, "learning_rate": 4.965030553790123e-06, "loss": 0.4847, "step": 1388 }, { "epoch": 0.3434718100890208, "grad_norm": 0.883414822387632, "learning_rate": 4.964976359520689e-06, "loss": 0.4873, "step": 1389 }, { "epoch": 0.3437190900098912, "grad_norm": 0.9225841299701298, "learning_rate": 4.964922123585924e-06, "loss": 0.4517, "step": 1390 }, { "epoch": 0.34396636993076163, "grad_norm": 0.836069523426395, "learning_rate": 4.964867845986742e-06, "loss": 0.508, "step": 1391 }, { "epoch": 0.34421364985163205, "grad_norm": 0.8230356770161646, "learning_rate": 4.964813526724064e-06, "loss": 0.4811, "step": 1392 }, { "epoch": 0.3444609297725025, "grad_norm": 0.8144205084918253, "learning_rate": 4.964759165798806e-06, "loss": 0.4746, "step": 1393 }, { "epoch": 0.3447082096933729, "grad_norm": 0.848243953448798, "learning_rate": 4.964704763211886e-06, "loss": 0.4605, "step": 1394 }, { "epoch": 0.3449554896142433, "grad_norm": 0.862441294591035, "learning_rate": 4.964650318964224e-06, "loss": 0.4261, "step": 1395 }, { "epoch": 0.34520276953511375, "grad_norm": 0.823727505626556, "learning_rate": 4.964595833056742e-06, "loss": 0.4542, "step": 1396 }, { "epoch": 0.3454500494559842, "grad_norm": 0.8461579909245877, "learning_rate": 4.964541305490359e-06, "loss": 0.4642, "step": 1397 }, { "epoch": 0.3456973293768546, "grad_norm": 0.8682318696565303, "learning_rate": 4.964486736265998e-06, "loss": 0.4619, "step": 1398 }, { "epoch": 0.345944609297725, "grad_norm": 0.8154599041496469, "learning_rate": 4.964432125384581e-06, "loss": 0.49, "step": 1399 }, { "epoch": 0.34619188921859545, "grad_norm": 0.8521804504909071, "learning_rate": 4.96437747284703e-06, "loss": 0.4339, "step": 1400 }, { "epoch": 0.3464391691394659, "grad_norm": 0.8556600517889699, "learning_rate": 4.964322778654271e-06, "loss": 0.4447, "step": 1401 }, { "epoch": 0.3466864490603363, "grad_norm": 0.8688194992174506, "learning_rate": 4.964268042807227e-06, "loss": 0.4644, "step": 1402 }, { "epoch": 0.3469337289812067, "grad_norm": 0.8017975295806874, "learning_rate": 4.9642132653068224e-06, "loss": 0.4561, "step": 1403 }, { "epoch": 0.34718100890207715, "grad_norm": 0.8536874452765987, "learning_rate": 4.964158446153985e-06, "loss": 0.4648, "step": 1404 }, { "epoch": 0.3474282888229476, "grad_norm": 0.8189127192711046, "learning_rate": 4.964103585349639e-06, "loss": 0.4388, "step": 1405 }, { "epoch": 0.347675568743818, "grad_norm": 0.8705939326174833, "learning_rate": 4.9640486828947146e-06, "loss": 0.4596, "step": 1406 }, { "epoch": 0.3479228486646884, "grad_norm": 0.885106043728194, "learning_rate": 4.963993738790138e-06, "loss": 0.4743, "step": 1407 }, { "epoch": 0.34817012858555885, "grad_norm": 0.8332350836081958, "learning_rate": 4.963938753036839e-06, "loss": 0.4795, "step": 1408 }, { "epoch": 0.34841740850642927, "grad_norm": 0.8151625206936463, "learning_rate": 4.963883725635746e-06, "loss": 0.4588, "step": 1409 }, { "epoch": 0.3486646884272997, "grad_norm": 0.8584186317187182, "learning_rate": 4.963828656587789e-06, "loss": 0.4609, "step": 1410 }, { "epoch": 0.3489119683481701, "grad_norm": 0.8374239839439461, "learning_rate": 4.9637735458939e-06, "loss": 0.4663, "step": 1411 }, { "epoch": 0.34915924826904055, "grad_norm": 0.8335071629302205, "learning_rate": 4.96371839355501e-06, "loss": 0.4976, "step": 1412 }, { "epoch": 0.34940652818991097, "grad_norm": 0.8566093656507006, "learning_rate": 4.96366319957205e-06, "loss": 0.4427, "step": 1413 }, { "epoch": 0.3496538081107814, "grad_norm": 0.8797486028317246, "learning_rate": 4.963607963945954e-06, "loss": 0.4638, "step": 1414 }, { "epoch": 0.3499010880316518, "grad_norm": 0.7973883458360015, "learning_rate": 4.963552686677656e-06, "loss": 0.4756, "step": 1415 }, { "epoch": 0.35014836795252224, "grad_norm": 0.8605355070813538, "learning_rate": 4.963497367768091e-06, "loss": 0.4937, "step": 1416 }, { "epoch": 0.35039564787339267, "grad_norm": 0.8087052810593318, "learning_rate": 4.9634420072181925e-06, "loss": 0.5043, "step": 1417 }, { "epoch": 0.3506429277942631, "grad_norm": 0.8042980991613009, "learning_rate": 4.963386605028897e-06, "loss": 0.4866, "step": 1418 }, { "epoch": 0.3508902077151335, "grad_norm": 0.8397881265192508, "learning_rate": 4.96333116120114e-06, "loss": 0.4775, "step": 1419 }, { "epoch": 0.35113748763600394, "grad_norm": 0.8229969968530699, "learning_rate": 4.963275675735859e-06, "loss": 0.4909, "step": 1420 }, { "epoch": 0.35138476755687437, "grad_norm": 0.8582458934680285, "learning_rate": 4.963220148633994e-06, "loss": 0.4483, "step": 1421 }, { "epoch": 0.3516320474777448, "grad_norm": 0.8509944642538084, "learning_rate": 4.963164579896481e-06, "loss": 0.4931, "step": 1422 }, { "epoch": 0.3518793273986152, "grad_norm": 0.7778259466986965, "learning_rate": 4.963108969524261e-06, "loss": 0.4506, "step": 1423 }, { "epoch": 0.35212660731948564, "grad_norm": 0.806063787046905, "learning_rate": 4.9630533175182714e-06, "loss": 0.4828, "step": 1424 }, { "epoch": 0.35237388724035607, "grad_norm": 0.8595506336846482, "learning_rate": 4.962997623879456e-06, "loss": 0.4606, "step": 1425 }, { "epoch": 0.3526211671612265, "grad_norm": 0.8324046367039577, "learning_rate": 4.962941888608754e-06, "loss": 0.4489, "step": 1426 }, { "epoch": 0.3528684470820969, "grad_norm": 0.8519133065926907, "learning_rate": 4.9628861117071095e-06, "loss": 0.4664, "step": 1427 }, { "epoch": 0.35311572700296734, "grad_norm": 0.7904079696015989, "learning_rate": 4.962830293175463e-06, "loss": 0.4657, "step": 1428 }, { "epoch": 0.35336300692383776, "grad_norm": 0.8232281020750102, "learning_rate": 4.96277443301476e-06, "loss": 0.444, "step": 1429 }, { "epoch": 0.3536102868447082, "grad_norm": 0.8535424230625128, "learning_rate": 4.962718531225942e-06, "loss": 0.465, "step": 1430 }, { "epoch": 0.3538575667655786, "grad_norm": 0.8615311397260789, "learning_rate": 4.962662587809957e-06, "loss": 0.4482, "step": 1431 }, { "epoch": 0.35410484668644904, "grad_norm": 0.8179287695934544, "learning_rate": 4.9626066027677496e-06, "loss": 0.4796, "step": 1432 }, { "epoch": 0.35435212660731946, "grad_norm": 0.8435396498957267, "learning_rate": 4.962550576100265e-06, "loss": 0.4504, "step": 1433 }, { "epoch": 0.3545994065281899, "grad_norm": 0.8045830988187997, "learning_rate": 4.962494507808452e-06, "loss": 0.4718, "step": 1434 }, { "epoch": 0.3548466864490603, "grad_norm": 0.7973282650780453, "learning_rate": 4.962438397893256e-06, "loss": 0.5084, "step": 1435 }, { "epoch": 0.35509396636993074, "grad_norm": 0.81788673220588, "learning_rate": 4.962382246355628e-06, "loss": 0.4762, "step": 1436 }, { "epoch": 0.35534124629080116, "grad_norm": 0.8191220728313339, "learning_rate": 4.962326053196515e-06, "loss": 0.459, "step": 1437 }, { "epoch": 0.3555885262116716, "grad_norm": 0.8418837776836511, "learning_rate": 4.9622698184168684e-06, "loss": 0.4832, "step": 1438 }, { "epoch": 0.35583580613254207, "grad_norm": 0.822644289062169, "learning_rate": 4.962213542017638e-06, "loss": 0.4891, "step": 1439 }, { "epoch": 0.3560830860534125, "grad_norm": 0.8055518425958926, "learning_rate": 4.962157223999774e-06, "loss": 0.4805, "step": 1440 }, { "epoch": 0.3563303659742829, "grad_norm": 0.8663900660198538, "learning_rate": 4.962100864364231e-06, "loss": 0.4414, "step": 1441 }, { "epoch": 0.35657764589515334, "grad_norm": 0.8065354382371167, "learning_rate": 4.962044463111959e-06, "loss": 0.472, "step": 1442 }, { "epoch": 0.35682492581602376, "grad_norm": 0.8429417885865965, "learning_rate": 4.961988020243913e-06, "loss": 0.4802, "step": 1443 }, { "epoch": 0.3570722057368942, "grad_norm": 0.8628035030884751, "learning_rate": 4.961931535761046e-06, "loss": 0.4278, "step": 1444 }, { "epoch": 0.3573194856577646, "grad_norm": 0.8255801822657475, "learning_rate": 4.961875009664313e-06, "loss": 0.4637, "step": 1445 }, { "epoch": 0.35756676557863504, "grad_norm": 0.8435723559389733, "learning_rate": 4.9618184419546705e-06, "loss": 0.4711, "step": 1446 }, { "epoch": 0.35781404549950546, "grad_norm": 0.8287755155885044, "learning_rate": 4.961761832633073e-06, "loss": 0.4893, "step": 1447 }, { "epoch": 0.3580613254203759, "grad_norm": 0.8113313485945742, "learning_rate": 4.961705181700479e-06, "loss": 0.4758, "step": 1448 }, { "epoch": 0.3583086053412463, "grad_norm": 0.8541198849431507, "learning_rate": 4.9616484891578455e-06, "loss": 0.447, "step": 1449 }, { "epoch": 0.35855588526211674, "grad_norm": 0.8277973505342479, "learning_rate": 4.96159175500613e-06, "loss": 0.4556, "step": 1450 }, { "epoch": 0.35880316518298716, "grad_norm": 0.8217363815126292, "learning_rate": 4.9615349792462916e-06, "loss": 0.4682, "step": 1451 }, { "epoch": 0.3590504451038576, "grad_norm": 0.8235924475409444, "learning_rate": 4.961478161879291e-06, "loss": 0.4577, "step": 1452 }, { "epoch": 0.359297725024728, "grad_norm": 0.8366896026328494, "learning_rate": 4.961421302906087e-06, "loss": 0.4536, "step": 1453 }, { "epoch": 0.35954500494559843, "grad_norm": 0.8480005034889219, "learning_rate": 4.961364402327643e-06, "loss": 0.467, "step": 1454 }, { "epoch": 0.35979228486646886, "grad_norm": 0.8056929237512162, "learning_rate": 4.961307460144919e-06, "loss": 0.4733, "step": 1455 }, { "epoch": 0.3600395647873393, "grad_norm": 0.8221098249663793, "learning_rate": 4.9612504763588774e-06, "loss": 0.4598, "step": 1456 }, { "epoch": 0.3602868447082097, "grad_norm": 0.8347778715831121, "learning_rate": 4.961193450970483e-06, "loss": 0.4568, "step": 1457 }, { "epoch": 0.36053412462908013, "grad_norm": 0.8772150797089943, "learning_rate": 4.961136383980697e-06, "loss": 0.4588, "step": 1458 }, { "epoch": 0.36078140454995056, "grad_norm": 0.8530561104086893, "learning_rate": 4.9610792753904866e-06, "loss": 0.4616, "step": 1459 }, { "epoch": 0.361028684470821, "grad_norm": 0.8823335046697693, "learning_rate": 4.961022125200816e-06, "loss": 0.4699, "step": 1460 }, { "epoch": 0.3612759643916914, "grad_norm": 0.8485109471759061, "learning_rate": 4.960964933412652e-06, "loss": 0.4378, "step": 1461 }, { "epoch": 0.36152324431256183, "grad_norm": 0.8898917632495861, "learning_rate": 4.96090770002696e-06, "loss": 0.4654, "step": 1462 }, { "epoch": 0.36177052423343226, "grad_norm": 0.8470917053499859, "learning_rate": 4.9608504250447075e-06, "loss": 0.4553, "step": 1463 }, { "epoch": 0.3620178041543027, "grad_norm": 0.8348048460177506, "learning_rate": 4.960793108466863e-06, "loss": 0.4423, "step": 1464 }, { "epoch": 0.3622650840751731, "grad_norm": 0.8837887760254436, "learning_rate": 4.960735750294397e-06, "loss": 0.437, "step": 1465 }, { "epoch": 0.36251236399604353, "grad_norm": 0.8521679859494443, "learning_rate": 4.960678350528277e-06, "loss": 0.4577, "step": 1466 }, { "epoch": 0.36275964391691395, "grad_norm": 0.7953899887086049, "learning_rate": 4.9606209091694734e-06, "loss": 0.4822, "step": 1467 }, { "epoch": 0.3630069238377844, "grad_norm": 0.7982731717026136, "learning_rate": 4.960563426218957e-06, "loss": 0.4584, "step": 1468 }, { "epoch": 0.3632542037586548, "grad_norm": 0.9145430537767979, "learning_rate": 4.960505901677701e-06, "loss": 0.5103, "step": 1469 }, { "epoch": 0.36350148367952523, "grad_norm": 0.8606249759635389, "learning_rate": 4.9604483355466756e-06, "loss": 0.4532, "step": 1470 }, { "epoch": 0.36374876360039565, "grad_norm": 0.8640284738109312, "learning_rate": 4.960390727826856e-06, "loss": 0.4562, "step": 1471 }, { "epoch": 0.3639960435212661, "grad_norm": 0.8927421670919629, "learning_rate": 4.960333078519214e-06, "loss": 0.4427, "step": 1472 }, { "epoch": 0.3642433234421365, "grad_norm": 0.8209190272202821, "learning_rate": 4.9602753876247244e-06, "loss": 0.4658, "step": 1473 }, { "epoch": 0.3644906033630069, "grad_norm": 0.8506090980497512, "learning_rate": 4.960217655144364e-06, "loss": 0.4903, "step": 1474 }, { "epoch": 0.36473788328387735, "grad_norm": 0.8304617275337773, "learning_rate": 4.960159881079106e-06, "loss": 0.4371, "step": 1475 }, { "epoch": 0.3649851632047478, "grad_norm": 0.8074225851216803, "learning_rate": 4.960102065429929e-06, "loss": 0.4615, "step": 1476 }, { "epoch": 0.3652324431256182, "grad_norm": 0.8003088453641801, "learning_rate": 4.96004420819781e-06, "loss": 0.4602, "step": 1477 }, { "epoch": 0.3654797230464886, "grad_norm": 0.8470939389079107, "learning_rate": 4.959986309383726e-06, "loss": 0.4713, "step": 1478 }, { "epoch": 0.36572700296735905, "grad_norm": 0.830521949089574, "learning_rate": 4.959928368988657e-06, "loss": 0.4811, "step": 1479 }, { "epoch": 0.3659742828882295, "grad_norm": 0.8745328214858118, "learning_rate": 4.959870387013581e-06, "loss": 0.4557, "step": 1480 }, { "epoch": 0.3662215628090999, "grad_norm": 0.847094148012273, "learning_rate": 4.959812363459479e-06, "loss": 0.4655, "step": 1481 }, { "epoch": 0.3664688427299703, "grad_norm": 0.8491449367553962, "learning_rate": 4.959754298327332e-06, "loss": 0.4597, "step": 1482 }, { "epoch": 0.36671612265084075, "grad_norm": 0.8229085286999269, "learning_rate": 4.959696191618119e-06, "loss": 0.4854, "step": 1483 }, { "epoch": 0.3669634025717112, "grad_norm": 0.8702802741898958, "learning_rate": 4.959638043332826e-06, "loss": 0.455, "step": 1484 }, { "epoch": 0.3672106824925816, "grad_norm": 0.8459937215244333, "learning_rate": 4.959579853472434e-06, "loss": 0.4758, "step": 1485 }, { "epoch": 0.367457962413452, "grad_norm": 0.8881342218230955, "learning_rate": 4.959521622037925e-06, "loss": 0.468, "step": 1486 }, { "epoch": 0.36770524233432245, "grad_norm": 0.7847108194328761, "learning_rate": 4.959463349030285e-06, "loss": 0.4896, "step": 1487 }, { "epoch": 0.36795252225519287, "grad_norm": 0.8417601497534091, "learning_rate": 4.959405034450501e-06, "loss": 0.4686, "step": 1488 }, { "epoch": 0.3681998021760633, "grad_norm": 0.8122499845921317, "learning_rate": 4.959346678299555e-06, "loss": 0.4692, "step": 1489 }, { "epoch": 0.3684470820969337, "grad_norm": 0.8580283394958202, "learning_rate": 4.9592882805784345e-06, "loss": 0.4742, "step": 1490 }, { "epoch": 0.36869436201780414, "grad_norm": 0.8428257373547411, "learning_rate": 4.959229841288128e-06, "loss": 0.4675, "step": 1491 }, { "epoch": 0.36894164193867457, "grad_norm": 0.8259183229805209, "learning_rate": 4.959171360429621e-06, "loss": 0.4404, "step": 1492 }, { "epoch": 0.369188921859545, "grad_norm": 0.8126446310683214, "learning_rate": 4.959112838003905e-06, "loss": 0.4687, "step": 1493 }, { "epoch": 0.3694362017804154, "grad_norm": 0.7830986243714438, "learning_rate": 4.959054274011966e-06, "loss": 0.4625, "step": 1494 }, { "epoch": 0.36968348170128584, "grad_norm": 0.8237721672583825, "learning_rate": 4.958995668454796e-06, "loss": 0.4679, "step": 1495 }, { "epoch": 0.36993076162215627, "grad_norm": 0.842776685828888, "learning_rate": 4.958937021333384e-06, "loss": 0.46, "step": 1496 }, { "epoch": 0.3701780415430267, "grad_norm": 0.8364831387221224, "learning_rate": 4.958878332648724e-06, "loss": 0.4899, "step": 1497 }, { "epoch": 0.3704253214638971, "grad_norm": 0.7940034967601184, "learning_rate": 4.958819602401806e-06, "loss": 0.4595, "step": 1498 }, { "epoch": 0.37067260138476754, "grad_norm": 0.8153152604939218, "learning_rate": 4.958760830593621e-06, "loss": 0.4808, "step": 1499 }, { "epoch": 0.37091988130563797, "grad_norm": 0.8603126838223264, "learning_rate": 4.958702017225166e-06, "loss": 0.4643, "step": 1500 }, { "epoch": 0.3711671612265084, "grad_norm": 0.832304898978481, "learning_rate": 4.958643162297434e-06, "loss": 0.4609, "step": 1501 }, { "epoch": 0.3714144411473788, "grad_norm": 0.8437671915588434, "learning_rate": 4.958584265811419e-06, "loss": 0.4607, "step": 1502 }, { "epoch": 0.37166172106824924, "grad_norm": 0.8270741301993446, "learning_rate": 4.958525327768117e-06, "loss": 0.4866, "step": 1503 }, { "epoch": 0.37190900098911966, "grad_norm": 0.8458032983321746, "learning_rate": 4.9584663481685235e-06, "loss": 0.4842, "step": 1504 }, { "epoch": 0.3721562809099901, "grad_norm": 0.8240486908432811, "learning_rate": 4.958407327013637e-06, "loss": 0.4836, "step": 1505 }, { "epoch": 0.3724035608308605, "grad_norm": 0.8103231743244376, "learning_rate": 4.9583482643044535e-06, "loss": 0.4607, "step": 1506 }, { "epoch": 0.37265084075173094, "grad_norm": 0.8309144783040975, "learning_rate": 4.9582891600419714e-06, "loss": 0.4767, "step": 1507 }, { "epoch": 0.37289812067260136, "grad_norm": 0.812854985431406, "learning_rate": 4.958230014227191e-06, "loss": 0.5015, "step": 1508 }, { "epoch": 0.3731454005934718, "grad_norm": 0.8389457059849967, "learning_rate": 4.9581708268611116e-06, "loss": 0.4895, "step": 1509 }, { "epoch": 0.3733926805143422, "grad_norm": 0.8324431688711037, "learning_rate": 4.958111597944734e-06, "loss": 0.4802, "step": 1510 }, { "epoch": 0.37363996043521264, "grad_norm": 0.8858341769964009, "learning_rate": 4.9580523274790585e-06, "loss": 0.4677, "step": 1511 }, { "epoch": 0.37388724035608306, "grad_norm": 0.8346574872673881, "learning_rate": 4.957993015465086e-06, "loss": 0.4434, "step": 1512 }, { "epoch": 0.3741345202769535, "grad_norm": 0.8159836769145766, "learning_rate": 4.957933661903822e-06, "loss": 0.49, "step": 1513 }, { "epoch": 0.3743818001978239, "grad_norm": 0.903045314748104, "learning_rate": 4.957874266796267e-06, "loss": 0.4861, "step": 1514 }, { "epoch": 0.37462908011869434, "grad_norm": 0.8234870808009122, "learning_rate": 4.9578148301434255e-06, "loss": 0.4452, "step": 1515 }, { "epoch": 0.37487636003956476, "grad_norm": 0.8153560688366689, "learning_rate": 4.957755351946303e-06, "loss": 0.4734, "step": 1516 }, { "epoch": 0.37512363996043524, "grad_norm": 0.8400329349725666, "learning_rate": 4.957695832205905e-06, "loss": 0.497, "step": 1517 }, { "epoch": 0.37537091988130566, "grad_norm": 0.8358924451402449, "learning_rate": 4.957636270923237e-06, "loss": 0.4562, "step": 1518 }, { "epoch": 0.3756181998021761, "grad_norm": 0.8432421412883345, "learning_rate": 4.9575766680993056e-06, "loss": 0.4428, "step": 1519 }, { "epoch": 0.3758654797230465, "grad_norm": 0.8160910599610325, "learning_rate": 4.957517023735119e-06, "loss": 0.4627, "step": 1520 }, { "epoch": 0.37611275964391694, "grad_norm": 0.8711532573510746, "learning_rate": 4.957457337831684e-06, "loss": 0.4717, "step": 1521 }, { "epoch": 0.37636003956478736, "grad_norm": 0.8242546795865497, "learning_rate": 4.95739761039001e-06, "loss": 0.4819, "step": 1522 }, { "epoch": 0.3766073194856578, "grad_norm": 0.8613610896652953, "learning_rate": 4.957337841411107e-06, "loss": 0.446, "step": 1523 }, { "epoch": 0.3768545994065282, "grad_norm": 0.864261076328205, "learning_rate": 4.9572780308959865e-06, "loss": 0.4698, "step": 1524 }, { "epoch": 0.37710187932739864, "grad_norm": 0.8614226944088311, "learning_rate": 4.957218178845657e-06, "loss": 0.4808, "step": 1525 }, { "epoch": 0.37734915924826906, "grad_norm": 0.8224765428766275, "learning_rate": 4.957158285261131e-06, "loss": 0.4403, "step": 1526 }, { "epoch": 0.3775964391691395, "grad_norm": 0.8181585369424585, "learning_rate": 4.957098350143422e-06, "loss": 0.4766, "step": 1527 }, { "epoch": 0.3778437190900099, "grad_norm": 0.8096774878681889, "learning_rate": 4.957038373493541e-06, "loss": 0.46, "step": 1528 }, { "epoch": 0.37809099901088034, "grad_norm": 0.8490900193792322, "learning_rate": 4.956978355312505e-06, "loss": 0.4781, "step": 1529 }, { "epoch": 0.37833827893175076, "grad_norm": 0.8322232014452549, "learning_rate": 4.956918295601325e-06, "loss": 0.5009, "step": 1530 }, { "epoch": 0.3785855588526212, "grad_norm": 0.7735197922262507, "learning_rate": 4.956858194361018e-06, "loss": 0.478, "step": 1531 }, { "epoch": 0.3788328387734916, "grad_norm": 0.8585921318091921, "learning_rate": 4.9567980515926e-06, "loss": 0.4856, "step": 1532 }, { "epoch": 0.37908011869436203, "grad_norm": 0.8446307436491305, "learning_rate": 4.956737867297086e-06, "loss": 0.4472, "step": 1533 }, { "epoch": 0.37932739861523246, "grad_norm": 0.8357409701620357, "learning_rate": 4.9566776414754955e-06, "loss": 0.4704, "step": 1534 }, { "epoch": 0.3795746785361029, "grad_norm": 0.883193450189684, "learning_rate": 4.9566173741288445e-06, "loss": 0.4445, "step": 1535 }, { "epoch": 0.3798219584569733, "grad_norm": 0.8362706611107679, "learning_rate": 4.956557065258154e-06, "loss": 0.4763, "step": 1536 }, { "epoch": 0.38006923837784373, "grad_norm": 0.8445029539445396, "learning_rate": 4.956496714864442e-06, "loss": 0.4363, "step": 1537 }, { "epoch": 0.38031651829871416, "grad_norm": 0.9005044139041423, "learning_rate": 4.956436322948728e-06, "loss": 0.4257, "step": 1538 }, { "epoch": 0.3805637982195846, "grad_norm": 0.803480549136306, "learning_rate": 4.956375889512033e-06, "loss": 0.4643, "step": 1539 }, { "epoch": 0.380811078140455, "grad_norm": 0.8944145630210211, "learning_rate": 4.95631541455538e-06, "loss": 0.4649, "step": 1540 }, { "epoch": 0.38105835806132543, "grad_norm": 0.8431926814490958, "learning_rate": 4.956254898079789e-06, "loss": 0.4634, "step": 1541 }, { "epoch": 0.38130563798219586, "grad_norm": 0.8623971408995822, "learning_rate": 4.956194340086284e-06, "loss": 0.473, "step": 1542 }, { "epoch": 0.3815529179030663, "grad_norm": 0.855497209762524, "learning_rate": 4.956133740575889e-06, "loss": 0.4384, "step": 1543 }, { "epoch": 0.3818001978239367, "grad_norm": 0.8154439659482268, "learning_rate": 4.9560730995496285e-06, "loss": 0.4714, "step": 1544 }, { "epoch": 0.38204747774480713, "grad_norm": 0.8117948295753515, "learning_rate": 4.956012417008526e-06, "loss": 0.4573, "step": 1545 }, { "epoch": 0.38229475766567755, "grad_norm": 0.8411563068995113, "learning_rate": 4.95595169295361e-06, "loss": 0.447, "step": 1546 }, { "epoch": 0.382542037586548, "grad_norm": 0.840886345860403, "learning_rate": 4.955890927385903e-06, "loss": 0.4373, "step": 1547 }, { "epoch": 0.3827893175074184, "grad_norm": 0.8749065735486805, "learning_rate": 4.955830120306436e-06, "loss": 0.466, "step": 1548 }, { "epoch": 0.3830365974282888, "grad_norm": 0.8266971574028511, "learning_rate": 4.955769271716234e-06, "loss": 0.4524, "step": 1549 }, { "epoch": 0.38328387734915925, "grad_norm": 0.7856800140279594, "learning_rate": 4.955708381616327e-06, "loss": 0.4661, "step": 1550 }, { "epoch": 0.3835311572700297, "grad_norm": 0.8227838319773383, "learning_rate": 4.955647450007743e-06, "loss": 0.4728, "step": 1551 }, { "epoch": 0.3837784371909001, "grad_norm": 0.855400428650959, "learning_rate": 4.955586476891514e-06, "loss": 0.4523, "step": 1552 }, { "epoch": 0.3840257171117705, "grad_norm": 0.854109040398388, "learning_rate": 4.955525462268669e-06, "loss": 0.4733, "step": 1553 }, { "epoch": 0.38427299703264095, "grad_norm": 0.8476868409597142, "learning_rate": 4.955464406140239e-06, "loss": 0.4342, "step": 1554 }, { "epoch": 0.3845202769535114, "grad_norm": 0.8221087986524699, "learning_rate": 4.955403308507257e-06, "loss": 0.4741, "step": 1555 }, { "epoch": 0.3847675568743818, "grad_norm": 0.8985974432339402, "learning_rate": 4.955342169370755e-06, "loss": 0.4764, "step": 1556 }, { "epoch": 0.3850148367952522, "grad_norm": 0.840966394271443, "learning_rate": 4.955280988731768e-06, "loss": 0.4677, "step": 1557 }, { "epoch": 0.38526211671612265, "grad_norm": 0.855372122677701, "learning_rate": 4.9552197665913284e-06, "loss": 0.4412, "step": 1558 }, { "epoch": 0.3855093966369931, "grad_norm": 0.8491148581437344, "learning_rate": 4.955158502950471e-06, "loss": 0.4804, "step": 1559 }, { "epoch": 0.3857566765578635, "grad_norm": 0.8354735780843734, "learning_rate": 4.955097197810233e-06, "loss": 0.461, "step": 1560 }, { "epoch": 0.3860039564787339, "grad_norm": 0.8421198371822611, "learning_rate": 4.955035851171648e-06, "loss": 0.4553, "step": 1561 }, { "epoch": 0.38625123639960435, "grad_norm": 0.8561721132938402, "learning_rate": 4.954974463035756e-06, "loss": 0.4647, "step": 1562 }, { "epoch": 0.38649851632047477, "grad_norm": 0.8326602684541324, "learning_rate": 4.9549130334035925e-06, "loss": 0.4217, "step": 1563 }, { "epoch": 0.3867457962413452, "grad_norm": 0.8318953344187765, "learning_rate": 4.954851562276196e-06, "loss": 0.476, "step": 1564 }, { "epoch": 0.3869930761622156, "grad_norm": 0.8104714755106442, "learning_rate": 4.954790049654608e-06, "loss": 0.4516, "step": 1565 }, { "epoch": 0.38724035608308605, "grad_norm": 0.8723051321964577, "learning_rate": 4.954728495539865e-06, "loss": 0.483, "step": 1566 }, { "epoch": 0.38748763600395647, "grad_norm": 0.8735820265601282, "learning_rate": 4.954666899933008e-06, "loss": 0.5133, "step": 1567 }, { "epoch": 0.3877349159248269, "grad_norm": 0.8858755298043255, "learning_rate": 4.954605262835079e-06, "loss": 0.4557, "step": 1568 }, { "epoch": 0.3879821958456973, "grad_norm": 0.875902503739144, "learning_rate": 4.954543584247121e-06, "loss": 0.4242, "step": 1569 }, { "epoch": 0.38822947576656774, "grad_norm": 0.7924017320572292, "learning_rate": 4.954481864170175e-06, "loss": 0.4822, "step": 1570 }, { "epoch": 0.38847675568743817, "grad_norm": 0.8362697886438909, "learning_rate": 4.9544201026052845e-06, "loss": 0.4602, "step": 1571 }, { "epoch": 0.3887240356083086, "grad_norm": 0.8916382978285358, "learning_rate": 4.954358299553492e-06, "loss": 0.4405, "step": 1572 }, { "epoch": 0.388971315529179, "grad_norm": 0.8634107397727967, "learning_rate": 4.954296455015846e-06, "loss": 0.456, "step": 1573 }, { "epoch": 0.38921859545004944, "grad_norm": 0.928281717353563, "learning_rate": 4.9542345689933875e-06, "loss": 0.466, "step": 1574 }, { "epoch": 0.38946587537091987, "grad_norm": 0.9131256535394252, "learning_rate": 4.954172641487165e-06, "loss": 0.4358, "step": 1575 }, { "epoch": 0.3897131552917903, "grad_norm": 0.8553374784847576, "learning_rate": 4.954110672498226e-06, "loss": 0.447, "step": 1576 }, { "epoch": 0.3899604352126607, "grad_norm": 0.8617713943665484, "learning_rate": 4.954048662027615e-06, "loss": 0.454, "step": 1577 }, { "epoch": 0.39020771513353114, "grad_norm": 0.8378470097296922, "learning_rate": 4.953986610076383e-06, "loss": 0.457, "step": 1578 }, { "epoch": 0.39045499505440157, "grad_norm": 0.890341315715719, "learning_rate": 4.953924516645578e-06, "loss": 0.4542, "step": 1579 }, { "epoch": 0.390702274975272, "grad_norm": 0.867350939044516, "learning_rate": 4.953862381736249e-06, "loss": 0.4343, "step": 1580 }, { "epoch": 0.3909495548961424, "grad_norm": 0.8877957331446623, "learning_rate": 4.953800205349446e-06, "loss": 0.488, "step": 1581 }, { "epoch": 0.39119683481701284, "grad_norm": 0.8807877558443802, "learning_rate": 4.953737987486221e-06, "loss": 0.4735, "step": 1582 }, { "epoch": 0.39144411473788326, "grad_norm": 0.8782111156976654, "learning_rate": 4.953675728147625e-06, "loss": 0.448, "step": 1583 }, { "epoch": 0.3916913946587537, "grad_norm": 0.8500467289046308, "learning_rate": 4.953613427334711e-06, "loss": 0.4617, "step": 1584 }, { "epoch": 0.3919386745796241, "grad_norm": 0.8409368715056174, "learning_rate": 4.953551085048531e-06, "loss": 0.4684, "step": 1585 }, { "epoch": 0.39218595450049454, "grad_norm": 0.8522470314278595, "learning_rate": 4.95348870129014e-06, "loss": 0.4592, "step": 1586 }, { "epoch": 0.39243323442136496, "grad_norm": 0.8168922995846802, "learning_rate": 4.953426276060592e-06, "loss": 0.4258, "step": 1587 }, { "epoch": 0.3926805143422354, "grad_norm": 0.8256035919532246, "learning_rate": 4.953363809360942e-06, "loss": 0.4665, "step": 1588 }, { "epoch": 0.3929277942631058, "grad_norm": 0.8541007709621282, "learning_rate": 4.953301301192246e-06, "loss": 0.4363, "step": 1589 }, { "epoch": 0.39317507418397624, "grad_norm": 0.8882561921472727, "learning_rate": 4.95323875155556e-06, "loss": 0.4635, "step": 1590 }, { "epoch": 0.39342235410484666, "grad_norm": 0.8501720096698456, "learning_rate": 4.953176160451942e-06, "loss": 0.4653, "step": 1591 }, { "epoch": 0.3936696340257171, "grad_norm": 0.8721229757098223, "learning_rate": 4.95311352788245e-06, "loss": 0.4806, "step": 1592 }, { "epoch": 0.3939169139465875, "grad_norm": 0.8979202349196487, "learning_rate": 4.953050853848143e-06, "loss": 0.454, "step": 1593 }, { "epoch": 0.39416419386745793, "grad_norm": 0.8760747889780947, "learning_rate": 4.9529881383500785e-06, "loss": 0.4763, "step": 1594 }, { "epoch": 0.3944114737883284, "grad_norm": 0.8452437439381595, "learning_rate": 4.9529253813893185e-06, "loss": 0.4282, "step": 1595 }, { "epoch": 0.39465875370919884, "grad_norm": 0.8396178678462056, "learning_rate": 4.952862582966923e-06, "loss": 0.4531, "step": 1596 }, { "epoch": 0.39490603363006926, "grad_norm": 0.8401292338194442, "learning_rate": 4.9527997430839535e-06, "loss": 0.4677, "step": 1597 }, { "epoch": 0.3951533135509397, "grad_norm": 0.8078584240478519, "learning_rate": 4.952736861741473e-06, "loss": 0.4612, "step": 1598 }, { "epoch": 0.3954005934718101, "grad_norm": 0.8580871690645315, "learning_rate": 4.952673938940543e-06, "loss": 0.4561, "step": 1599 }, { "epoch": 0.39564787339268054, "grad_norm": 0.8309114317557393, "learning_rate": 4.952610974682228e-06, "loss": 0.4587, "step": 1600 }, { "epoch": 0.39589515331355096, "grad_norm": 0.8018195354020252, "learning_rate": 4.952547968967592e-06, "loss": 0.4764, "step": 1601 }, { "epoch": 0.3961424332344214, "grad_norm": 0.8227914040250693, "learning_rate": 4.9524849217977e-06, "loss": 0.5004, "step": 1602 }, { "epoch": 0.3963897131552918, "grad_norm": 0.9344301223745453, "learning_rate": 4.952421833173618e-06, "loss": 0.4284, "step": 1603 }, { "epoch": 0.39663699307616224, "grad_norm": 0.8125773670413535, "learning_rate": 4.952358703096412e-06, "loss": 0.4878, "step": 1604 }, { "epoch": 0.39688427299703266, "grad_norm": 0.8794216357490461, "learning_rate": 4.952295531567149e-06, "loss": 0.4417, "step": 1605 }, { "epoch": 0.3971315529179031, "grad_norm": 0.8785534278955001, "learning_rate": 4.952232318586897e-06, "loss": 0.4572, "step": 1606 }, { "epoch": 0.3973788328387735, "grad_norm": 0.8732103433976532, "learning_rate": 4.952169064156724e-06, "loss": 0.4628, "step": 1607 }, { "epoch": 0.39762611275964393, "grad_norm": 0.8388512265824324, "learning_rate": 4.952105768277701e-06, "loss": 0.4819, "step": 1608 }, { "epoch": 0.39787339268051436, "grad_norm": 0.8531960312588226, "learning_rate": 4.9520424309508954e-06, "loss": 0.4596, "step": 1609 }, { "epoch": 0.3981206726013848, "grad_norm": 0.8990481924298742, "learning_rate": 4.951979052177379e-06, "loss": 0.4679, "step": 1610 }, { "epoch": 0.3983679525222552, "grad_norm": 0.9241672808577421, "learning_rate": 4.9519156319582226e-06, "loss": 0.4409, "step": 1611 }, { "epoch": 0.39861523244312563, "grad_norm": 0.8075821047905285, "learning_rate": 4.9518521702945e-06, "loss": 0.4304, "step": 1612 }, { "epoch": 0.39886251236399606, "grad_norm": 0.8439405328812108, "learning_rate": 4.951788667187281e-06, "loss": 0.4412, "step": 1613 }, { "epoch": 0.3991097922848665, "grad_norm": 0.8936493881653371, "learning_rate": 4.95172512263764e-06, "loss": 0.446, "step": 1614 }, { "epoch": 0.3993570722057369, "grad_norm": 0.8718520805400518, "learning_rate": 4.9516615366466535e-06, "loss": 0.4565, "step": 1615 }, { "epoch": 0.39960435212660733, "grad_norm": 0.8336286918355128, "learning_rate": 4.951597909215393e-06, "loss": 0.4722, "step": 1616 }, { "epoch": 0.39985163204747776, "grad_norm": 0.8757209786566342, "learning_rate": 4.951534240344936e-06, "loss": 0.4569, "step": 1617 }, { "epoch": 0.4000989119683482, "grad_norm": 0.8705182757213266, "learning_rate": 4.951470530036358e-06, "loss": 0.4605, "step": 1618 }, { "epoch": 0.4003461918892186, "grad_norm": 0.8640519203759794, "learning_rate": 4.951406778290735e-06, "loss": 0.4554, "step": 1619 }, { "epoch": 0.40059347181008903, "grad_norm": 0.8362305667086866, "learning_rate": 4.951342985109147e-06, "loss": 0.4673, "step": 1620 }, { "epoch": 0.40084075173095945, "grad_norm": 0.855065016510783, "learning_rate": 4.951279150492669e-06, "loss": 0.4581, "step": 1621 }, { "epoch": 0.4010880316518299, "grad_norm": 0.9169850417037374, "learning_rate": 4.9512152744423836e-06, "loss": 0.4765, "step": 1622 }, { "epoch": 0.4013353115727003, "grad_norm": 0.8578287913225426, "learning_rate": 4.951151356959368e-06, "loss": 0.4479, "step": 1623 }, { "epoch": 0.40158259149357073, "grad_norm": 0.9055998939207615, "learning_rate": 4.951087398044702e-06, "loss": 0.4566, "step": 1624 }, { "epoch": 0.40182987141444115, "grad_norm": 0.8854013010161614, "learning_rate": 4.951023397699469e-06, "loss": 0.4654, "step": 1625 }, { "epoch": 0.4020771513353116, "grad_norm": 0.8676151411605866, "learning_rate": 4.9509593559247505e-06, "loss": 0.4685, "step": 1626 }, { "epoch": 0.402324431256182, "grad_norm": 0.8113407989272491, "learning_rate": 4.950895272721627e-06, "loss": 0.4685, "step": 1627 }, { "epoch": 0.4025717111770524, "grad_norm": 0.8210151970043335, "learning_rate": 4.950831148091184e-06, "loss": 0.4517, "step": 1628 }, { "epoch": 0.40281899109792285, "grad_norm": 0.8853716567053723, "learning_rate": 4.950766982034504e-06, "loss": 0.4227, "step": 1629 }, { "epoch": 0.4030662710187933, "grad_norm": 0.9285034946776956, "learning_rate": 4.950702774552671e-06, "loss": 0.4095, "step": 1630 }, { "epoch": 0.4033135509396637, "grad_norm": 0.8081586648223669, "learning_rate": 4.950638525646773e-06, "loss": 0.4483, "step": 1631 }, { "epoch": 0.4035608308605341, "grad_norm": 0.814407403090029, "learning_rate": 4.9505742353178935e-06, "loss": 0.4395, "step": 1632 }, { "epoch": 0.40380811078140455, "grad_norm": 0.9318384755859314, "learning_rate": 4.9505099035671185e-06, "loss": 0.4528, "step": 1633 }, { "epoch": 0.404055390702275, "grad_norm": 0.8556784555850027, "learning_rate": 4.950445530395539e-06, "loss": 0.4551, "step": 1634 }, { "epoch": 0.4043026706231454, "grad_norm": 0.8236028720202845, "learning_rate": 4.9503811158042394e-06, "loss": 0.4969, "step": 1635 }, { "epoch": 0.4045499505440158, "grad_norm": 0.7955100118692611, "learning_rate": 4.9503166597943105e-06, "loss": 0.4911, "step": 1636 }, { "epoch": 0.40479723046488625, "grad_norm": 0.8321762635331581, "learning_rate": 4.950252162366841e-06, "loss": 0.4743, "step": 1637 }, { "epoch": 0.4050445103857567, "grad_norm": 0.892782367227542, "learning_rate": 4.950187623522922e-06, "loss": 0.4635, "step": 1638 }, { "epoch": 0.4052917903066271, "grad_norm": 0.8290384221114429, "learning_rate": 4.950123043263644e-06, "loss": 0.4701, "step": 1639 }, { "epoch": 0.4055390702274975, "grad_norm": 0.8635462718575356, "learning_rate": 4.9500584215900975e-06, "loss": 0.4315, "step": 1640 }, { "epoch": 0.40578635014836795, "grad_norm": 0.8303892565178683, "learning_rate": 4.949993758503376e-06, "loss": 0.4925, "step": 1641 }, { "epoch": 0.40603363006923837, "grad_norm": 0.840727633311021, "learning_rate": 4.949929054004572e-06, "loss": 0.4629, "step": 1642 }, { "epoch": 0.4062809099901088, "grad_norm": 0.8634706522730431, "learning_rate": 4.949864308094779e-06, "loss": 0.4796, "step": 1643 }, { "epoch": 0.4065281899109792, "grad_norm": 0.8361394254501443, "learning_rate": 4.949799520775092e-06, "loss": 0.4568, "step": 1644 }, { "epoch": 0.40677546983184965, "grad_norm": 0.8139921355363459, "learning_rate": 4.9497346920466074e-06, "loss": 0.4557, "step": 1645 }, { "epoch": 0.40702274975272007, "grad_norm": 0.812250032386861, "learning_rate": 4.949669821910418e-06, "loss": 0.4566, "step": 1646 }, { "epoch": 0.4072700296735905, "grad_norm": 0.828761594585985, "learning_rate": 4.949604910367623e-06, "loss": 0.4862, "step": 1647 }, { "epoch": 0.4075173095944609, "grad_norm": 0.8177756231917882, "learning_rate": 4.949539957419317e-06, "loss": 0.4586, "step": 1648 }, { "epoch": 0.40776458951533134, "grad_norm": 0.8214768220432287, "learning_rate": 4.949474963066599e-06, "loss": 0.449, "step": 1649 }, { "epoch": 0.40801186943620177, "grad_norm": 0.8194371482269276, "learning_rate": 4.9494099273105686e-06, "loss": 0.457, "step": 1650 }, { "epoch": 0.4082591493570722, "grad_norm": 0.8244018158911955, "learning_rate": 4.9493448501523245e-06, "loss": 0.4345, "step": 1651 }, { "epoch": 0.4085064292779426, "grad_norm": 0.8009139765863647, "learning_rate": 4.949279731592967e-06, "loss": 0.4791, "step": 1652 }, { "epoch": 0.40875370919881304, "grad_norm": 0.8116159409968832, "learning_rate": 4.949214571633595e-06, "loss": 0.4369, "step": 1653 }, { "epoch": 0.40900098911968347, "grad_norm": 0.8562285011172734, "learning_rate": 4.949149370275311e-06, "loss": 0.4557, "step": 1654 }, { "epoch": 0.4092482690405539, "grad_norm": 0.8191314049207302, "learning_rate": 4.949084127519219e-06, "loss": 0.4683, "step": 1655 }, { "epoch": 0.4094955489614243, "grad_norm": 0.7778969873901285, "learning_rate": 4.949018843366419e-06, "loss": 0.4639, "step": 1656 }, { "epoch": 0.40974282888229474, "grad_norm": 0.806816984603995, "learning_rate": 4.9489535178180155e-06, "loss": 0.4335, "step": 1657 }, { "epoch": 0.40999010880316517, "grad_norm": 0.8215100542029089, "learning_rate": 4.9488881508751135e-06, "loss": 0.4436, "step": 1658 }, { "epoch": 0.4102373887240356, "grad_norm": 0.8281492795541459, "learning_rate": 4.948822742538817e-06, "loss": 0.4521, "step": 1659 }, { "epoch": 0.410484668644906, "grad_norm": 0.897412440002854, "learning_rate": 4.9487572928102315e-06, "loss": 0.4777, "step": 1660 }, { "epoch": 0.41073194856577644, "grad_norm": 0.8231014853022581, "learning_rate": 4.948691801690464e-06, "loss": 0.4668, "step": 1661 }, { "epoch": 0.41097922848664686, "grad_norm": 0.8309257711014859, "learning_rate": 4.948626269180621e-06, "loss": 0.423, "step": 1662 }, { "epoch": 0.4112265084075173, "grad_norm": 0.788186095486044, "learning_rate": 4.94856069528181e-06, "loss": 0.4461, "step": 1663 }, { "epoch": 0.4114737883283877, "grad_norm": 0.8031013505892846, "learning_rate": 4.948495079995139e-06, "loss": 0.4581, "step": 1664 }, { "epoch": 0.41172106824925814, "grad_norm": 0.8214990535999098, "learning_rate": 4.948429423321719e-06, "loss": 0.4386, "step": 1665 }, { "epoch": 0.41196834817012856, "grad_norm": 0.8697067040191242, "learning_rate": 4.9483637252626585e-06, "loss": 0.4274, "step": 1666 }, { "epoch": 0.412215628090999, "grad_norm": 0.8208094818047257, "learning_rate": 4.948297985819067e-06, "loss": 0.4561, "step": 1667 }, { "epoch": 0.4124629080118694, "grad_norm": 0.8199301154122366, "learning_rate": 4.9482322049920575e-06, "loss": 0.4606, "step": 1668 }, { "epoch": 0.41271018793273984, "grad_norm": 0.7865329706718255, "learning_rate": 4.948166382782741e-06, "loss": 0.4564, "step": 1669 }, { "epoch": 0.41295746785361026, "grad_norm": 0.8352567987592875, "learning_rate": 4.948100519192229e-06, "loss": 0.4558, "step": 1670 }, { "epoch": 0.4132047477744807, "grad_norm": 0.8861005482099316, "learning_rate": 4.9480346142216375e-06, "loss": 0.4658, "step": 1671 }, { "epoch": 0.4134520276953511, "grad_norm": 0.852460995747833, "learning_rate": 4.947968667872079e-06, "loss": 0.4487, "step": 1672 }, { "epoch": 0.4136993076162216, "grad_norm": 0.8173953757463533, "learning_rate": 4.947902680144667e-06, "loss": 0.4554, "step": 1673 }, { "epoch": 0.413946587537092, "grad_norm": 0.8668203863267794, "learning_rate": 4.947836651040519e-06, "loss": 0.4846, "step": 1674 }, { "epoch": 0.41419386745796244, "grad_norm": 0.7885997646188458, "learning_rate": 4.94777058056075e-06, "loss": 0.4815, "step": 1675 }, { "epoch": 0.41444114737883286, "grad_norm": 0.8506062284072559, "learning_rate": 4.947704468706477e-06, "loss": 0.4362, "step": 1676 }, { "epoch": 0.4146884272997033, "grad_norm": 0.8786876540859795, "learning_rate": 4.947638315478817e-06, "loss": 0.4119, "step": 1677 }, { "epoch": 0.4149357072205737, "grad_norm": 0.782177966395444, "learning_rate": 4.9475721208788885e-06, "loss": 0.478, "step": 1678 }, { "epoch": 0.41518298714144414, "grad_norm": 0.8478757384596047, "learning_rate": 4.94750588490781e-06, "loss": 0.4391, "step": 1679 }, { "epoch": 0.41543026706231456, "grad_norm": 0.811351078735783, "learning_rate": 4.947439607566703e-06, "loss": 0.4447, "step": 1680 }, { "epoch": 0.415677546983185, "grad_norm": 0.7843424493602333, "learning_rate": 4.947373288856685e-06, "loss": 0.4617, "step": 1681 }, { "epoch": 0.4159248269040554, "grad_norm": 0.8235806060021909, "learning_rate": 4.947306928778879e-06, "loss": 0.4864, "step": 1682 }, { "epoch": 0.41617210682492584, "grad_norm": 0.847868232417344, "learning_rate": 4.947240527334406e-06, "loss": 0.4773, "step": 1683 }, { "epoch": 0.41641938674579626, "grad_norm": 0.8697925069615653, "learning_rate": 4.947174084524387e-06, "loss": 0.4453, "step": 1684 }, { "epoch": 0.4166666666666667, "grad_norm": 0.8142283134114524, "learning_rate": 4.947107600349948e-06, "loss": 0.4721, "step": 1685 }, { "epoch": 0.4169139465875371, "grad_norm": 0.8511529611862372, "learning_rate": 4.947041074812211e-06, "loss": 0.4462, "step": 1686 }, { "epoch": 0.41716122650840753, "grad_norm": 0.8329691094863567, "learning_rate": 4.946974507912301e-06, "loss": 0.4389, "step": 1687 }, { "epoch": 0.41740850642927796, "grad_norm": 0.8428860586768815, "learning_rate": 4.946907899651342e-06, "loss": 0.4514, "step": 1688 }, { "epoch": 0.4176557863501484, "grad_norm": 0.81243219915672, "learning_rate": 4.946841250030461e-06, "loss": 0.4397, "step": 1689 }, { "epoch": 0.4179030662710188, "grad_norm": 0.8532101717804385, "learning_rate": 4.946774559050785e-06, "loss": 0.5014, "step": 1690 }, { "epoch": 0.41815034619188923, "grad_norm": 0.8488612058084708, "learning_rate": 4.9467078267134396e-06, "loss": 0.4745, "step": 1691 }, { "epoch": 0.41839762611275966, "grad_norm": 0.8429030041129258, "learning_rate": 4.946641053019554e-06, "loss": 0.4668, "step": 1692 }, { "epoch": 0.4186449060336301, "grad_norm": 0.8126448430228969, "learning_rate": 4.9465742379702574e-06, "loss": 0.4771, "step": 1693 }, { "epoch": 0.4188921859545005, "grad_norm": 0.8732774473739731, "learning_rate": 4.946507381566677e-06, "loss": 0.4761, "step": 1694 }, { "epoch": 0.41913946587537093, "grad_norm": 0.8717402341975771, "learning_rate": 4.946440483809946e-06, "loss": 0.4822, "step": 1695 }, { "epoch": 0.41938674579624136, "grad_norm": 0.8502511813109619, "learning_rate": 4.946373544701193e-06, "loss": 0.4152, "step": 1696 }, { "epoch": 0.4196340257171118, "grad_norm": 0.8596676366588083, "learning_rate": 4.9463065642415485e-06, "loss": 0.4362, "step": 1697 }, { "epoch": 0.4198813056379822, "grad_norm": 0.8901457121778148, "learning_rate": 4.9462395424321476e-06, "loss": 0.4418, "step": 1698 }, { "epoch": 0.42012858555885263, "grad_norm": 0.9208361238486875, "learning_rate": 4.946172479274121e-06, "loss": 0.4329, "step": 1699 }, { "epoch": 0.42037586547972305, "grad_norm": 0.8547536386643835, "learning_rate": 4.946105374768603e-06, "loss": 0.4873, "step": 1700 }, { "epoch": 0.4206231454005935, "grad_norm": 0.8508039548865888, "learning_rate": 4.9460382289167284e-06, "loss": 0.4251, "step": 1701 }, { "epoch": 0.4208704253214639, "grad_norm": 0.7970088950959086, "learning_rate": 4.945971041719631e-06, "loss": 0.4671, "step": 1702 }, { "epoch": 0.42111770524233433, "grad_norm": 0.7841689731189674, "learning_rate": 4.945903813178447e-06, "loss": 0.467, "step": 1703 }, { "epoch": 0.42136498516320475, "grad_norm": 0.8173705703188501, "learning_rate": 4.945836543294312e-06, "loss": 0.4546, "step": 1704 }, { "epoch": 0.4216122650840752, "grad_norm": 0.8161859578647371, "learning_rate": 4.945769232068364e-06, "loss": 0.4916, "step": 1705 }, { "epoch": 0.4218595450049456, "grad_norm": 0.8198882926959018, "learning_rate": 4.945701879501742e-06, "loss": 0.4505, "step": 1706 }, { "epoch": 0.422106824925816, "grad_norm": 0.8118644016805705, "learning_rate": 4.945634485595582e-06, "loss": 0.4554, "step": 1707 }, { "epoch": 0.42235410484668645, "grad_norm": 0.8913670827889651, "learning_rate": 4.945567050351024e-06, "loss": 0.4465, "step": 1708 }, { "epoch": 0.4226013847675569, "grad_norm": 0.8459259044723303, "learning_rate": 4.945499573769209e-06, "loss": 0.4573, "step": 1709 }, { "epoch": 0.4228486646884273, "grad_norm": 0.826742105916524, "learning_rate": 4.945432055851276e-06, "loss": 0.4777, "step": 1710 }, { "epoch": 0.4230959446092977, "grad_norm": 0.7946204416088612, "learning_rate": 4.945364496598366e-06, "loss": 0.4572, "step": 1711 }, { "epoch": 0.42334322453016815, "grad_norm": 0.8168683419578028, "learning_rate": 4.9452968960116235e-06, "loss": 0.448, "step": 1712 }, { "epoch": 0.4235905044510386, "grad_norm": 0.8297167888254489, "learning_rate": 4.945229254092188e-06, "loss": 0.4519, "step": 1713 }, { "epoch": 0.423837784371909, "grad_norm": 0.8970476493506931, "learning_rate": 4.945161570841205e-06, "loss": 0.4432, "step": 1714 }, { "epoch": 0.4240850642927794, "grad_norm": 0.8465002130673689, "learning_rate": 4.945093846259817e-06, "loss": 0.4476, "step": 1715 }, { "epoch": 0.42433234421364985, "grad_norm": 0.8347667071381571, "learning_rate": 4.9450260803491705e-06, "loss": 0.4394, "step": 1716 }, { "epoch": 0.4245796241345203, "grad_norm": 0.814417603831734, "learning_rate": 4.94495827311041e-06, "loss": 0.4542, "step": 1717 }, { "epoch": 0.4248269040553907, "grad_norm": 0.9018559702844308, "learning_rate": 4.944890424544681e-06, "loss": 0.4449, "step": 1718 }, { "epoch": 0.4250741839762611, "grad_norm": 0.7992025415966338, "learning_rate": 4.944822534653131e-06, "loss": 0.462, "step": 1719 }, { "epoch": 0.42532146389713155, "grad_norm": 0.8037703415889722, "learning_rate": 4.944754603436908e-06, "loss": 0.4583, "step": 1720 }, { "epoch": 0.42556874381800197, "grad_norm": 0.8439242492521162, "learning_rate": 4.94468663089716e-06, "loss": 0.4465, "step": 1721 }, { "epoch": 0.4258160237388724, "grad_norm": 0.8326257136253545, "learning_rate": 4.944618617035035e-06, "loss": 0.4599, "step": 1722 }, { "epoch": 0.4260633036597428, "grad_norm": 0.8225161900870498, "learning_rate": 4.944550561851685e-06, "loss": 0.4424, "step": 1723 }, { "epoch": 0.42631058358061324, "grad_norm": 0.8117853626022742, "learning_rate": 4.944482465348257e-06, "loss": 0.4518, "step": 1724 }, { "epoch": 0.42655786350148367, "grad_norm": 0.8618327982125517, "learning_rate": 4.944414327525904e-06, "loss": 0.4339, "step": 1725 }, { "epoch": 0.4268051434223541, "grad_norm": 0.8386378955346201, "learning_rate": 4.944346148385777e-06, "loss": 0.4841, "step": 1726 }, { "epoch": 0.4270524233432245, "grad_norm": 0.8772973032751197, "learning_rate": 4.9442779279290295e-06, "loss": 0.4892, "step": 1727 }, { "epoch": 0.42729970326409494, "grad_norm": 0.908072104090296, "learning_rate": 4.944209666156814e-06, "loss": 0.457, "step": 1728 }, { "epoch": 0.42754698318496537, "grad_norm": 0.821449218350982, "learning_rate": 4.944141363070284e-06, "loss": 0.4392, "step": 1729 }, { "epoch": 0.4277942631058358, "grad_norm": 0.8316991236817805, "learning_rate": 4.944073018670594e-06, "loss": 0.4764, "step": 1730 }, { "epoch": 0.4280415430267062, "grad_norm": 0.8694517096681799, "learning_rate": 4.9440046329589e-06, "loss": 0.4539, "step": 1731 }, { "epoch": 0.42828882294757664, "grad_norm": 0.8471764393664999, "learning_rate": 4.943936205936359e-06, "loss": 0.4114, "step": 1732 }, { "epoch": 0.42853610286844707, "grad_norm": 0.8183886076647181, "learning_rate": 4.943867737604123e-06, "loss": 0.4322, "step": 1733 }, { "epoch": 0.4287833827893175, "grad_norm": 0.8579245008449513, "learning_rate": 4.943799227963354e-06, "loss": 0.4497, "step": 1734 }, { "epoch": 0.4290306627101879, "grad_norm": 0.8792696834992108, "learning_rate": 4.943730677015209e-06, "loss": 0.4437, "step": 1735 }, { "epoch": 0.42927794263105834, "grad_norm": 0.8196225113446304, "learning_rate": 4.9436620847608455e-06, "loss": 0.4486, "step": 1736 }, { "epoch": 0.42952522255192876, "grad_norm": 0.792556437218192, "learning_rate": 4.943593451201424e-06, "loss": 0.4464, "step": 1737 }, { "epoch": 0.4297725024727992, "grad_norm": 0.7962854260626189, "learning_rate": 4.943524776338104e-06, "loss": 0.4723, "step": 1738 }, { "epoch": 0.4300197823936696, "grad_norm": 0.8352264957077538, "learning_rate": 4.943456060172046e-06, "loss": 0.4501, "step": 1739 }, { "epoch": 0.43026706231454004, "grad_norm": 0.8400190502171467, "learning_rate": 4.943387302704412e-06, "loss": 0.454, "step": 1740 }, { "epoch": 0.43051434223541046, "grad_norm": 0.8276334232747109, "learning_rate": 4.943318503936364e-06, "loss": 0.4144, "step": 1741 }, { "epoch": 0.4307616221562809, "grad_norm": 0.8302199804463065, "learning_rate": 4.943249663869066e-06, "loss": 0.4686, "step": 1742 }, { "epoch": 0.4310089020771513, "grad_norm": 0.8207197842738865, "learning_rate": 4.94318078250368e-06, "loss": 0.4644, "step": 1743 }, { "epoch": 0.43125618199802174, "grad_norm": 0.834730998310763, "learning_rate": 4.943111859841371e-06, "loss": 0.4695, "step": 1744 }, { "epoch": 0.43150346191889216, "grad_norm": 0.8070569226814367, "learning_rate": 4.943042895883304e-06, "loss": 0.466, "step": 1745 }, { "epoch": 0.4317507418397626, "grad_norm": 0.8163444224681524, "learning_rate": 4.942973890630645e-06, "loss": 0.4782, "step": 1746 }, { "epoch": 0.431998021760633, "grad_norm": 0.83208535669435, "learning_rate": 4.942904844084559e-06, "loss": 0.4534, "step": 1747 }, { "epoch": 0.43224530168150344, "grad_norm": 0.7887636687926843, "learning_rate": 4.942835756246215e-06, "loss": 0.4407, "step": 1748 }, { "epoch": 0.43249258160237386, "grad_norm": 0.8521733444979064, "learning_rate": 4.942766627116779e-06, "loss": 0.459, "step": 1749 }, { "epoch": 0.43273986152324434, "grad_norm": 0.8518317628217601, "learning_rate": 4.942697456697422e-06, "loss": 0.4615, "step": 1750 }, { "epoch": 0.43298714144411476, "grad_norm": 0.9185604966693071, "learning_rate": 4.94262824498931e-06, "loss": 0.4503, "step": 1751 }, { "epoch": 0.4332344213649852, "grad_norm": 0.8103056740307707, "learning_rate": 4.942558991993615e-06, "loss": 0.4554, "step": 1752 }, { "epoch": 0.4334817012858556, "grad_norm": 0.799261239542613, "learning_rate": 4.942489697711508e-06, "loss": 0.4603, "step": 1753 }, { "epoch": 0.43372898120672604, "grad_norm": 0.8425868034525744, "learning_rate": 4.9424203621441585e-06, "loss": 0.4639, "step": 1754 }, { "epoch": 0.43397626112759646, "grad_norm": 0.813736917430641, "learning_rate": 4.9423509852927395e-06, "loss": 0.4504, "step": 1755 }, { "epoch": 0.4342235410484669, "grad_norm": 0.8325032700998303, "learning_rate": 4.942281567158424e-06, "loss": 0.4486, "step": 1756 }, { "epoch": 0.4344708209693373, "grad_norm": 0.8578441243657652, "learning_rate": 4.942212107742384e-06, "loss": 0.4324, "step": 1757 }, { "epoch": 0.43471810089020774, "grad_norm": 0.7950101986986995, "learning_rate": 4.9421426070457946e-06, "loss": 0.455, "step": 1758 }, { "epoch": 0.43496538081107816, "grad_norm": 0.8464725428496769, "learning_rate": 4.94207306506983e-06, "loss": 0.4338, "step": 1759 }, { "epoch": 0.4352126607319486, "grad_norm": 0.8469706639469825, "learning_rate": 4.942003481815666e-06, "loss": 0.4288, "step": 1760 }, { "epoch": 0.435459940652819, "grad_norm": 0.8775115383798611, "learning_rate": 4.94193385728448e-06, "loss": 0.4337, "step": 1761 }, { "epoch": 0.43570722057368944, "grad_norm": 0.7880837358851591, "learning_rate": 4.9418641914774465e-06, "loss": 0.4471, "step": 1762 }, { "epoch": 0.43595450049455986, "grad_norm": 0.8742884553070869, "learning_rate": 4.9417944843957445e-06, "loss": 0.4342, "step": 1763 }, { "epoch": 0.4362017804154303, "grad_norm": 0.8660240978853062, "learning_rate": 4.941724736040552e-06, "loss": 0.4734, "step": 1764 }, { "epoch": 0.4364490603363007, "grad_norm": 0.855246178964046, "learning_rate": 4.941654946413048e-06, "loss": 0.4821, "step": 1765 }, { "epoch": 0.43669634025717113, "grad_norm": 0.8049402509434078, "learning_rate": 4.941585115514412e-06, "loss": 0.4572, "step": 1766 }, { "epoch": 0.43694362017804156, "grad_norm": 0.8102549094132898, "learning_rate": 4.9415152433458245e-06, "loss": 0.4627, "step": 1767 }, { "epoch": 0.437190900098912, "grad_norm": 0.855260779736409, "learning_rate": 4.941445329908466e-06, "loss": 0.4486, "step": 1768 }, { "epoch": 0.4374381800197824, "grad_norm": 0.8398694779387051, "learning_rate": 4.94137537520352e-06, "loss": 0.4443, "step": 1769 }, { "epoch": 0.43768545994065283, "grad_norm": 0.8495857810077256, "learning_rate": 4.941305379232166e-06, "loss": 0.4494, "step": 1770 }, { "epoch": 0.43793273986152326, "grad_norm": 0.8457483281660204, "learning_rate": 4.941235341995589e-06, "loss": 0.4213, "step": 1771 }, { "epoch": 0.4381800197823937, "grad_norm": 0.8416695102723704, "learning_rate": 4.941165263494974e-06, "loss": 0.4329, "step": 1772 }, { "epoch": 0.4384272997032641, "grad_norm": 0.8077854346661958, "learning_rate": 4.9410951437315034e-06, "loss": 0.4563, "step": 1773 }, { "epoch": 0.43867457962413453, "grad_norm": 0.8591862542056117, "learning_rate": 4.941024982706363e-06, "loss": 0.4395, "step": 1774 }, { "epoch": 0.43892185954500496, "grad_norm": 0.8865112531995624, "learning_rate": 4.9409547804207396e-06, "loss": 0.4369, "step": 1775 }, { "epoch": 0.4391691394658754, "grad_norm": 0.79342992597239, "learning_rate": 4.940884536875817e-06, "loss": 0.45, "step": 1776 }, { "epoch": 0.4394164193867458, "grad_norm": 0.7881270666911689, "learning_rate": 4.940814252072787e-06, "loss": 0.4523, "step": 1777 }, { "epoch": 0.43966369930761623, "grad_norm": 0.855046856544146, "learning_rate": 4.9407439260128345e-06, "loss": 0.4397, "step": 1778 }, { "epoch": 0.43991097922848665, "grad_norm": 0.8388877807693309, "learning_rate": 4.940673558697149e-06, "loss": 0.4475, "step": 1779 }, { "epoch": 0.4401582591493571, "grad_norm": 0.8512455561172892, "learning_rate": 4.940603150126919e-06, "loss": 0.4456, "step": 1780 }, { "epoch": 0.4404055390702275, "grad_norm": 0.8861587872050732, "learning_rate": 4.940532700303337e-06, "loss": 0.4676, "step": 1781 }, { "epoch": 0.4406528189910979, "grad_norm": 0.8159176864311103, "learning_rate": 4.940462209227592e-06, "loss": 0.4761, "step": 1782 }, { "epoch": 0.44090009891196835, "grad_norm": 0.8589329700847905, "learning_rate": 4.9403916769008755e-06, "loss": 0.4238, "step": 1783 }, { "epoch": 0.4411473788328388, "grad_norm": 0.8120103864684454, "learning_rate": 4.940321103324379e-06, "loss": 0.4533, "step": 1784 }, { "epoch": 0.4413946587537092, "grad_norm": 0.830021957146338, "learning_rate": 4.940250488499298e-06, "loss": 0.4708, "step": 1785 }, { "epoch": 0.4416419386745796, "grad_norm": 0.8784720779654032, "learning_rate": 4.9401798324268236e-06, "loss": 0.4853, "step": 1786 }, { "epoch": 0.44188921859545005, "grad_norm": 0.8300399974772742, "learning_rate": 4.940109135108152e-06, "loss": 0.4547, "step": 1787 }, { "epoch": 0.4421364985163205, "grad_norm": 0.8155400116963316, "learning_rate": 4.940038396544476e-06, "loss": 0.4464, "step": 1788 }, { "epoch": 0.4423837784371909, "grad_norm": 0.827571249140311, "learning_rate": 4.939967616736994e-06, "loss": 0.4337, "step": 1789 }, { "epoch": 0.4426310583580613, "grad_norm": 0.8167550515040979, "learning_rate": 4.939896795686899e-06, "loss": 0.4486, "step": 1790 }, { "epoch": 0.44287833827893175, "grad_norm": 0.8494832396966039, "learning_rate": 4.939825933395391e-06, "loss": 0.4496, "step": 1791 }, { "epoch": 0.4431256181998022, "grad_norm": 0.8003204538621603, "learning_rate": 4.939755029863667e-06, "loss": 0.4445, "step": 1792 }, { "epoch": 0.4433728981206726, "grad_norm": 0.7824474758817086, "learning_rate": 4.939684085092925e-06, "loss": 0.4465, "step": 1793 }, { "epoch": 0.443620178041543, "grad_norm": 0.8081718945058649, "learning_rate": 4.939613099084365e-06, "loss": 0.4528, "step": 1794 }, { "epoch": 0.44386745796241345, "grad_norm": 0.8495881551915857, "learning_rate": 4.939542071839185e-06, "loss": 0.4403, "step": 1795 }, { "epoch": 0.44411473788328387, "grad_norm": 0.7924639025505061, "learning_rate": 4.939471003358587e-06, "loss": 0.4559, "step": 1796 }, { "epoch": 0.4443620178041543, "grad_norm": 0.8065926677878719, "learning_rate": 4.939399893643773e-06, "loss": 0.4489, "step": 1797 }, { "epoch": 0.4446092977250247, "grad_norm": 0.8231537032809033, "learning_rate": 4.939328742695943e-06, "loss": 0.4511, "step": 1798 }, { "epoch": 0.44485657764589515, "grad_norm": 0.8629947617643108, "learning_rate": 4.939257550516302e-06, "loss": 0.452, "step": 1799 }, { "epoch": 0.44510385756676557, "grad_norm": 0.8452213524607881, "learning_rate": 4.939186317106051e-06, "loss": 0.4724, "step": 1800 }, { "epoch": 0.445351137487636, "grad_norm": 0.8406637491043357, "learning_rate": 4.939115042466397e-06, "loss": 0.435, "step": 1801 }, { "epoch": 0.4455984174085064, "grad_norm": 0.822963252538923, "learning_rate": 4.9390437265985415e-06, "loss": 0.4258, "step": 1802 }, { "epoch": 0.44584569732937684, "grad_norm": 0.7878763520585504, "learning_rate": 4.93897236950369e-06, "loss": 0.4608, "step": 1803 }, { "epoch": 0.44609297725024727, "grad_norm": 0.8349799067133575, "learning_rate": 4.938900971183053e-06, "loss": 0.4327, "step": 1804 }, { "epoch": 0.4463402571711177, "grad_norm": 0.8816160813241095, "learning_rate": 4.9388295316378325e-06, "loss": 0.4639, "step": 1805 }, { "epoch": 0.4465875370919881, "grad_norm": 0.84235020294276, "learning_rate": 4.938758050869238e-06, "loss": 0.4499, "step": 1806 }, { "epoch": 0.44683481701285854, "grad_norm": 0.8242327113382405, "learning_rate": 4.938686528878477e-06, "loss": 0.4735, "step": 1807 }, { "epoch": 0.44708209693372897, "grad_norm": 0.7886103886587141, "learning_rate": 4.93861496566676e-06, "loss": 0.4615, "step": 1808 }, { "epoch": 0.4473293768545994, "grad_norm": 0.8398273563191035, "learning_rate": 4.938543361235295e-06, "loss": 0.4269, "step": 1809 }, { "epoch": 0.4475766567754698, "grad_norm": 0.8573294083629199, "learning_rate": 4.938471715585293e-06, "loss": 0.4333, "step": 1810 }, { "epoch": 0.44782393669634024, "grad_norm": 0.8849773282454866, "learning_rate": 4.938400028717966e-06, "loss": 0.4464, "step": 1811 }, { "epoch": 0.44807121661721067, "grad_norm": 0.8569807993576385, "learning_rate": 4.938328300634524e-06, "loss": 0.4555, "step": 1812 }, { "epoch": 0.4483184965380811, "grad_norm": 0.8038399479751446, "learning_rate": 4.93825653133618e-06, "loss": 0.4432, "step": 1813 }, { "epoch": 0.4485657764589515, "grad_norm": 0.8316422195623451, "learning_rate": 4.938184720824148e-06, "loss": 0.4357, "step": 1814 }, { "epoch": 0.44881305637982194, "grad_norm": 0.7881623847336401, "learning_rate": 4.938112869099641e-06, "loss": 0.4457, "step": 1815 }, { "epoch": 0.44906033630069236, "grad_norm": 0.8095248000011614, "learning_rate": 4.9380409761638725e-06, "loss": 0.4675, "step": 1816 }, { "epoch": 0.4493076162215628, "grad_norm": 0.8142992574456057, "learning_rate": 4.937969042018059e-06, "loss": 0.4382, "step": 1817 }, { "epoch": 0.4495548961424332, "grad_norm": 0.8037598382565191, "learning_rate": 4.937897066663417e-06, "loss": 0.4547, "step": 1818 }, { "epoch": 0.44980217606330364, "grad_norm": 0.8029225397282946, "learning_rate": 4.937825050101162e-06, "loss": 0.4532, "step": 1819 }, { "epoch": 0.45004945598417406, "grad_norm": 0.8525725054243789, "learning_rate": 4.937752992332512e-06, "loss": 0.4452, "step": 1820 }, { "epoch": 0.4502967359050445, "grad_norm": 0.7978513030283886, "learning_rate": 4.937680893358683e-06, "loss": 0.4496, "step": 1821 }, { "epoch": 0.4505440158259149, "grad_norm": 0.8684712638463695, "learning_rate": 4.9376087531808964e-06, "loss": 0.4336, "step": 1822 }, { "epoch": 0.45079129574678534, "grad_norm": 0.8163578329839308, "learning_rate": 4.93753657180037e-06, "loss": 0.4639, "step": 1823 }, { "epoch": 0.45103857566765576, "grad_norm": 0.8225937594696441, "learning_rate": 4.937464349218325e-06, "loss": 0.4566, "step": 1824 }, { "epoch": 0.4512858555885262, "grad_norm": 0.8008255690442442, "learning_rate": 4.93739208543598e-06, "loss": 0.463, "step": 1825 }, { "epoch": 0.4515331355093966, "grad_norm": 0.8360458473751116, "learning_rate": 4.937319780454559e-06, "loss": 0.4221, "step": 1826 }, { "epoch": 0.45178041543026703, "grad_norm": 0.7805314420818854, "learning_rate": 4.937247434275283e-06, "loss": 0.4615, "step": 1827 }, { "epoch": 0.4520276953511375, "grad_norm": 0.8314479860480369, "learning_rate": 4.937175046899375e-06, "loss": 0.4869, "step": 1828 }, { "epoch": 0.45227497527200794, "grad_norm": 0.7954839890830541, "learning_rate": 4.937102618328058e-06, "loss": 0.4717, "step": 1829 }, { "epoch": 0.45252225519287836, "grad_norm": 0.7896791151974906, "learning_rate": 4.937030148562558e-06, "loss": 0.4561, "step": 1830 }, { "epoch": 0.4527695351137488, "grad_norm": 0.8315873837686758, "learning_rate": 4.936957637604097e-06, "loss": 0.4973, "step": 1831 }, { "epoch": 0.4530168150346192, "grad_norm": 0.8302395670775476, "learning_rate": 4.936885085453904e-06, "loss": 0.4115, "step": 1832 }, { "epoch": 0.45326409495548964, "grad_norm": 0.7907392913162058, "learning_rate": 4.936812492113203e-06, "loss": 0.4398, "step": 1833 }, { "epoch": 0.45351137487636006, "grad_norm": 0.7930420093582806, "learning_rate": 4.936739857583222e-06, "loss": 0.4589, "step": 1834 }, { "epoch": 0.4537586547972305, "grad_norm": 0.8307780202557283, "learning_rate": 4.936667181865188e-06, "loss": 0.4349, "step": 1835 }, { "epoch": 0.4540059347181009, "grad_norm": 0.8218166907095654, "learning_rate": 4.93659446496033e-06, "loss": 0.4617, "step": 1836 }, { "epoch": 0.45425321463897134, "grad_norm": 0.7959798297948486, "learning_rate": 4.936521706869876e-06, "loss": 0.4607, "step": 1837 }, { "epoch": 0.45450049455984176, "grad_norm": 0.8398423129692738, "learning_rate": 4.93644890759506e-06, "loss": 0.4252, "step": 1838 }, { "epoch": 0.4547477744807122, "grad_norm": 0.8146261084755446, "learning_rate": 4.936376067137106e-06, "loss": 0.4191, "step": 1839 }, { "epoch": 0.4549950544015826, "grad_norm": 0.8096282195772391, "learning_rate": 4.936303185497251e-06, "loss": 0.4556, "step": 1840 }, { "epoch": 0.45524233432245303, "grad_norm": 0.8226335223421032, "learning_rate": 4.9362302626767236e-06, "loss": 0.4349, "step": 1841 }, { "epoch": 0.45548961424332346, "grad_norm": 0.8387498079572907, "learning_rate": 4.936157298676757e-06, "loss": 0.4613, "step": 1842 }, { "epoch": 0.4557368941641939, "grad_norm": 0.81593835337991, "learning_rate": 4.936084293498585e-06, "loss": 0.4779, "step": 1843 }, { "epoch": 0.4559841740850643, "grad_norm": 0.804173667458378, "learning_rate": 4.936011247143442e-06, "loss": 0.4465, "step": 1844 }, { "epoch": 0.45623145400593473, "grad_norm": 0.8180659963817345, "learning_rate": 4.935938159612562e-06, "loss": 0.4247, "step": 1845 }, { "epoch": 0.45647873392680516, "grad_norm": 0.8245812500907519, "learning_rate": 4.93586503090718e-06, "loss": 0.4857, "step": 1846 }, { "epoch": 0.4567260138476756, "grad_norm": 0.8247624372284065, "learning_rate": 4.9357918610285326e-06, "loss": 0.44, "step": 1847 }, { "epoch": 0.456973293768546, "grad_norm": 0.8117934491093357, "learning_rate": 4.935718649977857e-06, "loss": 0.443, "step": 1848 }, { "epoch": 0.45722057368941643, "grad_norm": 0.831498957769525, "learning_rate": 4.93564539775639e-06, "loss": 0.4285, "step": 1849 }, { "epoch": 0.45746785361028686, "grad_norm": 0.8178845965621625, "learning_rate": 4.9355721043653705e-06, "loss": 0.4579, "step": 1850 }, { "epoch": 0.4577151335311573, "grad_norm": 0.8435823859766596, "learning_rate": 4.935498769806037e-06, "loss": 0.4849, "step": 1851 }, { "epoch": 0.4579624134520277, "grad_norm": 0.8198546493674795, "learning_rate": 4.9354253940796285e-06, "loss": 0.4764, "step": 1852 }, { "epoch": 0.45820969337289813, "grad_norm": 0.8612598869000662, "learning_rate": 4.9353519771873865e-06, "loss": 0.4655, "step": 1853 }, { "epoch": 0.45845697329376855, "grad_norm": 0.8331800596681603, "learning_rate": 4.935278519130551e-06, "loss": 0.4427, "step": 1854 }, { "epoch": 0.458704253214639, "grad_norm": 0.859575706580175, "learning_rate": 4.935205019910363e-06, "loss": 0.4308, "step": 1855 }, { "epoch": 0.4589515331355094, "grad_norm": 0.8304086390526508, "learning_rate": 4.9351314795280665e-06, "loss": 0.4386, "step": 1856 }, { "epoch": 0.45919881305637983, "grad_norm": 0.8526206499245497, "learning_rate": 4.935057897984904e-06, "loss": 0.4171, "step": 1857 }, { "epoch": 0.45944609297725025, "grad_norm": 0.8122129580874308, "learning_rate": 4.934984275282119e-06, "loss": 0.4592, "step": 1858 }, { "epoch": 0.4596933728981207, "grad_norm": 0.8184420237101967, "learning_rate": 4.9349106114209555e-06, "loss": 0.476, "step": 1859 }, { "epoch": 0.4599406528189911, "grad_norm": 0.8276819930577508, "learning_rate": 4.934836906402659e-06, "loss": 0.4389, "step": 1860 }, { "epoch": 0.4601879327398615, "grad_norm": 0.787148630250681, "learning_rate": 4.934763160228476e-06, "loss": 0.4346, "step": 1861 }, { "epoch": 0.46043521266073195, "grad_norm": 0.8128804151684144, "learning_rate": 4.934689372899653e-06, "loss": 0.4728, "step": 1862 }, { "epoch": 0.4606824925816024, "grad_norm": 0.8461977492066648, "learning_rate": 4.934615544417436e-06, "loss": 0.4389, "step": 1863 }, { "epoch": 0.4609297725024728, "grad_norm": 0.843449430804296, "learning_rate": 4.934541674783074e-06, "loss": 0.4457, "step": 1864 }, { "epoch": 0.4611770524233432, "grad_norm": 0.8086082758083908, "learning_rate": 4.934467763997814e-06, "loss": 0.4343, "step": 1865 }, { "epoch": 0.46142433234421365, "grad_norm": 0.8364626491108798, "learning_rate": 4.934393812062907e-06, "loss": 0.4242, "step": 1866 }, { "epoch": 0.4616716122650841, "grad_norm": 0.8250397394229696, "learning_rate": 4.934319818979604e-06, "loss": 0.4468, "step": 1867 }, { "epoch": 0.4619188921859545, "grad_norm": 0.8365693927663361, "learning_rate": 4.9342457847491525e-06, "loss": 0.4374, "step": 1868 }, { "epoch": 0.4621661721068249, "grad_norm": 0.8496261702902699, "learning_rate": 4.934171709372806e-06, "loss": 0.4261, "step": 1869 }, { "epoch": 0.46241345202769535, "grad_norm": 0.8007783961735965, "learning_rate": 4.934097592851817e-06, "loss": 0.4749, "step": 1870 }, { "epoch": 0.4626607319485658, "grad_norm": 0.9167830740082399, "learning_rate": 4.9340234351874375e-06, "loss": 0.4388, "step": 1871 }, { "epoch": 0.4629080118694362, "grad_norm": 0.8811259241380944, "learning_rate": 4.93394923638092e-06, "loss": 0.4256, "step": 1872 }, { "epoch": 0.4631552917903066, "grad_norm": 0.813618018230188, "learning_rate": 4.933874996433521e-06, "loss": 0.4567, "step": 1873 }, { "epoch": 0.46340257171117705, "grad_norm": 0.8132288219977344, "learning_rate": 4.933800715346493e-06, "loss": 0.4449, "step": 1874 }, { "epoch": 0.46364985163204747, "grad_norm": 0.833392110392238, "learning_rate": 4.933726393121092e-06, "loss": 0.4675, "step": 1875 }, { "epoch": 0.4638971315529179, "grad_norm": 0.8353507684223446, "learning_rate": 4.933652029758577e-06, "loss": 0.4734, "step": 1876 }, { "epoch": 0.4641444114737883, "grad_norm": 0.8628430560657625, "learning_rate": 4.933577625260201e-06, "loss": 0.4304, "step": 1877 }, { "epoch": 0.46439169139465875, "grad_norm": 0.8689457252491675, "learning_rate": 4.933503179627224e-06, "loss": 0.449, "step": 1878 }, { "epoch": 0.46463897131552917, "grad_norm": 0.8110050031566923, "learning_rate": 4.933428692860904e-06, "loss": 0.441, "step": 1879 }, { "epoch": 0.4648862512363996, "grad_norm": 0.8292633865002436, "learning_rate": 4.933354164962499e-06, "loss": 0.429, "step": 1880 }, { "epoch": 0.46513353115727, "grad_norm": 0.8725341632231393, "learning_rate": 4.9332795959332715e-06, "loss": 0.4289, "step": 1881 }, { "epoch": 0.46538081107814044, "grad_norm": 0.8500399734842764, "learning_rate": 4.933204985774479e-06, "loss": 0.4512, "step": 1882 }, { "epoch": 0.46562809099901087, "grad_norm": 0.8080510822724074, "learning_rate": 4.933130334487384e-06, "loss": 0.4194, "step": 1883 }, { "epoch": 0.4658753709198813, "grad_norm": 0.8348679766093604, "learning_rate": 4.933055642073247e-06, "loss": 0.4223, "step": 1884 }, { "epoch": 0.4661226508407517, "grad_norm": 0.811940856384884, "learning_rate": 4.932980908533332e-06, "loss": 0.4511, "step": 1885 }, { "epoch": 0.46636993076162214, "grad_norm": 0.8623912087016934, "learning_rate": 4.9329061338689024e-06, "loss": 0.4067, "step": 1886 }, { "epoch": 0.46661721068249257, "grad_norm": 0.8466626050654013, "learning_rate": 4.932831318081222e-06, "loss": 0.4448, "step": 1887 }, { "epoch": 0.466864490603363, "grad_norm": 0.8167851555645209, "learning_rate": 4.932756461171554e-06, "loss": 0.4504, "step": 1888 }, { "epoch": 0.4671117705242334, "grad_norm": 0.8104154270120225, "learning_rate": 4.932681563141164e-06, "loss": 0.4395, "step": 1889 }, { "epoch": 0.46735905044510384, "grad_norm": 0.8655461155693418, "learning_rate": 4.932606623991319e-06, "loss": 0.4591, "step": 1890 }, { "epoch": 0.46760633036597427, "grad_norm": 0.8176989636313634, "learning_rate": 4.932531643723285e-06, "loss": 0.4427, "step": 1891 }, { "epoch": 0.4678536102868447, "grad_norm": 0.7838206162822197, "learning_rate": 4.9324566223383306e-06, "loss": 0.4498, "step": 1892 }, { "epoch": 0.4681008902077151, "grad_norm": 0.8035646415445724, "learning_rate": 4.9323815598377225e-06, "loss": 0.4471, "step": 1893 }, { "epoch": 0.46834817012858554, "grad_norm": 0.8037368620913304, "learning_rate": 4.93230645622273e-06, "loss": 0.4641, "step": 1894 }, { "epoch": 0.46859545004945596, "grad_norm": 0.8025390965922428, "learning_rate": 4.932231311494622e-06, "loss": 0.4373, "step": 1895 }, { "epoch": 0.4688427299703264, "grad_norm": 0.7945979697539853, "learning_rate": 4.932156125654669e-06, "loss": 0.4559, "step": 1896 }, { "epoch": 0.4690900098911968, "grad_norm": 0.8769209411502517, "learning_rate": 4.9320808987041424e-06, "loss": 0.4586, "step": 1897 }, { "epoch": 0.46933728981206724, "grad_norm": 0.8476950420274959, "learning_rate": 4.932005630644314e-06, "loss": 0.4135, "step": 1898 }, { "epoch": 0.46958456973293766, "grad_norm": 0.8244206095276344, "learning_rate": 4.931930321476455e-06, "loss": 0.4446, "step": 1899 }, { "epoch": 0.4698318496538081, "grad_norm": 0.7977893759785066, "learning_rate": 4.931854971201838e-06, "loss": 0.4703, "step": 1900 }, { "epoch": 0.4700791295746785, "grad_norm": 0.8611413119435934, "learning_rate": 4.9317795798217385e-06, "loss": 0.4591, "step": 1901 }, { "epoch": 0.47032640949554894, "grad_norm": 0.8374357250520812, "learning_rate": 4.931704147337428e-06, "loss": 0.4472, "step": 1902 }, { "epoch": 0.47057368941641936, "grad_norm": 0.8410624597911414, "learning_rate": 4.931628673750185e-06, "loss": 0.439, "step": 1903 }, { "epoch": 0.4708209693372898, "grad_norm": 0.8562823214973215, "learning_rate": 4.931553159061283e-06, "loss": 0.4445, "step": 1904 }, { "epoch": 0.4710682492581602, "grad_norm": 0.8224377914499575, "learning_rate": 4.931477603271999e-06, "loss": 0.4306, "step": 1905 }, { "epoch": 0.4713155291790307, "grad_norm": 0.8697597121950194, "learning_rate": 4.93140200638361e-06, "loss": 0.4548, "step": 1906 }, { "epoch": 0.4715628090999011, "grad_norm": 0.824255221165472, "learning_rate": 4.931326368397394e-06, "loss": 0.4951, "step": 1907 }, { "epoch": 0.47181008902077154, "grad_norm": 0.885166598612944, "learning_rate": 4.9312506893146286e-06, "loss": 0.4285, "step": 1908 }, { "epoch": 0.47205736894164196, "grad_norm": 0.8171167921231146, "learning_rate": 4.931174969136594e-06, "loss": 0.446, "step": 1909 }, { "epoch": 0.4723046488625124, "grad_norm": 0.820165159360279, "learning_rate": 4.93109920786457e-06, "loss": 0.4823, "step": 1910 }, { "epoch": 0.4725519287833828, "grad_norm": 0.8153543462857648, "learning_rate": 4.9310234054998375e-06, "loss": 0.4478, "step": 1911 }, { "epoch": 0.47279920870425324, "grad_norm": 0.8367918771121765, "learning_rate": 4.930947562043677e-06, "loss": 0.4695, "step": 1912 }, { "epoch": 0.47304648862512366, "grad_norm": 0.8471649045198084, "learning_rate": 4.930871677497371e-06, "loss": 0.47, "step": 1913 }, { "epoch": 0.4732937685459941, "grad_norm": 0.8076081839098591, "learning_rate": 4.9307957518622006e-06, "loss": 0.4391, "step": 1914 }, { "epoch": 0.4735410484668645, "grad_norm": 0.8264582394780937, "learning_rate": 4.9307197851394514e-06, "loss": 0.4291, "step": 1915 }, { "epoch": 0.47378832838773494, "grad_norm": 0.8178298174303794, "learning_rate": 4.930643777330407e-06, "loss": 0.4387, "step": 1916 }, { "epoch": 0.47403560830860536, "grad_norm": 0.7791823942240508, "learning_rate": 4.930567728436352e-06, "loss": 0.4531, "step": 1917 }, { "epoch": 0.4742828882294758, "grad_norm": 0.8280393676731757, "learning_rate": 4.930491638458571e-06, "loss": 0.4419, "step": 1918 }, { "epoch": 0.4745301681503462, "grad_norm": 0.8241832719290327, "learning_rate": 4.930415507398351e-06, "loss": 0.4381, "step": 1919 }, { "epoch": 0.47477744807121663, "grad_norm": 0.8217261660458774, "learning_rate": 4.930339335256978e-06, "loss": 0.4505, "step": 1920 }, { "epoch": 0.47502472799208706, "grad_norm": 0.8131794869769368, "learning_rate": 4.93026312203574e-06, "loss": 0.4311, "step": 1921 }, { "epoch": 0.4752720079129575, "grad_norm": 0.8411335977862362, "learning_rate": 4.930186867735926e-06, "loss": 0.4783, "step": 1922 }, { "epoch": 0.4755192878338279, "grad_norm": 0.8241214015182279, "learning_rate": 4.930110572358824e-06, "loss": 0.4497, "step": 1923 }, { "epoch": 0.47576656775469833, "grad_norm": 0.839706324810582, "learning_rate": 4.930034235905724e-06, "loss": 0.4802, "step": 1924 }, { "epoch": 0.47601384767556876, "grad_norm": 0.8397548471567146, "learning_rate": 4.929957858377915e-06, "loss": 0.4202, "step": 1925 }, { "epoch": 0.4762611275964392, "grad_norm": 0.8660864925533305, "learning_rate": 4.929881439776691e-06, "loss": 0.4399, "step": 1926 }, { "epoch": 0.4765084075173096, "grad_norm": 0.8347114497369829, "learning_rate": 4.929804980103341e-06, "loss": 0.4631, "step": 1927 }, { "epoch": 0.47675568743818003, "grad_norm": 0.8646650091416341, "learning_rate": 4.929728479359158e-06, "loss": 0.4393, "step": 1928 }, { "epoch": 0.47700296735905046, "grad_norm": 0.8252581921541579, "learning_rate": 4.929651937545436e-06, "loss": 0.4506, "step": 1929 }, { "epoch": 0.4772502472799209, "grad_norm": 0.8094312396500337, "learning_rate": 4.929575354663467e-06, "loss": 0.4942, "step": 1930 }, { "epoch": 0.4774975272007913, "grad_norm": 0.8772495543820467, "learning_rate": 4.929498730714548e-06, "loss": 0.445, "step": 1931 }, { "epoch": 0.47774480712166173, "grad_norm": 0.7923372192092943, "learning_rate": 4.929422065699972e-06, "loss": 0.4862, "step": 1932 }, { "epoch": 0.47799208704253215, "grad_norm": 0.8712668957725559, "learning_rate": 4.929345359621036e-06, "loss": 0.4351, "step": 1933 }, { "epoch": 0.4782393669634026, "grad_norm": 0.8493635338265155, "learning_rate": 4.929268612479036e-06, "loss": 0.4572, "step": 1934 }, { "epoch": 0.478486646884273, "grad_norm": 0.8666548569127566, "learning_rate": 4.929191824275269e-06, "loss": 0.4427, "step": 1935 }, { "epoch": 0.47873392680514343, "grad_norm": 0.9074290287081372, "learning_rate": 4.929114995011034e-06, "loss": 0.4321, "step": 1936 }, { "epoch": 0.47898120672601385, "grad_norm": 0.8352717250490244, "learning_rate": 4.929038124687629e-06, "loss": 0.4348, "step": 1937 }, { "epoch": 0.4792284866468843, "grad_norm": 0.8112065646293543, "learning_rate": 4.9289612133063536e-06, "loss": 0.4658, "step": 1938 }, { "epoch": 0.4794757665677547, "grad_norm": 0.8327013692076111, "learning_rate": 4.928884260868507e-06, "loss": 0.4465, "step": 1939 }, { "epoch": 0.4797230464886251, "grad_norm": 0.8322228019074666, "learning_rate": 4.928807267375391e-06, "loss": 0.439, "step": 1940 }, { "epoch": 0.47997032640949555, "grad_norm": 0.8225399645495624, "learning_rate": 4.928730232828306e-06, "loss": 0.4131, "step": 1941 }, { "epoch": 0.480217606330366, "grad_norm": 0.8287089897738253, "learning_rate": 4.928653157228555e-06, "loss": 0.4513, "step": 1942 }, { "epoch": 0.4804648862512364, "grad_norm": 0.8387018490840795, "learning_rate": 4.928576040577441e-06, "loss": 0.468, "step": 1943 }, { "epoch": 0.4807121661721068, "grad_norm": 0.7983871186620123, "learning_rate": 4.928498882876266e-06, "loss": 0.4606, "step": 1944 }, { "epoch": 0.48095944609297725, "grad_norm": 0.8726955385115273, "learning_rate": 4.928421684126335e-06, "loss": 0.4226, "step": 1945 }, { "epoch": 0.4812067260138477, "grad_norm": 0.8343516175985839, "learning_rate": 4.928344444328954e-06, "loss": 0.4601, "step": 1946 }, { "epoch": 0.4814540059347181, "grad_norm": 0.8262000466383999, "learning_rate": 4.928267163485427e-06, "loss": 0.4742, "step": 1947 }, { "epoch": 0.4817012858555885, "grad_norm": 0.8684322211013671, "learning_rate": 4.928189841597061e-06, "loss": 0.4459, "step": 1948 }, { "epoch": 0.48194856577645895, "grad_norm": 0.8391599834278922, "learning_rate": 4.928112478665163e-06, "loss": 0.4526, "step": 1949 }, { "epoch": 0.4821958456973294, "grad_norm": 0.847620940773091, "learning_rate": 4.92803507469104e-06, "loss": 0.4429, "step": 1950 }, { "epoch": 0.4824431256181998, "grad_norm": 0.8734227149804495, "learning_rate": 4.927957629676001e-06, "loss": 0.4414, "step": 1951 }, { "epoch": 0.4826904055390702, "grad_norm": 0.875124744595111, "learning_rate": 4.927880143621355e-06, "loss": 0.4464, "step": 1952 }, { "epoch": 0.48293768545994065, "grad_norm": 0.872328400694578, "learning_rate": 4.927802616528412e-06, "loss": 0.4484, "step": 1953 }, { "epoch": 0.48318496538081107, "grad_norm": 0.8407440697376363, "learning_rate": 4.927725048398482e-06, "loss": 0.4523, "step": 1954 }, { "epoch": 0.4834322453016815, "grad_norm": 0.7834132267725564, "learning_rate": 4.927647439232876e-06, "loss": 0.4393, "step": 1955 }, { "epoch": 0.4836795252225519, "grad_norm": 0.8234773490129442, "learning_rate": 4.927569789032907e-06, "loss": 0.4595, "step": 1956 }, { "epoch": 0.48392680514342234, "grad_norm": 0.8497947043946262, "learning_rate": 4.927492097799885e-06, "loss": 0.4303, "step": 1957 }, { "epoch": 0.48417408506429277, "grad_norm": 0.8806520611452342, "learning_rate": 4.927414365535126e-06, "loss": 0.4606, "step": 1958 }, { "epoch": 0.4844213649851632, "grad_norm": 0.8113702305265199, "learning_rate": 4.9273365922399416e-06, "loss": 0.445, "step": 1959 }, { "epoch": 0.4846686449060336, "grad_norm": 0.8248851985890626, "learning_rate": 4.927258777915648e-06, "loss": 0.4465, "step": 1960 }, { "epoch": 0.48491592482690404, "grad_norm": 0.8299620493793199, "learning_rate": 4.92718092256356e-06, "loss": 0.4204, "step": 1961 }, { "epoch": 0.48516320474777447, "grad_norm": 0.8052928255301574, "learning_rate": 4.927103026184993e-06, "loss": 0.478, "step": 1962 }, { "epoch": 0.4854104846686449, "grad_norm": 0.8740281640364033, "learning_rate": 4.927025088781265e-06, "loss": 0.426, "step": 1963 }, { "epoch": 0.4856577645895153, "grad_norm": 0.8183836323362705, "learning_rate": 4.926947110353692e-06, "loss": 0.4432, "step": 1964 }, { "epoch": 0.48590504451038574, "grad_norm": 0.8215966525772359, "learning_rate": 4.926869090903593e-06, "loss": 0.4306, "step": 1965 }, { "epoch": 0.48615232443125617, "grad_norm": 0.8491022861836903, "learning_rate": 4.9267910304322865e-06, "loss": 0.464, "step": 1966 }, { "epoch": 0.4863996043521266, "grad_norm": 0.8538882505150086, "learning_rate": 4.926712928941092e-06, "loss": 0.436, "step": 1967 }, { "epoch": 0.486646884272997, "grad_norm": 0.777009034398573, "learning_rate": 4.926634786431329e-06, "loss": 0.4501, "step": 1968 }, { "epoch": 0.48689416419386744, "grad_norm": 0.8005037204433741, "learning_rate": 4.926556602904319e-06, "loss": 0.4343, "step": 1969 }, { "epoch": 0.48714144411473786, "grad_norm": 0.8289955238627059, "learning_rate": 4.9264783783613835e-06, "loss": 0.4614, "step": 1970 }, { "epoch": 0.4873887240356083, "grad_norm": 0.8455414270465915, "learning_rate": 4.926400112803844e-06, "loss": 0.4669, "step": 1971 }, { "epoch": 0.4876360039564787, "grad_norm": 0.7797141965228852, "learning_rate": 4.926321806233024e-06, "loss": 0.49, "step": 1972 }, { "epoch": 0.48788328387734914, "grad_norm": 0.8493989470575196, "learning_rate": 4.926243458650248e-06, "loss": 0.4349, "step": 1973 }, { "epoch": 0.48813056379821956, "grad_norm": 0.8512014778641054, "learning_rate": 4.926165070056839e-06, "loss": 0.4108, "step": 1974 }, { "epoch": 0.48837784371909, "grad_norm": 0.7979992970173828, "learning_rate": 4.926086640454123e-06, "loss": 0.4706, "step": 1975 }, { "epoch": 0.4886251236399604, "grad_norm": 0.8239102783032143, "learning_rate": 4.926008169843424e-06, "loss": 0.4787, "step": 1976 }, { "epoch": 0.48887240356083084, "grad_norm": 0.8016225529555583, "learning_rate": 4.92592965822607e-06, "loss": 0.4547, "step": 1977 }, { "epoch": 0.48911968348170126, "grad_norm": 0.8531911433461715, "learning_rate": 4.925851105603388e-06, "loss": 0.4488, "step": 1978 }, { "epoch": 0.4893669634025717, "grad_norm": 0.8194447366104644, "learning_rate": 4.925772511976705e-06, "loss": 0.4431, "step": 1979 }, { "epoch": 0.4896142433234421, "grad_norm": 0.8296301370115579, "learning_rate": 4.925693877347349e-06, "loss": 0.4461, "step": 1980 }, { "epoch": 0.48986152324431254, "grad_norm": 0.8462798504011503, "learning_rate": 4.925615201716651e-06, "loss": 0.443, "step": 1981 }, { "epoch": 0.49010880316518296, "grad_norm": 0.8426288056577137, "learning_rate": 4.92553648508594e-06, "loss": 0.425, "step": 1982 }, { "epoch": 0.4903560830860534, "grad_norm": 0.829429706126443, "learning_rate": 4.925457727456546e-06, "loss": 0.4508, "step": 1983 }, { "epoch": 0.49060336300692386, "grad_norm": 0.8059370491729095, "learning_rate": 4.9253789288298e-06, "loss": 0.4502, "step": 1984 }, { "epoch": 0.4908506429277943, "grad_norm": 0.7795504551173943, "learning_rate": 4.925300089207035e-06, "loss": 0.4108, "step": 1985 }, { "epoch": 0.4910979228486647, "grad_norm": 0.8145867834841408, "learning_rate": 4.925221208589584e-06, "loss": 0.4514, "step": 1986 }, { "epoch": 0.49134520276953514, "grad_norm": 0.8452473568571481, "learning_rate": 4.925142286978778e-06, "loss": 0.4305, "step": 1987 }, { "epoch": 0.49159248269040556, "grad_norm": 0.8444624739728522, "learning_rate": 4.925063324375953e-06, "loss": 0.4673, "step": 1988 }, { "epoch": 0.491839762611276, "grad_norm": 0.8256672771190708, "learning_rate": 4.9249843207824434e-06, "loss": 0.4642, "step": 1989 }, { "epoch": 0.4920870425321464, "grad_norm": 0.8404370092223706, "learning_rate": 4.924905276199584e-06, "loss": 0.4288, "step": 1990 }, { "epoch": 0.49233432245301684, "grad_norm": 0.7864654632402006, "learning_rate": 4.924826190628711e-06, "loss": 0.4692, "step": 1991 }, { "epoch": 0.49258160237388726, "grad_norm": 0.8434933818377831, "learning_rate": 4.924747064071163e-06, "loss": 0.4158, "step": 1992 }, { "epoch": 0.4928288822947577, "grad_norm": 0.8244874183035269, "learning_rate": 4.924667896528274e-06, "loss": 0.4198, "step": 1993 }, { "epoch": 0.4930761622156281, "grad_norm": 0.8270973921922327, "learning_rate": 4.924588688001385e-06, "loss": 0.4734, "step": 1994 }, { "epoch": 0.49332344213649854, "grad_norm": 0.8058932904957486, "learning_rate": 4.924509438491834e-06, "loss": 0.4381, "step": 1995 }, { "epoch": 0.49357072205736896, "grad_norm": 0.7956261606694742, "learning_rate": 4.924430148000959e-06, "loss": 0.429, "step": 1996 }, { "epoch": 0.4938180019782394, "grad_norm": 0.7939524290994, "learning_rate": 4.924350816530104e-06, "loss": 0.4465, "step": 1997 }, { "epoch": 0.4940652818991098, "grad_norm": 0.7916027057232425, "learning_rate": 4.924271444080606e-06, "loss": 0.4444, "step": 1998 }, { "epoch": 0.49431256181998023, "grad_norm": 0.8173525084366966, "learning_rate": 4.924192030653808e-06, "loss": 0.4663, "step": 1999 }, { "epoch": 0.49455984174085066, "grad_norm": 0.852494236856881, "learning_rate": 4.924112576251054e-06, "loss": 0.454, "step": 2000 }, { "epoch": 0.4948071216617211, "grad_norm": 0.7988637477633536, "learning_rate": 4.924033080873684e-06, "loss": 0.4392, "step": 2001 }, { "epoch": 0.4950544015825915, "grad_norm": 0.8122968158357641, "learning_rate": 4.923953544523044e-06, "loss": 0.4637, "step": 2002 }, { "epoch": 0.49530168150346193, "grad_norm": 0.81761782432353, "learning_rate": 4.923873967200479e-06, "loss": 0.4378, "step": 2003 }, { "epoch": 0.49554896142433236, "grad_norm": 0.8633300475989552, "learning_rate": 4.923794348907331e-06, "loss": 0.4656, "step": 2004 }, { "epoch": 0.4957962413452028, "grad_norm": 0.7944747654872235, "learning_rate": 4.923714689644948e-06, "loss": 0.4484, "step": 2005 }, { "epoch": 0.4960435212660732, "grad_norm": 0.863856436444404, "learning_rate": 4.923634989414676e-06, "loss": 0.4371, "step": 2006 }, { "epoch": 0.49629080118694363, "grad_norm": 0.8659860035555645, "learning_rate": 4.923555248217864e-06, "loss": 0.4283, "step": 2007 }, { "epoch": 0.49653808110781406, "grad_norm": 0.7954268742364704, "learning_rate": 4.923475466055856e-06, "loss": 0.4547, "step": 2008 }, { "epoch": 0.4967853610286845, "grad_norm": 0.8138754857165406, "learning_rate": 4.9233956429300034e-06, "loss": 0.4108, "step": 2009 }, { "epoch": 0.4970326409495549, "grad_norm": 0.8581978670153796, "learning_rate": 4.9233157788416545e-06, "loss": 0.432, "step": 2010 }, { "epoch": 0.49727992087042533, "grad_norm": 0.8241805365555525, "learning_rate": 4.9232358737921585e-06, "loss": 0.4382, "step": 2011 }, { "epoch": 0.49752720079129575, "grad_norm": 0.8065492877310552, "learning_rate": 4.923155927782868e-06, "loss": 0.4384, "step": 2012 }, { "epoch": 0.4977744807121662, "grad_norm": 0.8521852417108808, "learning_rate": 4.923075940815133e-06, "loss": 0.4417, "step": 2013 }, { "epoch": 0.4980217606330366, "grad_norm": 0.8721828748607178, "learning_rate": 4.922995912890306e-06, "loss": 0.41, "step": 2014 }, { "epoch": 0.498269040553907, "grad_norm": 0.8475215975436352, "learning_rate": 4.922915844009739e-06, "loss": 0.4415, "step": 2015 }, { "epoch": 0.49851632047477745, "grad_norm": 0.7821130765064389, "learning_rate": 4.922835734174786e-06, "loss": 0.4429, "step": 2016 }, { "epoch": 0.4987636003956479, "grad_norm": 0.8511117917014502, "learning_rate": 4.922755583386801e-06, "loss": 0.4497, "step": 2017 }, { "epoch": 0.4990108803165183, "grad_norm": 0.831275796708298, "learning_rate": 4.92267539164714e-06, "loss": 0.4578, "step": 2018 }, { "epoch": 0.4992581602373887, "grad_norm": 0.8509135804106028, "learning_rate": 4.922595158957155e-06, "loss": 0.4569, "step": 2019 }, { "epoch": 0.49950544015825915, "grad_norm": 0.8582138029948618, "learning_rate": 4.922514885318206e-06, "loss": 0.4386, "step": 2020 }, { "epoch": 0.4997527200791296, "grad_norm": 0.8182293486915994, "learning_rate": 4.922434570731648e-06, "loss": 0.4499, "step": 2021 }, { "epoch": 0.5, "grad_norm": 0.8777366413472809, "learning_rate": 4.922354215198838e-06, "loss": 0.4371, "step": 2022 }, { "epoch": 0.5002472799208705, "grad_norm": 0.811791173923638, "learning_rate": 4.922273818721136e-06, "loss": 0.4672, "step": 2023 }, { "epoch": 0.5004945598417408, "grad_norm": 0.8274747192480223, "learning_rate": 4.922193381299899e-06, "loss": 0.427, "step": 2024 }, { "epoch": 0.5007418397626113, "grad_norm": 0.8065694090822414, "learning_rate": 4.922112902936489e-06, "loss": 0.4736, "step": 2025 }, { "epoch": 0.5009891196834817, "grad_norm": 0.8128372001404772, "learning_rate": 4.922032383632263e-06, "loss": 0.44, "step": 2026 }, { "epoch": 0.5012363996043522, "grad_norm": 0.8287731557078928, "learning_rate": 4.9219518233885856e-06, "loss": 0.4414, "step": 2027 }, { "epoch": 0.5014836795252225, "grad_norm": 0.8063495368442325, "learning_rate": 4.921871222206817e-06, "loss": 0.4736, "step": 2028 }, { "epoch": 0.501730959446093, "grad_norm": 0.8380024116633067, "learning_rate": 4.921790580088318e-06, "loss": 0.4712, "step": 2029 }, { "epoch": 0.5019782393669634, "grad_norm": 0.8284317158685391, "learning_rate": 4.921709897034454e-06, "loss": 0.4497, "step": 2030 }, { "epoch": 0.5022255192878339, "grad_norm": 0.8187102724304874, "learning_rate": 4.921629173046588e-06, "loss": 0.4551, "step": 2031 }, { "epoch": 0.5024727992087042, "grad_norm": 0.8512236754910734, "learning_rate": 4.921548408126085e-06, "loss": 0.4412, "step": 2032 }, { "epoch": 0.5027200791295747, "grad_norm": 0.7788881223000658, "learning_rate": 4.921467602274308e-06, "loss": 0.4104, "step": 2033 }, { "epoch": 0.5029673590504451, "grad_norm": 0.8141480380678998, "learning_rate": 4.921386755492625e-06, "loss": 0.4474, "step": 2034 }, { "epoch": 0.5032146389713156, "grad_norm": 0.8407140685541208, "learning_rate": 4.921305867782402e-06, "loss": 0.4082, "step": 2035 }, { "epoch": 0.503461918892186, "grad_norm": 0.8720791912431307, "learning_rate": 4.9212249391450065e-06, "loss": 0.4013, "step": 2036 }, { "epoch": 0.5037091988130564, "grad_norm": 0.8183434796093931, "learning_rate": 4.9211439695818065e-06, "loss": 0.4316, "step": 2037 }, { "epoch": 0.5039564787339268, "grad_norm": 0.8449401193598226, "learning_rate": 4.921062959094169e-06, "loss": 0.426, "step": 2038 }, { "epoch": 0.5042037586547973, "grad_norm": 0.8509257751850934, "learning_rate": 4.9209819076834655e-06, "loss": 0.4525, "step": 2039 }, { "epoch": 0.5044510385756676, "grad_norm": 0.8519430084094141, "learning_rate": 4.920900815351065e-06, "loss": 0.4146, "step": 2040 }, { "epoch": 0.5046983184965381, "grad_norm": 0.8456813326281569, "learning_rate": 4.920819682098338e-06, "loss": 0.4324, "step": 2041 }, { "epoch": 0.5049455984174085, "grad_norm": 0.8287711902840623, "learning_rate": 4.920738507926657e-06, "loss": 0.4613, "step": 2042 }, { "epoch": 0.505192878338279, "grad_norm": 0.8257370297562611, "learning_rate": 4.920657292837392e-06, "loss": 0.4234, "step": 2043 }, { "epoch": 0.5054401582591493, "grad_norm": 0.8961393796184989, "learning_rate": 4.9205760368319175e-06, "loss": 0.4434, "step": 2044 }, { "epoch": 0.5056874381800198, "grad_norm": 0.8204890246004772, "learning_rate": 4.920494739911607e-06, "loss": 0.4794, "step": 2045 }, { "epoch": 0.5059347181008902, "grad_norm": 0.8376177288707035, "learning_rate": 4.9204134020778335e-06, "loss": 0.4282, "step": 2046 }, { "epoch": 0.5061819980217607, "grad_norm": 0.834472891929355, "learning_rate": 4.920332023331973e-06, "loss": 0.4327, "step": 2047 }, { "epoch": 0.506429277942631, "grad_norm": 0.8406099377992237, "learning_rate": 4.9202506036754e-06, "loss": 0.4441, "step": 2048 }, { "epoch": 0.5066765578635015, "grad_norm": 0.8617534109839463, "learning_rate": 4.920169143109491e-06, "loss": 0.4324, "step": 2049 }, { "epoch": 0.5069238377843719, "grad_norm": 0.8676859852723221, "learning_rate": 4.920087641635624e-06, "loss": 0.4398, "step": 2050 }, { "epoch": 0.5071711177052424, "grad_norm": 0.8385705486777898, "learning_rate": 4.920006099255176e-06, "loss": 0.45, "step": 2051 }, { "epoch": 0.5074183976261127, "grad_norm": 0.8654084128937142, "learning_rate": 4.919924515969524e-06, "loss": 0.4203, "step": 2052 }, { "epoch": 0.5076656775469832, "grad_norm": 0.8643853671809885, "learning_rate": 4.919842891780049e-06, "loss": 0.4562, "step": 2053 }, { "epoch": 0.5079129574678536, "grad_norm": 0.8071820873638121, "learning_rate": 4.919761226688129e-06, "loss": 0.4174, "step": 2054 }, { "epoch": 0.5081602373887241, "grad_norm": 0.8008245242716596, "learning_rate": 4.9196795206951455e-06, "loss": 0.4124, "step": 2055 }, { "epoch": 0.5084075173095944, "grad_norm": 0.8074972712657118, "learning_rate": 4.919597773802479e-06, "loss": 0.448, "step": 2056 }, { "epoch": 0.5086547972304649, "grad_norm": 0.8302305293316452, "learning_rate": 4.919515986011512e-06, "loss": 0.4729, "step": 2057 }, { "epoch": 0.5089020771513353, "grad_norm": 0.8081365268996791, "learning_rate": 4.919434157323627e-06, "loss": 0.4572, "step": 2058 }, { "epoch": 0.5091493570722058, "grad_norm": 0.8033712684412141, "learning_rate": 4.919352287740205e-06, "loss": 0.4382, "step": 2059 }, { "epoch": 0.5093966369930761, "grad_norm": 0.7587735127229113, "learning_rate": 4.919270377262633e-06, "loss": 0.473, "step": 2060 }, { "epoch": 0.5096439169139466, "grad_norm": 0.8619234301971191, "learning_rate": 4.9191884258922926e-06, "loss": 0.443, "step": 2061 }, { "epoch": 0.509891196834817, "grad_norm": 0.8365792373646538, "learning_rate": 4.919106433630572e-06, "loss": 0.4133, "step": 2062 }, { "epoch": 0.5101384767556875, "grad_norm": 0.7856790139850826, "learning_rate": 4.919024400478854e-06, "loss": 0.4528, "step": 2063 }, { "epoch": 0.5103857566765578, "grad_norm": 0.7858186305207652, "learning_rate": 4.918942326438527e-06, "loss": 0.4014, "step": 2064 }, { "epoch": 0.5106330365974283, "grad_norm": 0.8088878916274455, "learning_rate": 4.918860211510979e-06, "loss": 0.4551, "step": 2065 }, { "epoch": 0.5108803165182987, "grad_norm": 0.7872550776179847, "learning_rate": 4.918778055697596e-06, "loss": 0.4684, "step": 2066 }, { "epoch": 0.5111275964391692, "grad_norm": 0.817077611575408, "learning_rate": 4.918695858999767e-06, "loss": 0.414, "step": 2067 }, { "epoch": 0.5113748763600395, "grad_norm": 0.8378442452576724, "learning_rate": 4.918613621418883e-06, "loss": 0.4576, "step": 2068 }, { "epoch": 0.51162215628091, "grad_norm": 0.8744527942820566, "learning_rate": 4.918531342956333e-06, "loss": 0.4543, "step": 2069 }, { "epoch": 0.5118694362017804, "grad_norm": 0.8732192451807397, "learning_rate": 4.9184490236135075e-06, "loss": 0.472, "step": 2070 }, { "epoch": 0.5121167161226509, "grad_norm": 0.8461949177925873, "learning_rate": 4.9183666633917986e-06, "loss": 0.4555, "step": 2071 }, { "epoch": 0.5123639960435212, "grad_norm": 0.8418875439184351, "learning_rate": 4.918284262292597e-06, "loss": 0.4366, "step": 2072 }, { "epoch": 0.5126112759643917, "grad_norm": 0.82720798639419, "learning_rate": 4.9182018203172986e-06, "loss": 0.4407, "step": 2073 }, { "epoch": 0.5128585558852621, "grad_norm": 0.8025497377538527, "learning_rate": 4.918119337467293e-06, "loss": 0.4483, "step": 2074 }, { "epoch": 0.5131058358061326, "grad_norm": 0.8042377781404518, "learning_rate": 4.918036813743978e-06, "loss": 0.4375, "step": 2075 }, { "epoch": 0.5133531157270029, "grad_norm": 0.8480814582795039, "learning_rate": 4.9179542491487455e-06, "loss": 0.4151, "step": 2076 }, { "epoch": 0.5136003956478734, "grad_norm": 0.8057648300526595, "learning_rate": 4.917871643682993e-06, "loss": 0.433, "step": 2077 }, { "epoch": 0.5138476755687438, "grad_norm": 0.8360351985687441, "learning_rate": 4.917788997348116e-06, "loss": 0.4263, "step": 2078 }, { "epoch": 0.5140949554896143, "grad_norm": 0.7756523740903111, "learning_rate": 4.9177063101455115e-06, "loss": 0.462, "step": 2079 }, { "epoch": 0.5143422354104846, "grad_norm": 0.8654229974106498, "learning_rate": 4.917623582076577e-06, "loss": 0.4391, "step": 2080 }, { "epoch": 0.5145895153313551, "grad_norm": 0.8568313817015912, "learning_rate": 4.917540813142712e-06, "loss": 0.4283, "step": 2081 }, { "epoch": 0.5148367952522255, "grad_norm": 0.8649603479345634, "learning_rate": 4.917458003345314e-06, "loss": 0.4261, "step": 2082 }, { "epoch": 0.515084075173096, "grad_norm": 0.8187465423672693, "learning_rate": 4.9173751526857835e-06, "loss": 0.4441, "step": 2083 }, { "epoch": 0.5153313550939663, "grad_norm": 0.8573292004045983, "learning_rate": 4.9172922611655205e-06, "loss": 0.4337, "step": 2084 }, { "epoch": 0.5155786350148368, "grad_norm": 0.8663104974580745, "learning_rate": 4.917209328785927e-06, "loss": 0.448, "step": 2085 }, { "epoch": 0.5158259149357072, "grad_norm": 0.8473083515505613, "learning_rate": 4.917126355548404e-06, "loss": 0.4226, "step": 2086 }, { "epoch": 0.5160731948565777, "grad_norm": 0.8896880901970612, "learning_rate": 4.9170433414543545e-06, "loss": 0.4093, "step": 2087 }, { "epoch": 0.516320474777448, "grad_norm": 0.8806978447051235, "learning_rate": 4.916960286505181e-06, "loss": 0.4322, "step": 2088 }, { "epoch": 0.5165677546983185, "grad_norm": 0.8451289370207562, "learning_rate": 4.9168771907022885e-06, "loss": 0.4176, "step": 2089 }, { "epoch": 0.5168150346191889, "grad_norm": 0.7887918587386917, "learning_rate": 4.91679405404708e-06, "loss": 0.4333, "step": 2090 }, { "epoch": 0.5170623145400594, "grad_norm": 0.8093101065598383, "learning_rate": 4.916710876540962e-06, "loss": 0.4482, "step": 2091 }, { "epoch": 0.5173095944609297, "grad_norm": 0.8145596529223303, "learning_rate": 4.916627658185339e-06, "loss": 0.4291, "step": 2092 }, { "epoch": 0.5175568743818002, "grad_norm": 0.8464311393382885, "learning_rate": 4.9165443989816195e-06, "loss": 0.4458, "step": 2093 }, { "epoch": 0.5178041543026706, "grad_norm": 0.8442575884890843, "learning_rate": 4.91646109893121e-06, "loss": 0.4598, "step": 2094 }, { "epoch": 0.518051434223541, "grad_norm": 0.8413652586604167, "learning_rate": 4.916377758035519e-06, "loss": 0.4618, "step": 2095 }, { "epoch": 0.5182987141444114, "grad_norm": 0.8364097355333588, "learning_rate": 4.916294376295954e-06, "loss": 0.4465, "step": 2096 }, { "epoch": 0.5185459940652819, "grad_norm": 0.8400100432544643, "learning_rate": 4.916210953713926e-06, "loss": 0.4313, "step": 2097 }, { "epoch": 0.5187932739861523, "grad_norm": 0.8128995366146822, "learning_rate": 4.916127490290843e-06, "loss": 0.459, "step": 2098 }, { "epoch": 0.5190405539070228, "grad_norm": 0.8073646482891128, "learning_rate": 4.916043986028117e-06, "loss": 0.4503, "step": 2099 }, { "epoch": 0.5192878338278932, "grad_norm": 0.8355382070231939, "learning_rate": 4.91596044092716e-06, "loss": 0.4238, "step": 2100 }, { "epoch": 0.5195351137487636, "grad_norm": 0.8290409040475074, "learning_rate": 4.915876854989384e-06, "loss": 0.4331, "step": 2101 }, { "epoch": 0.5197823936696341, "grad_norm": 0.8473558657695541, "learning_rate": 4.915793228216201e-06, "loss": 0.4177, "step": 2102 }, { "epoch": 0.5200296735905044, "grad_norm": 0.8168178012275521, "learning_rate": 4.915709560609025e-06, "loss": 0.4207, "step": 2103 }, { "epoch": 0.5202769535113749, "grad_norm": 0.8349750801854026, "learning_rate": 4.91562585216927e-06, "loss": 0.4387, "step": 2104 }, { "epoch": 0.5205242334322453, "grad_norm": 0.8332513834402295, "learning_rate": 4.9155421028983515e-06, "loss": 0.4475, "step": 2105 }, { "epoch": 0.5207715133531158, "grad_norm": 0.823978542351043, "learning_rate": 4.915458312797684e-06, "loss": 0.4987, "step": 2106 }, { "epoch": 0.5210187932739861, "grad_norm": 0.8171613069077716, "learning_rate": 4.915374481868685e-06, "loss": 0.4533, "step": 2107 }, { "epoch": 0.5212660731948566, "grad_norm": 0.7912376433723926, "learning_rate": 4.915290610112772e-06, "loss": 0.4411, "step": 2108 }, { "epoch": 0.521513353115727, "grad_norm": 0.8223882924185978, "learning_rate": 4.915206697531361e-06, "loss": 0.4469, "step": 2109 }, { "epoch": 0.5217606330365975, "grad_norm": 0.8122382638898773, "learning_rate": 4.91512274412587e-06, "loss": 0.4555, "step": 2110 }, { "epoch": 0.5220079129574678, "grad_norm": 0.8145767118335325, "learning_rate": 4.9150387498977205e-06, "loss": 0.4355, "step": 2111 }, { "epoch": 0.5222551928783383, "grad_norm": 0.8041076958743947, "learning_rate": 4.91495471484833e-06, "loss": 0.436, "step": 2112 }, { "epoch": 0.5225024727992087, "grad_norm": 0.8253205028728051, "learning_rate": 4.91487063897912e-06, "loss": 0.4334, "step": 2113 }, { "epoch": 0.5227497527200792, "grad_norm": 0.8192406762636769, "learning_rate": 4.9147865222915114e-06, "loss": 0.4581, "step": 2114 }, { "epoch": 0.5229970326409495, "grad_norm": 0.8354577329747063, "learning_rate": 4.914702364786926e-06, "loss": 0.4213, "step": 2115 }, { "epoch": 0.52324431256182, "grad_norm": 0.8290837226364314, "learning_rate": 4.914618166466787e-06, "loss": 0.4234, "step": 2116 }, { "epoch": 0.5234915924826904, "grad_norm": 0.827501095420979, "learning_rate": 4.914533927332516e-06, "loss": 0.3891, "step": 2117 }, { "epoch": 0.5237388724035609, "grad_norm": 0.8342828261706431, "learning_rate": 4.91444964738554e-06, "loss": 0.4336, "step": 2118 }, { "epoch": 0.5239861523244312, "grad_norm": 0.7885249793199698, "learning_rate": 4.914365326627279e-06, "loss": 0.4374, "step": 2119 }, { "epoch": 0.5242334322453017, "grad_norm": 0.8226055988032711, "learning_rate": 4.914280965059162e-06, "loss": 0.4655, "step": 2120 }, { "epoch": 0.5244807121661721, "grad_norm": 0.8010022488750518, "learning_rate": 4.914196562682613e-06, "loss": 0.4765, "step": 2121 }, { "epoch": 0.5247279920870426, "grad_norm": 0.8721911721071652, "learning_rate": 4.91411211949906e-06, "loss": 0.4082, "step": 2122 }, { "epoch": 0.5249752720079129, "grad_norm": 0.8054096169771939, "learning_rate": 4.914027635509929e-06, "loss": 0.4274, "step": 2123 }, { "epoch": 0.5252225519287834, "grad_norm": 0.8140381801310196, "learning_rate": 4.913943110716649e-06, "loss": 0.4405, "step": 2124 }, { "epoch": 0.5254698318496538, "grad_norm": 0.8264133414135034, "learning_rate": 4.913858545120648e-06, "loss": 0.4604, "step": 2125 }, { "epoch": 0.5257171117705243, "grad_norm": 0.8296155953337557, "learning_rate": 4.913773938723356e-06, "loss": 0.431, "step": 2126 }, { "epoch": 0.5259643916913946, "grad_norm": 0.8390771475583482, "learning_rate": 4.913689291526203e-06, "loss": 0.4416, "step": 2127 }, { "epoch": 0.5262116716122651, "grad_norm": 0.7978475489974522, "learning_rate": 4.91360460353062e-06, "loss": 0.4449, "step": 2128 }, { "epoch": 0.5264589515331355, "grad_norm": 0.829407786061133, "learning_rate": 4.913519874738038e-06, "loss": 0.4076, "step": 2129 }, { "epoch": 0.526706231454006, "grad_norm": 0.8555564981475666, "learning_rate": 4.913435105149889e-06, "loss": 0.4267, "step": 2130 }, { "epoch": 0.5269535113748763, "grad_norm": 0.7951753815561883, "learning_rate": 4.913350294767606e-06, "loss": 0.4283, "step": 2131 }, { "epoch": 0.5272007912957468, "grad_norm": 0.7800972057420057, "learning_rate": 4.913265443592623e-06, "loss": 0.4148, "step": 2132 }, { "epoch": 0.5274480712166172, "grad_norm": 0.8085138173904091, "learning_rate": 4.913180551626375e-06, "loss": 0.4119, "step": 2133 }, { "epoch": 0.5276953511374877, "grad_norm": 0.8267372632890972, "learning_rate": 4.913095618870295e-06, "loss": 0.4228, "step": 2134 }, { "epoch": 0.527942631058358, "grad_norm": 0.8510446154835541, "learning_rate": 4.913010645325819e-06, "loss": 0.4278, "step": 2135 }, { "epoch": 0.5281899109792285, "grad_norm": 0.7552565986314601, "learning_rate": 4.912925630994384e-06, "loss": 0.4367, "step": 2136 }, { "epoch": 0.5284371909000989, "grad_norm": 0.8004850586370468, "learning_rate": 4.912840575877427e-06, "loss": 0.4401, "step": 2137 }, { "epoch": 0.5286844708209694, "grad_norm": 0.8428019963490445, "learning_rate": 4.912755479976386e-06, "loss": 0.4457, "step": 2138 }, { "epoch": 0.5289317507418397, "grad_norm": 0.7976182228965645, "learning_rate": 4.912670343292698e-06, "loss": 0.453, "step": 2139 }, { "epoch": 0.5291790306627102, "grad_norm": 0.8429254450002306, "learning_rate": 4.912585165827803e-06, "loss": 0.456, "step": 2140 }, { "epoch": 0.5294263105835806, "grad_norm": 0.7952504632214655, "learning_rate": 4.9124999475831406e-06, "loss": 0.455, "step": 2141 }, { "epoch": 0.5296735905044511, "grad_norm": 0.8351255206913245, "learning_rate": 4.912414688560152e-06, "loss": 0.45, "step": 2142 }, { "epoch": 0.5299208704253214, "grad_norm": 0.8073379583341719, "learning_rate": 4.912329388760277e-06, "loss": 0.4188, "step": 2143 }, { "epoch": 0.5301681503461919, "grad_norm": 0.8233470088787288, "learning_rate": 4.912244048184958e-06, "loss": 0.4293, "step": 2144 }, { "epoch": 0.5304154302670623, "grad_norm": 0.8453326881951997, "learning_rate": 4.912158666835638e-06, "loss": 0.417, "step": 2145 }, { "epoch": 0.5306627101879328, "grad_norm": 0.8241068934917218, "learning_rate": 4.912073244713759e-06, "loss": 0.4182, "step": 2146 }, { "epoch": 0.5309099901088031, "grad_norm": 0.8519056160776881, "learning_rate": 4.911987781820766e-06, "loss": 0.4313, "step": 2147 }, { "epoch": 0.5311572700296736, "grad_norm": 0.853659838700209, "learning_rate": 4.911902278158104e-06, "loss": 0.4107, "step": 2148 }, { "epoch": 0.531404549950544, "grad_norm": 0.801594697582263, "learning_rate": 4.911816733727216e-06, "loss": 0.4403, "step": 2149 }, { "epoch": 0.5316518298714145, "grad_norm": 0.8333840414724853, "learning_rate": 4.9117311485295504e-06, "loss": 0.4948, "step": 2150 }, { "epoch": 0.5318991097922848, "grad_norm": 0.8255020361468853, "learning_rate": 4.911645522566553e-06, "loss": 0.4524, "step": 2151 }, { "epoch": 0.5321463897131553, "grad_norm": 0.8213445420795159, "learning_rate": 4.91155985583967e-06, "loss": 0.4122, "step": 2152 }, { "epoch": 0.5323936696340257, "grad_norm": 0.8492299494064399, "learning_rate": 4.911474148350351e-06, "loss": 0.4305, "step": 2153 }, { "epoch": 0.5326409495548962, "grad_norm": 0.7898914841030381, "learning_rate": 4.9113884001000434e-06, "loss": 0.4021, "step": 2154 }, { "epoch": 0.5328882294757665, "grad_norm": 0.854393304558985, "learning_rate": 4.911302611090198e-06, "loss": 0.4462, "step": 2155 }, { "epoch": 0.533135509396637, "grad_norm": 0.8418573865966376, "learning_rate": 4.911216781322264e-06, "loss": 0.4522, "step": 2156 }, { "epoch": 0.5333827893175074, "grad_norm": 0.8668734938595644, "learning_rate": 4.911130910797693e-06, "loss": 0.422, "step": 2157 }, { "epoch": 0.5336300692383779, "grad_norm": 0.8161443060828762, "learning_rate": 4.911044999517936e-06, "loss": 0.4137, "step": 2158 }, { "epoch": 0.5338773491592482, "grad_norm": 0.812891841442536, "learning_rate": 4.910959047484443e-06, "loss": 0.482, "step": 2159 }, { "epoch": 0.5341246290801187, "grad_norm": 0.8152358678019482, "learning_rate": 4.910873054698671e-06, "loss": 0.4771, "step": 2160 }, { "epoch": 0.5343719090009891, "grad_norm": 0.8338420312345085, "learning_rate": 4.91078702116207e-06, "loss": 0.4435, "step": 2161 }, { "epoch": 0.5346191889218596, "grad_norm": 0.8266232547366531, "learning_rate": 4.910700946876096e-06, "loss": 0.4302, "step": 2162 }, { "epoch": 0.5348664688427299, "grad_norm": 0.8521482137328993, "learning_rate": 4.910614831842203e-06, "loss": 0.4587, "step": 2163 }, { "epoch": 0.5351137487636004, "grad_norm": 0.7961612413527702, "learning_rate": 4.910528676061848e-06, "loss": 0.4333, "step": 2164 }, { "epoch": 0.5353610286844708, "grad_norm": 0.7767270958799087, "learning_rate": 4.910442479536486e-06, "loss": 0.4744, "step": 2165 }, { "epoch": 0.5356083086053413, "grad_norm": 0.85859905757044, "learning_rate": 4.910356242267573e-06, "loss": 0.4421, "step": 2166 }, { "epoch": 0.5358555885262116, "grad_norm": 0.8270715526912454, "learning_rate": 4.91026996425657e-06, "loss": 0.4013, "step": 2167 }, { "epoch": 0.5361028684470821, "grad_norm": 0.8315477443532387, "learning_rate": 4.910183645504932e-06, "loss": 0.4191, "step": 2168 }, { "epoch": 0.5363501483679525, "grad_norm": 0.8379944677971474, "learning_rate": 4.91009728601412e-06, "loss": 0.4465, "step": 2169 }, { "epoch": 0.536597428288823, "grad_norm": 0.8729045253998553, "learning_rate": 4.910010885785593e-06, "loss": 0.3901, "step": 2170 }, { "epoch": 0.5368447082096933, "grad_norm": 0.8667862657203399, "learning_rate": 4.909924444820812e-06, "loss": 0.4309, "step": 2171 }, { "epoch": 0.5370919881305638, "grad_norm": 0.8580076116689658, "learning_rate": 4.909837963121236e-06, "loss": 0.459, "step": 2172 }, { "epoch": 0.5373392680514342, "grad_norm": 0.8721818555952473, "learning_rate": 4.90975144068833e-06, "loss": 0.4297, "step": 2173 }, { "epoch": 0.5375865479723047, "grad_norm": 0.8281536856693168, "learning_rate": 4.9096648775235555e-06, "loss": 0.4258, "step": 2174 }, { "epoch": 0.537833827893175, "grad_norm": 0.8542025748193519, "learning_rate": 4.909578273628374e-06, "loss": 0.4518, "step": 2175 }, { "epoch": 0.5380811078140455, "grad_norm": 0.8190991431494749, "learning_rate": 4.909491629004251e-06, "loss": 0.4541, "step": 2176 }, { "epoch": 0.5383283877349159, "grad_norm": 0.8253186688235863, "learning_rate": 4.909404943652649e-06, "loss": 0.4359, "step": 2177 }, { "epoch": 0.5385756676557863, "grad_norm": 0.7976389178773332, "learning_rate": 4.909318217575036e-06, "loss": 0.4315, "step": 2178 }, { "epoch": 0.5388229475766568, "grad_norm": 0.8230735329961125, "learning_rate": 4.909231450772877e-06, "loss": 0.4417, "step": 2179 }, { "epoch": 0.5390702274975272, "grad_norm": 0.8584664253796905, "learning_rate": 4.909144643247637e-06, "loss": 0.4229, "step": 2180 }, { "epoch": 0.5393175074183977, "grad_norm": 0.8369916036027719, "learning_rate": 4.909057795000786e-06, "loss": 0.4209, "step": 2181 }, { "epoch": 0.539564787339268, "grad_norm": 0.8213458816819943, "learning_rate": 4.90897090603379e-06, "loss": 0.4552, "step": 2182 }, { "epoch": 0.5398120672601385, "grad_norm": 0.7906532652889825, "learning_rate": 4.908883976348118e-06, "loss": 0.4441, "step": 2183 }, { "epoch": 0.5400593471810089, "grad_norm": 0.807640026645016, "learning_rate": 4.908797005945239e-06, "loss": 0.4002, "step": 2184 }, { "epoch": 0.5403066271018794, "grad_norm": 0.8040101227089689, "learning_rate": 4.908709994826625e-06, "loss": 0.4424, "step": 2185 }, { "epoch": 0.5405539070227497, "grad_norm": 0.7864043573853555, "learning_rate": 4.9086229429937445e-06, "loss": 0.438, "step": 2186 }, { "epoch": 0.5408011869436202, "grad_norm": 0.8279325371607597, "learning_rate": 4.908535850448071e-06, "loss": 0.4508, "step": 2187 }, { "epoch": 0.5410484668644906, "grad_norm": 0.8115008524086239, "learning_rate": 4.908448717191074e-06, "loss": 0.4323, "step": 2188 }, { "epoch": 0.5412957467853611, "grad_norm": 0.9052085553109384, "learning_rate": 4.9083615432242285e-06, "loss": 0.4108, "step": 2189 }, { "epoch": 0.5415430267062314, "grad_norm": 0.8303476204316234, "learning_rate": 4.908274328549006e-06, "loss": 0.4361, "step": 2190 }, { "epoch": 0.5417903066271019, "grad_norm": 0.7947377340525147, "learning_rate": 4.908187073166883e-06, "loss": 0.4392, "step": 2191 }, { "epoch": 0.5420375865479723, "grad_norm": 0.7966996712846387, "learning_rate": 4.908099777079334e-06, "loss": 0.4436, "step": 2192 }, { "epoch": 0.5422848664688428, "grad_norm": 0.7775724874301819, "learning_rate": 4.908012440287833e-06, "loss": 0.446, "step": 2193 }, { "epoch": 0.5425321463897131, "grad_norm": 0.8067580307740871, "learning_rate": 4.907925062793858e-06, "loss": 0.4667, "step": 2194 }, { "epoch": 0.5427794263105836, "grad_norm": 0.8398863268604441, "learning_rate": 4.907837644598884e-06, "loss": 0.4295, "step": 2195 }, { "epoch": 0.543026706231454, "grad_norm": 0.8404337735535851, "learning_rate": 4.90775018570439e-06, "loss": 0.4308, "step": 2196 }, { "epoch": 0.5432739861523245, "grad_norm": 0.8528144316022201, "learning_rate": 4.907662686111854e-06, "loss": 0.4287, "step": 2197 }, { "epoch": 0.5435212660731948, "grad_norm": 0.7777032861160702, "learning_rate": 4.907575145822755e-06, "loss": 0.4426, "step": 2198 }, { "epoch": 0.5437685459940653, "grad_norm": 0.8135680163087625, "learning_rate": 4.907487564838573e-06, "loss": 0.448, "step": 2199 }, { "epoch": 0.5440158259149357, "grad_norm": 0.800459169825187, "learning_rate": 4.907399943160787e-06, "loss": 0.4259, "step": 2200 }, { "epoch": 0.5442631058358062, "grad_norm": 0.8160972567128584, "learning_rate": 4.9073122807908815e-06, "loss": 0.4278, "step": 2201 }, { "epoch": 0.5445103857566765, "grad_norm": 0.8310785106901327, "learning_rate": 4.907224577730334e-06, "loss": 0.4305, "step": 2202 }, { "epoch": 0.544757665677547, "grad_norm": 0.828287917795897, "learning_rate": 4.907136833980629e-06, "loss": 0.4288, "step": 2203 }, { "epoch": 0.5450049455984174, "grad_norm": 0.8524021235419231, "learning_rate": 4.907049049543249e-06, "loss": 0.4648, "step": 2204 }, { "epoch": 0.5452522255192879, "grad_norm": 0.8054678015571812, "learning_rate": 4.906961224419679e-06, "loss": 0.3964, "step": 2205 }, { "epoch": 0.5454995054401582, "grad_norm": 0.870197504575575, "learning_rate": 4.9068733586114025e-06, "loss": 0.4401, "step": 2206 }, { "epoch": 0.5457467853610287, "grad_norm": 0.7999008358756727, "learning_rate": 4.9067854521199055e-06, "loss": 0.4216, "step": 2207 }, { "epoch": 0.5459940652818991, "grad_norm": 0.7757865860444793, "learning_rate": 4.906697504946672e-06, "loss": 0.4631, "step": 2208 }, { "epoch": 0.5462413452027696, "grad_norm": 0.8179813608668347, "learning_rate": 4.906609517093192e-06, "loss": 0.4291, "step": 2209 }, { "epoch": 0.5464886251236399, "grad_norm": 0.8254017964289843, "learning_rate": 4.906521488560949e-06, "loss": 0.4165, "step": 2210 }, { "epoch": 0.5467359050445104, "grad_norm": 0.8365497028422612, "learning_rate": 4.906433419351433e-06, "loss": 0.414, "step": 2211 }, { "epoch": 0.5469831849653808, "grad_norm": 0.8249052368071237, "learning_rate": 4.906345309466131e-06, "loss": 0.4337, "step": 2212 }, { "epoch": 0.5472304648862513, "grad_norm": 0.793602182328121, "learning_rate": 4.906257158906536e-06, "loss": 0.4529, "step": 2213 }, { "epoch": 0.5474777448071216, "grad_norm": 0.7932116491251698, "learning_rate": 4.9061689676741335e-06, "loss": 0.4417, "step": 2214 }, { "epoch": 0.5477250247279921, "grad_norm": 0.8341760080668459, "learning_rate": 4.906080735770417e-06, "loss": 0.4301, "step": 2215 }, { "epoch": 0.5479723046488625, "grad_norm": 0.8190689937567308, "learning_rate": 4.905992463196877e-06, "loss": 0.4548, "step": 2216 }, { "epoch": 0.548219584569733, "grad_norm": 0.8905987162330603, "learning_rate": 4.9059041499550055e-06, "loss": 0.4423, "step": 2217 }, { "epoch": 0.5484668644906033, "grad_norm": 0.8042704183629057, "learning_rate": 4.905815796046296e-06, "loss": 0.4159, "step": 2218 }, { "epoch": 0.5487141444114738, "grad_norm": 0.7900037057341976, "learning_rate": 4.905727401472241e-06, "loss": 0.4726, "step": 2219 }, { "epoch": 0.5489614243323442, "grad_norm": 0.8418688230703245, "learning_rate": 4.905638966234335e-06, "loss": 0.4332, "step": 2220 }, { "epoch": 0.5492087042532147, "grad_norm": 0.8111111477828057, "learning_rate": 4.905550490334072e-06, "loss": 0.4698, "step": 2221 }, { "epoch": 0.549455984174085, "grad_norm": 0.7790535493069579, "learning_rate": 4.90546197377295e-06, "loss": 0.4388, "step": 2222 }, { "epoch": 0.5497032640949555, "grad_norm": 0.8159173255576104, "learning_rate": 4.905373416552463e-06, "loss": 0.4341, "step": 2223 }, { "epoch": 0.5499505440158259, "grad_norm": 0.8068905887185784, "learning_rate": 4.905284818674107e-06, "loss": 0.4507, "step": 2224 }, { "epoch": 0.5501978239366964, "grad_norm": 0.7975516282689951, "learning_rate": 4.905196180139382e-06, "loss": 0.4275, "step": 2225 }, { "epoch": 0.5504451038575667, "grad_norm": 0.8363033551597234, "learning_rate": 4.905107500949785e-06, "loss": 0.4033, "step": 2226 }, { "epoch": 0.5506923837784372, "grad_norm": 0.8446055816925565, "learning_rate": 4.905018781106815e-06, "loss": 0.4158, "step": 2227 }, { "epoch": 0.5509396636993076, "grad_norm": 0.8059400557615801, "learning_rate": 4.904930020611972e-06, "loss": 0.4892, "step": 2228 }, { "epoch": 0.5511869436201781, "grad_norm": 0.8230403015167589, "learning_rate": 4.904841219466756e-06, "loss": 0.4299, "step": 2229 }, { "epoch": 0.5514342235410484, "grad_norm": 0.8136682923193483, "learning_rate": 4.904752377672668e-06, "loss": 0.4462, "step": 2230 }, { "epoch": 0.5516815034619189, "grad_norm": 0.8024451412346982, "learning_rate": 4.90466349523121e-06, "loss": 0.4318, "step": 2231 }, { "epoch": 0.5519287833827893, "grad_norm": 0.8662454751543324, "learning_rate": 4.904574572143883e-06, "loss": 0.4286, "step": 2232 }, { "epoch": 0.5521760633036598, "grad_norm": 0.7864315924989522, "learning_rate": 4.904485608412193e-06, "loss": 0.4594, "step": 2233 }, { "epoch": 0.5524233432245301, "grad_norm": 0.7968996130135781, "learning_rate": 4.90439660403764e-06, "loss": 0.4429, "step": 2234 }, { "epoch": 0.5526706231454006, "grad_norm": 0.8056598325190589, "learning_rate": 4.904307559021731e-06, "loss": 0.4406, "step": 2235 }, { "epoch": 0.552917903066271, "grad_norm": 0.7968152385905476, "learning_rate": 4.9042184733659716e-06, "loss": 0.4491, "step": 2236 }, { "epoch": 0.5531651829871415, "grad_norm": 0.8240548698911649, "learning_rate": 4.904129347071866e-06, "loss": 0.4467, "step": 2237 }, { "epoch": 0.5534124629080118, "grad_norm": 0.8459774707886466, "learning_rate": 4.904040180140921e-06, "loss": 0.4174, "step": 2238 }, { "epoch": 0.5536597428288823, "grad_norm": 0.7962096726579939, "learning_rate": 4.903950972574644e-06, "loss": 0.4378, "step": 2239 }, { "epoch": 0.5539070227497527, "grad_norm": 0.7956323439925366, "learning_rate": 4.903861724374542e-06, "loss": 0.4728, "step": 2240 }, { "epoch": 0.5541543026706232, "grad_norm": 0.874177788619472, "learning_rate": 4.903772435542126e-06, "loss": 0.4302, "step": 2241 }, { "epoch": 0.5544015825914935, "grad_norm": 0.8428091902882662, "learning_rate": 4.9036831060789025e-06, "loss": 0.4645, "step": 2242 }, { "epoch": 0.554648862512364, "grad_norm": 0.8492857888020867, "learning_rate": 4.903593735986383e-06, "loss": 0.4369, "step": 2243 }, { "epoch": 0.5548961424332344, "grad_norm": 0.8303933191114768, "learning_rate": 4.903504325266077e-06, "loss": 0.4068, "step": 2244 }, { "epoch": 0.5551434223541049, "grad_norm": 0.827774144006841, "learning_rate": 4.903414873919497e-06, "loss": 0.4302, "step": 2245 }, { "epoch": 0.5553907022749752, "grad_norm": 0.8032810656584819, "learning_rate": 4.903325381948154e-06, "loss": 0.4324, "step": 2246 }, { "epoch": 0.5556379821958457, "grad_norm": 0.7855184005897174, "learning_rate": 4.903235849353562e-06, "loss": 0.4413, "step": 2247 }, { "epoch": 0.5558852621167161, "grad_norm": 0.8569371908994023, "learning_rate": 4.903146276137233e-06, "loss": 0.4295, "step": 2248 }, { "epoch": 0.5561325420375866, "grad_norm": 0.8228709188562383, "learning_rate": 4.903056662300682e-06, "loss": 0.4093, "step": 2249 }, { "epoch": 0.5563798219584569, "grad_norm": 0.7978752521898264, "learning_rate": 4.9029670078454225e-06, "loss": 0.4483, "step": 2250 }, { "epoch": 0.5566271018793274, "grad_norm": 0.8386855590201181, "learning_rate": 4.902877312772973e-06, "loss": 0.4367, "step": 2251 }, { "epoch": 0.5568743818001978, "grad_norm": 0.8097659858522324, "learning_rate": 4.902787577084844e-06, "loss": 0.4458, "step": 2252 }, { "epoch": 0.5571216617210683, "grad_norm": 0.7981580393798596, "learning_rate": 4.902697800782558e-06, "loss": 0.4582, "step": 2253 }, { "epoch": 0.5573689416419386, "grad_norm": 0.8063797456708763, "learning_rate": 4.9026079838676295e-06, "loss": 0.4467, "step": 2254 }, { "epoch": 0.5576162215628091, "grad_norm": 0.8263331039140084, "learning_rate": 4.902518126341577e-06, "loss": 0.4623, "step": 2255 }, { "epoch": 0.5578635014836796, "grad_norm": 0.7836176844916037, "learning_rate": 4.90242822820592e-06, "loss": 0.4364, "step": 2256 }, { "epoch": 0.55811078140455, "grad_norm": 0.8055018134426273, "learning_rate": 4.9023382894621775e-06, "loss": 0.4334, "step": 2257 }, { "epoch": 0.5583580613254204, "grad_norm": 0.8561517123386011, "learning_rate": 4.90224831011187e-06, "loss": 0.4232, "step": 2258 }, { "epoch": 0.5586053412462908, "grad_norm": 0.857649910835037, "learning_rate": 4.902158290156518e-06, "loss": 0.4254, "step": 2259 }, { "epoch": 0.5588526211671613, "grad_norm": 0.847810648250041, "learning_rate": 4.902068229597644e-06, "loss": 0.4346, "step": 2260 }, { "epoch": 0.5590999010880316, "grad_norm": 0.8041283790378243, "learning_rate": 4.901978128436769e-06, "loss": 0.4451, "step": 2261 }, { "epoch": 0.5593471810089021, "grad_norm": 0.8463729671090765, "learning_rate": 4.901887986675418e-06, "loss": 0.4629, "step": 2262 }, { "epoch": 0.5595944609297725, "grad_norm": 0.8255668378936192, "learning_rate": 4.901797804315112e-06, "loss": 0.4526, "step": 2263 }, { "epoch": 0.559841740850643, "grad_norm": 0.8397646315208941, "learning_rate": 4.901707581357377e-06, "loss": 0.4475, "step": 2264 }, { "epoch": 0.5600890207715133, "grad_norm": 0.8508504303490839, "learning_rate": 4.901617317803738e-06, "loss": 0.4254, "step": 2265 }, { "epoch": 0.5603363006923838, "grad_norm": 0.7740850838820026, "learning_rate": 4.9015270136557204e-06, "loss": 0.4467, "step": 2266 }, { "epoch": 0.5605835806132542, "grad_norm": 0.8227529902841786, "learning_rate": 4.9014366689148504e-06, "loss": 0.4195, "step": 2267 }, { "epoch": 0.5608308605341247, "grad_norm": 0.777112849292443, "learning_rate": 4.9013462835826564e-06, "loss": 0.436, "step": 2268 }, { "epoch": 0.561078140454995, "grad_norm": 0.802035885633998, "learning_rate": 4.901255857660664e-06, "loss": 0.4443, "step": 2269 }, { "epoch": 0.5613254203758655, "grad_norm": 0.7937144984071973, "learning_rate": 4.9011653911504035e-06, "loss": 0.4623, "step": 2270 }, { "epoch": 0.5615727002967359, "grad_norm": 0.8332439563357789, "learning_rate": 4.901074884053403e-06, "loss": 0.4373, "step": 2271 }, { "epoch": 0.5618199802176064, "grad_norm": 0.826393197627037, "learning_rate": 4.900984336371192e-06, "loss": 0.4567, "step": 2272 }, { "epoch": 0.5620672601384767, "grad_norm": 0.8725228788732688, "learning_rate": 4.900893748105303e-06, "loss": 0.4052, "step": 2273 }, { "epoch": 0.5623145400593472, "grad_norm": 0.8350692058768774, "learning_rate": 4.900803119257265e-06, "loss": 0.4229, "step": 2274 }, { "epoch": 0.5625618199802176, "grad_norm": 0.7833697423172672, "learning_rate": 4.900712449828611e-06, "loss": 0.4712, "step": 2275 }, { "epoch": 0.5628090999010881, "grad_norm": 0.8084407515407549, "learning_rate": 4.9006217398208735e-06, "loss": 0.4767, "step": 2276 }, { "epoch": 0.5630563798219584, "grad_norm": 0.8163996352400188, "learning_rate": 4.900530989235586e-06, "loss": 0.4593, "step": 2277 }, { "epoch": 0.5633036597428289, "grad_norm": 0.8359981175249533, "learning_rate": 4.9004401980742814e-06, "loss": 0.4633, "step": 2278 }, { "epoch": 0.5635509396636993, "grad_norm": 0.8165262581141532, "learning_rate": 4.900349366338495e-06, "loss": 0.4334, "step": 2279 }, { "epoch": 0.5637982195845698, "grad_norm": 0.8640529713261024, "learning_rate": 4.900258494029763e-06, "loss": 0.4489, "step": 2280 }, { "epoch": 0.5640454995054401, "grad_norm": 0.8400644684388208, "learning_rate": 4.90016758114962e-06, "loss": 0.4168, "step": 2281 }, { "epoch": 0.5642927794263106, "grad_norm": 0.8594985484661135, "learning_rate": 4.9000766276996025e-06, "loss": 0.443, "step": 2282 }, { "epoch": 0.564540059347181, "grad_norm": 0.7941467731546799, "learning_rate": 4.8999856336812495e-06, "loss": 0.4597, "step": 2283 }, { "epoch": 0.5647873392680515, "grad_norm": 0.8334239105274157, "learning_rate": 4.899894599096098e-06, "loss": 0.4527, "step": 2284 }, { "epoch": 0.5650346191889218, "grad_norm": 0.8680664900329874, "learning_rate": 4.899803523945688e-06, "loss": 0.4288, "step": 2285 }, { "epoch": 0.5652818991097923, "grad_norm": 0.827260333304532, "learning_rate": 4.899712408231556e-06, "loss": 0.4289, "step": 2286 }, { "epoch": 0.5655291790306627, "grad_norm": 0.8183857986951192, "learning_rate": 4.899621251955245e-06, "loss": 0.4149, "step": 2287 }, { "epoch": 0.5657764589515332, "grad_norm": 0.8184921245944378, "learning_rate": 4.899530055118295e-06, "loss": 0.4581, "step": 2288 }, { "epoch": 0.5660237388724035, "grad_norm": 0.846124403399315, "learning_rate": 4.899438817722248e-06, "loss": 0.4104, "step": 2289 }, { "epoch": 0.566271018793274, "grad_norm": 0.8560378103575694, "learning_rate": 4.899347539768644e-06, "loss": 0.4358, "step": 2290 }, { "epoch": 0.5665182987141444, "grad_norm": 0.8072714154604986, "learning_rate": 4.899256221259028e-06, "loss": 0.4087, "step": 2291 }, { "epoch": 0.5667655786350149, "grad_norm": 0.7711814123606188, "learning_rate": 4.899164862194943e-06, "loss": 0.4623, "step": 2292 }, { "epoch": 0.5670128585558852, "grad_norm": 0.7892717211222391, "learning_rate": 4.899073462577933e-06, "loss": 0.4566, "step": 2293 }, { "epoch": 0.5672601384767557, "grad_norm": 0.8024640398701117, "learning_rate": 4.898982022409543e-06, "loss": 0.4082, "step": 2294 }, { "epoch": 0.5675074183976261, "grad_norm": 0.8267656493786144, "learning_rate": 4.898890541691319e-06, "loss": 0.4373, "step": 2295 }, { "epoch": 0.5677546983184966, "grad_norm": 0.804452429351204, "learning_rate": 4.898799020424806e-06, "loss": 0.4303, "step": 2296 }, { "epoch": 0.5680019782393669, "grad_norm": 0.8341753209395127, "learning_rate": 4.8987074586115535e-06, "loss": 0.4246, "step": 2297 }, { "epoch": 0.5682492581602374, "grad_norm": 0.8332330741798474, "learning_rate": 4.898615856253107e-06, "loss": 0.4177, "step": 2298 }, { "epoch": 0.5684965380811078, "grad_norm": 0.7813581360985322, "learning_rate": 4.898524213351015e-06, "loss": 0.4176, "step": 2299 }, { "epoch": 0.5687438180019783, "grad_norm": 0.822524089394135, "learning_rate": 4.898432529906827e-06, "loss": 0.465, "step": 2300 }, { "epoch": 0.5689910979228486, "grad_norm": 0.9011958348262217, "learning_rate": 4.8983408059220935e-06, "loss": 0.4204, "step": 2301 }, { "epoch": 0.5692383778437191, "grad_norm": 0.8306695477074517, "learning_rate": 4.898249041398363e-06, "loss": 0.4838, "step": 2302 }, { "epoch": 0.5694856577645895, "grad_norm": 0.8522958342818597, "learning_rate": 4.898157236337189e-06, "loss": 0.4461, "step": 2303 }, { "epoch": 0.56973293768546, "grad_norm": 0.8436724215518849, "learning_rate": 4.898065390740121e-06, "loss": 0.4555, "step": 2304 }, { "epoch": 0.5699802176063303, "grad_norm": 0.7847752288452432, "learning_rate": 4.8979735046087126e-06, "loss": 0.4805, "step": 2305 }, { "epoch": 0.5702274975272008, "grad_norm": 0.8250677875140624, "learning_rate": 4.897881577944517e-06, "loss": 0.4447, "step": 2306 }, { "epoch": 0.5704747774480712, "grad_norm": 0.8081462932632263, "learning_rate": 4.897789610749088e-06, "loss": 0.4424, "step": 2307 }, { "epoch": 0.5707220573689417, "grad_norm": 0.8776056253920698, "learning_rate": 4.89769760302398e-06, "loss": 0.4045, "step": 2308 }, { "epoch": 0.570969337289812, "grad_norm": 0.8319100971376697, "learning_rate": 4.897605554770747e-06, "loss": 0.4583, "step": 2309 }, { "epoch": 0.5712166172106825, "grad_norm": 0.8083704554589096, "learning_rate": 4.897513465990947e-06, "loss": 0.4305, "step": 2310 }, { "epoch": 0.5714638971315529, "grad_norm": 0.8477139545852911, "learning_rate": 4.897421336686136e-06, "loss": 0.4139, "step": 2311 }, { "epoch": 0.5717111770524234, "grad_norm": 0.7994312192421875, "learning_rate": 4.8973291668578705e-06, "loss": 0.4224, "step": 2312 }, { "epoch": 0.5719584569732937, "grad_norm": 0.834166390095418, "learning_rate": 4.897236956507708e-06, "loss": 0.441, "step": 2313 }, { "epoch": 0.5722057368941642, "grad_norm": 0.791463316947777, "learning_rate": 4.897144705637209e-06, "loss": 0.4207, "step": 2314 }, { "epoch": 0.5724530168150346, "grad_norm": 0.8337894517778417, "learning_rate": 4.897052414247931e-06, "loss": 0.432, "step": 2315 }, { "epoch": 0.5727002967359051, "grad_norm": 0.8463594355939713, "learning_rate": 4.8969600823414344e-06, "loss": 0.4503, "step": 2316 }, { "epoch": 0.5729475766567754, "grad_norm": 0.8018986013541408, "learning_rate": 4.896867709919281e-06, "loss": 0.4249, "step": 2317 }, { "epoch": 0.5731948565776459, "grad_norm": 0.7905890182712009, "learning_rate": 4.896775296983031e-06, "loss": 0.4289, "step": 2318 }, { "epoch": 0.5734421364985163, "grad_norm": 0.8617968858880003, "learning_rate": 4.896682843534247e-06, "loss": 0.4388, "step": 2319 }, { "epoch": 0.5736894164193868, "grad_norm": 0.7938863568430938, "learning_rate": 4.896590349574492e-06, "loss": 0.4278, "step": 2320 }, { "epoch": 0.5739366963402571, "grad_norm": 0.8029197121608271, "learning_rate": 4.8964978151053275e-06, "loss": 0.4108, "step": 2321 }, { "epoch": 0.5741839762611276, "grad_norm": 0.8299560729574813, "learning_rate": 4.89640524012832e-06, "loss": 0.4709, "step": 2322 }, { "epoch": 0.574431256181998, "grad_norm": 0.796430638715101, "learning_rate": 4.8963126246450335e-06, "loss": 0.4285, "step": 2323 }, { "epoch": 0.5746785361028685, "grad_norm": 0.8555593774864729, "learning_rate": 4.8962199686570335e-06, "loss": 0.4079, "step": 2324 }, { "epoch": 0.5749258160237388, "grad_norm": 0.803728746348809, "learning_rate": 4.896127272165886e-06, "loss": 0.4427, "step": 2325 }, { "epoch": 0.5751730959446093, "grad_norm": 0.8023320061091146, "learning_rate": 4.896034535173158e-06, "loss": 0.426, "step": 2326 }, { "epoch": 0.5754203758654797, "grad_norm": 0.7665915360838667, "learning_rate": 4.895941757680415e-06, "loss": 0.4767, "step": 2327 }, { "epoch": 0.5756676557863502, "grad_norm": 0.8435315407885349, "learning_rate": 4.8958489396892286e-06, "loss": 0.4291, "step": 2328 }, { "epoch": 0.5759149357072205, "grad_norm": 0.804335725986931, "learning_rate": 4.895756081201166e-06, "loss": 0.4368, "step": 2329 }, { "epoch": 0.576162215628091, "grad_norm": 0.8100890310943418, "learning_rate": 4.895663182217797e-06, "loss": 0.4465, "step": 2330 }, { "epoch": 0.5764094955489614, "grad_norm": 0.8408293669759955, "learning_rate": 4.895570242740692e-06, "loss": 0.4538, "step": 2331 }, { "epoch": 0.5766567754698319, "grad_norm": 0.8239576081794222, "learning_rate": 4.895477262771422e-06, "loss": 0.4191, "step": 2332 }, { "epoch": 0.5769040553907022, "grad_norm": 0.8557709570378603, "learning_rate": 4.895384242311557e-06, "loss": 0.4175, "step": 2333 }, { "epoch": 0.5771513353115727, "grad_norm": 0.8297806420249763, "learning_rate": 4.895291181362673e-06, "loss": 0.4594, "step": 2334 }, { "epoch": 0.5773986152324432, "grad_norm": 0.8394220393681534, "learning_rate": 4.895198079926339e-06, "loss": 0.433, "step": 2335 }, { "epoch": 0.5776458951533135, "grad_norm": 0.8445647987604912, "learning_rate": 4.895104938004131e-06, "loss": 0.4482, "step": 2336 }, { "epoch": 0.577893175074184, "grad_norm": 0.764743578386273, "learning_rate": 4.895011755597622e-06, "loss": 0.4393, "step": 2337 }, { "epoch": 0.5781404549950544, "grad_norm": 0.7726672754820177, "learning_rate": 4.894918532708388e-06, "loss": 0.4574, "step": 2338 }, { "epoch": 0.5783877349159249, "grad_norm": 0.8421556128246851, "learning_rate": 4.894825269338005e-06, "loss": 0.4334, "step": 2339 }, { "epoch": 0.5786350148367952, "grad_norm": 0.8046307352593715, "learning_rate": 4.894731965488049e-06, "loss": 0.46, "step": 2340 }, { "epoch": 0.5788822947576657, "grad_norm": 0.8135881552965002, "learning_rate": 4.894638621160097e-06, "loss": 0.4574, "step": 2341 }, { "epoch": 0.5791295746785361, "grad_norm": 0.8889421866638874, "learning_rate": 4.894545236355728e-06, "loss": 0.4498, "step": 2342 }, { "epoch": 0.5793768545994066, "grad_norm": 0.801204639738931, "learning_rate": 4.894451811076518e-06, "loss": 0.4134, "step": 2343 }, { "epoch": 0.579624134520277, "grad_norm": 0.7606419385924601, "learning_rate": 4.894358345324047e-06, "loss": 0.4414, "step": 2344 }, { "epoch": 0.5798714144411474, "grad_norm": 0.7991068318546964, "learning_rate": 4.894264839099897e-06, "loss": 0.4416, "step": 2345 }, { "epoch": 0.5801186943620178, "grad_norm": 0.7771589957974486, "learning_rate": 4.894171292405646e-06, "loss": 0.4342, "step": 2346 }, { "epoch": 0.5803659742828883, "grad_norm": 0.7968997920942941, "learning_rate": 4.894077705242877e-06, "loss": 0.4354, "step": 2347 }, { "epoch": 0.5806132542037586, "grad_norm": 0.8548778972922664, "learning_rate": 4.8939840776131695e-06, "loss": 0.4334, "step": 2348 }, { "epoch": 0.5808605341246291, "grad_norm": 0.7816682107099382, "learning_rate": 4.893890409518108e-06, "loss": 0.4431, "step": 2349 }, { "epoch": 0.5811078140454995, "grad_norm": 0.799351712968776, "learning_rate": 4.893796700959277e-06, "loss": 0.4222, "step": 2350 }, { "epoch": 0.58135509396637, "grad_norm": 0.8482320335522718, "learning_rate": 4.893702951938257e-06, "loss": 0.4419, "step": 2351 }, { "epoch": 0.5816023738872403, "grad_norm": 0.8284555077453484, "learning_rate": 4.8936091624566355e-06, "loss": 0.4476, "step": 2352 }, { "epoch": 0.5818496538081108, "grad_norm": 0.8275396827906788, "learning_rate": 4.893515332515996e-06, "loss": 0.4343, "step": 2353 }, { "epoch": 0.5820969337289812, "grad_norm": 0.7981461681609753, "learning_rate": 4.893421462117926e-06, "loss": 0.4716, "step": 2354 }, { "epoch": 0.5823442136498517, "grad_norm": 0.8157820886340437, "learning_rate": 4.893327551264011e-06, "loss": 0.4425, "step": 2355 }, { "epoch": 0.582591493570722, "grad_norm": 0.8458746953030688, "learning_rate": 4.893233599955839e-06, "loss": 0.4378, "step": 2356 }, { "epoch": 0.5828387734915925, "grad_norm": 0.7723732034558244, "learning_rate": 4.8931396081949975e-06, "loss": 0.4288, "step": 2357 }, { "epoch": 0.5830860534124629, "grad_norm": 0.8096170301395595, "learning_rate": 4.893045575983076e-06, "loss": 0.4779, "step": 2358 }, { "epoch": 0.5833333333333334, "grad_norm": 0.9007402286717402, "learning_rate": 4.892951503321664e-06, "loss": 0.437, "step": 2359 }, { "epoch": 0.5835806132542037, "grad_norm": 0.8482481245802722, "learning_rate": 4.89285739021235e-06, "loss": 0.4514, "step": 2360 }, { "epoch": 0.5838278931750742, "grad_norm": 0.8062299158701528, "learning_rate": 4.8927632366567275e-06, "loss": 0.4478, "step": 2361 }, { "epoch": 0.5840751730959446, "grad_norm": 0.8283318915862347, "learning_rate": 4.892669042656385e-06, "loss": 0.4219, "step": 2362 }, { "epoch": 0.5843224530168151, "grad_norm": 0.8426695019394065, "learning_rate": 4.892574808212917e-06, "loss": 0.4448, "step": 2363 }, { "epoch": 0.5845697329376854, "grad_norm": 0.8086019012718995, "learning_rate": 4.892480533327915e-06, "loss": 0.4214, "step": 2364 }, { "epoch": 0.5848170128585559, "grad_norm": 0.7901079758203985, "learning_rate": 4.892386218002973e-06, "loss": 0.4535, "step": 2365 }, { "epoch": 0.5850642927794263, "grad_norm": 0.8167317156898983, "learning_rate": 4.892291862239684e-06, "loss": 0.4087, "step": 2366 }, { "epoch": 0.5853115727002968, "grad_norm": 0.8526609048941977, "learning_rate": 4.892197466039646e-06, "loss": 0.4308, "step": 2367 }, { "epoch": 0.5855588526211671, "grad_norm": 0.8253932828103235, "learning_rate": 4.8921030294044515e-06, "loss": 0.403, "step": 2368 }, { "epoch": 0.5858061325420376, "grad_norm": 0.8005452158190326, "learning_rate": 4.892008552335697e-06, "loss": 0.4069, "step": 2369 }, { "epoch": 0.586053412462908, "grad_norm": 0.7694913501113785, "learning_rate": 4.891914034834982e-06, "loss": 0.4201, "step": 2370 }, { "epoch": 0.5863006923837785, "grad_norm": 0.8099442709125199, "learning_rate": 4.891819476903902e-06, "loss": 0.4575, "step": 2371 }, { "epoch": 0.5865479723046488, "grad_norm": 0.8306582160233137, "learning_rate": 4.891724878544054e-06, "loss": 0.4259, "step": 2372 }, { "epoch": 0.5867952522255193, "grad_norm": 0.7982546667998933, "learning_rate": 4.891630239757041e-06, "loss": 0.4317, "step": 2373 }, { "epoch": 0.5870425321463897, "grad_norm": 0.8258893090255622, "learning_rate": 4.891535560544459e-06, "loss": 0.4472, "step": 2374 }, { "epoch": 0.5872898120672602, "grad_norm": 0.7941013746518771, "learning_rate": 4.89144084090791e-06, "loss": 0.422, "step": 2375 }, { "epoch": 0.5875370919881305, "grad_norm": 0.8618283605659312, "learning_rate": 4.891346080848995e-06, "loss": 0.4099, "step": 2376 }, { "epoch": 0.587784371909001, "grad_norm": 0.8359174158930102, "learning_rate": 4.891251280369316e-06, "loss": 0.4186, "step": 2377 }, { "epoch": 0.5880316518298714, "grad_norm": 0.8476514457524892, "learning_rate": 4.891156439470473e-06, "loss": 0.4319, "step": 2378 }, { "epoch": 0.5882789317507419, "grad_norm": 0.8448744222305574, "learning_rate": 4.891061558154073e-06, "loss": 0.4568, "step": 2379 }, { "epoch": 0.5885262116716122, "grad_norm": 0.843451311072349, "learning_rate": 4.890966636421717e-06, "loss": 0.4319, "step": 2380 }, { "epoch": 0.5887734915924827, "grad_norm": 0.8339217168520412, "learning_rate": 4.890871674275011e-06, "loss": 0.4351, "step": 2381 }, { "epoch": 0.5890207715133531, "grad_norm": 0.833107783986654, "learning_rate": 4.890776671715558e-06, "loss": 0.4035, "step": 2382 }, { "epoch": 0.5892680514342236, "grad_norm": 0.8137168641583187, "learning_rate": 4.890681628744966e-06, "loss": 0.4259, "step": 2383 }, { "epoch": 0.5895153313550939, "grad_norm": 0.805892766745161, "learning_rate": 4.890586545364841e-06, "loss": 0.4333, "step": 2384 }, { "epoch": 0.5897626112759644, "grad_norm": 0.8375188065735549, "learning_rate": 4.890491421576788e-06, "loss": 0.4335, "step": 2385 }, { "epoch": 0.5900098911968348, "grad_norm": 0.8230336151396773, "learning_rate": 4.8903962573824185e-06, "loss": 0.4406, "step": 2386 }, { "epoch": 0.5902571711177053, "grad_norm": 0.8155143896059736, "learning_rate": 4.890301052783339e-06, "loss": 0.4545, "step": 2387 }, { "epoch": 0.5905044510385756, "grad_norm": 0.7717233653578202, "learning_rate": 4.890205807781159e-06, "loss": 0.4469, "step": 2388 }, { "epoch": 0.5907517309594461, "grad_norm": 0.8009676088638478, "learning_rate": 4.8901105223774885e-06, "loss": 0.4335, "step": 2389 }, { "epoch": 0.5909990108803165, "grad_norm": 0.8379245612231933, "learning_rate": 4.890015196573938e-06, "loss": 0.4205, "step": 2390 }, { "epoch": 0.591246290801187, "grad_norm": 0.8086859042796692, "learning_rate": 4.889919830372118e-06, "loss": 0.4363, "step": 2391 }, { "epoch": 0.5914935707220573, "grad_norm": 0.8130939334847568, "learning_rate": 4.889824423773642e-06, "loss": 0.4109, "step": 2392 }, { "epoch": 0.5917408506429278, "grad_norm": 0.8476467595351704, "learning_rate": 4.8897289767801225e-06, "loss": 0.4031, "step": 2393 }, { "epoch": 0.5919881305637982, "grad_norm": 0.8461748435957103, "learning_rate": 4.889633489393173e-06, "loss": 0.4091, "step": 2394 }, { "epoch": 0.5922354104846687, "grad_norm": 0.8376353137944713, "learning_rate": 4.889537961614405e-06, "loss": 0.4146, "step": 2395 }, { "epoch": 0.592482690405539, "grad_norm": 0.8239443055595109, "learning_rate": 4.889442393445435e-06, "loss": 0.4447, "step": 2396 }, { "epoch": 0.5927299703264095, "grad_norm": 0.849161150005281, "learning_rate": 4.88934678488788e-06, "loss": 0.4547, "step": 2397 }, { "epoch": 0.5929772502472799, "grad_norm": 0.790329735581731, "learning_rate": 4.889251135943353e-06, "loss": 0.4401, "step": 2398 }, { "epoch": 0.5932245301681504, "grad_norm": 0.8193562709687785, "learning_rate": 4.889155446613473e-06, "loss": 0.4093, "step": 2399 }, { "epoch": 0.5934718100890207, "grad_norm": 0.8170696936647929, "learning_rate": 4.889059716899857e-06, "loss": 0.4233, "step": 2400 }, { "epoch": 0.5937190900098912, "grad_norm": 0.8168596273728271, "learning_rate": 4.888963946804122e-06, "loss": 0.3982, "step": 2401 }, { "epoch": 0.5939663699307616, "grad_norm": 0.8114324383935596, "learning_rate": 4.888868136327888e-06, "loss": 0.4336, "step": 2402 }, { "epoch": 0.594213649851632, "grad_norm": 0.814161224922398, "learning_rate": 4.888772285472773e-06, "loss": 0.4445, "step": 2403 }, { "epoch": 0.5944609297725024, "grad_norm": 0.8203037927462447, "learning_rate": 4.888676394240399e-06, "loss": 0.4143, "step": 2404 }, { "epoch": 0.5947082096933729, "grad_norm": 0.7833477571195605, "learning_rate": 4.888580462632386e-06, "loss": 0.4369, "step": 2405 }, { "epoch": 0.5949554896142433, "grad_norm": 0.8367330917074817, "learning_rate": 4.888484490650355e-06, "loss": 0.423, "step": 2406 }, { "epoch": 0.5952027695351138, "grad_norm": 0.7996837665018429, "learning_rate": 4.888388478295929e-06, "loss": 0.4215, "step": 2407 }, { "epoch": 0.5954500494559841, "grad_norm": 0.8099673447111903, "learning_rate": 4.888292425570731e-06, "loss": 0.4565, "step": 2408 }, { "epoch": 0.5956973293768546, "grad_norm": 0.8315706071687368, "learning_rate": 4.888196332476385e-06, "loss": 0.4251, "step": 2409 }, { "epoch": 0.595944609297725, "grad_norm": 0.8641851324877825, "learning_rate": 4.8881001990145125e-06, "loss": 0.431, "step": 2410 }, { "epoch": 0.5961918892185954, "grad_norm": 0.8151888626416508, "learning_rate": 4.888004025186742e-06, "loss": 0.4106, "step": 2411 }, { "epoch": 0.5964391691394659, "grad_norm": 0.8143057678231387, "learning_rate": 4.887907810994697e-06, "loss": 0.434, "step": 2412 }, { "epoch": 0.5966864490603363, "grad_norm": 0.8169851007290792, "learning_rate": 4.887811556440004e-06, "loss": 0.4379, "step": 2413 }, { "epoch": 0.5969337289812068, "grad_norm": 0.8156004947234917, "learning_rate": 4.887715261524291e-06, "loss": 0.4556, "step": 2414 }, { "epoch": 0.5971810089020771, "grad_norm": 0.8226055472128943, "learning_rate": 4.887618926249185e-06, "loss": 0.4323, "step": 2415 }, { "epoch": 0.5974282888229476, "grad_norm": 0.8130042612264833, "learning_rate": 4.887522550616314e-06, "loss": 0.4472, "step": 2416 }, { "epoch": 0.597675568743818, "grad_norm": 0.7757441744985806, "learning_rate": 4.887426134627308e-06, "loss": 0.4422, "step": 2417 }, { "epoch": 0.5979228486646885, "grad_norm": 0.8293693320678572, "learning_rate": 4.887329678283795e-06, "loss": 0.4411, "step": 2418 }, { "epoch": 0.5981701285855588, "grad_norm": 0.7909863669793495, "learning_rate": 4.887233181587407e-06, "loss": 0.4363, "step": 2419 }, { "epoch": 0.5984174085064293, "grad_norm": 0.8034735828656278, "learning_rate": 4.887136644539775e-06, "loss": 0.4383, "step": 2420 }, { "epoch": 0.5986646884272997, "grad_norm": 0.8340913456941164, "learning_rate": 4.887040067142529e-06, "loss": 0.4121, "step": 2421 }, { "epoch": 0.5989119683481702, "grad_norm": 0.7963542690809786, "learning_rate": 4.886943449397304e-06, "loss": 0.4293, "step": 2422 }, { "epoch": 0.5991592482690405, "grad_norm": 0.8308598116178003, "learning_rate": 4.886846791305732e-06, "loss": 0.4524, "step": 2423 }, { "epoch": 0.599406528189911, "grad_norm": 0.8014680489986464, "learning_rate": 4.886750092869446e-06, "loss": 0.4267, "step": 2424 }, { "epoch": 0.5996538081107814, "grad_norm": 0.7930535832840587, "learning_rate": 4.88665335409008e-06, "loss": 0.4369, "step": 2425 }, { "epoch": 0.5999010880316519, "grad_norm": 0.8039444096609584, "learning_rate": 4.886556574969273e-06, "loss": 0.4326, "step": 2426 }, { "epoch": 0.6001483679525222, "grad_norm": 0.8152125898259113, "learning_rate": 4.886459755508657e-06, "loss": 0.395, "step": 2427 }, { "epoch": 0.6003956478733927, "grad_norm": 0.7958138668006676, "learning_rate": 4.88636289570987e-06, "loss": 0.4426, "step": 2428 }, { "epoch": 0.6006429277942631, "grad_norm": 0.8406325006773193, "learning_rate": 4.886265995574548e-06, "loss": 0.4242, "step": 2429 }, { "epoch": 0.6008902077151336, "grad_norm": 0.7785418865541055, "learning_rate": 4.886169055104331e-06, "loss": 0.4458, "step": 2430 }, { "epoch": 0.6011374876360039, "grad_norm": 0.819082154131098, "learning_rate": 4.886072074300855e-06, "loss": 0.4547, "step": 2431 }, { "epoch": 0.6013847675568744, "grad_norm": 0.8524435533885798, "learning_rate": 4.885975053165762e-06, "loss": 0.4217, "step": 2432 }, { "epoch": 0.6016320474777448, "grad_norm": 0.7912817382969713, "learning_rate": 4.88587799170069e-06, "loss": 0.4079, "step": 2433 }, { "epoch": 0.6018793273986153, "grad_norm": 0.7786233064046465, "learning_rate": 4.88578088990728e-06, "loss": 0.4147, "step": 2434 }, { "epoch": 0.6021266073194856, "grad_norm": 0.7967905199446587, "learning_rate": 4.885683747787174e-06, "loss": 0.4346, "step": 2435 }, { "epoch": 0.6023738872403561, "grad_norm": 0.8044827879750851, "learning_rate": 4.885586565342014e-06, "loss": 0.4604, "step": 2436 }, { "epoch": 0.6026211671612265, "grad_norm": 0.8160589521379803, "learning_rate": 4.885489342573441e-06, "loss": 0.4083, "step": 2437 }, { "epoch": 0.602868447082097, "grad_norm": 0.8213002853671916, "learning_rate": 4.885392079483101e-06, "loss": 0.4524, "step": 2438 }, { "epoch": 0.6031157270029673, "grad_norm": 0.8522852301398287, "learning_rate": 4.885294776072636e-06, "loss": 0.4345, "step": 2439 }, { "epoch": 0.6033630069238378, "grad_norm": 0.859578041404694, "learning_rate": 4.88519743234369e-06, "loss": 0.437, "step": 2440 }, { "epoch": 0.6036102868447082, "grad_norm": 0.9126217387901533, "learning_rate": 4.885100048297911e-06, "loss": 0.4386, "step": 2441 }, { "epoch": 0.6038575667655787, "grad_norm": 0.8240466430659698, "learning_rate": 4.8850026239369435e-06, "loss": 0.4156, "step": 2442 }, { "epoch": 0.604104846686449, "grad_norm": 0.8498653504755754, "learning_rate": 4.884905159262435e-06, "loss": 0.4311, "step": 2443 }, { "epoch": 0.6043521266073195, "grad_norm": 0.8118568331305821, "learning_rate": 4.884807654276031e-06, "loss": 0.4453, "step": 2444 }, { "epoch": 0.6045994065281899, "grad_norm": 0.8184288967709394, "learning_rate": 4.884710108979383e-06, "loss": 0.4434, "step": 2445 }, { "epoch": 0.6048466864490604, "grad_norm": 0.8282720476073526, "learning_rate": 4.884612523374137e-06, "loss": 0.466, "step": 2446 }, { "epoch": 0.6050939663699307, "grad_norm": 0.8750263587818898, "learning_rate": 4.8845148974619435e-06, "loss": 0.4173, "step": 2447 }, { "epoch": 0.6053412462908012, "grad_norm": 0.8459312708646205, "learning_rate": 4.884417231244452e-06, "loss": 0.4133, "step": 2448 }, { "epoch": 0.6055885262116716, "grad_norm": 0.8712529102031519, "learning_rate": 4.8843195247233145e-06, "loss": 0.43, "step": 2449 }, { "epoch": 0.6058358061325421, "grad_norm": 0.8252571385372829, "learning_rate": 4.884221777900182e-06, "loss": 0.4319, "step": 2450 }, { "epoch": 0.6060830860534124, "grad_norm": 0.7656825993441855, "learning_rate": 4.884123990776706e-06, "loss": 0.4252, "step": 2451 }, { "epoch": 0.6063303659742829, "grad_norm": 0.8633963817614291, "learning_rate": 4.88402616335454e-06, "loss": 0.4558, "step": 2452 }, { "epoch": 0.6065776458951533, "grad_norm": 0.8335583680412979, "learning_rate": 4.883928295635338e-06, "loss": 0.4372, "step": 2453 }, { "epoch": 0.6068249258160238, "grad_norm": 0.8826702767603091, "learning_rate": 4.883830387620754e-06, "loss": 0.426, "step": 2454 }, { "epoch": 0.6070722057368941, "grad_norm": 0.8559725178211727, "learning_rate": 4.8837324393124425e-06, "loss": 0.4267, "step": 2455 }, { "epoch": 0.6073194856577646, "grad_norm": 0.7989825274835176, "learning_rate": 4.8836344507120595e-06, "loss": 0.4341, "step": 2456 }, { "epoch": 0.607566765578635, "grad_norm": 0.8038816796422794, "learning_rate": 4.883536421821261e-06, "loss": 0.4224, "step": 2457 }, { "epoch": 0.6078140454995055, "grad_norm": 0.8681157087578908, "learning_rate": 4.883438352641704e-06, "loss": 0.4334, "step": 2458 }, { "epoch": 0.6080613254203758, "grad_norm": 0.799734995402737, "learning_rate": 4.883340243175047e-06, "loss": 0.4226, "step": 2459 }, { "epoch": 0.6083086053412463, "grad_norm": 0.8547585200505449, "learning_rate": 4.883242093422947e-06, "loss": 0.461, "step": 2460 }, { "epoch": 0.6085558852621167, "grad_norm": 0.8419176540033854, "learning_rate": 4.883143903387063e-06, "loss": 0.4581, "step": 2461 }, { "epoch": 0.6088031651829872, "grad_norm": 0.8190904513189141, "learning_rate": 4.8830456730690565e-06, "loss": 0.4149, "step": 2462 }, { "epoch": 0.6090504451038575, "grad_norm": 0.8075726467540836, "learning_rate": 4.882947402470586e-06, "loss": 0.4328, "step": 2463 }, { "epoch": 0.609297725024728, "grad_norm": 0.8180651312891082, "learning_rate": 4.882849091593314e-06, "loss": 0.4176, "step": 2464 }, { "epoch": 0.6095450049455984, "grad_norm": 0.8270321732282798, "learning_rate": 4.882750740438902e-06, "loss": 0.4385, "step": 2465 }, { "epoch": 0.6097922848664689, "grad_norm": 0.813487741003457, "learning_rate": 4.8826523490090104e-06, "loss": 0.4152, "step": 2466 }, { "epoch": 0.6100395647873392, "grad_norm": 0.8395319099633974, "learning_rate": 4.882553917305305e-06, "loss": 0.3927, "step": 2467 }, { "epoch": 0.6102868447082097, "grad_norm": 0.8595231311190774, "learning_rate": 4.882455445329448e-06, "loss": 0.4379, "step": 2468 }, { "epoch": 0.6105341246290801, "grad_norm": 0.8683672743262101, "learning_rate": 4.8823569330831045e-06, "loss": 0.4235, "step": 2469 }, { "epoch": 0.6107814045499506, "grad_norm": 0.8659235169816588, "learning_rate": 4.882258380567939e-06, "loss": 0.3991, "step": 2470 }, { "epoch": 0.6110286844708209, "grad_norm": 0.8208473928771434, "learning_rate": 4.882159787785618e-06, "loss": 0.4522, "step": 2471 }, { "epoch": 0.6112759643916914, "grad_norm": 0.8733025812767434, "learning_rate": 4.882061154737809e-06, "loss": 0.4098, "step": 2472 }, { "epoch": 0.6115232443125618, "grad_norm": 0.8931585574147225, "learning_rate": 4.881962481426176e-06, "loss": 0.4389, "step": 2473 }, { "epoch": 0.6117705242334323, "grad_norm": 0.834574300801114, "learning_rate": 4.88186376785239e-06, "loss": 0.4195, "step": 2474 }, { "epoch": 0.6120178041543026, "grad_norm": 0.8370196298648357, "learning_rate": 4.881765014018118e-06, "loss": 0.4347, "step": 2475 }, { "epoch": 0.6122650840751731, "grad_norm": 0.8634205008935408, "learning_rate": 4.88166621992503e-06, "loss": 0.4259, "step": 2476 }, { "epoch": 0.6125123639960435, "grad_norm": 0.8360755499657381, "learning_rate": 4.881567385574795e-06, "loss": 0.4374, "step": 2477 }, { "epoch": 0.612759643916914, "grad_norm": 0.8426066875433872, "learning_rate": 4.8814685109690846e-06, "loss": 0.4193, "step": 2478 }, { "epoch": 0.6130069238377843, "grad_norm": 0.87903730703909, "learning_rate": 4.8813695961095694e-06, "loss": 0.4389, "step": 2479 }, { "epoch": 0.6132542037586548, "grad_norm": 0.8943708464365521, "learning_rate": 4.881270640997921e-06, "loss": 0.4317, "step": 2480 }, { "epoch": 0.6135014836795252, "grad_norm": 0.8261054283912324, "learning_rate": 4.881171645635814e-06, "loss": 0.4407, "step": 2481 }, { "epoch": 0.6137487636003957, "grad_norm": 0.8064405085915288, "learning_rate": 4.88107261002492e-06, "loss": 0.4425, "step": 2482 }, { "epoch": 0.613996043521266, "grad_norm": 0.784802011195429, "learning_rate": 4.880973534166912e-06, "loss": 0.4488, "step": 2483 }, { "epoch": 0.6142433234421365, "grad_norm": 0.8176221625872651, "learning_rate": 4.880874418063467e-06, "loss": 0.4129, "step": 2484 }, { "epoch": 0.6144906033630069, "grad_norm": 0.8343618719243155, "learning_rate": 4.880775261716259e-06, "loss": 0.3911, "step": 2485 }, { "epoch": 0.6147378832838774, "grad_norm": 0.8644375812951496, "learning_rate": 4.880676065126965e-06, "loss": 0.4122, "step": 2486 }, { "epoch": 0.6149851632047477, "grad_norm": 0.7800771831644979, "learning_rate": 4.88057682829726e-06, "loss": 0.4497, "step": 2487 }, { "epoch": 0.6152324431256182, "grad_norm": 0.850924422669018, "learning_rate": 4.880477551228823e-06, "loss": 0.4184, "step": 2488 }, { "epoch": 0.6154797230464887, "grad_norm": 0.8561928941850189, "learning_rate": 4.880378233923332e-06, "loss": 0.4311, "step": 2489 }, { "epoch": 0.615727002967359, "grad_norm": 0.8350502975323144, "learning_rate": 4.880278876382465e-06, "loss": 0.4259, "step": 2490 }, { "epoch": 0.6159742828882295, "grad_norm": 0.7847878516103233, "learning_rate": 4.8801794786079e-06, "loss": 0.4432, "step": 2491 }, { "epoch": 0.6162215628090999, "grad_norm": 0.7967723156172767, "learning_rate": 4.880080040601322e-06, "loss": 0.4076, "step": 2492 }, { "epoch": 0.6164688427299704, "grad_norm": 0.7983503377091417, "learning_rate": 4.879980562364406e-06, "loss": 0.4051, "step": 2493 }, { "epoch": 0.6167161226508407, "grad_norm": 0.8022288887944488, "learning_rate": 4.879881043898838e-06, "loss": 0.4443, "step": 2494 }, { "epoch": 0.6169634025717112, "grad_norm": 0.8080184803379807, "learning_rate": 4.8797814852062965e-06, "loss": 0.4544, "step": 2495 }, { "epoch": 0.6172106824925816, "grad_norm": 0.8432338832511567, "learning_rate": 4.879681886288467e-06, "loss": 0.4269, "step": 2496 }, { "epoch": 0.6174579624134521, "grad_norm": 0.8274845517353767, "learning_rate": 4.8795822471470326e-06, "loss": 0.4548, "step": 2497 }, { "epoch": 0.6177052423343224, "grad_norm": 0.8422453088573836, "learning_rate": 4.879482567783675e-06, "loss": 0.4357, "step": 2498 }, { "epoch": 0.6179525222551929, "grad_norm": 0.8313181938884414, "learning_rate": 4.8793828482000834e-06, "loss": 0.4106, "step": 2499 }, { "epoch": 0.6181998021760633, "grad_norm": 0.7956229961490899, "learning_rate": 4.87928308839794e-06, "loss": 0.44, "step": 2500 }, { "epoch": 0.6184470820969338, "grad_norm": 0.8021572150909114, "learning_rate": 4.879183288378932e-06, "loss": 0.4505, "step": 2501 }, { "epoch": 0.6186943620178041, "grad_norm": 0.870409141916946, "learning_rate": 4.879083448144747e-06, "loss": 0.4065, "step": 2502 }, { "epoch": 0.6189416419386746, "grad_norm": 0.8154769479555604, "learning_rate": 4.878983567697071e-06, "loss": 0.4026, "step": 2503 }, { "epoch": 0.619188921859545, "grad_norm": 0.7614555739597262, "learning_rate": 4.8788836470375935e-06, "loss": 0.433, "step": 2504 }, { "epoch": 0.6194362017804155, "grad_norm": 0.8216234222194428, "learning_rate": 4.878783686168004e-06, "loss": 0.4329, "step": 2505 }, { "epoch": 0.6196834817012858, "grad_norm": 0.847598350182186, "learning_rate": 4.878683685089991e-06, "loss": 0.4035, "step": 2506 }, { "epoch": 0.6199307616221563, "grad_norm": 0.8281932559908807, "learning_rate": 4.878583643805244e-06, "loss": 0.4334, "step": 2507 }, { "epoch": 0.6201780415430267, "grad_norm": 0.8013841803051386, "learning_rate": 4.878483562315456e-06, "loss": 0.4092, "step": 2508 }, { "epoch": 0.6204253214638972, "grad_norm": 0.7827821396675764, "learning_rate": 4.878383440622318e-06, "loss": 0.4216, "step": 2509 }, { "epoch": 0.6206726013847675, "grad_norm": 0.811958550694751, "learning_rate": 4.878283278727522e-06, "loss": 0.4272, "step": 2510 }, { "epoch": 0.620919881305638, "grad_norm": 0.8295542833041976, "learning_rate": 4.878183076632761e-06, "loss": 0.4248, "step": 2511 }, { "epoch": 0.6211671612265084, "grad_norm": 0.7848392977501718, "learning_rate": 4.878082834339729e-06, "loss": 0.4474, "step": 2512 }, { "epoch": 0.6214144411473789, "grad_norm": 0.7896984047945792, "learning_rate": 4.87798255185012e-06, "loss": 0.4375, "step": 2513 }, { "epoch": 0.6216617210682492, "grad_norm": 0.786506360369637, "learning_rate": 4.87788222916563e-06, "loss": 0.4779, "step": 2514 }, { "epoch": 0.6219090009891197, "grad_norm": 0.7869355956930445, "learning_rate": 4.877781866287953e-06, "loss": 0.4449, "step": 2515 }, { "epoch": 0.6221562809099901, "grad_norm": 0.7693278810302654, "learning_rate": 4.877681463218787e-06, "loss": 0.4643, "step": 2516 }, { "epoch": 0.6224035608308606, "grad_norm": 0.8335221592746881, "learning_rate": 4.877581019959829e-06, "loss": 0.4018, "step": 2517 }, { "epoch": 0.6226508407517309, "grad_norm": 0.8045163991097942, "learning_rate": 4.877480536512777e-06, "loss": 0.4178, "step": 2518 }, { "epoch": 0.6228981206726014, "grad_norm": 0.7830727482060992, "learning_rate": 4.877380012879328e-06, "loss": 0.448, "step": 2519 }, { "epoch": 0.6231454005934718, "grad_norm": 0.7963686363614251, "learning_rate": 4.877279449061182e-06, "loss": 0.4804, "step": 2520 }, { "epoch": 0.6233926805143423, "grad_norm": 0.7780549531823838, "learning_rate": 4.8771788450600384e-06, "loss": 0.4472, "step": 2521 }, { "epoch": 0.6236399604352126, "grad_norm": 0.7994742514234306, "learning_rate": 4.877078200877599e-06, "loss": 0.4585, "step": 2522 }, { "epoch": 0.6238872403560831, "grad_norm": 0.7852106883065006, "learning_rate": 4.876977516515564e-06, "loss": 0.4613, "step": 2523 }, { "epoch": 0.6241345202769535, "grad_norm": 0.7876985072103233, "learning_rate": 4.876876791975635e-06, "loss": 0.4222, "step": 2524 }, { "epoch": 0.624381800197824, "grad_norm": 0.7671611530115617, "learning_rate": 4.876776027259516e-06, "loss": 0.451, "step": 2525 }, { "epoch": 0.6246290801186943, "grad_norm": 0.7946021739256844, "learning_rate": 4.876675222368907e-06, "loss": 0.446, "step": 2526 }, { "epoch": 0.6248763600395648, "grad_norm": 0.8459845780214671, "learning_rate": 4.876574377305516e-06, "loss": 0.4532, "step": 2527 }, { "epoch": 0.6251236399604352, "grad_norm": 0.8008984159682268, "learning_rate": 4.876473492071045e-06, "loss": 0.4374, "step": 2528 }, { "epoch": 0.6253709198813057, "grad_norm": 0.7682503991359931, "learning_rate": 4.876372566667199e-06, "loss": 0.4116, "step": 2529 }, { "epoch": 0.625618199802176, "grad_norm": 0.8258691494063493, "learning_rate": 4.876271601095686e-06, "loss": 0.42, "step": 2530 }, { "epoch": 0.6258654797230465, "grad_norm": 0.7927208100659665, "learning_rate": 4.876170595358211e-06, "loss": 0.434, "step": 2531 }, { "epoch": 0.6261127596439169, "grad_norm": 0.8137626732834563, "learning_rate": 4.8760695494564815e-06, "loss": 0.4102, "step": 2532 }, { "epoch": 0.6263600395647874, "grad_norm": 0.779955387753274, "learning_rate": 4.875968463392206e-06, "loss": 0.4557, "step": 2533 }, { "epoch": 0.6266073194856577, "grad_norm": 0.8225478650613147, "learning_rate": 4.875867337167093e-06, "loss": 0.4651, "step": 2534 }, { "epoch": 0.6268545994065282, "grad_norm": 0.7874547156708713, "learning_rate": 4.875766170782852e-06, "loss": 0.4218, "step": 2535 }, { "epoch": 0.6271018793273986, "grad_norm": 0.8484394458254143, "learning_rate": 4.875664964241191e-06, "loss": 0.4554, "step": 2536 }, { "epoch": 0.6273491592482691, "grad_norm": 0.8084521995284009, "learning_rate": 4.875563717543824e-06, "loss": 0.4319, "step": 2537 }, { "epoch": 0.6275964391691394, "grad_norm": 0.7910596133893639, "learning_rate": 4.87546243069246e-06, "loss": 0.4598, "step": 2538 }, { "epoch": 0.6278437190900099, "grad_norm": 0.796909735607753, "learning_rate": 4.875361103688812e-06, "loss": 0.4447, "step": 2539 }, { "epoch": 0.6280909990108803, "grad_norm": 0.7894299591362924, "learning_rate": 4.875259736534593e-06, "loss": 0.4143, "step": 2540 }, { "epoch": 0.6283382789317508, "grad_norm": 0.8215464045091878, "learning_rate": 4.8751583292315156e-06, "loss": 0.4337, "step": 2541 }, { "epoch": 0.6285855588526211, "grad_norm": 0.8097485457219312, "learning_rate": 4.875056881781294e-06, "loss": 0.423, "step": 2542 }, { "epoch": 0.6288328387734916, "grad_norm": 0.832716101729499, "learning_rate": 4.874955394185643e-06, "loss": 0.4168, "step": 2543 }, { "epoch": 0.629080118694362, "grad_norm": 0.778723112231423, "learning_rate": 4.874853866446279e-06, "loss": 0.4576, "step": 2544 }, { "epoch": 0.6293273986152325, "grad_norm": 0.8635182250262682, "learning_rate": 4.874752298564916e-06, "loss": 0.4457, "step": 2545 }, { "epoch": 0.6295746785361028, "grad_norm": 0.7917765753480467, "learning_rate": 4.874650690543273e-06, "loss": 0.4059, "step": 2546 }, { "epoch": 0.6298219584569733, "grad_norm": 0.8248753697058309, "learning_rate": 4.874549042383066e-06, "loss": 0.3954, "step": 2547 }, { "epoch": 0.6300692383778437, "grad_norm": 0.8478879231826125, "learning_rate": 4.8744473540860136e-06, "loss": 0.4136, "step": 2548 }, { "epoch": 0.6303165182987142, "grad_norm": 0.8212763456739502, "learning_rate": 4.874345625653836e-06, "loss": 0.4684, "step": 2549 }, { "epoch": 0.6305637982195845, "grad_norm": 0.757161231064773, "learning_rate": 4.874243857088251e-06, "loss": 0.4436, "step": 2550 }, { "epoch": 0.630811078140455, "grad_norm": 0.872176501099029, "learning_rate": 4.874142048390978e-06, "loss": 0.4187, "step": 2551 }, { "epoch": 0.6310583580613254, "grad_norm": 0.8151427391928595, "learning_rate": 4.87404019956374e-06, "loss": 0.4146, "step": 2552 }, { "epoch": 0.6313056379821959, "grad_norm": 0.8257722842621575, "learning_rate": 4.873938310608258e-06, "loss": 0.4088, "step": 2553 }, { "epoch": 0.6315529179030662, "grad_norm": 0.824778895269139, "learning_rate": 4.8738363815262535e-06, "loss": 0.4103, "step": 2554 }, { "epoch": 0.6318001978239367, "grad_norm": 0.8206243653518269, "learning_rate": 4.8737344123194495e-06, "loss": 0.443, "step": 2555 }, { "epoch": 0.6320474777448071, "grad_norm": 0.833260027971966, "learning_rate": 4.873632402989571e-06, "loss": 0.4403, "step": 2556 }, { "epoch": 0.6322947576656776, "grad_norm": 0.8392388817195789, "learning_rate": 4.87353035353834e-06, "loss": 0.4167, "step": 2557 }, { "epoch": 0.6325420375865479, "grad_norm": 0.8045955688235943, "learning_rate": 4.873428263967483e-06, "loss": 0.4345, "step": 2558 }, { "epoch": 0.6327893175074184, "grad_norm": 0.8073482847741306, "learning_rate": 4.873326134278725e-06, "loss": 0.4465, "step": 2559 }, { "epoch": 0.6330365974282888, "grad_norm": 0.8417721947718284, "learning_rate": 4.873223964473792e-06, "loss": 0.437, "step": 2560 }, { "epoch": 0.6332838773491593, "grad_norm": 0.835567113423516, "learning_rate": 4.873121754554413e-06, "loss": 0.4601, "step": 2561 }, { "epoch": 0.6335311572700296, "grad_norm": 0.7979539900749805, "learning_rate": 4.873019504522313e-06, "loss": 0.4195, "step": 2562 }, { "epoch": 0.6337784371909001, "grad_norm": 0.7912474454955465, "learning_rate": 4.872917214379221e-06, "loss": 0.4487, "step": 2563 }, { "epoch": 0.6340257171117705, "grad_norm": 0.8307214918215886, "learning_rate": 4.872814884126867e-06, "loss": 0.4173, "step": 2564 }, { "epoch": 0.634272997032641, "grad_norm": 0.8101781344145439, "learning_rate": 4.87271251376698e-06, "loss": 0.4501, "step": 2565 }, { "epoch": 0.6345202769535113, "grad_norm": 0.8204980030353368, "learning_rate": 4.872610103301289e-06, "loss": 0.4757, "step": 2566 }, { "epoch": 0.6347675568743818, "grad_norm": 0.8005074209067652, "learning_rate": 4.872507652731529e-06, "loss": 0.4078, "step": 2567 }, { "epoch": 0.6350148367952523, "grad_norm": 0.7997515924249456, "learning_rate": 4.872405162059428e-06, "loss": 0.45, "step": 2568 }, { "epoch": 0.6352621167161226, "grad_norm": 0.8137264810694476, "learning_rate": 4.87230263128672e-06, "loss": 0.4241, "step": 2569 }, { "epoch": 0.6355093966369931, "grad_norm": 0.824910213201859, "learning_rate": 4.872200060415136e-06, "loss": 0.4292, "step": 2570 }, { "epoch": 0.6357566765578635, "grad_norm": 0.8033275021348851, "learning_rate": 4.872097449446413e-06, "loss": 0.4323, "step": 2571 }, { "epoch": 0.636003956478734, "grad_norm": 0.7746126846914729, "learning_rate": 4.871994798382284e-06, "loss": 0.4206, "step": 2572 }, { "epoch": 0.6362512363996043, "grad_norm": 0.8549318207086319, "learning_rate": 4.871892107224483e-06, "loss": 0.4053, "step": 2573 }, { "epoch": 0.6364985163204748, "grad_norm": 0.7938662480945529, "learning_rate": 4.8717893759747475e-06, "loss": 0.4344, "step": 2574 }, { "epoch": 0.6367457962413452, "grad_norm": 0.7915283947882275, "learning_rate": 4.8716866046348135e-06, "loss": 0.4422, "step": 2575 }, { "epoch": 0.6369930761622157, "grad_norm": 0.7688073874395939, "learning_rate": 4.871583793206417e-06, "loss": 0.4506, "step": 2576 }, { "epoch": 0.637240356083086, "grad_norm": 0.7912905811315122, "learning_rate": 4.871480941691297e-06, "loss": 0.4166, "step": 2577 }, { "epoch": 0.6374876360039565, "grad_norm": 0.8120497456748971, "learning_rate": 4.871378050091191e-06, "loss": 0.4181, "step": 2578 }, { "epoch": 0.6377349159248269, "grad_norm": 0.8028994592647819, "learning_rate": 4.871275118407839e-06, "loss": 0.4311, "step": 2579 }, { "epoch": 0.6379821958456974, "grad_norm": 0.7834446209323545, "learning_rate": 4.871172146642981e-06, "loss": 0.4279, "step": 2580 }, { "epoch": 0.6382294757665677, "grad_norm": 0.8348542999908406, "learning_rate": 4.871069134798357e-06, "loss": 0.4329, "step": 2581 }, { "epoch": 0.6384767556874382, "grad_norm": 0.8272271677793834, "learning_rate": 4.8709660828757084e-06, "loss": 0.4182, "step": 2582 }, { "epoch": 0.6387240356083086, "grad_norm": 0.8689124353130055, "learning_rate": 4.8708629908767765e-06, "loss": 0.4006, "step": 2583 }, { "epoch": 0.6389713155291791, "grad_norm": 0.7859560070726559, "learning_rate": 4.870759858803306e-06, "loss": 0.4099, "step": 2584 }, { "epoch": 0.6392185954500494, "grad_norm": 0.8065301022490496, "learning_rate": 4.870656686657037e-06, "loss": 0.4429, "step": 2585 }, { "epoch": 0.6394658753709199, "grad_norm": 0.8046008098865215, "learning_rate": 4.870553474439715e-06, "loss": 0.4355, "step": 2586 }, { "epoch": 0.6397131552917903, "grad_norm": 0.8173022893337369, "learning_rate": 4.870450222153086e-06, "loss": 0.4541, "step": 2587 }, { "epoch": 0.6399604352126608, "grad_norm": 0.8241623751017143, "learning_rate": 4.870346929798893e-06, "loss": 0.4322, "step": 2588 }, { "epoch": 0.6402077151335311, "grad_norm": 0.7625457826822414, "learning_rate": 4.870243597378882e-06, "loss": 0.4425, "step": 2589 }, { "epoch": 0.6404549950544016, "grad_norm": 0.8650909181528662, "learning_rate": 4.870140224894801e-06, "loss": 0.4208, "step": 2590 }, { "epoch": 0.640702274975272, "grad_norm": 0.807605893764015, "learning_rate": 4.870036812348397e-06, "loss": 0.4301, "step": 2591 }, { "epoch": 0.6409495548961425, "grad_norm": 0.8261404636614745, "learning_rate": 4.8699333597414166e-06, "loss": 0.4278, "step": 2592 }, { "epoch": 0.6411968348170128, "grad_norm": 0.8284238869122891, "learning_rate": 4.869829867075611e-06, "loss": 0.3984, "step": 2593 }, { "epoch": 0.6414441147378833, "grad_norm": 0.7975722051769433, "learning_rate": 4.869726334352727e-06, "loss": 0.4709, "step": 2594 }, { "epoch": 0.6416913946587537, "grad_norm": 0.8507481794439908, "learning_rate": 4.869622761574516e-06, "loss": 0.4238, "step": 2595 }, { "epoch": 0.6419386745796242, "grad_norm": 0.8131973370387947, "learning_rate": 4.869519148742728e-06, "loss": 0.4163, "step": 2596 }, { "epoch": 0.6421859545004945, "grad_norm": 0.7971351475313032, "learning_rate": 4.8694154958591145e-06, "loss": 0.4265, "step": 2597 }, { "epoch": 0.642433234421365, "grad_norm": 0.8116345108123881, "learning_rate": 4.869311802925428e-06, "loss": 0.4123, "step": 2598 }, { "epoch": 0.6426805143422354, "grad_norm": 0.8095032265641844, "learning_rate": 4.8692080699434205e-06, "loss": 0.4268, "step": 2599 }, { "epoch": 0.6429277942631059, "grad_norm": 0.8217190775852606, "learning_rate": 4.869104296914847e-06, "loss": 0.4485, "step": 2600 }, { "epoch": 0.6431750741839762, "grad_norm": 0.8025278787335687, "learning_rate": 4.869000483841459e-06, "loss": 0.4606, "step": 2601 }, { "epoch": 0.6434223541048467, "grad_norm": 0.8116294256653767, "learning_rate": 4.868896630725014e-06, "loss": 0.4358, "step": 2602 }, { "epoch": 0.6436696340257171, "grad_norm": 0.7967259432878298, "learning_rate": 4.868792737567266e-06, "loss": 0.4422, "step": 2603 }, { "epoch": 0.6439169139465876, "grad_norm": 0.8050752940748637, "learning_rate": 4.86868880436997e-06, "loss": 0.4417, "step": 2604 }, { "epoch": 0.6441641938674579, "grad_norm": 0.7917551367908968, "learning_rate": 4.868584831134885e-06, "loss": 0.4185, "step": 2605 }, { "epoch": 0.6444114737883284, "grad_norm": 0.8045620985739201, "learning_rate": 4.868480817863766e-06, "loss": 0.4031, "step": 2606 }, { "epoch": 0.6446587537091988, "grad_norm": 0.7708976802059638, "learning_rate": 4.868376764558374e-06, "loss": 0.4827, "step": 2607 }, { "epoch": 0.6449060336300693, "grad_norm": 0.8344243360154323, "learning_rate": 4.868272671220465e-06, "loss": 0.4184, "step": 2608 }, { "epoch": 0.6451533135509396, "grad_norm": 0.7928287190522306, "learning_rate": 4.868168537851801e-06, "loss": 0.4405, "step": 2609 }, { "epoch": 0.6454005934718101, "grad_norm": 0.8301762127855165, "learning_rate": 4.868064364454141e-06, "loss": 0.4264, "step": 2610 }, { "epoch": 0.6456478733926805, "grad_norm": 0.8234622479401585, "learning_rate": 4.867960151029245e-06, "loss": 0.4101, "step": 2611 }, { "epoch": 0.645895153313551, "grad_norm": 0.8042270905776066, "learning_rate": 4.867855897578876e-06, "loss": 0.4088, "step": 2612 }, { "epoch": 0.6461424332344213, "grad_norm": 0.8118602734915629, "learning_rate": 4.867751604104795e-06, "loss": 0.4279, "step": 2613 }, { "epoch": 0.6463897131552918, "grad_norm": 0.8157638003872915, "learning_rate": 4.8676472706087655e-06, "loss": 0.3937, "step": 2614 }, { "epoch": 0.6466369930761622, "grad_norm": 0.7960341855478246, "learning_rate": 4.867542897092551e-06, "loss": 0.4683, "step": 2615 }, { "epoch": 0.6468842729970327, "grad_norm": 0.8650800961392032, "learning_rate": 4.867438483557916e-06, "loss": 0.41, "step": 2616 }, { "epoch": 0.647131552917903, "grad_norm": 0.7994797297627116, "learning_rate": 4.867334030006624e-06, "loss": 0.3952, "step": 2617 }, { "epoch": 0.6473788328387735, "grad_norm": 0.8182129634502129, "learning_rate": 4.867229536440442e-06, "loss": 0.4538, "step": 2618 }, { "epoch": 0.6476261127596439, "grad_norm": 0.8291856382801164, "learning_rate": 4.867125002861136e-06, "loss": 0.421, "step": 2619 }, { "epoch": 0.6478733926805144, "grad_norm": 0.8143024931629848, "learning_rate": 4.867020429270473e-06, "loss": 0.4306, "step": 2620 }, { "epoch": 0.6481206726013847, "grad_norm": 0.7831998758499393, "learning_rate": 4.866915815670221e-06, "loss": 0.4269, "step": 2621 }, { "epoch": 0.6483679525222552, "grad_norm": 0.7830711376483871, "learning_rate": 4.866811162062146e-06, "loss": 0.4514, "step": 2622 }, { "epoch": 0.6486152324431256, "grad_norm": 0.8088024343741351, "learning_rate": 4.86670646844802e-06, "loss": 0.4059, "step": 2623 }, { "epoch": 0.6488625123639961, "grad_norm": 0.8646016295169404, "learning_rate": 4.86660173482961e-06, "loss": 0.4262, "step": 2624 }, { "epoch": 0.6491097922848664, "grad_norm": 0.832596400950109, "learning_rate": 4.866496961208689e-06, "loss": 0.4289, "step": 2625 }, { "epoch": 0.6493570722057369, "grad_norm": 0.8067755589491356, "learning_rate": 4.866392147587026e-06, "loss": 0.4026, "step": 2626 }, { "epoch": 0.6496043521266073, "grad_norm": 0.7575527345016138, "learning_rate": 4.8662872939663925e-06, "loss": 0.4915, "step": 2627 }, { "epoch": 0.6498516320474778, "grad_norm": 0.882603184161495, "learning_rate": 4.866182400348562e-06, "loss": 0.4028, "step": 2628 }, { "epoch": 0.6500989119683481, "grad_norm": 0.820325274503156, "learning_rate": 4.866077466735307e-06, "loss": 0.4334, "step": 2629 }, { "epoch": 0.6503461918892186, "grad_norm": 0.8311177188830288, "learning_rate": 4.8659724931284014e-06, "loss": 0.4505, "step": 2630 }, { "epoch": 0.650593471810089, "grad_norm": 0.7836144268998563, "learning_rate": 4.865867479529619e-06, "loss": 0.4185, "step": 2631 }, { "epoch": 0.6508407517309595, "grad_norm": 0.769144550632546, "learning_rate": 4.865762425940735e-06, "loss": 0.4447, "step": 2632 }, { "epoch": 0.6510880316518298, "grad_norm": 0.7983531419796737, "learning_rate": 4.865657332363526e-06, "loss": 0.4451, "step": 2633 }, { "epoch": 0.6513353115727003, "grad_norm": 0.8263434045298292, "learning_rate": 4.865552198799767e-06, "loss": 0.4073, "step": 2634 }, { "epoch": 0.6515825914935707, "grad_norm": 0.8110069368531261, "learning_rate": 4.865447025251237e-06, "loss": 0.4525, "step": 2635 }, { "epoch": 0.6518298714144412, "grad_norm": 0.8143820165970546, "learning_rate": 4.86534181171971e-06, "loss": 0.412, "step": 2636 }, { "epoch": 0.6520771513353115, "grad_norm": 0.8320001375672842, "learning_rate": 4.865236558206969e-06, "loss": 0.4605, "step": 2637 }, { "epoch": 0.652324431256182, "grad_norm": 0.8097042928776802, "learning_rate": 4.865131264714791e-06, "loss": 0.3915, "step": 2638 }, { "epoch": 0.6525717111770524, "grad_norm": 0.7994068536215166, "learning_rate": 4.865025931244955e-06, "loss": 0.4416, "step": 2639 }, { "epoch": 0.6528189910979229, "grad_norm": 0.833191717549721, "learning_rate": 4.864920557799243e-06, "loss": 0.3927, "step": 2640 }, { "epoch": 0.6530662710187932, "grad_norm": 0.8287763555318937, "learning_rate": 4.864815144379435e-06, "loss": 0.4285, "step": 2641 }, { "epoch": 0.6533135509396637, "grad_norm": 0.8376260262533985, "learning_rate": 4.864709690987313e-06, "loss": 0.4058, "step": 2642 }, { "epoch": 0.6535608308605341, "grad_norm": 0.8012222473334901, "learning_rate": 4.8646041976246595e-06, "loss": 0.4342, "step": 2643 }, { "epoch": 0.6538081107814046, "grad_norm": 0.7915340668570944, "learning_rate": 4.864498664293258e-06, "loss": 0.4375, "step": 2644 }, { "epoch": 0.654055390702275, "grad_norm": 0.7914492576635661, "learning_rate": 4.864393090994892e-06, "loss": 0.4365, "step": 2645 }, { "epoch": 0.6543026706231454, "grad_norm": 0.7693345141545859, "learning_rate": 4.864287477731346e-06, "loss": 0.4217, "step": 2646 }, { "epoch": 0.6545499505440159, "grad_norm": 0.7878375532840123, "learning_rate": 4.8641818245044065e-06, "loss": 0.4313, "step": 2647 }, { "epoch": 0.6547972304648862, "grad_norm": 0.776760393600863, "learning_rate": 4.8640761313158565e-06, "loss": 0.4121, "step": 2648 }, { "epoch": 0.6550445103857567, "grad_norm": 0.8010628469676324, "learning_rate": 4.8639703981674854e-06, "loss": 0.4049, "step": 2649 }, { "epoch": 0.6552917903066271, "grad_norm": 0.8374209078452766, "learning_rate": 4.863864625061079e-06, "loss": 0.3779, "step": 2650 }, { "epoch": 0.6555390702274976, "grad_norm": 0.8174278279572348, "learning_rate": 4.8637588119984245e-06, "loss": 0.42, "step": 2651 }, { "epoch": 0.655786350148368, "grad_norm": 0.7978953213607124, "learning_rate": 4.863652958981312e-06, "loss": 0.4014, "step": 2652 }, { "epoch": 0.6560336300692384, "grad_norm": 0.839369529972231, "learning_rate": 4.863547066011529e-06, "loss": 0.4557, "step": 2653 }, { "epoch": 0.6562809099901088, "grad_norm": 0.8486133636067605, "learning_rate": 4.863441133090867e-06, "loss": 0.4262, "step": 2654 }, { "epoch": 0.6565281899109793, "grad_norm": 0.8001265604884367, "learning_rate": 4.863335160221116e-06, "loss": 0.3965, "step": 2655 }, { "epoch": 0.6567754698318496, "grad_norm": 0.7686223929805192, "learning_rate": 4.863229147404067e-06, "loss": 0.4275, "step": 2656 }, { "epoch": 0.6570227497527201, "grad_norm": 0.7772075862282518, "learning_rate": 4.863123094641513e-06, "loss": 0.4033, "step": 2657 }, { "epoch": 0.6572700296735905, "grad_norm": 0.7775090647355424, "learning_rate": 4.8630170019352455e-06, "loss": 0.4308, "step": 2658 }, { "epoch": 0.657517309594461, "grad_norm": 0.7985056923918036, "learning_rate": 4.862910869287058e-06, "loss": 0.4574, "step": 2659 }, { "epoch": 0.6577645895153313, "grad_norm": 0.8176547204705846, "learning_rate": 4.862804696698743e-06, "loss": 0.4177, "step": 2660 }, { "epoch": 0.6580118694362018, "grad_norm": 0.7913133572047072, "learning_rate": 4.8626984841720985e-06, "loss": 0.4181, "step": 2661 }, { "epoch": 0.6582591493570722, "grad_norm": 0.8017262292735567, "learning_rate": 4.862592231708917e-06, "loss": 0.4138, "step": 2662 }, { "epoch": 0.6585064292779427, "grad_norm": 0.823785805842316, "learning_rate": 4.862485939310996e-06, "loss": 0.4178, "step": 2663 }, { "epoch": 0.658753709198813, "grad_norm": 0.8011536361874997, "learning_rate": 4.862379606980131e-06, "loss": 0.42, "step": 2664 }, { "epoch": 0.6590009891196835, "grad_norm": 0.8131762554088784, "learning_rate": 4.86227323471812e-06, "loss": 0.4171, "step": 2665 }, { "epoch": 0.6592482690405539, "grad_norm": 0.7743146594982807, "learning_rate": 4.86216682252676e-06, "loss": 0.4358, "step": 2666 }, { "epoch": 0.6594955489614244, "grad_norm": 0.7984111657102294, "learning_rate": 4.862060370407852e-06, "loss": 0.4085, "step": 2667 }, { "epoch": 0.6597428288822947, "grad_norm": 0.8081662281215509, "learning_rate": 4.861953878363193e-06, "loss": 0.406, "step": 2668 }, { "epoch": 0.6599901088031652, "grad_norm": 0.847834361380235, "learning_rate": 4.8618473463945846e-06, "loss": 0.4487, "step": 2669 }, { "epoch": 0.6602373887240356, "grad_norm": 0.8062903347818331, "learning_rate": 4.861740774503827e-06, "loss": 0.4064, "step": 2670 }, { "epoch": 0.6604846686449061, "grad_norm": 0.8131245152077919, "learning_rate": 4.861634162692721e-06, "loss": 0.4103, "step": 2671 }, { "epoch": 0.6607319485657764, "grad_norm": 0.780375927043985, "learning_rate": 4.86152751096307e-06, "loss": 0.4342, "step": 2672 }, { "epoch": 0.6609792284866469, "grad_norm": 0.8053037494452797, "learning_rate": 4.861420819316674e-06, "loss": 0.4044, "step": 2673 }, { "epoch": 0.6612265084075173, "grad_norm": 0.7860517182947623, "learning_rate": 4.861314087755339e-06, "loss": 0.4325, "step": 2674 }, { "epoch": 0.6614737883283878, "grad_norm": 0.8359112628884461, "learning_rate": 4.8612073162808685e-06, "loss": 0.4046, "step": 2675 }, { "epoch": 0.6617210682492581, "grad_norm": 0.8207892956870797, "learning_rate": 4.861100504895067e-06, "loss": 0.4135, "step": 2676 }, { "epoch": 0.6619683481701286, "grad_norm": 0.825628094557653, "learning_rate": 4.86099365359974e-06, "loss": 0.4151, "step": 2677 }, { "epoch": 0.662215628090999, "grad_norm": 0.8562140165251803, "learning_rate": 4.860886762396694e-06, "loss": 0.4417, "step": 2678 }, { "epoch": 0.6624629080118695, "grad_norm": 0.8366896031458023, "learning_rate": 4.860779831287735e-06, "loss": 0.3959, "step": 2679 }, { "epoch": 0.6627101879327398, "grad_norm": 0.862785681802581, "learning_rate": 4.86067286027467e-06, "loss": 0.4187, "step": 2680 }, { "epoch": 0.6629574678536103, "grad_norm": 0.7714723304346427, "learning_rate": 4.860565849359309e-06, "loss": 0.4396, "step": 2681 }, { "epoch": 0.6632047477744807, "grad_norm": 0.8142081920122305, "learning_rate": 4.860458798543459e-06, "loss": 0.428, "step": 2682 }, { "epoch": 0.6634520276953512, "grad_norm": 0.7798010043722234, "learning_rate": 4.8603517078289305e-06, "loss": 0.4388, "step": 2683 }, { "epoch": 0.6636993076162215, "grad_norm": 0.7755345393059091, "learning_rate": 4.860244577217533e-06, "loss": 0.4107, "step": 2684 }, { "epoch": 0.663946587537092, "grad_norm": 0.7674983481753626, "learning_rate": 4.860137406711079e-06, "loss": 0.4143, "step": 2685 }, { "epoch": 0.6641938674579624, "grad_norm": 0.8273272184248035, "learning_rate": 4.860030196311377e-06, "loss": 0.414, "step": 2686 }, { "epoch": 0.6644411473788329, "grad_norm": 0.817916038266538, "learning_rate": 4.859922946020241e-06, "loss": 0.4047, "step": 2687 }, { "epoch": 0.6646884272997032, "grad_norm": 0.8207357627703306, "learning_rate": 4.8598156558394835e-06, "loss": 0.4294, "step": 2688 }, { "epoch": 0.6649357072205737, "grad_norm": 0.8194821245448161, "learning_rate": 4.859708325770919e-06, "loss": 0.4013, "step": 2689 }, { "epoch": 0.6651829871414441, "grad_norm": 0.7698555674508966, "learning_rate": 4.859600955816361e-06, "loss": 0.4349, "step": 2690 }, { "epoch": 0.6654302670623146, "grad_norm": 0.776934367789398, "learning_rate": 4.859493545977624e-06, "loss": 0.4098, "step": 2691 }, { "epoch": 0.6656775469831849, "grad_norm": 0.8173710162756613, "learning_rate": 4.859386096256523e-06, "loss": 0.3915, "step": 2692 }, { "epoch": 0.6659248269040554, "grad_norm": 0.8276330948906291, "learning_rate": 4.859278606654876e-06, "loss": 0.4198, "step": 2693 }, { "epoch": 0.6661721068249258, "grad_norm": 0.775277988894492, "learning_rate": 4.859171077174498e-06, "loss": 0.4151, "step": 2694 }, { "epoch": 0.6664193867457963, "grad_norm": 0.8367562396642206, "learning_rate": 4.8590635078172086e-06, "loss": 0.4578, "step": 2695 }, { "epoch": 0.6666666666666666, "grad_norm": 0.8159711563660788, "learning_rate": 4.858955898584824e-06, "loss": 0.4159, "step": 2696 }, { "epoch": 0.6669139465875371, "grad_norm": 0.8408357991825046, "learning_rate": 4.858848249479165e-06, "loss": 0.4011, "step": 2697 }, { "epoch": 0.6671612265084075, "grad_norm": 0.841572463582256, "learning_rate": 4.858740560502049e-06, "loss": 0.4123, "step": 2698 }, { "epoch": 0.667408506429278, "grad_norm": 0.8179256974131708, "learning_rate": 4.8586328316552974e-06, "loss": 0.439, "step": 2699 }, { "epoch": 0.6676557863501483, "grad_norm": 0.8055382933055434, "learning_rate": 4.858525062940732e-06, "loss": 0.4276, "step": 2700 }, { "epoch": 0.6679030662710188, "grad_norm": 0.8132449133301869, "learning_rate": 4.858417254360173e-06, "loss": 0.4247, "step": 2701 }, { "epoch": 0.6681503461918892, "grad_norm": 0.8017582573504225, "learning_rate": 4.858309405915443e-06, "loss": 0.4138, "step": 2702 }, { "epoch": 0.6683976261127597, "grad_norm": 0.8102328296173832, "learning_rate": 4.858201517608366e-06, "loss": 0.4615, "step": 2703 }, { "epoch": 0.66864490603363, "grad_norm": 0.8565741417342391, "learning_rate": 4.858093589440765e-06, "loss": 0.4398, "step": 2704 }, { "epoch": 0.6688921859545005, "grad_norm": 0.8203380198987199, "learning_rate": 4.8579856214144635e-06, "loss": 0.4477, "step": 2705 }, { "epoch": 0.6691394658753709, "grad_norm": 0.8154367679562924, "learning_rate": 4.8578776135312876e-06, "loss": 0.4182, "step": 2706 }, { "epoch": 0.6693867457962414, "grad_norm": 0.796586375252954, "learning_rate": 4.8577695657930625e-06, "loss": 0.417, "step": 2707 }, { "epoch": 0.6696340257171117, "grad_norm": 0.7857952936500736, "learning_rate": 4.857661478201614e-06, "loss": 0.425, "step": 2708 }, { "epoch": 0.6698813056379822, "grad_norm": 0.7839475383973936, "learning_rate": 4.85755335075877e-06, "loss": 0.4323, "step": 2709 }, { "epoch": 0.6701285855588526, "grad_norm": 0.8317686188823157, "learning_rate": 4.857445183466357e-06, "loss": 0.4038, "step": 2710 }, { "epoch": 0.670375865479723, "grad_norm": 0.7735299641172824, "learning_rate": 4.857336976326205e-06, "loss": 0.4349, "step": 2711 }, { "epoch": 0.6706231454005934, "grad_norm": 0.7993198548331951, "learning_rate": 4.857228729340142e-06, "loss": 0.4505, "step": 2712 }, { "epoch": 0.6708704253214639, "grad_norm": 0.8005486544012455, "learning_rate": 4.8571204425099976e-06, "loss": 0.4316, "step": 2713 }, { "epoch": 0.6711177052423343, "grad_norm": 0.8235107641996137, "learning_rate": 4.857012115837602e-06, "loss": 0.414, "step": 2714 }, { "epoch": 0.6713649851632048, "grad_norm": 0.7544707697511732, "learning_rate": 4.856903749324787e-06, "loss": 0.4061, "step": 2715 }, { "epoch": 0.6716122650840751, "grad_norm": 0.837063786540837, "learning_rate": 4.856795342973385e-06, "loss": 0.4236, "step": 2716 }, { "epoch": 0.6718595450049456, "grad_norm": 0.7794131174029756, "learning_rate": 4.856686896785226e-06, "loss": 0.4053, "step": 2717 }, { "epoch": 0.672106824925816, "grad_norm": 0.8167949776353572, "learning_rate": 4.856578410762145e-06, "loss": 0.3939, "step": 2718 }, { "epoch": 0.6723541048466865, "grad_norm": 0.8159754290439838, "learning_rate": 4.856469884905974e-06, "loss": 0.4235, "step": 2719 }, { "epoch": 0.6726013847675568, "grad_norm": 0.7811653270798669, "learning_rate": 4.8563613192185495e-06, "loss": 0.3975, "step": 2720 }, { "epoch": 0.6728486646884273, "grad_norm": 0.7749258733290558, "learning_rate": 4.856252713701706e-06, "loss": 0.4407, "step": 2721 }, { "epoch": 0.6730959446092978, "grad_norm": 0.8416011342065808, "learning_rate": 4.856144068357279e-06, "loss": 0.4084, "step": 2722 }, { "epoch": 0.6733432245301681, "grad_norm": 0.7965495607636626, "learning_rate": 4.8560353831871035e-06, "loss": 0.4325, "step": 2723 }, { "epoch": 0.6735905044510386, "grad_norm": 0.8046182312914216, "learning_rate": 4.855926658193019e-06, "loss": 0.4498, "step": 2724 }, { "epoch": 0.673837784371909, "grad_norm": 0.7897180049006142, "learning_rate": 4.855817893376862e-06, "loss": 0.4226, "step": 2725 }, { "epoch": 0.6740850642927795, "grad_norm": 0.7673796995879293, "learning_rate": 4.85570908874047e-06, "loss": 0.4749, "step": 2726 }, { "epoch": 0.6743323442136498, "grad_norm": 0.8066313586664126, "learning_rate": 4.855600244285684e-06, "loss": 0.4353, "step": 2727 }, { "epoch": 0.6745796241345203, "grad_norm": 0.8043440775563024, "learning_rate": 4.855491360014343e-06, "loss": 0.4377, "step": 2728 }, { "epoch": 0.6748269040553907, "grad_norm": 0.8052759257323537, "learning_rate": 4.855382435928287e-06, "loss": 0.4242, "step": 2729 }, { "epoch": 0.6750741839762612, "grad_norm": 0.816064941591361, "learning_rate": 4.855273472029358e-06, "loss": 0.4073, "step": 2730 }, { "epoch": 0.6753214638971315, "grad_norm": 0.7606831743034189, "learning_rate": 4.855164468319398e-06, "loss": 0.4127, "step": 2731 }, { "epoch": 0.675568743818002, "grad_norm": 0.8075604491375517, "learning_rate": 4.855055424800249e-06, "loss": 0.4494, "step": 2732 }, { "epoch": 0.6758160237388724, "grad_norm": 0.7534279728737382, "learning_rate": 4.854946341473753e-06, "loss": 0.4519, "step": 2733 }, { "epoch": 0.6760633036597429, "grad_norm": 0.8069270952474595, "learning_rate": 4.8548372183417556e-06, "loss": 0.4065, "step": 2734 }, { "epoch": 0.6763105835806132, "grad_norm": 0.8506776561935846, "learning_rate": 4.854728055406101e-06, "loss": 0.4306, "step": 2735 }, { "epoch": 0.6765578635014837, "grad_norm": 0.8035438540666233, "learning_rate": 4.854618852668632e-06, "loss": 0.4606, "step": 2736 }, { "epoch": 0.6768051434223541, "grad_norm": 0.7934236069117082, "learning_rate": 4.854509610131198e-06, "loss": 0.4424, "step": 2737 }, { "epoch": 0.6770524233432246, "grad_norm": 0.7654027889805268, "learning_rate": 4.854400327795644e-06, "loss": 0.4361, "step": 2738 }, { "epoch": 0.6772997032640949, "grad_norm": 0.8258240340169412, "learning_rate": 4.854291005663816e-06, "loss": 0.4033, "step": 2739 }, { "epoch": 0.6775469831849654, "grad_norm": 0.8126929867632539, "learning_rate": 4.854181643737564e-06, "loss": 0.432, "step": 2740 }, { "epoch": 0.6777942631058358, "grad_norm": 0.8059082743497984, "learning_rate": 4.854072242018734e-06, "loss": 0.422, "step": 2741 }, { "epoch": 0.6780415430267063, "grad_norm": 0.8181609925538584, "learning_rate": 4.853962800509179e-06, "loss": 0.4423, "step": 2742 }, { "epoch": 0.6782888229475766, "grad_norm": 0.7901271764796842, "learning_rate": 4.853853319210745e-06, "loss": 0.4251, "step": 2743 }, { "epoch": 0.6785361028684471, "grad_norm": 0.8030211456123973, "learning_rate": 4.853743798125285e-06, "loss": 0.4312, "step": 2744 }, { "epoch": 0.6787833827893175, "grad_norm": 0.7965447474952801, "learning_rate": 4.8536342372546494e-06, "loss": 0.4439, "step": 2745 }, { "epoch": 0.679030662710188, "grad_norm": 0.7792761987365541, "learning_rate": 4.85352463660069e-06, "loss": 0.4361, "step": 2746 }, { "epoch": 0.6792779426310583, "grad_norm": 0.8066811438330524, "learning_rate": 4.853414996165258e-06, "loss": 0.4504, "step": 2747 }, { "epoch": 0.6795252225519288, "grad_norm": 0.8388942890539145, "learning_rate": 4.85330531595021e-06, "loss": 0.4208, "step": 2748 }, { "epoch": 0.6797725024727992, "grad_norm": 0.8154470508323961, "learning_rate": 4.853195595957398e-06, "loss": 0.4192, "step": 2749 }, { "epoch": 0.6800197823936697, "grad_norm": 0.7756267102531477, "learning_rate": 4.853085836188676e-06, "loss": 0.3995, "step": 2750 }, { "epoch": 0.68026706231454, "grad_norm": 0.8401001254872341, "learning_rate": 4.852976036645899e-06, "loss": 0.4341, "step": 2751 }, { "epoch": 0.6805143422354105, "grad_norm": 0.8190568217830237, "learning_rate": 4.852866197330925e-06, "loss": 0.432, "step": 2752 }, { "epoch": 0.6807616221562809, "grad_norm": 0.776414872949731, "learning_rate": 4.852756318245609e-06, "loss": 0.412, "step": 2753 }, { "epoch": 0.6810089020771514, "grad_norm": 0.7750759553622607, "learning_rate": 4.852646399391808e-06, "loss": 0.4521, "step": 2754 }, { "epoch": 0.6812561819980217, "grad_norm": 0.8000941607465487, "learning_rate": 4.8525364407713825e-06, "loss": 0.4095, "step": 2755 }, { "epoch": 0.6815034619188922, "grad_norm": 0.8196128109500314, "learning_rate": 4.852426442386188e-06, "loss": 0.441, "step": 2756 }, { "epoch": 0.6817507418397626, "grad_norm": 0.8147986557860766, "learning_rate": 4.852316404238085e-06, "loss": 0.4571, "step": 2757 }, { "epoch": 0.6819980217606331, "grad_norm": 0.8017639356034257, "learning_rate": 4.8522063263289336e-06, "loss": 0.4231, "step": 2758 }, { "epoch": 0.6822453016815034, "grad_norm": 0.8257170171004764, "learning_rate": 4.8520962086605945e-06, "loss": 0.442, "step": 2759 }, { "epoch": 0.6824925816023739, "grad_norm": 0.8078217424320431, "learning_rate": 4.8519860512349295e-06, "loss": 0.4357, "step": 2760 }, { "epoch": 0.6827398615232443, "grad_norm": 0.8127981123442076, "learning_rate": 4.851875854053799e-06, "loss": 0.4018, "step": 2761 }, { "epoch": 0.6829871414441148, "grad_norm": 0.808121246312467, "learning_rate": 4.8517656171190665e-06, "loss": 0.4395, "step": 2762 }, { "epoch": 0.6832344213649851, "grad_norm": 0.8616502396837019, "learning_rate": 4.8516553404325965e-06, "loss": 0.421, "step": 2763 }, { "epoch": 0.6834817012858556, "grad_norm": 0.8211179328198835, "learning_rate": 4.851545023996252e-06, "loss": 0.4151, "step": 2764 }, { "epoch": 0.683728981206726, "grad_norm": 0.7971065636543179, "learning_rate": 4.851434667811896e-06, "loss": 0.3961, "step": 2765 }, { "epoch": 0.6839762611275965, "grad_norm": 0.7959678571700568, "learning_rate": 4.851324271881397e-06, "loss": 0.4542, "step": 2766 }, { "epoch": 0.6842235410484668, "grad_norm": 0.8297470002867344, "learning_rate": 4.8512138362066185e-06, "loss": 0.3831, "step": 2767 }, { "epoch": 0.6844708209693373, "grad_norm": 0.8012939597110195, "learning_rate": 4.851103360789428e-06, "loss": 0.4234, "step": 2768 }, { "epoch": 0.6847181008902077, "grad_norm": 0.8087329469986386, "learning_rate": 4.850992845631694e-06, "loss": 0.4243, "step": 2769 }, { "epoch": 0.6849653808110782, "grad_norm": 0.8236396320864248, "learning_rate": 4.850882290735283e-06, "loss": 0.4246, "step": 2770 }, { "epoch": 0.6852126607319485, "grad_norm": 0.8167830829538719, "learning_rate": 4.850771696102066e-06, "loss": 0.4284, "step": 2771 }, { "epoch": 0.685459940652819, "grad_norm": 0.7654752845074516, "learning_rate": 4.850661061733909e-06, "loss": 0.4589, "step": 2772 }, { "epoch": 0.6857072205736894, "grad_norm": 0.8547620176700507, "learning_rate": 4.850550387632683e-06, "loss": 0.4125, "step": 2773 }, { "epoch": 0.6859545004945599, "grad_norm": 0.7917980650045635, "learning_rate": 4.85043967380026e-06, "loss": 0.4348, "step": 2774 }, { "epoch": 0.6862017804154302, "grad_norm": 0.8345478477879663, "learning_rate": 4.850328920238512e-06, "loss": 0.4178, "step": 2775 }, { "epoch": 0.6864490603363007, "grad_norm": 0.8332777404049719, "learning_rate": 4.8502181269493084e-06, "loss": 0.4122, "step": 2776 }, { "epoch": 0.6866963402571711, "grad_norm": 0.7759039805494313, "learning_rate": 4.850107293934524e-06, "loss": 0.4256, "step": 2777 }, { "epoch": 0.6869436201780416, "grad_norm": 0.8252611335287267, "learning_rate": 4.849996421196031e-06, "loss": 0.416, "step": 2778 }, { "epoch": 0.6871909000989119, "grad_norm": 0.788591873666151, "learning_rate": 4.849885508735704e-06, "loss": 0.4711, "step": 2779 }, { "epoch": 0.6874381800197824, "grad_norm": 0.8189554759316249, "learning_rate": 4.849774556555419e-06, "loss": 0.4125, "step": 2780 }, { "epoch": 0.6876854599406528, "grad_norm": 0.7836784844389106, "learning_rate": 4.849663564657049e-06, "loss": 0.4231, "step": 2781 }, { "epoch": 0.6879327398615233, "grad_norm": 0.8058795307766686, "learning_rate": 4.849552533042472e-06, "loss": 0.4194, "step": 2782 }, { "epoch": 0.6881800197823936, "grad_norm": 0.7907983381026115, "learning_rate": 4.8494414617135635e-06, "loss": 0.4243, "step": 2783 }, { "epoch": 0.6884272997032641, "grad_norm": 0.8183733832559971, "learning_rate": 4.8493303506722025e-06, "loss": 0.4179, "step": 2784 }, { "epoch": 0.6886745796241345, "grad_norm": 0.8188930609183032, "learning_rate": 4.849219199920266e-06, "loss": 0.4405, "step": 2785 }, { "epoch": 0.688921859545005, "grad_norm": 0.8305368966836002, "learning_rate": 4.849108009459632e-06, "loss": 0.4278, "step": 2786 }, { "epoch": 0.6891691394658753, "grad_norm": 0.8366002879385498, "learning_rate": 4.8489967792921806e-06, "loss": 0.4391, "step": 2787 }, { "epoch": 0.6894164193867458, "grad_norm": 0.7871906369909654, "learning_rate": 4.848885509419793e-06, "loss": 0.4691, "step": 2788 }, { "epoch": 0.6896636993076162, "grad_norm": 0.8251697333494993, "learning_rate": 4.848774199844348e-06, "loss": 0.4225, "step": 2789 }, { "epoch": 0.6899109792284867, "grad_norm": 0.8012321325556169, "learning_rate": 4.848662850567729e-06, "loss": 0.3791, "step": 2790 }, { "epoch": 0.690158259149357, "grad_norm": 0.7895786794853111, "learning_rate": 4.848551461591817e-06, "loss": 0.4197, "step": 2791 }, { "epoch": 0.6904055390702275, "grad_norm": 0.8122224358345084, "learning_rate": 4.848440032918496e-06, "loss": 0.4122, "step": 2792 }, { "epoch": 0.6906528189910979, "grad_norm": 0.8011124583088066, "learning_rate": 4.848328564549648e-06, "loss": 0.3933, "step": 2793 }, { "epoch": 0.6909000989119684, "grad_norm": 0.8069567267692628, "learning_rate": 4.848217056487158e-06, "loss": 0.4225, "step": 2794 }, { "epoch": 0.6911473788328387, "grad_norm": 0.8025647116960141, "learning_rate": 4.84810550873291e-06, "loss": 0.4056, "step": 2795 }, { "epoch": 0.6913946587537092, "grad_norm": 0.7795941271097871, "learning_rate": 4.84799392128879e-06, "loss": 0.4069, "step": 2796 }, { "epoch": 0.6916419386745796, "grad_norm": 0.8206549077084073, "learning_rate": 4.847882294156684e-06, "loss": 0.4144, "step": 2797 }, { "epoch": 0.69188921859545, "grad_norm": 0.7993709593343162, "learning_rate": 4.847770627338479e-06, "loss": 0.4189, "step": 2798 }, { "epoch": 0.6921364985163204, "grad_norm": 0.7983736260715664, "learning_rate": 4.847658920836063e-06, "loss": 0.4471, "step": 2799 }, { "epoch": 0.6923837784371909, "grad_norm": 0.8042491425514566, "learning_rate": 4.847547174651325e-06, "loss": 0.4155, "step": 2800 }, { "epoch": 0.6926310583580614, "grad_norm": 0.8234388367328737, "learning_rate": 4.84743538878615e-06, "loss": 0.4272, "step": 2801 }, { "epoch": 0.6928783382789317, "grad_norm": 0.7744695018229808, "learning_rate": 4.847323563242431e-06, "loss": 0.4224, "step": 2802 }, { "epoch": 0.6931256181998022, "grad_norm": 0.7872893346801324, "learning_rate": 4.847211698022058e-06, "loss": 0.4265, "step": 2803 }, { "epoch": 0.6933728981206726, "grad_norm": 0.8187757340572908, "learning_rate": 4.84709979312692e-06, "loss": 0.44, "step": 2804 }, { "epoch": 0.6936201780415431, "grad_norm": 0.801267739135154, "learning_rate": 4.84698784855891e-06, "loss": 0.4532, "step": 2805 }, { "epoch": 0.6938674579624134, "grad_norm": 0.8573956823059902, "learning_rate": 4.84687586431992e-06, "loss": 0.4569, "step": 2806 }, { "epoch": 0.6941147378832839, "grad_norm": 0.810081502663985, "learning_rate": 4.846763840411842e-06, "loss": 0.4453, "step": 2807 }, { "epoch": 0.6943620178041543, "grad_norm": 0.7970434492165938, "learning_rate": 4.8466517768365705e-06, "loss": 0.4149, "step": 2808 }, { "epoch": 0.6946092977250248, "grad_norm": 0.7767778121773069, "learning_rate": 4.846539673595999e-06, "loss": 0.4063, "step": 2809 }, { "epoch": 0.6948565776458951, "grad_norm": 0.8376570930059907, "learning_rate": 4.846427530692023e-06, "loss": 0.4213, "step": 2810 }, { "epoch": 0.6951038575667656, "grad_norm": 0.8110481108261645, "learning_rate": 4.846315348126538e-06, "loss": 0.4211, "step": 2811 }, { "epoch": 0.695351137487636, "grad_norm": 0.8193719014283674, "learning_rate": 4.84620312590144e-06, "loss": 0.4341, "step": 2812 }, { "epoch": 0.6955984174085065, "grad_norm": 0.7941431266613678, "learning_rate": 4.846090864018625e-06, "loss": 0.4064, "step": 2813 }, { "epoch": 0.6958456973293768, "grad_norm": 0.7931169995062347, "learning_rate": 4.845978562479993e-06, "loss": 0.4086, "step": 2814 }, { "epoch": 0.6960929772502473, "grad_norm": 0.831336166924252, "learning_rate": 4.84586622128744e-06, "loss": 0.4216, "step": 2815 }, { "epoch": 0.6963402571711177, "grad_norm": 0.7731150224481598, "learning_rate": 4.845753840442865e-06, "loss": 0.4164, "step": 2816 }, { "epoch": 0.6965875370919882, "grad_norm": 0.7920674915282415, "learning_rate": 4.845641419948168e-06, "loss": 0.4356, "step": 2817 }, { "epoch": 0.6968348170128585, "grad_norm": 0.875976281556326, "learning_rate": 4.84552895980525e-06, "loss": 0.4334, "step": 2818 }, { "epoch": 0.697082096933729, "grad_norm": 0.8001556368006945, "learning_rate": 4.845416460016011e-06, "loss": 0.4052, "step": 2819 }, { "epoch": 0.6973293768545994, "grad_norm": 0.7875811128771384, "learning_rate": 4.845303920582353e-06, "loss": 0.4402, "step": 2820 }, { "epoch": 0.6975766567754699, "grad_norm": 0.8171665238112196, "learning_rate": 4.845191341506178e-06, "loss": 0.4224, "step": 2821 }, { "epoch": 0.6978239366963402, "grad_norm": 0.7908125499885357, "learning_rate": 4.845078722789388e-06, "loss": 0.4352, "step": 2822 }, { "epoch": 0.6980712166172107, "grad_norm": 0.823467984833555, "learning_rate": 4.844966064433889e-06, "loss": 0.4124, "step": 2823 }, { "epoch": 0.6983184965380811, "grad_norm": 0.7508391242666943, "learning_rate": 4.844853366441583e-06, "loss": 0.4066, "step": 2824 }, { "epoch": 0.6985657764589516, "grad_norm": 0.7815866665699481, "learning_rate": 4.844740628814376e-06, "loss": 0.4236, "step": 2825 }, { "epoch": 0.6988130563798219, "grad_norm": 0.797117432450336, "learning_rate": 4.8446278515541735e-06, "loss": 0.3852, "step": 2826 }, { "epoch": 0.6990603363006924, "grad_norm": 0.7708383132040468, "learning_rate": 4.844515034662882e-06, "loss": 0.4233, "step": 2827 }, { "epoch": 0.6993076162215628, "grad_norm": 0.8234459455209941, "learning_rate": 4.844402178142408e-06, "loss": 0.4306, "step": 2828 }, { "epoch": 0.6995548961424333, "grad_norm": 0.7891526183943096, "learning_rate": 4.844289281994659e-06, "loss": 0.43, "step": 2829 }, { "epoch": 0.6998021760633036, "grad_norm": 0.8269880587344088, "learning_rate": 4.844176346221543e-06, "loss": 0.382, "step": 2830 }, { "epoch": 0.7000494559841741, "grad_norm": 0.7979649115794843, "learning_rate": 4.844063370824969e-06, "loss": 0.4233, "step": 2831 }, { "epoch": 0.7002967359050445, "grad_norm": 0.7802581744052987, "learning_rate": 4.843950355806848e-06, "loss": 0.4148, "step": 2832 }, { "epoch": 0.700544015825915, "grad_norm": 0.7770547238028475, "learning_rate": 4.84383730116909e-06, "loss": 0.4468, "step": 2833 }, { "epoch": 0.7007912957467853, "grad_norm": 0.79419809474251, "learning_rate": 4.843724206913604e-06, "loss": 0.4328, "step": 2834 }, { "epoch": 0.7010385756676558, "grad_norm": 0.8038231205976583, "learning_rate": 4.843611073042303e-06, "loss": 0.4266, "step": 2835 }, { "epoch": 0.7012858555885262, "grad_norm": 0.7842622977893237, "learning_rate": 4.843497899557099e-06, "loss": 0.4553, "step": 2836 }, { "epoch": 0.7015331355093967, "grad_norm": 0.8707104909836916, "learning_rate": 4.843384686459906e-06, "loss": 0.4047, "step": 2837 }, { "epoch": 0.701780415430267, "grad_norm": 0.8075300737210989, "learning_rate": 4.843271433752635e-06, "loss": 0.3966, "step": 2838 }, { "epoch": 0.7020276953511375, "grad_norm": 0.7771082005270675, "learning_rate": 4.843158141437204e-06, "loss": 0.4482, "step": 2839 }, { "epoch": 0.7022749752720079, "grad_norm": 0.826855389864659, "learning_rate": 4.843044809515525e-06, "loss": 0.4077, "step": 2840 }, { "epoch": 0.7025222551928784, "grad_norm": 0.8162412200875937, "learning_rate": 4.842931437989515e-06, "loss": 0.4583, "step": 2841 }, { "epoch": 0.7027695351137487, "grad_norm": 0.8050089615685367, "learning_rate": 4.84281802686109e-06, "loss": 0.3962, "step": 2842 }, { "epoch": 0.7030168150346192, "grad_norm": 0.8163750044094787, "learning_rate": 4.8427045761321675e-06, "loss": 0.4327, "step": 2843 }, { "epoch": 0.7032640949554896, "grad_norm": 0.8563912067654783, "learning_rate": 4.842591085804664e-06, "loss": 0.4566, "step": 2844 }, { "epoch": 0.7035113748763601, "grad_norm": 0.8223661537798799, "learning_rate": 4.842477555880498e-06, "loss": 0.4242, "step": 2845 }, { "epoch": 0.7037586547972304, "grad_norm": 0.7730639792659905, "learning_rate": 4.84236398636159e-06, "loss": 0.4461, "step": 2846 }, { "epoch": 0.7040059347181009, "grad_norm": 0.7769243044865499, "learning_rate": 4.842250377249858e-06, "loss": 0.4306, "step": 2847 }, { "epoch": 0.7042532146389713, "grad_norm": 0.8167018892846426, "learning_rate": 4.842136728547223e-06, "loss": 0.4237, "step": 2848 }, { "epoch": 0.7045004945598418, "grad_norm": 0.8385873076201484, "learning_rate": 4.842023040255606e-06, "loss": 0.41, "step": 2849 }, { "epoch": 0.7047477744807121, "grad_norm": 0.794449770554456, "learning_rate": 4.841909312376928e-06, "loss": 0.3948, "step": 2850 }, { "epoch": 0.7049950544015826, "grad_norm": 0.8085818080942666, "learning_rate": 4.841795544913112e-06, "loss": 0.4333, "step": 2851 }, { "epoch": 0.705242334322453, "grad_norm": 0.8347190989084778, "learning_rate": 4.841681737866082e-06, "loss": 0.4061, "step": 2852 }, { "epoch": 0.7054896142433235, "grad_norm": 0.7808124598357045, "learning_rate": 4.84156789123776e-06, "loss": 0.4412, "step": 2853 }, { "epoch": 0.7057368941641938, "grad_norm": 0.8393493189297654, "learning_rate": 4.841454005030071e-06, "loss": 0.415, "step": 2854 }, { "epoch": 0.7059841740850643, "grad_norm": 0.786373432859627, "learning_rate": 4.84134007924494e-06, "loss": 0.4337, "step": 2855 }, { "epoch": 0.7062314540059347, "grad_norm": 0.7791712896571225, "learning_rate": 4.841226113884292e-06, "loss": 0.4308, "step": 2856 }, { "epoch": 0.7064787339268052, "grad_norm": 0.7758379505569559, "learning_rate": 4.841112108950055e-06, "loss": 0.4105, "step": 2857 }, { "epoch": 0.7067260138476755, "grad_norm": 0.7336906709851186, "learning_rate": 4.840998064444154e-06, "loss": 0.4304, "step": 2858 }, { "epoch": 0.706973293768546, "grad_norm": 0.7709631481004076, "learning_rate": 4.840883980368518e-06, "loss": 0.4417, "step": 2859 }, { "epoch": 0.7072205736894164, "grad_norm": 0.861736396380686, "learning_rate": 4.840769856725076e-06, "loss": 0.426, "step": 2860 }, { "epoch": 0.7074678536102869, "grad_norm": 0.8644517295452442, "learning_rate": 4.840655693515754e-06, "loss": 0.3996, "step": 2861 }, { "epoch": 0.7077151335311572, "grad_norm": 0.7940413003498069, "learning_rate": 4.840541490742485e-06, "loss": 0.4244, "step": 2862 }, { "epoch": 0.7079624134520277, "grad_norm": 0.8098229584210634, "learning_rate": 4.840427248407199e-06, "loss": 0.4284, "step": 2863 }, { "epoch": 0.7082096933728981, "grad_norm": 0.8133450773470225, "learning_rate": 4.840312966511825e-06, "loss": 0.4395, "step": 2864 }, { "epoch": 0.7084569732937686, "grad_norm": 0.7919331528428261, "learning_rate": 4.840198645058296e-06, "loss": 0.447, "step": 2865 }, { "epoch": 0.7087042532146389, "grad_norm": 0.7965762781992587, "learning_rate": 4.840084284048544e-06, "loss": 0.4315, "step": 2866 }, { "epoch": 0.7089515331355094, "grad_norm": 0.8215063582230092, "learning_rate": 4.839969883484502e-06, "loss": 0.4236, "step": 2867 }, { "epoch": 0.7091988130563798, "grad_norm": 0.8198839600468166, "learning_rate": 4.8398554433681056e-06, "loss": 0.4358, "step": 2868 }, { "epoch": 0.7094460929772503, "grad_norm": 0.7736256143569525, "learning_rate": 4.839740963701286e-06, "loss": 0.4558, "step": 2869 }, { "epoch": 0.7096933728981206, "grad_norm": 0.7829134148515651, "learning_rate": 4.83962644448598e-06, "loss": 0.4205, "step": 2870 }, { "epoch": 0.7099406528189911, "grad_norm": 0.8252334612510036, "learning_rate": 4.839511885724123e-06, "loss": 0.4337, "step": 2871 }, { "epoch": 0.7101879327398615, "grad_norm": 0.8022554245927136, "learning_rate": 4.839397287417652e-06, "loss": 0.457, "step": 2872 }, { "epoch": 0.710435212660732, "grad_norm": 0.8192708918207695, "learning_rate": 4.8392826495685036e-06, "loss": 0.4167, "step": 2873 }, { "epoch": 0.7106824925816023, "grad_norm": 0.803077397303975, "learning_rate": 4.839167972178615e-06, "loss": 0.4098, "step": 2874 }, { "epoch": 0.7109297725024728, "grad_norm": 0.8468082968841901, "learning_rate": 4.839053255249925e-06, "loss": 0.4242, "step": 2875 }, { "epoch": 0.7111770524233432, "grad_norm": 0.8148986206105447, "learning_rate": 4.838938498784373e-06, "loss": 0.4286, "step": 2876 }, { "epoch": 0.7114243323442137, "grad_norm": 0.8170745630142042, "learning_rate": 4.838823702783898e-06, "loss": 0.423, "step": 2877 }, { "epoch": 0.7116716122650841, "grad_norm": 0.7996079185522241, "learning_rate": 4.838708867250441e-06, "loss": 0.4333, "step": 2878 }, { "epoch": 0.7119188921859545, "grad_norm": 0.7439191783301492, "learning_rate": 4.838593992185942e-06, "loss": 0.4235, "step": 2879 }, { "epoch": 0.712166172106825, "grad_norm": 0.7948869447365353, "learning_rate": 4.838479077592345e-06, "loss": 0.4185, "step": 2880 }, { "epoch": 0.7124134520276953, "grad_norm": 0.7963172123490325, "learning_rate": 4.83836412347159e-06, "loss": 0.4368, "step": 2881 }, { "epoch": 0.7126607319485658, "grad_norm": 0.7621389475987713, "learning_rate": 4.838249129825622e-06, "loss": 0.4544, "step": 2882 }, { "epoch": 0.7129080118694362, "grad_norm": 0.8008573013469324, "learning_rate": 4.838134096656383e-06, "loss": 0.4303, "step": 2883 }, { "epoch": 0.7131552917903067, "grad_norm": 0.7845189586215126, "learning_rate": 4.838019023965818e-06, "loss": 0.4131, "step": 2884 }, { "epoch": 0.713402571711177, "grad_norm": 0.8049398377356689, "learning_rate": 4.837903911755872e-06, "loss": 0.4288, "step": 2885 }, { "epoch": 0.7136498516320475, "grad_norm": 0.804018737817397, "learning_rate": 4.837788760028491e-06, "loss": 0.422, "step": 2886 }, { "epoch": 0.7138971315529179, "grad_norm": 0.8148238252599712, "learning_rate": 4.8376735687856215e-06, "loss": 0.4527, "step": 2887 }, { "epoch": 0.7141444114737884, "grad_norm": 0.8173379766329714, "learning_rate": 4.837558338029211e-06, "loss": 0.4449, "step": 2888 }, { "epoch": 0.7143916913946587, "grad_norm": 0.8174244199346332, "learning_rate": 4.837443067761206e-06, "loss": 0.4345, "step": 2889 }, { "epoch": 0.7146389713155292, "grad_norm": 0.7358219327556542, "learning_rate": 4.837327757983556e-06, "loss": 0.4379, "step": 2890 }, { "epoch": 0.7148862512363996, "grad_norm": 0.7666685418017337, "learning_rate": 4.837212408698209e-06, "loss": 0.4546, "step": 2891 }, { "epoch": 0.7151335311572701, "grad_norm": 0.7880217194687349, "learning_rate": 4.837097019907116e-06, "loss": 0.3937, "step": 2892 }, { "epoch": 0.7153808110781404, "grad_norm": 0.7914607948486829, "learning_rate": 4.836981591612226e-06, "loss": 0.4528, "step": 2893 }, { "epoch": 0.7156280909990109, "grad_norm": 0.8349074820163436, "learning_rate": 4.836866123815492e-06, "loss": 0.4242, "step": 2894 }, { "epoch": 0.7158753709198813, "grad_norm": 0.7726278159771962, "learning_rate": 4.836750616518864e-06, "loss": 0.4623, "step": 2895 }, { "epoch": 0.7161226508407518, "grad_norm": 0.8124863076626552, "learning_rate": 4.836635069724295e-06, "loss": 0.3905, "step": 2896 }, { "epoch": 0.7163699307616221, "grad_norm": 0.7841970322379082, "learning_rate": 4.836519483433738e-06, "loss": 0.42, "step": 2897 }, { "epoch": 0.7166172106824926, "grad_norm": 0.8169770200971669, "learning_rate": 4.8364038576491465e-06, "loss": 0.4281, "step": 2898 }, { "epoch": 0.716864490603363, "grad_norm": 0.7934550788486586, "learning_rate": 4.836288192372476e-06, "loss": 0.4258, "step": 2899 }, { "epoch": 0.7171117705242335, "grad_norm": 0.7811046579681694, "learning_rate": 4.8361724876056804e-06, "loss": 0.4334, "step": 2900 }, { "epoch": 0.7173590504451038, "grad_norm": 0.8288956689415943, "learning_rate": 4.836056743350717e-06, "loss": 0.4114, "step": 2901 }, { "epoch": 0.7176063303659743, "grad_norm": 0.8100905415131514, "learning_rate": 4.83594095960954e-06, "loss": 0.4193, "step": 2902 }, { "epoch": 0.7178536102868447, "grad_norm": 0.8349181394617355, "learning_rate": 4.835825136384107e-06, "loss": 0.4089, "step": 2903 }, { "epoch": 0.7181008902077152, "grad_norm": 0.8189708968152819, "learning_rate": 4.835709273676377e-06, "loss": 0.3892, "step": 2904 }, { "epoch": 0.7183481701285855, "grad_norm": 0.8103038676070768, "learning_rate": 4.835593371488308e-06, "loss": 0.4453, "step": 2905 }, { "epoch": 0.718595450049456, "grad_norm": 0.8690922003528119, "learning_rate": 4.835477429821859e-06, "loss": 0.4004, "step": 2906 }, { "epoch": 0.7188427299703264, "grad_norm": 0.8919261323122275, "learning_rate": 4.835361448678989e-06, "loss": 0.3887, "step": 2907 }, { "epoch": 0.7190900098911969, "grad_norm": 0.8034616323291977, "learning_rate": 4.835245428061659e-06, "loss": 0.4287, "step": 2908 }, { "epoch": 0.7193372898120672, "grad_norm": 0.8147872678673408, "learning_rate": 4.8351293679718305e-06, "loss": 0.4226, "step": 2909 }, { "epoch": 0.7195845697329377, "grad_norm": 0.819089910267115, "learning_rate": 4.835013268411465e-06, "loss": 0.4631, "step": 2910 }, { "epoch": 0.7198318496538081, "grad_norm": 0.7978691069618967, "learning_rate": 4.8348971293825245e-06, "loss": 0.4237, "step": 2911 }, { "epoch": 0.7200791295746786, "grad_norm": 0.8174621419121835, "learning_rate": 4.834780950886973e-06, "loss": 0.4376, "step": 2912 }, { "epoch": 0.7203264094955489, "grad_norm": 0.8309583432333282, "learning_rate": 4.834664732926773e-06, "loss": 0.4429, "step": 2913 }, { "epoch": 0.7205736894164194, "grad_norm": 0.8000154938904277, "learning_rate": 4.8345484755038895e-06, "loss": 0.4185, "step": 2914 }, { "epoch": 0.7208209693372898, "grad_norm": 0.7947941622328786, "learning_rate": 4.834432178620288e-06, "loss": 0.414, "step": 2915 }, { "epoch": 0.7210682492581603, "grad_norm": 0.7923969199565712, "learning_rate": 4.834315842277934e-06, "loss": 0.4357, "step": 2916 }, { "epoch": 0.7213155291790306, "grad_norm": 0.7960039620189656, "learning_rate": 4.834199466478793e-06, "loss": 0.438, "step": 2917 }, { "epoch": 0.7215628090999011, "grad_norm": 0.8361359088575793, "learning_rate": 4.8340830512248335e-06, "loss": 0.3967, "step": 2918 }, { "epoch": 0.7218100890207715, "grad_norm": 0.8108901771349343, "learning_rate": 4.833966596518023e-06, "loss": 0.4363, "step": 2919 }, { "epoch": 0.722057368941642, "grad_norm": 0.8549440580224905, "learning_rate": 4.833850102360329e-06, "loss": 0.41, "step": 2920 }, { "epoch": 0.7223046488625123, "grad_norm": 0.8003186776319219, "learning_rate": 4.833733568753721e-06, "loss": 0.4216, "step": 2921 }, { "epoch": 0.7225519287833828, "grad_norm": 0.8035872812352234, "learning_rate": 4.83361699570017e-06, "loss": 0.4182, "step": 2922 }, { "epoch": 0.7227992087042532, "grad_norm": 0.8297400236558385, "learning_rate": 4.8335003832016444e-06, "loss": 0.4316, "step": 2923 }, { "epoch": 0.7230464886251237, "grad_norm": 0.8790681235417003, "learning_rate": 4.833383731260118e-06, "loss": 0.4063, "step": 2924 }, { "epoch": 0.723293768545994, "grad_norm": 0.8268063083338992, "learning_rate": 4.833267039877559e-06, "loss": 0.3856, "step": 2925 }, { "epoch": 0.7235410484668645, "grad_norm": 0.8406419166486392, "learning_rate": 4.833150309055942e-06, "loss": 0.4236, "step": 2926 }, { "epoch": 0.7237883283877349, "grad_norm": 0.7926519696834897, "learning_rate": 4.833033538797241e-06, "loss": 0.4174, "step": 2927 }, { "epoch": 0.7240356083086054, "grad_norm": 0.8187942841023771, "learning_rate": 4.832916729103427e-06, "loss": 0.4387, "step": 2928 }, { "epoch": 0.7242828882294757, "grad_norm": 0.8221501951469953, "learning_rate": 4.832799879976476e-06, "loss": 0.3999, "step": 2929 }, { "epoch": 0.7245301681503462, "grad_norm": 0.7934800047804927, "learning_rate": 4.832682991418364e-06, "loss": 0.4058, "step": 2930 }, { "epoch": 0.7247774480712166, "grad_norm": 0.8330724261819362, "learning_rate": 4.832566063431066e-06, "loss": 0.4321, "step": 2931 }, { "epoch": 0.7250247279920871, "grad_norm": 0.8228536121668583, "learning_rate": 4.832449096016557e-06, "loss": 0.4173, "step": 2932 }, { "epoch": 0.7252720079129574, "grad_norm": 0.837175853076657, "learning_rate": 4.8323320891768166e-06, "loss": 0.4395, "step": 2933 }, { "epoch": 0.7255192878338279, "grad_norm": 0.8578406462579389, "learning_rate": 4.83221504291382e-06, "loss": 0.4025, "step": 2934 }, { "epoch": 0.7257665677546983, "grad_norm": 0.8276438543678113, "learning_rate": 4.832097957229548e-06, "loss": 0.4128, "step": 2935 }, { "epoch": 0.7260138476755688, "grad_norm": 0.7945420798891535, "learning_rate": 4.831980832125978e-06, "loss": 0.4318, "step": 2936 }, { "epoch": 0.7262611275964391, "grad_norm": 0.8624450431948255, "learning_rate": 4.8318636676050906e-06, "loss": 0.4077, "step": 2937 }, { "epoch": 0.7265084075173096, "grad_norm": 0.7943945249796411, "learning_rate": 4.831746463668866e-06, "loss": 0.4147, "step": 2938 }, { "epoch": 0.72675568743818, "grad_norm": 0.7865076359278911, "learning_rate": 4.831629220319285e-06, "loss": 0.4437, "step": 2939 }, { "epoch": 0.7270029673590505, "grad_norm": 0.8245008586056316, "learning_rate": 4.83151193755833e-06, "loss": 0.4172, "step": 2940 }, { "epoch": 0.7272502472799208, "grad_norm": 0.8682372634570225, "learning_rate": 4.831394615387983e-06, "loss": 0.423, "step": 2941 }, { "epoch": 0.7274975272007913, "grad_norm": 0.8425489215333106, "learning_rate": 4.831277253810227e-06, "loss": 0.4299, "step": 2942 }, { "epoch": 0.7277448071216617, "grad_norm": 0.8057793479253231, "learning_rate": 4.831159852827046e-06, "loss": 0.4122, "step": 2943 }, { "epoch": 0.7279920870425322, "grad_norm": 0.7887003730131898, "learning_rate": 4.831042412440424e-06, "loss": 0.4345, "step": 2944 }, { "epoch": 0.7282393669634025, "grad_norm": 0.8003641551533328, "learning_rate": 4.8309249326523475e-06, "loss": 0.4256, "step": 2945 }, { "epoch": 0.728486646884273, "grad_norm": 0.8050096172563999, "learning_rate": 4.8308074134648e-06, "loss": 0.4363, "step": 2946 }, { "epoch": 0.7287339268051434, "grad_norm": 0.8501449867311948, "learning_rate": 4.83068985487977e-06, "loss": 0.4167, "step": 2947 }, { "epoch": 0.7289812067260139, "grad_norm": 0.7875128051256901, "learning_rate": 4.830572256899243e-06, "loss": 0.4462, "step": 2948 }, { "epoch": 0.7292284866468842, "grad_norm": 0.8374013219223678, "learning_rate": 4.830454619525207e-06, "loss": 0.4518, "step": 2949 }, { "epoch": 0.7294757665677547, "grad_norm": 0.817008859188431, "learning_rate": 4.830336942759651e-06, "loss": 0.3903, "step": 2950 }, { "epoch": 0.7297230464886251, "grad_norm": 0.8014171810642367, "learning_rate": 4.830219226604565e-06, "loss": 0.4285, "step": 2951 }, { "epoch": 0.7299703264094956, "grad_norm": 0.7856834039638679, "learning_rate": 4.830101471061936e-06, "loss": 0.3934, "step": 2952 }, { "epoch": 0.7302176063303659, "grad_norm": 0.7748087866010082, "learning_rate": 4.829983676133758e-06, "loss": 0.4545, "step": 2953 }, { "epoch": 0.7304648862512364, "grad_norm": 0.7989513159267758, "learning_rate": 4.829865841822019e-06, "loss": 0.4253, "step": 2954 }, { "epoch": 0.7307121661721068, "grad_norm": 0.7764358392610177, "learning_rate": 4.829747968128712e-06, "loss": 0.4249, "step": 2955 }, { "epoch": 0.7309594460929772, "grad_norm": 0.8279849821092882, "learning_rate": 4.829630055055829e-06, "loss": 0.4512, "step": 2956 }, { "epoch": 0.7312067260138477, "grad_norm": 0.8260366275350098, "learning_rate": 4.8295121026053644e-06, "loss": 0.4295, "step": 2957 }, { "epoch": 0.7314540059347181, "grad_norm": 0.8117838736807556, "learning_rate": 4.82939411077931e-06, "loss": 0.4171, "step": 2958 }, { "epoch": 0.7317012858555886, "grad_norm": 0.7806874920384149, "learning_rate": 4.829276079579662e-06, "loss": 0.4076, "step": 2959 }, { "epoch": 0.731948565776459, "grad_norm": 0.8156644012373938, "learning_rate": 4.829158009008414e-06, "loss": 0.4367, "step": 2960 }, { "epoch": 0.7321958456973294, "grad_norm": 0.8361228747154713, "learning_rate": 4.829039899067563e-06, "loss": 0.4495, "step": 2961 }, { "epoch": 0.7324431256181998, "grad_norm": 0.7970591862132284, "learning_rate": 4.828921749759104e-06, "loss": 0.4057, "step": 2962 }, { "epoch": 0.7326904055390703, "grad_norm": 0.799202037507185, "learning_rate": 4.828803561085034e-06, "loss": 0.4048, "step": 2963 }, { "epoch": 0.7329376854599406, "grad_norm": 0.7759952798464693, "learning_rate": 4.8286853330473535e-06, "loss": 0.4005, "step": 2964 }, { "epoch": 0.7331849653808111, "grad_norm": 0.8369606917142974, "learning_rate": 4.828567065648057e-06, "loss": 0.4266, "step": 2965 }, { "epoch": 0.7334322453016815, "grad_norm": 0.8127543973780628, "learning_rate": 4.828448758889147e-06, "loss": 0.3879, "step": 2966 }, { "epoch": 0.733679525222552, "grad_norm": 0.834168778505788, "learning_rate": 4.828330412772622e-06, "loss": 0.4255, "step": 2967 }, { "epoch": 0.7339268051434223, "grad_norm": 0.7851856770790845, "learning_rate": 4.828212027300481e-06, "loss": 0.4608, "step": 2968 }, { "epoch": 0.7341740850642928, "grad_norm": 0.7632714078688472, "learning_rate": 4.828093602474727e-06, "loss": 0.4416, "step": 2969 }, { "epoch": 0.7344213649851632, "grad_norm": 0.8347666891173107, "learning_rate": 4.827975138297361e-06, "loss": 0.4122, "step": 2970 }, { "epoch": 0.7346686449060337, "grad_norm": 0.8067829929634867, "learning_rate": 4.827856634770385e-06, "loss": 0.4411, "step": 2971 }, { "epoch": 0.734915924826904, "grad_norm": 0.8017276159688024, "learning_rate": 4.8277380918958015e-06, "loss": 0.3973, "step": 2972 }, { "epoch": 0.7351632047477745, "grad_norm": 0.812076660288236, "learning_rate": 4.827619509675616e-06, "loss": 0.4197, "step": 2973 }, { "epoch": 0.7354104846686449, "grad_norm": 0.7788714236767956, "learning_rate": 4.827500888111833e-06, "loss": 0.4337, "step": 2974 }, { "epoch": 0.7356577645895154, "grad_norm": 0.8090103487550155, "learning_rate": 4.8273822272064555e-06, "loss": 0.419, "step": 2975 }, { "epoch": 0.7359050445103857, "grad_norm": 0.8265966587257293, "learning_rate": 4.8272635269614895e-06, "loss": 0.4082, "step": 2976 }, { "epoch": 0.7361523244312562, "grad_norm": 0.7846445990321484, "learning_rate": 4.827144787378944e-06, "loss": 0.437, "step": 2977 }, { "epoch": 0.7363996043521266, "grad_norm": 0.8312527404300895, "learning_rate": 4.827026008460823e-06, "loss": 0.4016, "step": 2978 }, { "epoch": 0.7366468842729971, "grad_norm": 0.7910678678346987, "learning_rate": 4.826907190209136e-06, "loss": 0.427, "step": 2979 }, { "epoch": 0.7368941641938674, "grad_norm": 0.8081223109051165, "learning_rate": 4.82678833262589e-06, "loss": 0.3933, "step": 2980 }, { "epoch": 0.7371414441147379, "grad_norm": 0.8047433161446556, "learning_rate": 4.826669435713096e-06, "loss": 0.4079, "step": 2981 }, { "epoch": 0.7373887240356083, "grad_norm": 0.7999842935873025, "learning_rate": 4.826550499472761e-06, "loss": 0.4615, "step": 2982 }, { "epoch": 0.7376360039564788, "grad_norm": 0.792716091172535, "learning_rate": 4.826431523906898e-06, "loss": 0.391, "step": 2983 }, { "epoch": 0.7378832838773491, "grad_norm": 0.8303626433497433, "learning_rate": 4.826312509017517e-06, "loss": 0.404, "step": 2984 }, { "epoch": 0.7381305637982196, "grad_norm": 0.8374450240264267, "learning_rate": 4.826193454806629e-06, "loss": 0.4143, "step": 2985 }, { "epoch": 0.73837784371909, "grad_norm": 0.7785793171820266, "learning_rate": 4.826074361276247e-06, "loss": 0.4179, "step": 2986 }, { "epoch": 0.7386251236399605, "grad_norm": 0.7920577899878737, "learning_rate": 4.825955228428385e-06, "loss": 0.4276, "step": 2987 }, { "epoch": 0.7388724035608308, "grad_norm": 0.8200588077966792, "learning_rate": 4.825836056265055e-06, "loss": 0.4317, "step": 2988 }, { "epoch": 0.7391196834817013, "grad_norm": 0.8116664468428142, "learning_rate": 4.8257168447882725e-06, "loss": 0.4042, "step": 2989 }, { "epoch": 0.7393669634025717, "grad_norm": 0.776120292987068, "learning_rate": 4.825597594000052e-06, "loss": 0.4418, "step": 2990 }, { "epoch": 0.7396142433234422, "grad_norm": 0.8081847545003181, "learning_rate": 4.825478303902409e-06, "loss": 0.412, "step": 2991 }, { "epoch": 0.7398615232443125, "grad_norm": 0.7950133188013835, "learning_rate": 4.825358974497361e-06, "loss": 0.4246, "step": 2992 }, { "epoch": 0.740108803165183, "grad_norm": 0.7993263391124805, "learning_rate": 4.825239605786924e-06, "loss": 0.4061, "step": 2993 }, { "epoch": 0.7403560830860534, "grad_norm": 0.7997981436722923, "learning_rate": 4.825120197773114e-06, "loss": 0.4563, "step": 2994 }, { "epoch": 0.7406033630069239, "grad_norm": 0.7705130115086049, "learning_rate": 4.825000750457953e-06, "loss": 0.4198, "step": 2995 }, { "epoch": 0.7408506429277942, "grad_norm": 0.8002552475075952, "learning_rate": 4.824881263843458e-06, "loss": 0.4432, "step": 2996 }, { "epoch": 0.7410979228486647, "grad_norm": 0.8018310738296338, "learning_rate": 4.824761737931649e-06, "loss": 0.4402, "step": 2997 }, { "epoch": 0.7413452027695351, "grad_norm": 0.8593903578168341, "learning_rate": 4.8246421727245465e-06, "loss": 0.4054, "step": 2998 }, { "epoch": 0.7415924826904056, "grad_norm": 0.7889330438300733, "learning_rate": 4.8245225682241705e-06, "loss": 0.4422, "step": 2999 }, { "epoch": 0.7418397626112759, "grad_norm": 0.7938169220464205, "learning_rate": 4.824402924432543e-06, "loss": 0.441, "step": 3000 }, { "epoch": 0.7420870425321464, "grad_norm": 0.7844611533247873, "learning_rate": 4.8242832413516874e-06, "loss": 0.4336, "step": 3001 }, { "epoch": 0.7423343224530168, "grad_norm": 0.7749679668683314, "learning_rate": 4.824163518983627e-06, "loss": 0.4142, "step": 3002 }, { "epoch": 0.7425816023738873, "grad_norm": 0.8197901469393682, "learning_rate": 4.824043757330384e-06, "loss": 0.4245, "step": 3003 }, { "epoch": 0.7428288822947576, "grad_norm": 0.8261013183178937, "learning_rate": 4.823923956393982e-06, "loss": 0.4037, "step": 3004 }, { "epoch": 0.7430761622156281, "grad_norm": 0.7558923203936677, "learning_rate": 4.8238041161764475e-06, "loss": 0.4175, "step": 3005 }, { "epoch": 0.7433234421364985, "grad_norm": 0.8320092925247135, "learning_rate": 4.823684236679807e-06, "loss": 0.4256, "step": 3006 }, { "epoch": 0.743570722057369, "grad_norm": 0.8217439102346995, "learning_rate": 4.823564317906085e-06, "loss": 0.427, "step": 3007 }, { "epoch": 0.7438180019782393, "grad_norm": 0.8199819751186989, "learning_rate": 4.823444359857308e-06, "loss": 0.382, "step": 3008 }, { "epoch": 0.7440652818991098, "grad_norm": 0.817959773474465, "learning_rate": 4.823324362535506e-06, "loss": 0.4329, "step": 3009 }, { "epoch": 0.7443125618199802, "grad_norm": 0.8187584290387997, "learning_rate": 4.823204325942706e-06, "loss": 0.4485, "step": 3010 }, { "epoch": 0.7445598417408507, "grad_norm": 0.7898625944514868, "learning_rate": 4.823084250080937e-06, "loss": 0.4069, "step": 3011 }, { "epoch": 0.744807121661721, "grad_norm": 0.7940276326242832, "learning_rate": 4.822964134952229e-06, "loss": 0.4197, "step": 3012 }, { "epoch": 0.7450544015825915, "grad_norm": 0.7927876642320121, "learning_rate": 4.822843980558611e-06, "loss": 0.4299, "step": 3013 }, { "epoch": 0.7453016815034619, "grad_norm": 0.7994978248822859, "learning_rate": 4.8227237869021165e-06, "loss": 0.4234, "step": 3014 }, { "epoch": 0.7455489614243324, "grad_norm": 0.7543327971185407, "learning_rate": 4.822603553984775e-06, "loss": 0.4427, "step": 3015 }, { "epoch": 0.7457962413452027, "grad_norm": 0.8259426315239943, "learning_rate": 4.822483281808619e-06, "loss": 0.4323, "step": 3016 }, { "epoch": 0.7460435212660732, "grad_norm": 0.7824668111245527, "learning_rate": 4.822362970375682e-06, "loss": 0.4243, "step": 3017 }, { "epoch": 0.7462908011869436, "grad_norm": 0.8090132078040282, "learning_rate": 4.822242619687997e-06, "loss": 0.4681, "step": 3018 }, { "epoch": 0.746538081107814, "grad_norm": 0.7841620643031427, "learning_rate": 4.8221222297476e-06, "loss": 0.4296, "step": 3019 }, { "epoch": 0.7467853610286844, "grad_norm": 0.7995190592508252, "learning_rate": 4.822001800556523e-06, "loss": 0.4018, "step": 3020 }, { "epoch": 0.7470326409495549, "grad_norm": 0.7860214365572894, "learning_rate": 4.821881332116804e-06, "loss": 0.4409, "step": 3021 }, { "epoch": 0.7472799208704253, "grad_norm": 0.8040174870471135, "learning_rate": 4.8217608244304794e-06, "loss": 0.4187, "step": 3022 }, { "epoch": 0.7475272007912958, "grad_norm": 0.8042489812626473, "learning_rate": 4.821640277499584e-06, "loss": 0.4127, "step": 3023 }, { "epoch": 0.7477744807121661, "grad_norm": 0.834905466029092, "learning_rate": 4.8215196913261575e-06, "loss": 0.4198, "step": 3024 }, { "epoch": 0.7480217606330366, "grad_norm": 0.7667416755370126, "learning_rate": 4.821399065912237e-06, "loss": 0.4247, "step": 3025 }, { "epoch": 0.748269040553907, "grad_norm": 0.7910830801831669, "learning_rate": 4.821278401259861e-06, "loss": 0.4077, "step": 3026 }, { "epoch": 0.7485163204747775, "grad_norm": 0.7382810269836959, "learning_rate": 4.8211576973710714e-06, "loss": 0.4525, "step": 3027 }, { "epoch": 0.7487636003956478, "grad_norm": 0.8121105617204082, "learning_rate": 4.8210369542479055e-06, "loss": 0.4052, "step": 3028 }, { "epoch": 0.7490108803165183, "grad_norm": 0.8058548126440102, "learning_rate": 4.820916171892407e-06, "loss": 0.4351, "step": 3029 }, { "epoch": 0.7492581602373887, "grad_norm": 0.8577264933262061, "learning_rate": 4.820795350306615e-06, "loss": 0.3981, "step": 3030 }, { "epoch": 0.7495054401582592, "grad_norm": 0.7453444539845234, "learning_rate": 4.820674489492573e-06, "loss": 0.4223, "step": 3031 }, { "epoch": 0.7497527200791295, "grad_norm": 0.8466748330823123, "learning_rate": 4.820553589452323e-06, "loss": 0.3995, "step": 3032 }, { "epoch": 0.75, "grad_norm": 0.7790821202863654, "learning_rate": 4.820432650187911e-06, "loss": 0.404, "step": 3033 }, { "epoch": 0.7502472799208705, "grad_norm": 0.8153973710234346, "learning_rate": 4.820311671701379e-06, "loss": 0.4276, "step": 3034 }, { "epoch": 0.7504945598417408, "grad_norm": 0.7753722323128266, "learning_rate": 4.8201906539947715e-06, "loss": 0.4306, "step": 3035 }, { "epoch": 0.7507418397626113, "grad_norm": 0.7744896340611823, "learning_rate": 4.8200695970701356e-06, "loss": 0.4356, "step": 3036 }, { "epoch": 0.7509891196834817, "grad_norm": 0.7690189042676627, "learning_rate": 4.8199485009295166e-06, "loss": 0.44, "step": 3037 }, { "epoch": 0.7512363996043522, "grad_norm": 0.8033947179211088, "learning_rate": 4.819827365574963e-06, "loss": 0.4139, "step": 3038 }, { "epoch": 0.7514836795252225, "grad_norm": 0.7576932851777864, "learning_rate": 4.819706191008519e-06, "loss": 0.4081, "step": 3039 }, { "epoch": 0.751730959446093, "grad_norm": 0.7967928320460952, "learning_rate": 4.819584977232236e-06, "loss": 0.4257, "step": 3040 }, { "epoch": 0.7519782393669634, "grad_norm": 0.8392913645000412, "learning_rate": 4.8194637242481615e-06, "loss": 0.4197, "step": 3041 }, { "epoch": 0.7522255192878339, "grad_norm": 0.7730924432116476, "learning_rate": 4.819342432058345e-06, "loss": 0.4321, "step": 3042 }, { "epoch": 0.7524727992087042, "grad_norm": 0.8068109563132863, "learning_rate": 4.819221100664836e-06, "loss": 0.3994, "step": 3043 }, { "epoch": 0.7527200791295747, "grad_norm": 0.8260660558085546, "learning_rate": 4.819099730069688e-06, "loss": 0.4048, "step": 3044 }, { "epoch": 0.7529673590504451, "grad_norm": 0.770330180817139, "learning_rate": 4.8189783202749495e-06, "loss": 0.4362, "step": 3045 }, { "epoch": 0.7532146389713156, "grad_norm": 0.831087174442937, "learning_rate": 4.818856871282674e-06, "loss": 0.402, "step": 3046 }, { "epoch": 0.753461918892186, "grad_norm": 0.8055486831247699, "learning_rate": 4.818735383094915e-06, "loss": 0.4169, "step": 3047 }, { "epoch": 0.7537091988130564, "grad_norm": 0.8349348167930836, "learning_rate": 4.818613855713725e-06, "loss": 0.4063, "step": 3048 }, { "epoch": 0.7539564787339268, "grad_norm": 0.7832541815851745, "learning_rate": 4.818492289141159e-06, "loss": 0.42, "step": 3049 }, { "epoch": 0.7542037586547973, "grad_norm": 0.8058628554987075, "learning_rate": 4.818370683379271e-06, "loss": 0.4038, "step": 3050 }, { "epoch": 0.7544510385756676, "grad_norm": 0.7869318129819252, "learning_rate": 4.818249038430117e-06, "loss": 0.4022, "step": 3051 }, { "epoch": 0.7546983184965381, "grad_norm": 0.7429522741807416, "learning_rate": 4.818127354295752e-06, "loss": 0.433, "step": 3052 }, { "epoch": 0.7549455984174085, "grad_norm": 0.8058306975517127, "learning_rate": 4.818005630978235e-06, "loss": 0.4429, "step": 3053 }, { "epoch": 0.755192878338279, "grad_norm": 0.8114294962955277, "learning_rate": 4.817883868479622e-06, "loss": 0.4374, "step": 3054 }, { "epoch": 0.7554401582591493, "grad_norm": 0.7666783664854765, "learning_rate": 4.817762066801971e-06, "loss": 0.4114, "step": 3055 }, { "epoch": 0.7556874381800198, "grad_norm": 0.8336654948850036, "learning_rate": 4.817640225947341e-06, "loss": 0.3976, "step": 3056 }, { "epoch": 0.7559347181008902, "grad_norm": 0.8036876409711966, "learning_rate": 4.817518345917792e-06, "loss": 0.4335, "step": 3057 }, { "epoch": 0.7561819980217607, "grad_norm": 0.8720285437742171, "learning_rate": 4.817396426715384e-06, "loss": 0.4038, "step": 3058 }, { "epoch": 0.756429277942631, "grad_norm": 0.8182607660403057, "learning_rate": 4.8172744683421765e-06, "loss": 0.427, "step": 3059 }, { "epoch": 0.7566765578635015, "grad_norm": 0.8027583929885022, "learning_rate": 4.8171524708002335e-06, "loss": 0.4123, "step": 3060 }, { "epoch": 0.7569238377843719, "grad_norm": 0.8232173834519446, "learning_rate": 4.817030434091615e-06, "loss": 0.413, "step": 3061 }, { "epoch": 0.7571711177052424, "grad_norm": 0.805678272221054, "learning_rate": 4.816908358218384e-06, "loss": 0.4007, "step": 3062 }, { "epoch": 0.7574183976261127, "grad_norm": 0.7726560827200997, "learning_rate": 4.8167862431826054e-06, "loss": 0.3941, "step": 3063 }, { "epoch": 0.7576656775469832, "grad_norm": 0.8068589530095359, "learning_rate": 4.816664088986342e-06, "loss": 0.4074, "step": 3064 }, { "epoch": 0.7579129574678536, "grad_norm": 0.8193830699266273, "learning_rate": 4.816541895631659e-06, "loss": 0.4048, "step": 3065 }, { "epoch": 0.7581602373887241, "grad_norm": 0.802439000408714, "learning_rate": 4.816419663120621e-06, "loss": 0.4228, "step": 3066 }, { "epoch": 0.7584075173095944, "grad_norm": 0.7831520315560794, "learning_rate": 4.816297391455296e-06, "loss": 0.4416, "step": 3067 }, { "epoch": 0.7586547972304649, "grad_norm": 0.8013772553913207, "learning_rate": 4.816175080637748e-06, "loss": 0.4063, "step": 3068 }, { "epoch": 0.7589020771513353, "grad_norm": 0.8069423659312832, "learning_rate": 4.816052730670047e-06, "loss": 0.4101, "step": 3069 }, { "epoch": 0.7591493570722058, "grad_norm": 0.8042760358148239, "learning_rate": 4.815930341554259e-06, "loss": 0.437, "step": 3070 }, { "epoch": 0.7593966369930761, "grad_norm": 0.8528508748773457, "learning_rate": 4.815807913292454e-06, "loss": 0.4203, "step": 3071 }, { "epoch": 0.7596439169139466, "grad_norm": 0.7788038832278472, "learning_rate": 4.815685445886702e-06, "loss": 0.4122, "step": 3072 }, { "epoch": 0.759891196834817, "grad_norm": 0.7895557689997091, "learning_rate": 4.815562939339072e-06, "loss": 0.439, "step": 3073 }, { "epoch": 0.7601384767556875, "grad_norm": 0.7665727269321336, "learning_rate": 4.815440393651635e-06, "loss": 0.4272, "step": 3074 }, { "epoch": 0.7603857566765578, "grad_norm": 0.8193909964159897, "learning_rate": 4.815317808826462e-06, "loss": 0.3975, "step": 3075 }, { "epoch": 0.7606330365974283, "grad_norm": 0.7904789376121926, "learning_rate": 4.815195184865625e-06, "loss": 0.4269, "step": 3076 }, { "epoch": 0.7608803165182987, "grad_norm": 0.8179670797014685, "learning_rate": 4.815072521771197e-06, "loss": 0.3968, "step": 3077 }, { "epoch": 0.7611275964391692, "grad_norm": 0.7733754206107383, "learning_rate": 4.814949819545252e-06, "loss": 0.4223, "step": 3078 }, { "epoch": 0.7613748763600395, "grad_norm": 0.7996203606510405, "learning_rate": 4.8148270781898635e-06, "loss": 0.4292, "step": 3079 }, { "epoch": 0.76162215628091, "grad_norm": 0.8190133067708448, "learning_rate": 4.814704297707105e-06, "loss": 0.4145, "step": 3080 }, { "epoch": 0.7618694362017804, "grad_norm": 0.7937567763174175, "learning_rate": 4.814581478099054e-06, "loss": 0.4103, "step": 3081 }, { "epoch": 0.7621167161226509, "grad_norm": 0.7713599014763641, "learning_rate": 4.814458619367785e-06, "loss": 0.4263, "step": 3082 }, { "epoch": 0.7623639960435212, "grad_norm": 0.819891250654944, "learning_rate": 4.814335721515376e-06, "loss": 0.4097, "step": 3083 }, { "epoch": 0.7626112759643917, "grad_norm": 0.7885235017605721, "learning_rate": 4.814212784543902e-06, "loss": 0.449, "step": 3084 }, { "epoch": 0.7628585558852621, "grad_norm": 0.7893477319871918, "learning_rate": 4.814089808455444e-06, "loss": 0.4257, "step": 3085 }, { "epoch": 0.7631058358061326, "grad_norm": 0.7928417601407314, "learning_rate": 4.813966793252079e-06, "loss": 0.4282, "step": 3086 }, { "epoch": 0.7633531157270029, "grad_norm": 0.7930496478110192, "learning_rate": 4.813843738935886e-06, "loss": 0.4129, "step": 3087 }, { "epoch": 0.7636003956478734, "grad_norm": 0.7492915045783554, "learning_rate": 4.813720645508946e-06, "loss": 0.424, "step": 3088 }, { "epoch": 0.7638476755687438, "grad_norm": 0.7939923318098281, "learning_rate": 4.8135975129733385e-06, "loss": 0.4203, "step": 3089 }, { "epoch": 0.7640949554896143, "grad_norm": 0.8054567000276287, "learning_rate": 4.813474341331145e-06, "loss": 0.4082, "step": 3090 }, { "epoch": 0.7643422354104846, "grad_norm": 0.7615495246999505, "learning_rate": 4.813351130584448e-06, "loss": 0.4363, "step": 3091 }, { "epoch": 0.7645895153313551, "grad_norm": 0.7487931230093278, "learning_rate": 4.813227880735331e-06, "loss": 0.4208, "step": 3092 }, { "epoch": 0.7648367952522255, "grad_norm": 0.7989302908264841, "learning_rate": 4.8131045917858754e-06, "loss": 0.4568, "step": 3093 }, { "epoch": 0.765084075173096, "grad_norm": 0.8150902813166511, "learning_rate": 4.812981263738165e-06, "loss": 0.4299, "step": 3094 }, { "epoch": 0.7653313550939663, "grad_norm": 0.795813383219662, "learning_rate": 4.8128578965942875e-06, "loss": 0.444, "step": 3095 }, { "epoch": 0.7655786350148368, "grad_norm": 0.768998047077691, "learning_rate": 4.812734490356326e-06, "loss": 0.411, "step": 3096 }, { "epoch": 0.7658259149357072, "grad_norm": 0.7876009500354682, "learning_rate": 4.812611045026365e-06, "loss": 0.4065, "step": 3097 }, { "epoch": 0.7660731948565777, "grad_norm": 0.8228181027521226, "learning_rate": 4.812487560606493e-06, "loss": 0.4257, "step": 3098 }, { "epoch": 0.766320474777448, "grad_norm": 0.7899654098672674, "learning_rate": 4.812364037098798e-06, "loss": 0.4281, "step": 3099 }, { "epoch": 0.7665677546983185, "grad_norm": 0.8263770103922008, "learning_rate": 4.812240474505366e-06, "loss": 0.4172, "step": 3100 }, { "epoch": 0.7668150346191889, "grad_norm": 0.8223565750218941, "learning_rate": 4.812116872828285e-06, "loss": 0.4094, "step": 3101 }, { "epoch": 0.7670623145400594, "grad_norm": 0.7882256242313526, "learning_rate": 4.811993232069647e-06, "loss": 0.4405, "step": 3102 }, { "epoch": 0.7673095944609297, "grad_norm": 0.7673647038001873, "learning_rate": 4.81186955223154e-06, "loss": 0.4043, "step": 3103 }, { "epoch": 0.7675568743818002, "grad_norm": 0.7748167909851514, "learning_rate": 4.811745833316056e-06, "loss": 0.4149, "step": 3104 }, { "epoch": 0.7678041543026706, "grad_norm": 0.7804500490847732, "learning_rate": 4.811622075325284e-06, "loss": 0.4149, "step": 3105 }, { "epoch": 0.768051434223541, "grad_norm": 0.7692586391443403, "learning_rate": 4.811498278261318e-06, "loss": 0.4366, "step": 3106 }, { "epoch": 0.7682987141444114, "grad_norm": 0.7842598306966564, "learning_rate": 4.811374442126248e-06, "loss": 0.4212, "step": 3107 }, { "epoch": 0.7685459940652819, "grad_norm": 0.7708815469931595, "learning_rate": 4.8112505669221695e-06, "loss": 0.4464, "step": 3108 }, { "epoch": 0.7687932739861523, "grad_norm": 0.8253602424248783, "learning_rate": 4.811126652651177e-06, "loss": 0.4068, "step": 3109 }, { "epoch": 0.7690405539070228, "grad_norm": 0.8309488561255076, "learning_rate": 4.811002699315362e-06, "loss": 0.4139, "step": 3110 }, { "epoch": 0.7692878338278932, "grad_norm": 0.8085658208306157, "learning_rate": 4.810878706916823e-06, "loss": 0.4377, "step": 3111 }, { "epoch": 0.7695351137487636, "grad_norm": 0.7741149699878886, "learning_rate": 4.8107546754576525e-06, "loss": 0.4274, "step": 3112 }, { "epoch": 0.7697823936696341, "grad_norm": 0.7663408982316708, "learning_rate": 4.81063060493995e-06, "loss": 0.4153, "step": 3113 }, { "epoch": 0.7700296735905044, "grad_norm": 0.8112801558552337, "learning_rate": 4.81050649536581e-06, "loss": 0.4095, "step": 3114 }, { "epoch": 0.7702769535113749, "grad_norm": 0.775543508476325, "learning_rate": 4.810382346737333e-06, "loss": 0.4495, "step": 3115 }, { "epoch": 0.7705242334322453, "grad_norm": 0.7901145505532202, "learning_rate": 4.8102581590566156e-06, "loss": 0.4322, "step": 3116 }, { "epoch": 0.7707715133531158, "grad_norm": 0.7821623933520957, "learning_rate": 4.810133932325758e-06, "loss": 0.4069, "step": 3117 }, { "epoch": 0.7710187932739861, "grad_norm": 0.8265570200176376, "learning_rate": 4.810009666546858e-06, "loss": 0.402, "step": 3118 }, { "epoch": 0.7712660731948566, "grad_norm": 0.8154505625826904, "learning_rate": 4.8098853617220186e-06, "loss": 0.4606, "step": 3119 }, { "epoch": 0.771513353115727, "grad_norm": 0.8235703743873171, "learning_rate": 4.8097610178533396e-06, "loss": 0.4021, "step": 3120 }, { "epoch": 0.7717606330365975, "grad_norm": 0.7884753518627691, "learning_rate": 4.809636634942923e-06, "loss": 0.4118, "step": 3121 }, { "epoch": 0.7720079129574678, "grad_norm": 0.8084593721515126, "learning_rate": 4.809512212992872e-06, "loss": 0.431, "step": 3122 }, { "epoch": 0.7722551928783383, "grad_norm": 0.779666053910998, "learning_rate": 4.809387752005288e-06, "loss": 0.4187, "step": 3123 }, { "epoch": 0.7725024727992087, "grad_norm": 0.82077016879112, "learning_rate": 4.809263251982276e-06, "loss": 0.4131, "step": 3124 }, { "epoch": 0.7727497527200792, "grad_norm": 0.8032424551504816, "learning_rate": 4.80913871292594e-06, "loss": 0.3919, "step": 3125 }, { "epoch": 0.7729970326409495, "grad_norm": 0.7832784986154248, "learning_rate": 4.8090141348383854e-06, "loss": 0.4286, "step": 3126 }, { "epoch": 0.77324431256182, "grad_norm": 0.8535658999730766, "learning_rate": 4.808889517721718e-06, "loss": 0.4152, "step": 3127 }, { "epoch": 0.7734915924826904, "grad_norm": 0.8068467344043826, "learning_rate": 4.808764861578043e-06, "loss": 0.4368, "step": 3128 }, { "epoch": 0.7737388724035609, "grad_norm": 0.8411797207814421, "learning_rate": 4.808640166409469e-06, "loss": 0.4149, "step": 3129 }, { "epoch": 0.7739861523244312, "grad_norm": 0.797745138292174, "learning_rate": 4.808515432218102e-06, "loss": 0.4046, "step": 3130 }, { "epoch": 0.7742334322453017, "grad_norm": 0.7810552927361032, "learning_rate": 4.808390659006053e-06, "loss": 0.4298, "step": 3131 }, { "epoch": 0.7744807121661721, "grad_norm": 0.7977546221024441, "learning_rate": 4.808265846775429e-06, "loss": 0.4287, "step": 3132 }, { "epoch": 0.7747279920870426, "grad_norm": 0.7827566413934264, "learning_rate": 4.8081409955283405e-06, "loss": 0.4265, "step": 3133 }, { "epoch": 0.7749752720079129, "grad_norm": 0.7900617509765616, "learning_rate": 4.808016105266897e-06, "loss": 0.4154, "step": 3134 }, { "epoch": 0.7752225519287834, "grad_norm": 0.7925920557238082, "learning_rate": 4.80789117599321e-06, "loss": 0.4282, "step": 3135 }, { "epoch": 0.7754698318496538, "grad_norm": 0.8133811417479949, "learning_rate": 4.807766207709392e-06, "loss": 0.4471, "step": 3136 }, { "epoch": 0.7757171117705243, "grad_norm": 0.7828796782998949, "learning_rate": 4.807641200417554e-06, "loss": 0.4132, "step": 3137 }, { "epoch": 0.7759643916913946, "grad_norm": 0.8190662126335538, "learning_rate": 4.807516154119809e-06, "loss": 0.4192, "step": 3138 }, { "epoch": 0.7762116716122651, "grad_norm": 0.7894618621921312, "learning_rate": 4.807391068818272e-06, "loss": 0.4287, "step": 3139 }, { "epoch": 0.7764589515331355, "grad_norm": 0.7990613399601455, "learning_rate": 4.807265944515056e-06, "loss": 0.4185, "step": 3140 }, { "epoch": 0.776706231454006, "grad_norm": 0.7940117259476284, "learning_rate": 4.807140781212277e-06, "loss": 0.4296, "step": 3141 }, { "epoch": 0.7769535113748763, "grad_norm": 0.8090210021535224, "learning_rate": 4.80701557891205e-06, "loss": 0.4166, "step": 3142 }, { "epoch": 0.7772007912957468, "grad_norm": 0.798970265827542, "learning_rate": 4.806890337616491e-06, "loss": 0.4122, "step": 3143 }, { "epoch": 0.7774480712166172, "grad_norm": 0.8108410945027389, "learning_rate": 4.806765057327718e-06, "loss": 0.4216, "step": 3144 }, { "epoch": 0.7776953511374877, "grad_norm": 0.7695340798568441, "learning_rate": 4.806639738047847e-06, "loss": 0.4478, "step": 3145 }, { "epoch": 0.777942631058358, "grad_norm": 0.8425189435774728, "learning_rate": 4.806514379778998e-06, "loss": 0.3915, "step": 3146 }, { "epoch": 0.7781899109792285, "grad_norm": 0.7909871752665145, "learning_rate": 4.806388982523289e-06, "loss": 0.4145, "step": 3147 }, { "epoch": 0.7784371909000989, "grad_norm": 0.8395535395280559, "learning_rate": 4.806263546282839e-06, "loss": 0.4229, "step": 3148 }, { "epoch": 0.7786844708209694, "grad_norm": 0.7946048118578783, "learning_rate": 4.806138071059769e-06, "loss": 0.416, "step": 3149 }, { "epoch": 0.7789317507418397, "grad_norm": 0.7696555887999623, "learning_rate": 4.806012556856201e-06, "loss": 0.3952, "step": 3150 }, { "epoch": 0.7791790306627102, "grad_norm": 0.8071199984857714, "learning_rate": 4.805887003674255e-06, "loss": 0.415, "step": 3151 }, { "epoch": 0.7794263105835806, "grad_norm": 0.7989656420229575, "learning_rate": 4.805761411516054e-06, "loss": 0.403, "step": 3152 }, { "epoch": 0.7796735905044511, "grad_norm": 0.8035327928385132, "learning_rate": 4.805635780383719e-06, "loss": 0.387, "step": 3153 }, { "epoch": 0.7799208704253214, "grad_norm": 0.8139828219701931, "learning_rate": 4.805510110279376e-06, "loss": 0.4157, "step": 3154 }, { "epoch": 0.7801681503461919, "grad_norm": 0.8595116770693756, "learning_rate": 4.805384401205147e-06, "loss": 0.42, "step": 3155 }, { "epoch": 0.7804154302670623, "grad_norm": 0.7638842723959627, "learning_rate": 4.80525865316316e-06, "loss": 0.4054, "step": 3156 }, { "epoch": 0.7806627101879328, "grad_norm": 0.7788059644498618, "learning_rate": 4.805132866155538e-06, "loss": 0.4065, "step": 3157 }, { "epoch": 0.7809099901088031, "grad_norm": 0.7917946328097877, "learning_rate": 4.805007040184407e-06, "loss": 0.4034, "step": 3158 }, { "epoch": 0.7811572700296736, "grad_norm": 0.794606043110883, "learning_rate": 4.804881175251895e-06, "loss": 0.4459, "step": 3159 }, { "epoch": 0.781404549950544, "grad_norm": 0.7855478903283287, "learning_rate": 4.804755271360129e-06, "loss": 0.4043, "step": 3160 }, { "epoch": 0.7816518298714145, "grad_norm": 0.8003629127364914, "learning_rate": 4.804629328511238e-06, "loss": 0.4148, "step": 3161 }, { "epoch": 0.7818991097922848, "grad_norm": 0.7983155545329887, "learning_rate": 4.804503346707349e-06, "loss": 0.4337, "step": 3162 }, { "epoch": 0.7821463897131553, "grad_norm": 0.8021511440132186, "learning_rate": 4.804377325950593e-06, "loss": 0.409, "step": 3163 }, { "epoch": 0.7823936696340257, "grad_norm": 0.760133073100139, "learning_rate": 4.804251266243099e-06, "loss": 0.423, "step": 3164 }, { "epoch": 0.7826409495548962, "grad_norm": 0.7848135629022405, "learning_rate": 4.8041251675869996e-06, "loss": 0.4158, "step": 3165 }, { "epoch": 0.7828882294757665, "grad_norm": 0.7925730082390904, "learning_rate": 4.803999029984423e-06, "loss": 0.4211, "step": 3166 }, { "epoch": 0.783135509396637, "grad_norm": 0.7764546739610668, "learning_rate": 4.803872853437506e-06, "loss": 0.4444, "step": 3167 }, { "epoch": 0.7833827893175074, "grad_norm": 0.8499268393363937, "learning_rate": 4.803746637948377e-06, "loss": 0.4121, "step": 3168 }, { "epoch": 0.7836300692383779, "grad_norm": 0.796267303570014, "learning_rate": 4.803620383519171e-06, "loss": 0.4579, "step": 3169 }, { "epoch": 0.7838773491592482, "grad_norm": 0.8586483960136988, "learning_rate": 4.803494090152022e-06, "loss": 0.4167, "step": 3170 }, { "epoch": 0.7841246290801187, "grad_norm": 0.7846703688927068, "learning_rate": 4.803367757849065e-06, "loss": 0.4256, "step": 3171 }, { "epoch": 0.7843719090009891, "grad_norm": 0.8152792507339273, "learning_rate": 4.803241386612436e-06, "loss": 0.3683, "step": 3172 }, { "epoch": 0.7846191889218596, "grad_norm": 0.7998726293610626, "learning_rate": 4.8031149764442695e-06, "loss": 0.4073, "step": 3173 }, { "epoch": 0.7848664688427299, "grad_norm": 0.8425199760576926, "learning_rate": 4.802988527346703e-06, "loss": 0.4096, "step": 3174 }, { "epoch": 0.7851137487636004, "grad_norm": 0.8087625745415404, "learning_rate": 4.802862039321875e-06, "loss": 0.4172, "step": 3175 }, { "epoch": 0.7853610286844708, "grad_norm": 0.8103640303916235, "learning_rate": 4.802735512371922e-06, "loss": 0.4016, "step": 3176 }, { "epoch": 0.7856083086053413, "grad_norm": 0.7885395510106586, "learning_rate": 4.8026089464989825e-06, "loss": 0.4076, "step": 3177 }, { "epoch": 0.7858555885262116, "grad_norm": 0.8642951165136358, "learning_rate": 4.802482341705197e-06, "loss": 0.4139, "step": 3178 }, { "epoch": 0.7861028684470821, "grad_norm": 0.7819223306835482, "learning_rate": 4.8023556979927045e-06, "loss": 0.4341, "step": 3179 }, { "epoch": 0.7863501483679525, "grad_norm": 0.8193735193209689, "learning_rate": 4.802229015363646e-06, "loss": 0.4271, "step": 3180 }, { "epoch": 0.786597428288823, "grad_norm": 0.7692047832916445, "learning_rate": 4.802102293820162e-06, "loss": 0.4266, "step": 3181 }, { "epoch": 0.7868447082096933, "grad_norm": 0.7580998926928701, "learning_rate": 4.801975533364397e-06, "loss": 0.4232, "step": 3182 }, { "epoch": 0.7870919881305638, "grad_norm": 0.7952151355924447, "learning_rate": 4.801848733998491e-06, "loss": 0.4287, "step": 3183 }, { "epoch": 0.7873392680514342, "grad_norm": 0.768566684447632, "learning_rate": 4.801721895724588e-06, "loss": 0.4352, "step": 3184 }, { "epoch": 0.7875865479723047, "grad_norm": 0.8392183142924099, "learning_rate": 4.801595018544834e-06, "loss": 0.4161, "step": 3185 }, { "epoch": 0.787833827893175, "grad_norm": 0.8229971702026877, "learning_rate": 4.80146810246137e-06, "loss": 0.4035, "step": 3186 }, { "epoch": 0.7880811078140455, "grad_norm": 0.7669879130914552, "learning_rate": 4.801341147476343e-06, "loss": 0.3952, "step": 3187 }, { "epoch": 0.7883283877349159, "grad_norm": 0.7891804474395242, "learning_rate": 4.801214153591899e-06, "loss": 0.3975, "step": 3188 }, { "epoch": 0.7885756676557863, "grad_norm": 0.7986833755586805, "learning_rate": 4.801087120810185e-06, "loss": 0.413, "step": 3189 }, { "epoch": 0.7888229475766568, "grad_norm": 0.7952880723142738, "learning_rate": 4.800960049133347e-06, "loss": 0.4372, "step": 3190 }, { "epoch": 0.7890702274975272, "grad_norm": 0.7757455316227299, "learning_rate": 4.800832938563534e-06, "loss": 0.4249, "step": 3191 }, { "epoch": 0.7893175074183977, "grad_norm": 0.7949907790570365, "learning_rate": 4.800705789102894e-06, "loss": 0.431, "step": 3192 }, { "epoch": 0.789564787339268, "grad_norm": 0.7868569621757431, "learning_rate": 4.800578600753577e-06, "loss": 0.424, "step": 3193 }, { "epoch": 0.7898120672601385, "grad_norm": 0.7864424676820765, "learning_rate": 4.800451373517732e-06, "loss": 0.3849, "step": 3194 }, { "epoch": 0.7900593471810089, "grad_norm": 0.784595550816719, "learning_rate": 4.800324107397509e-06, "loss": 0.4098, "step": 3195 }, { "epoch": 0.7903066271018794, "grad_norm": 0.830043984168889, "learning_rate": 4.800196802395061e-06, "loss": 0.3888, "step": 3196 }, { "epoch": 0.7905539070227497, "grad_norm": 0.790118097460834, "learning_rate": 4.800069458512538e-06, "loss": 0.4102, "step": 3197 }, { "epoch": 0.7908011869436202, "grad_norm": 0.8227085580927859, "learning_rate": 4.799942075752093e-06, "loss": 0.4375, "step": 3198 }, { "epoch": 0.7910484668644906, "grad_norm": 0.7982196753808344, "learning_rate": 4.799814654115879e-06, "loss": 0.4529, "step": 3199 }, { "epoch": 0.7912957467853611, "grad_norm": 0.7866905634175901, "learning_rate": 4.799687193606052e-06, "loss": 0.4251, "step": 3200 }, { "epoch": 0.7915430267062314, "grad_norm": 0.7685080791035871, "learning_rate": 4.799559694224763e-06, "loss": 0.4215, "step": 3201 }, { "epoch": 0.7917903066271019, "grad_norm": 0.7599904496578714, "learning_rate": 4.799432155974168e-06, "loss": 0.4219, "step": 3202 }, { "epoch": 0.7920375865479723, "grad_norm": 0.8263878111249411, "learning_rate": 4.799304578856425e-06, "loss": 0.4235, "step": 3203 }, { "epoch": 0.7922848664688428, "grad_norm": 0.7741646735638096, "learning_rate": 4.799176962873689e-06, "loss": 0.4094, "step": 3204 }, { "epoch": 0.7925321463897131, "grad_norm": 0.7842075482726589, "learning_rate": 4.799049308028116e-06, "loss": 0.4265, "step": 3205 }, { "epoch": 0.7927794263105836, "grad_norm": 0.8011953412570326, "learning_rate": 4.7989216143218655e-06, "loss": 0.4101, "step": 3206 }, { "epoch": 0.793026706231454, "grad_norm": 0.8305619346107188, "learning_rate": 4.798793881757095e-06, "loss": 0.4166, "step": 3207 }, { "epoch": 0.7932739861523245, "grad_norm": 0.8065231582621645, "learning_rate": 4.798666110335963e-06, "loss": 0.4233, "step": 3208 }, { "epoch": 0.7935212660731948, "grad_norm": 0.7874595373516532, "learning_rate": 4.798538300060631e-06, "loss": 0.4178, "step": 3209 }, { "epoch": 0.7937685459940653, "grad_norm": 0.7907591698271821, "learning_rate": 4.798410450933258e-06, "loss": 0.4226, "step": 3210 }, { "epoch": 0.7940158259149357, "grad_norm": 0.7625097668467142, "learning_rate": 4.798282562956005e-06, "loss": 0.415, "step": 3211 }, { "epoch": 0.7942631058358062, "grad_norm": 0.7561667754981604, "learning_rate": 4.798154636131033e-06, "loss": 0.4355, "step": 3212 }, { "epoch": 0.7945103857566765, "grad_norm": 0.7809583776002582, "learning_rate": 4.7980266704605064e-06, "loss": 0.4113, "step": 3213 }, { "epoch": 0.794757665677547, "grad_norm": 0.784211514845161, "learning_rate": 4.797898665946587e-06, "loss": 0.4288, "step": 3214 }, { "epoch": 0.7950049455984174, "grad_norm": 0.7780178015749081, "learning_rate": 4.797770622591439e-06, "loss": 0.4187, "step": 3215 }, { "epoch": 0.7952522255192879, "grad_norm": 0.8068492708723259, "learning_rate": 4.797642540397226e-06, "loss": 0.4221, "step": 3216 }, { "epoch": 0.7954995054401582, "grad_norm": 0.8164724169733689, "learning_rate": 4.797514419366112e-06, "loss": 0.4121, "step": 3217 }, { "epoch": 0.7957467853610287, "grad_norm": 0.7890501236318611, "learning_rate": 4.7973862595002655e-06, "loss": 0.4102, "step": 3218 }, { "epoch": 0.7959940652818991, "grad_norm": 0.8065399895842733, "learning_rate": 4.79725806080185e-06, "loss": 0.4579, "step": 3219 }, { "epoch": 0.7962413452027696, "grad_norm": 0.7813246830312838, "learning_rate": 4.797129823273035e-06, "loss": 0.4026, "step": 3220 }, { "epoch": 0.7964886251236399, "grad_norm": 0.7719196802952721, "learning_rate": 4.797001546915985e-06, "loss": 0.4604, "step": 3221 }, { "epoch": 0.7967359050445104, "grad_norm": 0.7951454051368005, "learning_rate": 4.796873231732871e-06, "loss": 0.4071, "step": 3222 }, { "epoch": 0.7969831849653808, "grad_norm": 0.8307970839902238, "learning_rate": 4.796744877725861e-06, "loss": 0.4101, "step": 3223 }, { "epoch": 0.7972304648862513, "grad_norm": 0.7701120556585122, "learning_rate": 4.796616484897123e-06, "loss": 0.4492, "step": 3224 }, { "epoch": 0.7974777448071216, "grad_norm": 0.7773449035476312, "learning_rate": 4.79648805324883e-06, "loss": 0.378, "step": 3225 }, { "epoch": 0.7977250247279921, "grad_norm": 0.8036758045319646, "learning_rate": 4.796359582783151e-06, "loss": 0.4119, "step": 3226 }, { "epoch": 0.7979723046488625, "grad_norm": 0.8288205702589608, "learning_rate": 4.796231073502258e-06, "loss": 0.4325, "step": 3227 }, { "epoch": 0.798219584569733, "grad_norm": 0.8388175569444808, "learning_rate": 4.796102525408323e-06, "loss": 0.3907, "step": 3228 }, { "epoch": 0.7984668644906033, "grad_norm": 0.7803655660453329, "learning_rate": 4.795973938503518e-06, "loss": 0.3959, "step": 3229 }, { "epoch": 0.7987141444114738, "grad_norm": 0.7853139273322179, "learning_rate": 4.79584531279002e-06, "loss": 0.3897, "step": 3230 }, { "epoch": 0.7989614243323442, "grad_norm": 0.7871116216692523, "learning_rate": 4.7957166482699985e-06, "loss": 0.4291, "step": 3231 }, { "epoch": 0.7992087042532147, "grad_norm": 0.7671489638405405, "learning_rate": 4.795587944945631e-06, "loss": 0.4324, "step": 3232 }, { "epoch": 0.799455984174085, "grad_norm": 0.8063052507791727, "learning_rate": 4.795459202819093e-06, "loss": 0.3814, "step": 3233 }, { "epoch": 0.7997032640949555, "grad_norm": 0.7942943583781706, "learning_rate": 4.795330421892559e-06, "loss": 0.4134, "step": 3234 }, { "epoch": 0.7999505440158259, "grad_norm": 0.8014136850289755, "learning_rate": 4.795201602168208e-06, "loss": 0.4275, "step": 3235 }, { "epoch": 0.8001978239366964, "grad_norm": 0.793107629545374, "learning_rate": 4.795072743648216e-06, "loss": 0.4141, "step": 3236 }, { "epoch": 0.8004451038575667, "grad_norm": 0.8333140544980959, "learning_rate": 4.794943846334761e-06, "loss": 0.4016, "step": 3237 }, { "epoch": 0.8006923837784372, "grad_norm": 0.7937306545181533, "learning_rate": 4.7948149102300214e-06, "loss": 0.3894, "step": 3238 }, { "epoch": 0.8009396636993076, "grad_norm": 0.7752640899581088, "learning_rate": 4.794685935336178e-06, "loss": 0.4351, "step": 3239 }, { "epoch": 0.8011869436201781, "grad_norm": 0.792963683311261, "learning_rate": 4.79455692165541e-06, "loss": 0.4454, "step": 3240 }, { "epoch": 0.8014342235410484, "grad_norm": 0.8299219831294848, "learning_rate": 4.794427869189898e-06, "loss": 0.3952, "step": 3241 }, { "epoch": 0.8016815034619189, "grad_norm": 0.8157943140762436, "learning_rate": 4.7942987779418245e-06, "loss": 0.4332, "step": 3242 }, { "epoch": 0.8019287833827893, "grad_norm": 0.7986078907851436, "learning_rate": 4.79416964791337e-06, "loss": 0.4322, "step": 3243 }, { "epoch": 0.8021760633036598, "grad_norm": 0.786261870460221, "learning_rate": 4.794040479106718e-06, "loss": 0.4235, "step": 3244 }, { "epoch": 0.8024233432245301, "grad_norm": 0.8283195848120665, "learning_rate": 4.7939112715240515e-06, "loss": 0.4319, "step": 3245 }, { "epoch": 0.8026706231454006, "grad_norm": 0.8287242798951121, "learning_rate": 4.793782025167555e-06, "loss": 0.4509, "step": 3246 }, { "epoch": 0.802917903066271, "grad_norm": 0.7817295347972962, "learning_rate": 4.793652740039412e-06, "loss": 0.405, "step": 3247 }, { "epoch": 0.8031651829871415, "grad_norm": 0.7838023242726265, "learning_rate": 4.79352341614181e-06, "loss": 0.4224, "step": 3248 }, { "epoch": 0.8034124629080118, "grad_norm": 0.7761306457370105, "learning_rate": 4.793394053476932e-06, "loss": 0.4359, "step": 3249 }, { "epoch": 0.8036597428288823, "grad_norm": 0.8218425977051614, "learning_rate": 4.793264652046967e-06, "loss": 0.4233, "step": 3250 }, { "epoch": 0.8039070227497527, "grad_norm": 0.7921841515228916, "learning_rate": 4.7931352118541e-06, "loss": 0.4177, "step": 3251 }, { "epoch": 0.8041543026706232, "grad_norm": 0.8773579262315243, "learning_rate": 4.793005732900522e-06, "loss": 0.3918, "step": 3252 }, { "epoch": 0.8044015825914935, "grad_norm": 0.829400566392459, "learning_rate": 4.792876215188419e-06, "loss": 0.4072, "step": 3253 }, { "epoch": 0.804648862512364, "grad_norm": 0.7823791901433861, "learning_rate": 4.792746658719982e-06, "loss": 0.4101, "step": 3254 }, { "epoch": 0.8048961424332344, "grad_norm": 0.7881281107043909, "learning_rate": 4.792617063497399e-06, "loss": 0.4145, "step": 3255 }, { "epoch": 0.8051434223541049, "grad_norm": 0.8476540235058267, "learning_rate": 4.792487429522862e-06, "loss": 0.4036, "step": 3256 }, { "epoch": 0.8053907022749752, "grad_norm": 0.8152889870566801, "learning_rate": 4.792357756798561e-06, "loss": 0.418, "step": 3257 }, { "epoch": 0.8056379821958457, "grad_norm": 0.8076974577039581, "learning_rate": 4.79222804532669e-06, "loss": 0.4021, "step": 3258 }, { "epoch": 0.8058852621167161, "grad_norm": 0.8058805425863745, "learning_rate": 4.792098295109439e-06, "loss": 0.3917, "step": 3259 }, { "epoch": 0.8061325420375866, "grad_norm": 0.8234389705832926, "learning_rate": 4.791968506149003e-06, "loss": 0.4173, "step": 3260 }, { "epoch": 0.8063798219584569, "grad_norm": 0.7886416606971659, "learning_rate": 4.791838678447574e-06, "loss": 0.405, "step": 3261 }, { "epoch": 0.8066271018793274, "grad_norm": 0.779637253269339, "learning_rate": 4.7917088120073484e-06, "loss": 0.4245, "step": 3262 }, { "epoch": 0.8068743818001978, "grad_norm": 0.8095012054220153, "learning_rate": 4.79157890683052e-06, "loss": 0.4503, "step": 3263 }, { "epoch": 0.8071216617210683, "grad_norm": 0.8194829000253359, "learning_rate": 4.791448962919285e-06, "loss": 0.4007, "step": 3264 }, { "epoch": 0.8073689416419386, "grad_norm": 0.8104089346079402, "learning_rate": 4.7913189802758405e-06, "loss": 0.4139, "step": 3265 }, { "epoch": 0.8076162215628091, "grad_norm": 0.8149788810165006, "learning_rate": 4.791188958902382e-06, "loss": 0.4001, "step": 3266 }, { "epoch": 0.8078635014836796, "grad_norm": 0.7880461660692727, "learning_rate": 4.791058898801109e-06, "loss": 0.4242, "step": 3267 }, { "epoch": 0.80811078140455, "grad_norm": 0.8440145345829424, "learning_rate": 4.790928799974219e-06, "loss": 0.4047, "step": 3268 }, { "epoch": 0.8083580613254204, "grad_norm": 0.8081825088885769, "learning_rate": 4.790798662423911e-06, "loss": 0.4055, "step": 3269 }, { "epoch": 0.8086053412462908, "grad_norm": 0.7968073833564706, "learning_rate": 4.790668486152385e-06, "loss": 0.4071, "step": 3270 }, { "epoch": 0.8088526211671613, "grad_norm": 0.7841070913783371, "learning_rate": 4.790538271161841e-06, "loss": 0.4171, "step": 3271 }, { "epoch": 0.8090999010880316, "grad_norm": 0.8286577863722515, "learning_rate": 4.79040801745448e-06, "loss": 0.3833, "step": 3272 }, { "epoch": 0.8093471810089021, "grad_norm": 0.7644216257502693, "learning_rate": 4.790277725032504e-06, "loss": 0.4089, "step": 3273 }, { "epoch": 0.8095944609297725, "grad_norm": 0.8298216917114288, "learning_rate": 4.790147393898116e-06, "loss": 0.4184, "step": 3274 }, { "epoch": 0.809841740850643, "grad_norm": 0.8373160134004612, "learning_rate": 4.790017024053517e-06, "loss": 0.4097, "step": 3275 }, { "epoch": 0.8100890207715133, "grad_norm": 0.8003961462949286, "learning_rate": 4.789886615500912e-06, "loss": 0.4208, "step": 3276 }, { "epoch": 0.8103363006923838, "grad_norm": 0.8084367466040833, "learning_rate": 4.789756168242506e-06, "loss": 0.4008, "step": 3277 }, { "epoch": 0.8105835806132542, "grad_norm": 0.8328646094691385, "learning_rate": 4.789625682280503e-06, "loss": 0.3932, "step": 3278 }, { "epoch": 0.8108308605341247, "grad_norm": 0.817103508185311, "learning_rate": 4.789495157617108e-06, "loss": 0.4241, "step": 3279 }, { "epoch": 0.811078140454995, "grad_norm": 0.7878968437266687, "learning_rate": 4.789364594254529e-06, "loss": 0.4245, "step": 3280 }, { "epoch": 0.8113254203758655, "grad_norm": 0.7723691927519781, "learning_rate": 4.78923399219497e-06, "loss": 0.3915, "step": 3281 }, { "epoch": 0.8115727002967359, "grad_norm": 0.8168370329750326, "learning_rate": 4.789103351440641e-06, "loss": 0.3639, "step": 3282 }, { "epoch": 0.8118199802176064, "grad_norm": 0.7968216032656887, "learning_rate": 4.788972671993751e-06, "loss": 0.3938, "step": 3283 }, { "epoch": 0.8120672601384767, "grad_norm": 0.8185040720148407, "learning_rate": 4.788841953856506e-06, "loss": 0.4247, "step": 3284 }, { "epoch": 0.8123145400593472, "grad_norm": 0.7781045150914397, "learning_rate": 4.788711197031118e-06, "loss": 0.4036, "step": 3285 }, { "epoch": 0.8125618199802176, "grad_norm": 0.7785731429716326, "learning_rate": 4.788580401519794e-06, "loss": 0.3986, "step": 3286 }, { "epoch": 0.8128090999010881, "grad_norm": 0.8212032505997505, "learning_rate": 4.7884495673247496e-06, "loss": 0.4266, "step": 3287 }, { "epoch": 0.8130563798219584, "grad_norm": 0.7484700477722402, "learning_rate": 4.788318694448192e-06, "loss": 0.4596, "step": 3288 }, { "epoch": 0.8133036597428289, "grad_norm": 0.8209584736189361, "learning_rate": 4.788187782892336e-06, "loss": 0.3874, "step": 3289 }, { "epoch": 0.8135509396636993, "grad_norm": 0.7842763948585224, "learning_rate": 4.788056832659392e-06, "loss": 0.4281, "step": 3290 }, { "epoch": 0.8137982195845698, "grad_norm": 0.809014432481598, "learning_rate": 4.787925843751576e-06, "loss": 0.4021, "step": 3291 }, { "epoch": 0.8140454995054401, "grad_norm": 0.8093191327442567, "learning_rate": 4.787794816171101e-06, "loss": 0.4046, "step": 3292 }, { "epoch": 0.8142927794263106, "grad_norm": 0.7877046692252317, "learning_rate": 4.7876637499201815e-06, "loss": 0.4486, "step": 3293 }, { "epoch": 0.814540059347181, "grad_norm": 0.798840334202066, "learning_rate": 4.787532645001033e-06, "loss": 0.4032, "step": 3294 }, { "epoch": 0.8147873392680515, "grad_norm": 0.8255728729719902, "learning_rate": 4.787401501415871e-06, "loss": 0.452, "step": 3295 }, { "epoch": 0.8150346191889218, "grad_norm": 0.797243236400735, "learning_rate": 4.787270319166913e-06, "loss": 0.4275, "step": 3296 }, { "epoch": 0.8152818991097923, "grad_norm": 0.8349074546118905, "learning_rate": 4.787139098256377e-06, "loss": 0.3907, "step": 3297 }, { "epoch": 0.8155291790306627, "grad_norm": 0.8110622701045463, "learning_rate": 4.7870078386864795e-06, "loss": 0.4357, "step": 3298 }, { "epoch": 0.8157764589515332, "grad_norm": 0.7921550904844632, "learning_rate": 4.78687654045944e-06, "loss": 0.4277, "step": 3299 }, { "epoch": 0.8160237388724035, "grad_norm": 0.7754244489726932, "learning_rate": 4.7867452035774774e-06, "loss": 0.412, "step": 3300 }, { "epoch": 0.816271018793274, "grad_norm": 0.791617530083558, "learning_rate": 4.786613828042813e-06, "loss": 0.4174, "step": 3301 }, { "epoch": 0.8165182987141444, "grad_norm": 0.7944566773450151, "learning_rate": 4.786482413857666e-06, "loss": 0.4104, "step": 3302 }, { "epoch": 0.8167655786350149, "grad_norm": 0.8172864231650154, "learning_rate": 4.786350961024257e-06, "loss": 0.4232, "step": 3303 }, { "epoch": 0.8170128585558852, "grad_norm": 0.844980815358175, "learning_rate": 4.78621946954481e-06, "loss": 0.3849, "step": 3304 }, { "epoch": 0.8172601384767557, "grad_norm": 0.7757666993402713, "learning_rate": 4.786087939421547e-06, "loss": 0.4008, "step": 3305 }, { "epoch": 0.8175074183976261, "grad_norm": 0.794111476583098, "learning_rate": 4.7859563706566914e-06, "loss": 0.3986, "step": 3306 }, { "epoch": 0.8177546983184966, "grad_norm": 0.7773637704984339, "learning_rate": 4.785824763252466e-06, "loss": 0.3883, "step": 3307 }, { "epoch": 0.8180019782393669, "grad_norm": 0.798335262514005, "learning_rate": 4.785693117211095e-06, "loss": 0.4097, "step": 3308 }, { "epoch": 0.8182492581602374, "grad_norm": 0.8023022414258852, "learning_rate": 4.785561432534806e-06, "loss": 0.3993, "step": 3309 }, { "epoch": 0.8184965380811078, "grad_norm": 0.8301219030490221, "learning_rate": 4.7854297092258216e-06, "loss": 0.3828, "step": 3310 }, { "epoch": 0.8187438180019783, "grad_norm": 0.8153453055334471, "learning_rate": 4.785297947286372e-06, "loss": 0.4305, "step": 3311 }, { "epoch": 0.8189910979228486, "grad_norm": 0.8111562590093313, "learning_rate": 4.785166146718681e-06, "loss": 0.4078, "step": 3312 }, { "epoch": 0.8192383778437191, "grad_norm": 0.7826670595098245, "learning_rate": 4.785034307524979e-06, "loss": 0.4281, "step": 3313 }, { "epoch": 0.8194856577645895, "grad_norm": 0.7793094315853807, "learning_rate": 4.784902429707493e-06, "loss": 0.4373, "step": 3314 }, { "epoch": 0.81973293768546, "grad_norm": 0.7994195562682976, "learning_rate": 4.784770513268452e-06, "loss": 0.4028, "step": 3315 }, { "epoch": 0.8199802176063303, "grad_norm": 0.777986973844296, "learning_rate": 4.784638558210086e-06, "loss": 0.4135, "step": 3316 }, { "epoch": 0.8202274975272008, "grad_norm": 0.8011858291824131, "learning_rate": 4.784506564534627e-06, "loss": 0.4556, "step": 3317 }, { "epoch": 0.8204747774480712, "grad_norm": 0.7695970936651433, "learning_rate": 4.784374532244304e-06, "loss": 0.4264, "step": 3318 }, { "epoch": 0.8207220573689417, "grad_norm": 0.7854179279542661, "learning_rate": 4.78424246134135e-06, "loss": 0.3941, "step": 3319 }, { "epoch": 0.820969337289812, "grad_norm": 0.8145456782664094, "learning_rate": 4.784110351827996e-06, "loss": 0.4303, "step": 3320 }, { "epoch": 0.8212166172106825, "grad_norm": 0.7778866148610745, "learning_rate": 4.783978203706476e-06, "loss": 0.3849, "step": 3321 }, { "epoch": 0.8214638971315529, "grad_norm": 0.8002029621999994, "learning_rate": 4.783846016979024e-06, "loss": 0.3992, "step": 3322 }, { "epoch": 0.8217111770524234, "grad_norm": 0.8349311151236508, "learning_rate": 4.7837137916478745e-06, "loss": 0.3903, "step": 3323 }, { "epoch": 0.8219584569732937, "grad_norm": 0.7790180389747006, "learning_rate": 4.783581527715261e-06, "loss": 0.3822, "step": 3324 }, { "epoch": 0.8222057368941642, "grad_norm": 0.7629026266310897, "learning_rate": 4.783449225183421e-06, "loss": 0.4195, "step": 3325 }, { "epoch": 0.8224530168150346, "grad_norm": 0.7649237556248503, "learning_rate": 4.783316884054589e-06, "loss": 0.4185, "step": 3326 }, { "epoch": 0.8227002967359051, "grad_norm": 0.7809836127526398, "learning_rate": 4.7831845043310034e-06, "loss": 0.4325, "step": 3327 }, { "epoch": 0.8229475766567754, "grad_norm": 0.771282433024842, "learning_rate": 4.783052086014901e-06, "loss": 0.4005, "step": 3328 }, { "epoch": 0.8231948565776459, "grad_norm": 0.7837387184470643, "learning_rate": 4.7829196291085205e-06, "loss": 0.4038, "step": 3329 }, { "epoch": 0.8234421364985163, "grad_norm": 0.7763923077993041, "learning_rate": 4.7827871336141006e-06, "loss": 0.4172, "step": 3330 }, { "epoch": 0.8236894164193868, "grad_norm": 0.7888187786186903, "learning_rate": 4.782654599533881e-06, "loss": 0.4086, "step": 3331 }, { "epoch": 0.8239366963402571, "grad_norm": 0.7964113323354067, "learning_rate": 4.7825220268701015e-06, "loss": 0.3942, "step": 3332 }, { "epoch": 0.8241839762611276, "grad_norm": 0.7949089727526033, "learning_rate": 4.782389415625003e-06, "loss": 0.427, "step": 3333 }, { "epoch": 0.824431256181998, "grad_norm": 0.7898222003899379, "learning_rate": 4.782256765800828e-06, "loss": 0.4198, "step": 3334 }, { "epoch": 0.8246785361028685, "grad_norm": 0.7713468303220107, "learning_rate": 4.782124077399818e-06, "loss": 0.4082, "step": 3335 }, { "epoch": 0.8249258160237388, "grad_norm": 0.760638496140234, "learning_rate": 4.7819913504242156e-06, "loss": 0.4271, "step": 3336 }, { "epoch": 0.8251730959446093, "grad_norm": 0.7949387100448536, "learning_rate": 4.7818585848762645e-06, "loss": 0.4114, "step": 3337 }, { "epoch": 0.8254203758654797, "grad_norm": 0.7580505449504422, "learning_rate": 4.781725780758208e-06, "loss": 0.4131, "step": 3338 }, { "epoch": 0.8256676557863502, "grad_norm": 0.8045167857797156, "learning_rate": 4.781592938072292e-06, "loss": 0.4223, "step": 3339 }, { "epoch": 0.8259149357072205, "grad_norm": 0.8159002530056874, "learning_rate": 4.781460056820763e-06, "loss": 0.4332, "step": 3340 }, { "epoch": 0.826162215628091, "grad_norm": 0.7873685673757427, "learning_rate": 4.781327137005865e-06, "loss": 0.4053, "step": 3341 }, { "epoch": 0.8264094955489614, "grad_norm": 0.8123786085064364, "learning_rate": 4.781194178629844e-06, "loss": 0.417, "step": 3342 }, { "epoch": 0.8266567754698319, "grad_norm": 0.819766015202432, "learning_rate": 4.781061181694949e-06, "loss": 0.4162, "step": 3343 }, { "epoch": 0.8269040553907022, "grad_norm": 0.7979483243492991, "learning_rate": 4.78092814620343e-06, "loss": 0.4317, "step": 3344 }, { "epoch": 0.8271513353115727, "grad_norm": 0.7688666283808752, "learning_rate": 4.780795072157532e-06, "loss": 0.43, "step": 3345 }, { "epoch": 0.8273986152324432, "grad_norm": 0.7990950578586927, "learning_rate": 4.780661959559506e-06, "loss": 0.372, "step": 3346 }, { "epoch": 0.8276458951533135, "grad_norm": 0.7721501590793813, "learning_rate": 4.780528808411602e-06, "loss": 0.4185, "step": 3347 }, { "epoch": 0.827893175074184, "grad_norm": 0.7854619913809469, "learning_rate": 4.780395618716071e-06, "loss": 0.3988, "step": 3348 }, { "epoch": 0.8281404549950544, "grad_norm": 0.8290833887661643, "learning_rate": 4.7802623904751626e-06, "loss": 0.4358, "step": 3349 }, { "epoch": 0.8283877349159249, "grad_norm": 0.8158026726230498, "learning_rate": 4.780129123691131e-06, "loss": 0.3959, "step": 3350 }, { "epoch": 0.8286350148367952, "grad_norm": 0.8003195625474808, "learning_rate": 4.779995818366227e-06, "loss": 0.4162, "step": 3351 }, { "epoch": 0.8288822947576657, "grad_norm": 0.7900036436217165, "learning_rate": 4.779862474502705e-06, "loss": 0.4293, "step": 3352 }, { "epoch": 0.8291295746785361, "grad_norm": 0.8004447888167544, "learning_rate": 4.779729092102818e-06, "loss": 0.4007, "step": 3353 }, { "epoch": 0.8293768545994066, "grad_norm": 0.7964423636908463, "learning_rate": 4.779595671168822e-06, "loss": 0.3976, "step": 3354 }, { "epoch": 0.829624134520277, "grad_norm": 0.8067022591755529, "learning_rate": 4.779462211702971e-06, "loss": 0.4154, "step": 3355 }, { "epoch": 0.8298714144411474, "grad_norm": 0.8050964956297638, "learning_rate": 4.77932871370752e-06, "loss": 0.417, "step": 3356 }, { "epoch": 0.8301186943620178, "grad_norm": 0.7859285185827618, "learning_rate": 4.779195177184728e-06, "loss": 0.4388, "step": 3357 }, { "epoch": 0.8303659742828883, "grad_norm": 0.8209308533484606, "learning_rate": 4.779061602136851e-06, "loss": 0.4343, "step": 3358 }, { "epoch": 0.8306132542037586, "grad_norm": 0.7901126945376796, "learning_rate": 4.778927988566146e-06, "loss": 0.4181, "step": 3359 }, { "epoch": 0.8308605341246291, "grad_norm": 0.8069670725815871, "learning_rate": 4.778794336474873e-06, "loss": 0.3819, "step": 3360 }, { "epoch": 0.8311078140454995, "grad_norm": 0.7769685258141426, "learning_rate": 4.778660645865288e-06, "loss": 0.4084, "step": 3361 }, { "epoch": 0.83135509396637, "grad_norm": 0.8246160503291277, "learning_rate": 4.7785269167396545e-06, "loss": 0.3947, "step": 3362 }, { "epoch": 0.8316023738872403, "grad_norm": 0.8136590431488328, "learning_rate": 4.778393149100231e-06, "loss": 0.4101, "step": 3363 }, { "epoch": 0.8318496538081108, "grad_norm": 0.7911097863165703, "learning_rate": 4.778259342949279e-06, "loss": 0.4493, "step": 3364 }, { "epoch": 0.8320969337289812, "grad_norm": 0.812883523661831, "learning_rate": 4.77812549828906e-06, "loss": 0.3993, "step": 3365 }, { "epoch": 0.8323442136498517, "grad_norm": 0.8022234638234113, "learning_rate": 4.777991615121837e-06, "loss": 0.4131, "step": 3366 }, { "epoch": 0.832591493570722, "grad_norm": 0.7795760679299366, "learning_rate": 4.777857693449871e-06, "loss": 0.4185, "step": 3367 }, { "epoch": 0.8328387734915925, "grad_norm": 0.7837567396640641, "learning_rate": 4.777723733275429e-06, "loss": 0.4314, "step": 3368 }, { "epoch": 0.8330860534124629, "grad_norm": 0.7892606037338461, "learning_rate": 4.7775897346007726e-06, "loss": 0.4224, "step": 3369 }, { "epoch": 0.8333333333333334, "grad_norm": 0.7831937189039634, "learning_rate": 4.7774556974281685e-06, "loss": 0.4399, "step": 3370 }, { "epoch": 0.8335806132542037, "grad_norm": 0.8124136973914873, "learning_rate": 4.77732162175988e-06, "loss": 0.4537, "step": 3371 }, { "epoch": 0.8338278931750742, "grad_norm": 0.7723322289863942, "learning_rate": 4.777187507598177e-06, "loss": 0.388, "step": 3372 }, { "epoch": 0.8340751730959446, "grad_norm": 0.7743046452175267, "learning_rate": 4.777053354945322e-06, "loss": 0.416, "step": 3373 }, { "epoch": 0.8343224530168151, "grad_norm": 0.8135790787271157, "learning_rate": 4.776919163803587e-06, "loss": 0.4058, "step": 3374 }, { "epoch": 0.8345697329376854, "grad_norm": 0.7661956419487184, "learning_rate": 4.776784934175237e-06, "loss": 0.4246, "step": 3375 }, { "epoch": 0.8348170128585559, "grad_norm": 0.80368496419414, "learning_rate": 4.7766506660625414e-06, "loss": 0.4271, "step": 3376 }, { "epoch": 0.8350642927794263, "grad_norm": 0.7714493561021236, "learning_rate": 4.776516359467771e-06, "loss": 0.4371, "step": 3377 }, { "epoch": 0.8353115727002968, "grad_norm": 0.8100023760100212, "learning_rate": 4.776382014393195e-06, "loss": 0.4019, "step": 3378 }, { "epoch": 0.8355588526211671, "grad_norm": 0.7884436871457682, "learning_rate": 4.776247630841085e-06, "loss": 0.3934, "step": 3379 }, { "epoch": 0.8358061325420376, "grad_norm": 0.7671160651595207, "learning_rate": 4.776113208813712e-06, "loss": 0.4291, "step": 3380 }, { "epoch": 0.836053412462908, "grad_norm": 0.7938160957249958, "learning_rate": 4.775978748313348e-06, "loss": 0.4187, "step": 3381 }, { "epoch": 0.8363006923837785, "grad_norm": 0.8224792228789037, "learning_rate": 4.775844249342265e-06, "loss": 0.4066, "step": 3382 }, { "epoch": 0.8365479723046488, "grad_norm": 0.7905005579082747, "learning_rate": 4.775709711902738e-06, "loss": 0.4017, "step": 3383 }, { "epoch": 0.8367952522255193, "grad_norm": 0.8057947131158326, "learning_rate": 4.7755751359970405e-06, "loss": 0.4054, "step": 3384 }, { "epoch": 0.8370425321463897, "grad_norm": 0.8175423715679635, "learning_rate": 4.775440521627447e-06, "loss": 0.409, "step": 3385 }, { "epoch": 0.8372898120672602, "grad_norm": 0.8008681519700376, "learning_rate": 4.7753058687962325e-06, "loss": 0.4221, "step": 3386 }, { "epoch": 0.8375370919881305, "grad_norm": 0.7850063473346038, "learning_rate": 4.775171177505674e-06, "loss": 0.377, "step": 3387 }, { "epoch": 0.837784371909001, "grad_norm": 0.7947371747318924, "learning_rate": 4.775036447758048e-06, "loss": 0.4143, "step": 3388 }, { "epoch": 0.8380316518298714, "grad_norm": 0.7655744610178552, "learning_rate": 4.774901679555631e-06, "loss": 0.3918, "step": 3389 }, { "epoch": 0.8382789317507419, "grad_norm": 0.7762446116793785, "learning_rate": 4.774766872900702e-06, "loss": 0.4381, "step": 3390 }, { "epoch": 0.8385262116716122, "grad_norm": 0.8303410835002127, "learning_rate": 4.7746320277955395e-06, "loss": 0.3758, "step": 3391 }, { "epoch": 0.8387734915924827, "grad_norm": 0.7985702070353878, "learning_rate": 4.774497144242421e-06, "loss": 0.3818, "step": 3392 }, { "epoch": 0.8390207715133531, "grad_norm": 0.7920132547874397, "learning_rate": 4.774362222243629e-06, "loss": 0.4347, "step": 3393 }, { "epoch": 0.8392680514342236, "grad_norm": 0.8030043793173209, "learning_rate": 4.774227261801442e-06, "loss": 0.3946, "step": 3394 }, { "epoch": 0.8395153313550939, "grad_norm": 0.747732048647203, "learning_rate": 4.774092262918143e-06, "loss": 0.4537, "step": 3395 }, { "epoch": 0.8397626112759644, "grad_norm": 0.840523303115213, "learning_rate": 4.773957225596013e-06, "loss": 0.4157, "step": 3396 }, { "epoch": 0.8400098911968348, "grad_norm": 0.8110229327341179, "learning_rate": 4.773822149837334e-06, "loss": 0.4228, "step": 3397 }, { "epoch": 0.8402571711177053, "grad_norm": 0.7683317822796379, "learning_rate": 4.77368703564439e-06, "loss": 0.4239, "step": 3398 }, { "epoch": 0.8405044510385756, "grad_norm": 0.782689216594726, "learning_rate": 4.7735518830194635e-06, "loss": 0.4216, "step": 3399 }, { "epoch": 0.8407517309594461, "grad_norm": 0.7907944083692497, "learning_rate": 4.773416691964842e-06, "loss": 0.3986, "step": 3400 }, { "epoch": 0.8409990108803165, "grad_norm": 0.7655934114807923, "learning_rate": 4.7732814624828075e-06, "loss": 0.4165, "step": 3401 }, { "epoch": 0.841246290801187, "grad_norm": 0.8110535411816441, "learning_rate": 4.773146194575647e-06, "loss": 0.4225, "step": 3402 }, { "epoch": 0.8414935707220573, "grad_norm": 0.8100327921657837, "learning_rate": 4.773010888245647e-06, "loss": 0.42, "step": 3403 }, { "epoch": 0.8417408506429278, "grad_norm": 0.7530456461140547, "learning_rate": 4.772875543495094e-06, "loss": 0.4248, "step": 3404 }, { "epoch": 0.8419881305637982, "grad_norm": 0.7879760448848543, "learning_rate": 4.772740160326276e-06, "loss": 0.4314, "step": 3405 }, { "epoch": 0.8422354104846687, "grad_norm": 0.8606818943078248, "learning_rate": 4.772604738741482e-06, "loss": 0.3978, "step": 3406 }, { "epoch": 0.842482690405539, "grad_norm": 0.7549716602552334, "learning_rate": 4.7724692787430006e-06, "loss": 0.4289, "step": 3407 }, { "epoch": 0.8427299703264095, "grad_norm": 0.7633266980017795, "learning_rate": 4.772333780333121e-06, "loss": 0.4274, "step": 3408 }, { "epoch": 0.8429772502472799, "grad_norm": 0.765061521639252, "learning_rate": 4.772198243514135e-06, "loss": 0.41, "step": 3409 }, { "epoch": 0.8432245301681504, "grad_norm": 0.7783131637239651, "learning_rate": 4.772062668288332e-06, "loss": 0.4222, "step": 3410 }, { "epoch": 0.8434718100890207, "grad_norm": 0.7613563968237722, "learning_rate": 4.771927054658003e-06, "loss": 0.4069, "step": 3411 }, { "epoch": 0.8437190900098912, "grad_norm": 0.8018125862329029, "learning_rate": 4.771791402625442e-06, "loss": 0.411, "step": 3412 }, { "epoch": 0.8439663699307616, "grad_norm": 0.8350909510173182, "learning_rate": 4.771655712192942e-06, "loss": 0.4001, "step": 3413 }, { "epoch": 0.844213649851632, "grad_norm": 0.7891030802950871, "learning_rate": 4.771519983362795e-06, "loss": 0.4467, "step": 3414 }, { "epoch": 0.8444609297725024, "grad_norm": 0.8525872477343289, "learning_rate": 4.771384216137297e-06, "loss": 0.3848, "step": 3415 }, { "epoch": 0.8447082096933729, "grad_norm": 0.8166630385029069, "learning_rate": 4.771248410518742e-06, "loss": 0.4117, "step": 3416 }, { "epoch": 0.8449554896142433, "grad_norm": 0.8127217435899223, "learning_rate": 4.771112566509424e-06, "loss": 0.4449, "step": 3417 }, { "epoch": 0.8452027695351138, "grad_norm": 0.7989077116900508, "learning_rate": 4.770976684111643e-06, "loss": 0.4309, "step": 3418 }, { "epoch": 0.8454500494559841, "grad_norm": 0.7742207773656186, "learning_rate": 4.770840763327691e-06, "loss": 0.4169, "step": 3419 }, { "epoch": 0.8456973293768546, "grad_norm": 0.8009671807975369, "learning_rate": 4.770704804159869e-06, "loss": 0.4272, "step": 3420 }, { "epoch": 0.845944609297725, "grad_norm": 0.7762396091246215, "learning_rate": 4.770568806610474e-06, "loss": 0.4071, "step": 3421 }, { "epoch": 0.8461918892185954, "grad_norm": 0.79048982739237, "learning_rate": 4.770432770681804e-06, "loss": 0.4142, "step": 3422 }, { "epoch": 0.8464391691394659, "grad_norm": 0.7925764780479834, "learning_rate": 4.7702966963761595e-06, "loss": 0.4406, "step": 3423 }, { "epoch": 0.8466864490603363, "grad_norm": 0.8229734761276529, "learning_rate": 4.770160583695841e-06, "loss": 0.4051, "step": 3424 }, { "epoch": 0.8469337289812068, "grad_norm": 0.7812549831681974, "learning_rate": 4.7700244326431485e-06, "loss": 0.426, "step": 3425 }, { "epoch": 0.8471810089020771, "grad_norm": 0.8153456675383441, "learning_rate": 4.769888243220382e-06, "loss": 0.4306, "step": 3426 }, { "epoch": 0.8474282888229476, "grad_norm": 0.8228319181988596, "learning_rate": 4.769752015429846e-06, "loss": 0.4103, "step": 3427 }, { "epoch": 0.847675568743818, "grad_norm": 0.8350865864963404, "learning_rate": 4.769615749273842e-06, "loss": 0.4163, "step": 3428 }, { "epoch": 0.8479228486646885, "grad_norm": 0.8015176047552672, "learning_rate": 4.769479444754672e-06, "loss": 0.4199, "step": 3429 }, { "epoch": 0.8481701285855588, "grad_norm": 0.8301892125593503, "learning_rate": 4.769343101874643e-06, "loss": 0.4164, "step": 3430 }, { "epoch": 0.8484174085064293, "grad_norm": 0.8073898224130004, "learning_rate": 4.769206720636056e-06, "loss": 0.4482, "step": 3431 }, { "epoch": 0.8486646884272997, "grad_norm": 0.8309199793371376, "learning_rate": 4.769070301041219e-06, "loss": 0.4566, "step": 3432 }, { "epoch": 0.8489119683481702, "grad_norm": 0.7678874274807607, "learning_rate": 4.768933843092436e-06, "loss": 0.3946, "step": 3433 }, { "epoch": 0.8491592482690405, "grad_norm": 0.7549429356875449, "learning_rate": 4.768797346792015e-06, "loss": 0.4287, "step": 3434 }, { "epoch": 0.849406528189911, "grad_norm": 0.7876208123929652, "learning_rate": 4.768660812142263e-06, "loss": 0.405, "step": 3435 }, { "epoch": 0.8496538081107814, "grad_norm": 0.8291311437122808, "learning_rate": 4.768524239145487e-06, "loss": 0.4087, "step": 3436 }, { "epoch": 0.8499010880316519, "grad_norm": 0.8268178012129144, "learning_rate": 4.768387627803996e-06, "loss": 0.4097, "step": 3437 }, { "epoch": 0.8501483679525222, "grad_norm": 0.7851962802571755, "learning_rate": 4.7682509781200995e-06, "loss": 0.435, "step": 3438 }, { "epoch": 0.8503956478733927, "grad_norm": 0.8224586447080902, "learning_rate": 4.768114290096106e-06, "loss": 0.407, "step": 3439 }, { "epoch": 0.8506429277942631, "grad_norm": 0.8156576914563988, "learning_rate": 4.7679775637343275e-06, "loss": 0.4067, "step": 3440 }, { "epoch": 0.8508902077151336, "grad_norm": 0.8416785505706572, "learning_rate": 4.767840799037074e-06, "loss": 0.4297, "step": 3441 }, { "epoch": 0.8511374876360039, "grad_norm": 0.7581910072086763, "learning_rate": 4.767703996006658e-06, "loss": 0.439, "step": 3442 }, { "epoch": 0.8513847675568744, "grad_norm": 0.8036041084178673, "learning_rate": 4.767567154645392e-06, "loss": 0.3876, "step": 3443 }, { "epoch": 0.8516320474777448, "grad_norm": 0.8077582774150418, "learning_rate": 4.767430274955587e-06, "loss": 0.4529, "step": 3444 }, { "epoch": 0.8518793273986153, "grad_norm": 0.7940995416916322, "learning_rate": 4.767293356939559e-06, "loss": 0.4045, "step": 3445 }, { "epoch": 0.8521266073194856, "grad_norm": 0.8380267975480605, "learning_rate": 4.7671564005996215e-06, "loss": 0.4236, "step": 3446 }, { "epoch": 0.8523738872403561, "grad_norm": 0.7782724418448519, "learning_rate": 4.767019405938089e-06, "loss": 0.4095, "step": 3447 }, { "epoch": 0.8526211671612265, "grad_norm": 0.8634191317576759, "learning_rate": 4.766882372957278e-06, "loss": 0.4273, "step": 3448 }, { "epoch": 0.852868447082097, "grad_norm": 0.7886128298243266, "learning_rate": 4.7667453016595044e-06, "loss": 0.4336, "step": 3449 }, { "epoch": 0.8531157270029673, "grad_norm": 0.7839418502660845, "learning_rate": 4.766608192047084e-06, "loss": 0.4042, "step": 3450 }, { "epoch": 0.8533630069238378, "grad_norm": 0.779315107663836, "learning_rate": 4.766471044122337e-06, "loss": 0.4116, "step": 3451 }, { "epoch": 0.8536102868447082, "grad_norm": 0.8130332426670765, "learning_rate": 4.766333857887579e-06, "loss": 0.3986, "step": 3452 }, { "epoch": 0.8538575667655787, "grad_norm": 0.8316154784681559, "learning_rate": 4.7661966333451305e-06, "loss": 0.3984, "step": 3453 }, { "epoch": 0.854104846686449, "grad_norm": 0.7697149493976957, "learning_rate": 4.766059370497309e-06, "loss": 0.4037, "step": 3454 }, { "epoch": 0.8543521266073195, "grad_norm": 0.8222479951816859, "learning_rate": 4.765922069346437e-06, "loss": 0.3778, "step": 3455 }, { "epoch": 0.8545994065281899, "grad_norm": 0.8444888274019681, "learning_rate": 4.765784729894834e-06, "loss": 0.3792, "step": 3456 }, { "epoch": 0.8548466864490604, "grad_norm": 0.8094354195563492, "learning_rate": 4.765647352144822e-06, "loss": 0.4236, "step": 3457 }, { "epoch": 0.8550939663699307, "grad_norm": 0.7945699779942299, "learning_rate": 4.7655099360987225e-06, "loss": 0.419, "step": 3458 }, { "epoch": 0.8553412462908012, "grad_norm": 0.7623631949030496, "learning_rate": 4.765372481758859e-06, "loss": 0.4047, "step": 3459 }, { "epoch": 0.8555885262116716, "grad_norm": 0.8006217233235627, "learning_rate": 4.7652349891275525e-06, "loss": 0.4199, "step": 3460 }, { "epoch": 0.8558358061325421, "grad_norm": 0.8071519894620478, "learning_rate": 4.765097458207131e-06, "loss": 0.429, "step": 3461 }, { "epoch": 0.8560830860534124, "grad_norm": 0.7998455842155523, "learning_rate": 4.764959888999917e-06, "loss": 0.3849, "step": 3462 }, { "epoch": 0.8563303659742829, "grad_norm": 0.7739611268657531, "learning_rate": 4.7648222815082345e-06, "loss": 0.4204, "step": 3463 }, { "epoch": 0.8565776458951533, "grad_norm": 0.7927935553548467, "learning_rate": 4.764684635734412e-06, "loss": 0.4075, "step": 3464 }, { "epoch": 0.8568249258160238, "grad_norm": 0.7803766875608863, "learning_rate": 4.764546951680775e-06, "loss": 0.4338, "step": 3465 }, { "epoch": 0.8570722057368941, "grad_norm": 0.8016805038029136, "learning_rate": 4.76440922934965e-06, "loss": 0.4044, "step": 3466 }, { "epoch": 0.8573194856577646, "grad_norm": 0.8185021142595483, "learning_rate": 4.764271468743367e-06, "loss": 0.4072, "step": 3467 }, { "epoch": 0.857566765578635, "grad_norm": 0.7925774571128128, "learning_rate": 4.764133669864253e-06, "loss": 0.4161, "step": 3468 }, { "epoch": 0.8578140454995055, "grad_norm": 0.8062989314810408, "learning_rate": 4.763995832714636e-06, "loss": 0.3727, "step": 3469 }, { "epoch": 0.8580613254203758, "grad_norm": 0.7957403676082958, "learning_rate": 4.763857957296849e-06, "loss": 0.4329, "step": 3470 }, { "epoch": 0.8583086053412463, "grad_norm": 0.8384575802088189, "learning_rate": 4.7637200436132194e-06, "loss": 0.4043, "step": 3471 }, { "epoch": 0.8585558852621167, "grad_norm": 0.807113219782392, "learning_rate": 4.76358209166608e-06, "loss": 0.4014, "step": 3472 }, { "epoch": 0.8588031651829872, "grad_norm": 0.8043008375712767, "learning_rate": 4.7634441014577635e-06, "loss": 0.4005, "step": 3473 }, { "epoch": 0.8590504451038575, "grad_norm": 0.7842916597723525, "learning_rate": 4.763306072990601e-06, "loss": 0.391, "step": 3474 }, { "epoch": 0.859297725024728, "grad_norm": 0.7986848229547799, "learning_rate": 4.763168006266925e-06, "loss": 0.4219, "step": 3475 }, { "epoch": 0.8595450049455984, "grad_norm": 0.8198764103686891, "learning_rate": 4.76302990128907e-06, "loss": 0.3762, "step": 3476 }, { "epoch": 0.8597922848664689, "grad_norm": 0.7972917241817145, "learning_rate": 4.76289175805937e-06, "loss": 0.4477, "step": 3477 }, { "epoch": 0.8600395647873392, "grad_norm": 0.8004509954487877, "learning_rate": 4.762753576580161e-06, "loss": 0.4219, "step": 3478 }, { "epoch": 0.8602868447082097, "grad_norm": 0.7815966556815089, "learning_rate": 4.762615356853779e-06, "loss": 0.4269, "step": 3479 }, { "epoch": 0.8605341246290801, "grad_norm": 0.8140688940745657, "learning_rate": 4.762477098882558e-06, "loss": 0.4224, "step": 3480 }, { "epoch": 0.8607814045499506, "grad_norm": 0.8085670395416449, "learning_rate": 4.762338802668838e-06, "loss": 0.4033, "step": 3481 }, { "epoch": 0.8610286844708209, "grad_norm": 0.8114398196934406, "learning_rate": 4.762200468214953e-06, "loss": 0.405, "step": 3482 }, { "epoch": 0.8612759643916914, "grad_norm": 0.7957454762289632, "learning_rate": 4.7620620955232435e-06, "loss": 0.3963, "step": 3483 }, { "epoch": 0.8615232443125618, "grad_norm": 0.7716297802639464, "learning_rate": 4.7619236845960495e-06, "loss": 0.3725, "step": 3484 }, { "epoch": 0.8617705242334323, "grad_norm": 0.7498275856769105, "learning_rate": 4.7617852354357085e-06, "loss": 0.408, "step": 3485 }, { "epoch": 0.8620178041543026, "grad_norm": 0.8209720763151387, "learning_rate": 4.761646748044561e-06, "loss": 0.4222, "step": 3486 }, { "epoch": 0.8622650840751731, "grad_norm": 0.7811869145474315, "learning_rate": 4.761508222424948e-06, "loss": 0.4029, "step": 3487 }, { "epoch": 0.8625123639960435, "grad_norm": 0.774668109393603, "learning_rate": 4.761369658579213e-06, "loss": 0.4089, "step": 3488 }, { "epoch": 0.862759643916914, "grad_norm": 0.774508807733018, "learning_rate": 4.761231056509694e-06, "loss": 0.4087, "step": 3489 }, { "epoch": 0.8630069238377843, "grad_norm": 0.8043812063644213, "learning_rate": 4.761092416218737e-06, "loss": 0.4133, "step": 3490 }, { "epoch": 0.8632542037586548, "grad_norm": 0.8291349458889816, "learning_rate": 4.760953737708685e-06, "loss": 0.4119, "step": 3491 }, { "epoch": 0.8635014836795252, "grad_norm": 0.7920356279520397, "learning_rate": 4.7608150209818815e-06, "loss": 0.3771, "step": 3492 }, { "epoch": 0.8637487636003957, "grad_norm": 0.8072523281275508, "learning_rate": 4.760676266040671e-06, "loss": 0.4068, "step": 3493 }, { "epoch": 0.863996043521266, "grad_norm": 0.7899951360565894, "learning_rate": 4.7605374728874e-06, "loss": 0.4221, "step": 3494 }, { "epoch": 0.8642433234421365, "grad_norm": 0.7947865396993798, "learning_rate": 4.760398641524413e-06, "loss": 0.4459, "step": 3495 }, { "epoch": 0.8644906033630069, "grad_norm": 0.7709649897446718, "learning_rate": 4.760259771954058e-06, "loss": 0.4485, "step": 3496 }, { "epoch": 0.8647378832838774, "grad_norm": 0.8107187352574584, "learning_rate": 4.7601208641786814e-06, "loss": 0.4296, "step": 3497 }, { "epoch": 0.8649851632047477, "grad_norm": 0.808173993566204, "learning_rate": 4.759981918200632e-06, "loss": 0.3832, "step": 3498 }, { "epoch": 0.8652324431256182, "grad_norm": 0.7733226232262745, "learning_rate": 4.7598429340222565e-06, "loss": 0.4412, "step": 3499 }, { "epoch": 0.8654797230464887, "grad_norm": 0.7802454421928897, "learning_rate": 4.7597039116459065e-06, "loss": 0.44, "step": 3500 }, { "epoch": 0.865727002967359, "grad_norm": 0.7551425262142368, "learning_rate": 4.75956485107393e-06, "loss": 0.4164, "step": 3501 }, { "epoch": 0.8659742828882295, "grad_norm": 0.7607363103302663, "learning_rate": 4.75942575230868e-06, "loss": 0.4278, "step": 3502 }, { "epoch": 0.8662215628090999, "grad_norm": 0.7928219599691498, "learning_rate": 4.759286615352504e-06, "loss": 0.4044, "step": 3503 }, { "epoch": 0.8664688427299704, "grad_norm": 0.7746463131564298, "learning_rate": 4.759147440207758e-06, "loss": 0.4119, "step": 3504 }, { "epoch": 0.8667161226508407, "grad_norm": 0.8030529000110455, "learning_rate": 4.7590082268767906e-06, "loss": 0.3837, "step": 3505 }, { "epoch": 0.8669634025717112, "grad_norm": 0.8373758297950461, "learning_rate": 4.758868975361958e-06, "loss": 0.4134, "step": 3506 }, { "epoch": 0.8672106824925816, "grad_norm": 0.847300972466465, "learning_rate": 4.758729685665612e-06, "loss": 0.4137, "step": 3507 }, { "epoch": 0.8674579624134521, "grad_norm": 0.822308602045918, "learning_rate": 4.758590357790107e-06, "loss": 0.4266, "step": 3508 }, { "epoch": 0.8677052423343224, "grad_norm": 0.7751883119985707, "learning_rate": 4.7584509917378e-06, "loss": 0.4068, "step": 3509 }, { "epoch": 0.8679525222551929, "grad_norm": 0.7472605460681306, "learning_rate": 4.758311587511044e-06, "loss": 0.4192, "step": 3510 }, { "epoch": 0.8681998021760633, "grad_norm": 0.7846163021154071, "learning_rate": 4.758172145112198e-06, "loss": 0.4236, "step": 3511 }, { "epoch": 0.8684470820969338, "grad_norm": 0.8040444889492215, "learning_rate": 4.758032664543617e-06, "loss": 0.4068, "step": 3512 }, { "epoch": 0.8686943620178041, "grad_norm": 0.7854285165758984, "learning_rate": 4.757893145807659e-06, "loss": 0.4087, "step": 3513 }, { "epoch": 0.8689416419386746, "grad_norm": 0.8123417665178312, "learning_rate": 4.757753588906684e-06, "loss": 0.3667, "step": 3514 }, { "epoch": 0.869188921859545, "grad_norm": 0.783236901262894, "learning_rate": 4.757613993843048e-06, "loss": 0.4303, "step": 3515 }, { "epoch": 0.8694362017804155, "grad_norm": 0.7909692214395833, "learning_rate": 4.757474360619113e-06, "loss": 0.4018, "step": 3516 }, { "epoch": 0.8696834817012858, "grad_norm": 0.8481627332283962, "learning_rate": 4.757334689237239e-06, "loss": 0.4067, "step": 3517 }, { "epoch": 0.8699307616221563, "grad_norm": 0.7615068125391466, "learning_rate": 4.757194979699784e-06, "loss": 0.4368, "step": 3518 }, { "epoch": 0.8701780415430267, "grad_norm": 0.801705773275248, "learning_rate": 4.757055232009113e-06, "loss": 0.4375, "step": 3519 }, { "epoch": 0.8704253214638972, "grad_norm": 0.8221944799768854, "learning_rate": 4.756915446167587e-06, "loss": 0.389, "step": 3520 }, { "epoch": 0.8706726013847675, "grad_norm": 0.7862370813701264, "learning_rate": 4.756775622177568e-06, "loss": 0.42, "step": 3521 }, { "epoch": 0.870919881305638, "grad_norm": 0.7597195820726511, "learning_rate": 4.756635760041421e-06, "loss": 0.3909, "step": 3522 }, { "epoch": 0.8711671612265084, "grad_norm": 0.8076111401656502, "learning_rate": 4.7564958597615085e-06, "loss": 0.4135, "step": 3523 }, { "epoch": 0.8714144411473789, "grad_norm": 0.8192089416161316, "learning_rate": 4.756355921340197e-06, "loss": 0.3929, "step": 3524 }, { "epoch": 0.8716617210682492, "grad_norm": 0.8545678851827884, "learning_rate": 4.7562159447798485e-06, "loss": 0.3968, "step": 3525 }, { "epoch": 0.8719090009891197, "grad_norm": 0.7846539307702833, "learning_rate": 4.756075930082833e-06, "loss": 0.4232, "step": 3526 }, { "epoch": 0.8721562809099901, "grad_norm": 0.7645948649778886, "learning_rate": 4.755935877251515e-06, "loss": 0.3992, "step": 3527 }, { "epoch": 0.8724035608308606, "grad_norm": 0.8118160536750945, "learning_rate": 4.755795786288262e-06, "loss": 0.3972, "step": 3528 }, { "epoch": 0.8726508407517309, "grad_norm": 0.8161177141362831, "learning_rate": 4.7556556571954414e-06, "loss": 0.4113, "step": 3529 }, { "epoch": 0.8728981206726014, "grad_norm": 0.7550227042035801, "learning_rate": 4.755515489975424e-06, "loss": 0.4113, "step": 3530 }, { "epoch": 0.8731454005934718, "grad_norm": 0.7841549066488641, "learning_rate": 4.755375284630577e-06, "loss": 0.3964, "step": 3531 }, { "epoch": 0.8733926805143423, "grad_norm": 0.7504063160687925, "learning_rate": 4.75523504116327e-06, "loss": 0.4069, "step": 3532 }, { "epoch": 0.8736399604352126, "grad_norm": 0.8184342628628202, "learning_rate": 4.755094759575875e-06, "loss": 0.4015, "step": 3533 }, { "epoch": 0.8738872403560831, "grad_norm": 0.7998457424375706, "learning_rate": 4.754954439870763e-06, "loss": 0.3947, "step": 3534 }, { "epoch": 0.8741345202769535, "grad_norm": 0.7918492983576808, "learning_rate": 4.754814082050305e-06, "loss": 0.4025, "step": 3535 }, { "epoch": 0.874381800197824, "grad_norm": 0.826262336173255, "learning_rate": 4.7546736861168745e-06, "loss": 0.4033, "step": 3536 }, { "epoch": 0.8746290801186943, "grad_norm": 0.8139016489751498, "learning_rate": 4.754533252072843e-06, "loss": 0.4127, "step": 3537 }, { "epoch": 0.8748763600395648, "grad_norm": 0.8212185358376062, "learning_rate": 4.754392779920585e-06, "loss": 0.4218, "step": 3538 }, { "epoch": 0.8751236399604352, "grad_norm": 0.8123701589959741, "learning_rate": 4.754252269662476e-06, "loss": 0.4212, "step": 3539 }, { "epoch": 0.8753709198813057, "grad_norm": 0.7360744756469361, "learning_rate": 4.754111721300889e-06, "loss": 0.4146, "step": 3540 }, { "epoch": 0.875618199802176, "grad_norm": 0.7851546187922345, "learning_rate": 4.753971134838202e-06, "loss": 0.4179, "step": 3541 }, { "epoch": 0.8758654797230465, "grad_norm": 0.7721531761045248, "learning_rate": 4.753830510276789e-06, "loss": 0.4435, "step": 3542 }, { "epoch": 0.8761127596439169, "grad_norm": 0.7785287323560217, "learning_rate": 4.7536898476190295e-06, "loss": 0.4226, "step": 3543 }, { "epoch": 0.8763600395647874, "grad_norm": 0.7967610882789702, "learning_rate": 4.753549146867299e-06, "loss": 0.4254, "step": 3544 }, { "epoch": 0.8766073194856577, "grad_norm": 0.8096712743989584, "learning_rate": 4.753408408023976e-06, "loss": 0.3831, "step": 3545 }, { "epoch": 0.8768545994065282, "grad_norm": 0.7636869739295397, "learning_rate": 4.75326763109144e-06, "loss": 0.427, "step": 3546 }, { "epoch": 0.8771018793273986, "grad_norm": 0.771914882644073, "learning_rate": 4.753126816072071e-06, "loss": 0.3803, "step": 3547 }, { "epoch": 0.8773491592482691, "grad_norm": 0.7653991534397886, "learning_rate": 4.752985962968247e-06, "loss": 0.4219, "step": 3548 }, { "epoch": 0.8775964391691394, "grad_norm": 0.8189550150303511, "learning_rate": 4.752845071782352e-06, "loss": 0.4325, "step": 3549 }, { "epoch": 0.8778437190900099, "grad_norm": 0.7692889761933356, "learning_rate": 4.752704142516765e-06, "loss": 0.3801, "step": 3550 }, { "epoch": 0.8780909990108803, "grad_norm": 0.7996162297522486, "learning_rate": 4.7525631751738696e-06, "loss": 0.4072, "step": 3551 }, { "epoch": 0.8783382789317508, "grad_norm": 0.7928390591889003, "learning_rate": 4.752422169756048e-06, "loss": 0.4187, "step": 3552 }, { "epoch": 0.8785855588526211, "grad_norm": 0.8121066718850152, "learning_rate": 4.7522811262656835e-06, "loss": 0.4002, "step": 3553 }, { "epoch": 0.8788328387734916, "grad_norm": 0.7825370933836572, "learning_rate": 4.752140044705161e-06, "loss": 0.4406, "step": 3554 }, { "epoch": 0.879080118694362, "grad_norm": 0.7621264478238917, "learning_rate": 4.751998925076863e-06, "loss": 0.417, "step": 3555 }, { "epoch": 0.8793273986152325, "grad_norm": 0.7962830855667626, "learning_rate": 4.7518577673831765e-06, "loss": 0.4318, "step": 3556 }, { "epoch": 0.8795746785361028, "grad_norm": 0.7761619234912889, "learning_rate": 4.7517165716264866e-06, "loss": 0.4136, "step": 3557 }, { "epoch": 0.8798219584569733, "grad_norm": 0.8059167848034989, "learning_rate": 4.751575337809183e-06, "loss": 0.4084, "step": 3558 }, { "epoch": 0.8800692383778437, "grad_norm": 0.8124913029375966, "learning_rate": 4.751434065933648e-06, "loss": 0.4045, "step": 3559 }, { "epoch": 0.8803165182987142, "grad_norm": 0.7814128265870901, "learning_rate": 4.751292756002273e-06, "loss": 0.4258, "step": 3560 }, { "epoch": 0.8805637982195845, "grad_norm": 0.8039331824474487, "learning_rate": 4.751151408017445e-06, "loss": 0.4056, "step": 3561 }, { "epoch": 0.880811078140455, "grad_norm": 0.8269124609448832, "learning_rate": 4.751010021981555e-06, "loss": 0.4022, "step": 3562 }, { "epoch": 0.8810583580613254, "grad_norm": 0.8059296222218977, "learning_rate": 4.75086859789699e-06, "loss": 0.4137, "step": 3563 }, { "epoch": 0.8813056379821959, "grad_norm": 0.8257234417066797, "learning_rate": 4.750727135766143e-06, "loss": 0.4017, "step": 3564 }, { "epoch": 0.8815529179030662, "grad_norm": 0.7868621973148561, "learning_rate": 4.750585635591404e-06, "loss": 0.4026, "step": 3565 }, { "epoch": 0.8818001978239367, "grad_norm": 0.8049536206026485, "learning_rate": 4.750444097375165e-06, "loss": 0.3992, "step": 3566 }, { "epoch": 0.8820474777448071, "grad_norm": 0.8126870559722053, "learning_rate": 4.750302521119819e-06, "loss": 0.4046, "step": 3567 }, { "epoch": 0.8822947576656776, "grad_norm": 0.8647264312706785, "learning_rate": 4.750160906827758e-06, "loss": 0.4248, "step": 3568 }, { "epoch": 0.8825420375865479, "grad_norm": 0.742871129693659, "learning_rate": 4.750019254501376e-06, "loss": 0.4345, "step": 3569 }, { "epoch": 0.8827893175074184, "grad_norm": 0.8191164799952924, "learning_rate": 4.749877564143067e-06, "loss": 0.424, "step": 3570 }, { "epoch": 0.8830365974282888, "grad_norm": 0.781549768862716, "learning_rate": 4.749735835755227e-06, "loss": 0.4298, "step": 3571 }, { "epoch": 0.8832838773491593, "grad_norm": 0.8053711023623178, "learning_rate": 4.749594069340252e-06, "loss": 0.413, "step": 3572 }, { "epoch": 0.8835311572700296, "grad_norm": 0.8262873093017418, "learning_rate": 4.749452264900536e-06, "loss": 0.427, "step": 3573 }, { "epoch": 0.8837784371909001, "grad_norm": 0.7651153848178928, "learning_rate": 4.749310422438478e-06, "loss": 0.4328, "step": 3574 }, { "epoch": 0.8840257171117705, "grad_norm": 0.821565389817644, "learning_rate": 4.749168541956475e-06, "loss": 0.4025, "step": 3575 }, { "epoch": 0.884272997032641, "grad_norm": 0.8771003427825715, "learning_rate": 4.749026623456925e-06, "loss": 0.3983, "step": 3576 }, { "epoch": 0.8845202769535113, "grad_norm": 0.8193490682302352, "learning_rate": 4.748884666942226e-06, "loss": 0.4175, "step": 3577 }, { "epoch": 0.8847675568743818, "grad_norm": 0.7881409124419267, "learning_rate": 4.748742672414779e-06, "loss": 0.4199, "step": 3578 }, { "epoch": 0.8850148367952523, "grad_norm": 0.7939165086785582, "learning_rate": 4.748600639876983e-06, "loss": 0.4059, "step": 3579 }, { "epoch": 0.8852621167161226, "grad_norm": 0.7968599992051688, "learning_rate": 4.748458569331239e-06, "loss": 0.4462, "step": 3580 }, { "epoch": 0.8855093966369931, "grad_norm": 0.8478289232046818, "learning_rate": 4.7483164607799495e-06, "loss": 0.3866, "step": 3581 }, { "epoch": 0.8857566765578635, "grad_norm": 0.7865185184922773, "learning_rate": 4.748174314225515e-06, "loss": 0.415, "step": 3582 }, { "epoch": 0.886003956478734, "grad_norm": 0.7813174862734311, "learning_rate": 4.748032129670339e-06, "loss": 0.4436, "step": 3583 }, { "epoch": 0.8862512363996043, "grad_norm": 0.8084377607109554, "learning_rate": 4.747889907116826e-06, "loss": 0.3953, "step": 3584 }, { "epoch": 0.8864985163204748, "grad_norm": 0.8036795735133182, "learning_rate": 4.747747646567378e-06, "loss": 0.3895, "step": 3585 }, { "epoch": 0.8867457962413452, "grad_norm": 0.8098549722269822, "learning_rate": 4.747605348024399e-06, "loss": 0.4254, "step": 3586 }, { "epoch": 0.8869930761622157, "grad_norm": 0.8081490604273397, "learning_rate": 4.747463011490297e-06, "loss": 0.4039, "step": 3587 }, { "epoch": 0.887240356083086, "grad_norm": 0.7912104338630691, "learning_rate": 4.747320636967476e-06, "loss": 0.4045, "step": 3588 }, { "epoch": 0.8874876360039565, "grad_norm": 0.8185827574341613, "learning_rate": 4.747178224458343e-06, "loss": 0.3962, "step": 3589 }, { "epoch": 0.8877349159248269, "grad_norm": 0.804633825051263, "learning_rate": 4.7470357739653055e-06, "loss": 0.4151, "step": 3590 }, { "epoch": 0.8879821958456974, "grad_norm": 0.8270157295952916, "learning_rate": 4.746893285490771e-06, "loss": 0.3745, "step": 3591 }, { "epoch": 0.8882294757665677, "grad_norm": 0.794707377298943, "learning_rate": 4.746750759037148e-06, "loss": 0.4317, "step": 3592 }, { "epoch": 0.8884767556874382, "grad_norm": 0.815353758945955, "learning_rate": 4.746608194606845e-06, "loss": 0.4009, "step": 3593 }, { "epoch": 0.8887240356083086, "grad_norm": 0.7851323106201907, "learning_rate": 4.746465592202273e-06, "loss": 0.3853, "step": 3594 }, { "epoch": 0.8889713155291791, "grad_norm": 0.7726796238264945, "learning_rate": 4.7463229518258424e-06, "loss": 0.4068, "step": 3595 }, { "epoch": 0.8892185954500494, "grad_norm": 0.842200959923491, "learning_rate": 4.746180273479963e-06, "loss": 0.4096, "step": 3596 }, { "epoch": 0.8894658753709199, "grad_norm": 0.7998155688698845, "learning_rate": 4.746037557167047e-06, "loss": 0.4087, "step": 3597 }, { "epoch": 0.8897131552917903, "grad_norm": 0.7937182610227635, "learning_rate": 4.745894802889507e-06, "loss": 0.422, "step": 3598 }, { "epoch": 0.8899604352126608, "grad_norm": 0.8111751844441354, "learning_rate": 4.745752010649755e-06, "loss": 0.39, "step": 3599 }, { "epoch": 0.8902077151335311, "grad_norm": 0.7714646667870872, "learning_rate": 4.745609180450207e-06, "loss": 0.4192, "step": 3600 }, { "epoch": 0.8904549950544016, "grad_norm": 0.7554803232211826, "learning_rate": 4.745466312293275e-06, "loss": 0.4306, "step": 3601 }, { "epoch": 0.890702274975272, "grad_norm": 0.7757947210978946, "learning_rate": 4.745323406181375e-06, "loss": 0.4142, "step": 3602 }, { "epoch": 0.8909495548961425, "grad_norm": 0.760204249748166, "learning_rate": 4.7451804621169214e-06, "loss": 0.4446, "step": 3603 }, { "epoch": 0.8911968348170128, "grad_norm": 0.7870209440781624, "learning_rate": 4.745037480102332e-06, "loss": 0.3776, "step": 3604 }, { "epoch": 0.8914441147378833, "grad_norm": 0.7948033100309609, "learning_rate": 4.744894460140021e-06, "loss": 0.4145, "step": 3605 }, { "epoch": 0.8916913946587537, "grad_norm": 0.8094052552150242, "learning_rate": 4.7447514022324085e-06, "loss": 0.3901, "step": 3606 }, { "epoch": 0.8919386745796242, "grad_norm": 0.8210557157284226, "learning_rate": 4.744608306381912e-06, "loss": 0.444, "step": 3607 }, { "epoch": 0.8921859545004945, "grad_norm": 0.8307590466318364, "learning_rate": 4.744465172590949e-06, "loss": 0.3834, "step": 3608 }, { "epoch": 0.892433234421365, "grad_norm": 0.7843833688120964, "learning_rate": 4.7443220008619405e-06, "loss": 0.4132, "step": 3609 }, { "epoch": 0.8926805143422354, "grad_norm": 0.7944958514405386, "learning_rate": 4.744178791197305e-06, "loss": 0.4175, "step": 3610 }, { "epoch": 0.8929277942631059, "grad_norm": 0.7998369942157381, "learning_rate": 4.744035543599464e-06, "loss": 0.3737, "step": 3611 }, { "epoch": 0.8931750741839762, "grad_norm": 0.8009239130803604, "learning_rate": 4.74389225807084e-06, "loss": 0.4203, "step": 3612 }, { "epoch": 0.8934223541048467, "grad_norm": 0.800511982201152, "learning_rate": 4.743748934613853e-06, "loss": 0.4466, "step": 3613 }, { "epoch": 0.8936696340257171, "grad_norm": 0.7686053384089964, "learning_rate": 4.743605573230926e-06, "loss": 0.4101, "step": 3614 }, { "epoch": 0.8939169139465876, "grad_norm": 0.8060434016909004, "learning_rate": 4.7434621739244826e-06, "loss": 0.4232, "step": 3615 }, { "epoch": 0.8941641938674579, "grad_norm": 0.8230882644339693, "learning_rate": 4.7433187366969465e-06, "loss": 0.4046, "step": 3616 }, { "epoch": 0.8944114737883284, "grad_norm": 0.7989957008420914, "learning_rate": 4.743175261550743e-06, "loss": 0.4167, "step": 3617 }, { "epoch": 0.8946587537091988, "grad_norm": 0.8394401197348738, "learning_rate": 4.7430317484882956e-06, "loss": 0.4009, "step": 3618 }, { "epoch": 0.8949060336300693, "grad_norm": 0.8094052684678341, "learning_rate": 4.7428881975120325e-06, "loss": 0.3933, "step": 3619 }, { "epoch": 0.8951533135509396, "grad_norm": 0.7851229819463934, "learning_rate": 4.742744608624377e-06, "loss": 0.4113, "step": 3620 }, { "epoch": 0.8954005934718101, "grad_norm": 0.7662309230132146, "learning_rate": 4.742600981827759e-06, "loss": 0.4065, "step": 3621 }, { "epoch": 0.8956478733926805, "grad_norm": 0.7710743555606467, "learning_rate": 4.7424573171246045e-06, "loss": 0.4206, "step": 3622 }, { "epoch": 0.895895153313551, "grad_norm": 0.8099130000859085, "learning_rate": 4.742313614517342e-06, "loss": 0.3821, "step": 3623 }, { "epoch": 0.8961424332344213, "grad_norm": 0.8223717726070423, "learning_rate": 4.7421698740084024e-06, "loss": 0.4351, "step": 3624 }, { "epoch": 0.8963897131552918, "grad_norm": 0.785125594303034, "learning_rate": 4.742026095600213e-06, "loss": 0.3946, "step": 3625 }, { "epoch": 0.8966369930761622, "grad_norm": 0.8069026278651512, "learning_rate": 4.741882279295204e-06, "loss": 0.3847, "step": 3626 }, { "epoch": 0.8968842729970327, "grad_norm": 0.8105176440474819, "learning_rate": 4.7417384250958085e-06, "loss": 0.4036, "step": 3627 }, { "epoch": 0.897131552917903, "grad_norm": 0.8156431875624227, "learning_rate": 4.741594533004455e-06, "loss": 0.3943, "step": 3628 }, { "epoch": 0.8973788328387735, "grad_norm": 0.8325389032978646, "learning_rate": 4.74145060302358e-06, "loss": 0.3985, "step": 3629 }, { "epoch": 0.8976261127596439, "grad_norm": 0.7812029183112011, "learning_rate": 4.741306635155613e-06, "loss": 0.4174, "step": 3630 }, { "epoch": 0.8978733926805144, "grad_norm": 0.7443379316555324, "learning_rate": 4.741162629402987e-06, "loss": 0.4244, "step": 3631 }, { "epoch": 0.8981206726013847, "grad_norm": 0.7918379844224285, "learning_rate": 4.741018585768139e-06, "loss": 0.4298, "step": 3632 }, { "epoch": 0.8983679525222552, "grad_norm": 0.8155608902187095, "learning_rate": 4.740874504253501e-06, "loss": 0.4136, "step": 3633 }, { "epoch": 0.8986152324431256, "grad_norm": 0.798431437148323, "learning_rate": 4.740730384861511e-06, "loss": 0.4152, "step": 3634 }, { "epoch": 0.8988625123639961, "grad_norm": 0.7817921688685995, "learning_rate": 4.740586227594602e-06, "loss": 0.4322, "step": 3635 }, { "epoch": 0.8991097922848664, "grad_norm": 0.801265782510891, "learning_rate": 4.740442032455213e-06, "loss": 0.3742, "step": 3636 }, { "epoch": 0.8993570722057369, "grad_norm": 0.774678482565866, "learning_rate": 4.740297799445781e-06, "loss": 0.4431, "step": 3637 }, { "epoch": 0.8996043521266073, "grad_norm": 0.7968222612747385, "learning_rate": 4.740153528568743e-06, "loss": 0.4176, "step": 3638 }, { "epoch": 0.8998516320474778, "grad_norm": 0.7887370276926967, "learning_rate": 4.740009219826538e-06, "loss": 0.4488, "step": 3639 }, { "epoch": 0.9000989119683481, "grad_norm": 0.8051668166188332, "learning_rate": 4.739864873221607e-06, "loss": 0.4105, "step": 3640 }, { "epoch": 0.9003461918892186, "grad_norm": 0.7819691265369665, "learning_rate": 4.739720488756387e-06, "loss": 0.4241, "step": 3641 }, { "epoch": 0.900593471810089, "grad_norm": 0.834960143043667, "learning_rate": 4.73957606643332e-06, "loss": 0.4472, "step": 3642 }, { "epoch": 0.9008407517309595, "grad_norm": 0.8080501371876072, "learning_rate": 4.739431606254847e-06, "loss": 0.3909, "step": 3643 }, { "epoch": 0.9010880316518298, "grad_norm": 0.7919301864894568, "learning_rate": 4.73928710822341e-06, "loss": 0.412, "step": 3644 }, { "epoch": 0.9013353115727003, "grad_norm": 0.7911907598771045, "learning_rate": 4.739142572341451e-06, "loss": 0.437, "step": 3645 }, { "epoch": 0.9015825914935707, "grad_norm": 0.7705392923487695, "learning_rate": 4.738997998611413e-06, "loss": 0.4042, "step": 3646 }, { "epoch": 0.9018298714144412, "grad_norm": 0.8233674027397053, "learning_rate": 4.7388533870357415e-06, "loss": 0.414, "step": 3647 }, { "epoch": 0.9020771513353115, "grad_norm": 0.7503969461053938, "learning_rate": 4.738708737616879e-06, "loss": 0.4066, "step": 3648 }, { "epoch": 0.902324431256182, "grad_norm": 0.8049770365300944, "learning_rate": 4.73856405035727e-06, "loss": 0.41, "step": 3649 }, { "epoch": 0.9025717111770524, "grad_norm": 0.8109425037455092, "learning_rate": 4.7384193252593606e-06, "loss": 0.4445, "step": 3650 }, { "epoch": 0.9028189910979229, "grad_norm": 0.8369589101352245, "learning_rate": 4.7382745623255985e-06, "loss": 0.4313, "step": 3651 }, { "epoch": 0.9030662710187932, "grad_norm": 0.8046568527922425, "learning_rate": 4.73812976155843e-06, "loss": 0.3856, "step": 3652 }, { "epoch": 0.9033135509396637, "grad_norm": 0.8099061520890892, "learning_rate": 4.737984922960301e-06, "loss": 0.4034, "step": 3653 }, { "epoch": 0.9035608308605341, "grad_norm": 0.7989778433300206, "learning_rate": 4.737840046533662e-06, "loss": 0.3918, "step": 3654 }, { "epoch": 0.9038081107814046, "grad_norm": 0.7793440943753789, "learning_rate": 4.737695132280961e-06, "loss": 0.3995, "step": 3655 }, { "epoch": 0.904055390702275, "grad_norm": 0.7676663113860338, "learning_rate": 4.737550180204646e-06, "loss": 0.3792, "step": 3656 }, { "epoch": 0.9043026706231454, "grad_norm": 0.8045089888506259, "learning_rate": 4.737405190307169e-06, "loss": 0.4119, "step": 3657 }, { "epoch": 0.9045499505440159, "grad_norm": 0.7690497853722972, "learning_rate": 4.7372601625909805e-06, "loss": 0.4096, "step": 3658 }, { "epoch": 0.9047972304648862, "grad_norm": 0.8071716454958764, "learning_rate": 4.737115097058532e-06, "loss": 0.4048, "step": 3659 }, { "epoch": 0.9050445103857567, "grad_norm": 0.7895776592417868, "learning_rate": 4.736969993712275e-06, "loss": 0.4167, "step": 3660 }, { "epoch": 0.9052917903066271, "grad_norm": 0.8240625463753163, "learning_rate": 4.736824852554661e-06, "loss": 0.403, "step": 3661 }, { "epoch": 0.9055390702274976, "grad_norm": 0.8195618957323566, "learning_rate": 4.736679673588146e-06, "loss": 0.3815, "step": 3662 }, { "epoch": 0.905786350148368, "grad_norm": 0.7834351939345308, "learning_rate": 4.736534456815182e-06, "loss": 0.4219, "step": 3663 }, { "epoch": 0.9060336300692384, "grad_norm": 0.8324618159473429, "learning_rate": 4.736389202238224e-06, "loss": 0.3964, "step": 3664 }, { "epoch": 0.9062809099901088, "grad_norm": 0.8600961054014756, "learning_rate": 4.736243909859727e-06, "loss": 0.3904, "step": 3665 }, { "epoch": 0.9065281899109793, "grad_norm": 0.7577065431947011, "learning_rate": 4.736098579682148e-06, "loss": 0.4157, "step": 3666 }, { "epoch": 0.9067754698318496, "grad_norm": 0.7866848892631093, "learning_rate": 4.735953211707942e-06, "loss": 0.403, "step": 3667 }, { "epoch": 0.9070227497527201, "grad_norm": 0.7505834429164107, "learning_rate": 4.735807805939568e-06, "loss": 0.4294, "step": 3668 }, { "epoch": 0.9072700296735905, "grad_norm": 0.8346896722270837, "learning_rate": 4.735662362379482e-06, "loss": 0.4088, "step": 3669 }, { "epoch": 0.907517309594461, "grad_norm": 0.8556942291684705, "learning_rate": 4.735516881030143e-06, "loss": 0.3931, "step": 3670 }, { "epoch": 0.9077645895153313, "grad_norm": 0.8109171826553419, "learning_rate": 4.7353713618940104e-06, "loss": 0.3865, "step": 3671 }, { "epoch": 0.9080118694362018, "grad_norm": 0.7873661354765638, "learning_rate": 4.735225804973543e-06, "loss": 0.3986, "step": 3672 }, { "epoch": 0.9082591493570722, "grad_norm": 0.8094256714029084, "learning_rate": 4.735080210271202e-06, "loss": 0.4157, "step": 3673 }, { "epoch": 0.9085064292779427, "grad_norm": 0.8159543106898893, "learning_rate": 4.734934577789449e-06, "loss": 0.3597, "step": 3674 }, { "epoch": 0.908753709198813, "grad_norm": 0.826502911376005, "learning_rate": 4.734788907530744e-06, "loss": 0.3858, "step": 3675 }, { "epoch": 0.9090009891196835, "grad_norm": 0.7757107363293632, "learning_rate": 4.734643199497551e-06, "loss": 0.4146, "step": 3676 }, { "epoch": 0.9092482690405539, "grad_norm": 0.7897304584611788, "learning_rate": 4.73449745369233e-06, "loss": 0.3961, "step": 3677 }, { "epoch": 0.9094955489614244, "grad_norm": 0.8082471594665909, "learning_rate": 4.734351670117548e-06, "loss": 0.3993, "step": 3678 }, { "epoch": 0.9097428288822947, "grad_norm": 0.8032744698939259, "learning_rate": 4.734205848775667e-06, "loss": 0.4244, "step": 3679 }, { "epoch": 0.9099901088031652, "grad_norm": 0.7602920123766033, "learning_rate": 4.734059989669153e-06, "loss": 0.3965, "step": 3680 }, { "epoch": 0.9102373887240356, "grad_norm": 0.8009664738803186, "learning_rate": 4.73391409280047e-06, "loss": 0.4085, "step": 3681 }, { "epoch": 0.9104846686449061, "grad_norm": 0.8213732647282298, "learning_rate": 4.733768158172086e-06, "loss": 0.3911, "step": 3682 }, { "epoch": 0.9107319485657764, "grad_norm": 0.7848207476521984, "learning_rate": 4.733622185786466e-06, "loss": 0.4079, "step": 3683 }, { "epoch": 0.9109792284866469, "grad_norm": 0.7988798539247747, "learning_rate": 4.733476175646079e-06, "loss": 0.4248, "step": 3684 }, { "epoch": 0.9112265084075173, "grad_norm": 0.791433923608785, "learning_rate": 4.733330127753391e-06, "loss": 0.374, "step": 3685 }, { "epoch": 0.9114737883283878, "grad_norm": 0.8508840280452902, "learning_rate": 4.733184042110872e-06, "loss": 0.3851, "step": 3686 }, { "epoch": 0.9117210682492581, "grad_norm": 0.7563054793404609, "learning_rate": 4.733037918720991e-06, "loss": 0.4036, "step": 3687 }, { "epoch": 0.9119683481701286, "grad_norm": 0.7806311523663997, "learning_rate": 4.732891757586217e-06, "loss": 0.4269, "step": 3688 }, { "epoch": 0.912215628090999, "grad_norm": 0.8016731889784404, "learning_rate": 4.732745558709022e-06, "loss": 0.4159, "step": 3689 }, { "epoch": 0.9124629080118695, "grad_norm": 0.841535918475194, "learning_rate": 4.732599322091878e-06, "loss": 0.3978, "step": 3690 }, { "epoch": 0.9127101879327398, "grad_norm": 0.8294101182941553, "learning_rate": 4.732453047737254e-06, "loss": 0.3808, "step": 3691 }, { "epoch": 0.9129574678536103, "grad_norm": 0.7925075807432463, "learning_rate": 4.7323067356476236e-06, "loss": 0.39, "step": 3692 }, { "epoch": 0.9132047477744807, "grad_norm": 0.8122022329751005, "learning_rate": 4.7321603858254615e-06, "loss": 0.3956, "step": 3693 }, { "epoch": 0.9134520276953512, "grad_norm": 0.8872703411191297, "learning_rate": 4.732013998273239e-06, "loss": 0.3825, "step": 3694 }, { "epoch": 0.9136993076162215, "grad_norm": 0.8172203043153907, "learning_rate": 4.7318675729934325e-06, "loss": 0.4327, "step": 3695 }, { "epoch": 0.913946587537092, "grad_norm": 0.7698700210872599, "learning_rate": 4.731721109988516e-06, "loss": 0.4249, "step": 3696 }, { "epoch": 0.9141938674579624, "grad_norm": 0.7909208463040441, "learning_rate": 4.731574609260965e-06, "loss": 0.413, "step": 3697 }, { "epoch": 0.9144411473788329, "grad_norm": 0.85111940345262, "learning_rate": 4.7314280708132555e-06, "loss": 0.3766, "step": 3698 }, { "epoch": 0.9146884272997032, "grad_norm": 0.8267173832504716, "learning_rate": 4.731281494647866e-06, "loss": 0.4056, "step": 3699 }, { "epoch": 0.9149357072205737, "grad_norm": 0.781799158173567, "learning_rate": 4.731134880767273e-06, "loss": 0.3793, "step": 3700 }, { "epoch": 0.9151829871414441, "grad_norm": 0.7796258192015334, "learning_rate": 4.730988229173955e-06, "loss": 0.4309, "step": 3701 }, { "epoch": 0.9154302670623146, "grad_norm": 0.8493662394700314, "learning_rate": 4.7308415398703896e-06, "loss": 0.4401, "step": 3702 }, { "epoch": 0.9156775469831849, "grad_norm": 0.8252101322373908, "learning_rate": 4.730694812859058e-06, "loss": 0.425, "step": 3703 }, { "epoch": 0.9159248269040554, "grad_norm": 0.8436892482893468, "learning_rate": 4.73054804814244e-06, "loss": 0.4001, "step": 3704 }, { "epoch": 0.9161721068249258, "grad_norm": 0.7990937103631333, "learning_rate": 4.730401245723015e-06, "loss": 0.4528, "step": 3705 }, { "epoch": 0.9164193867457963, "grad_norm": 0.7426409943836156, "learning_rate": 4.730254405603266e-06, "loss": 0.4139, "step": 3706 }, { "epoch": 0.9166666666666666, "grad_norm": 0.8458020418541371, "learning_rate": 4.730107527785675e-06, "loss": 0.3864, "step": 3707 }, { "epoch": 0.9169139465875371, "grad_norm": 0.7763715601421739, "learning_rate": 4.729960612272724e-06, "loss": 0.413, "step": 3708 }, { "epoch": 0.9171612265084075, "grad_norm": 0.772693666164887, "learning_rate": 4.729813659066895e-06, "loss": 0.3964, "step": 3709 }, { "epoch": 0.917408506429278, "grad_norm": 0.7765688668068649, "learning_rate": 4.729666668170675e-06, "loss": 0.3879, "step": 3710 }, { "epoch": 0.9176557863501483, "grad_norm": 0.7526284361207995, "learning_rate": 4.729519639586546e-06, "loss": 0.4087, "step": 3711 }, { "epoch": 0.9179030662710188, "grad_norm": 0.7880056084680264, "learning_rate": 4.729372573316994e-06, "loss": 0.3952, "step": 3712 }, { "epoch": 0.9181503461918892, "grad_norm": 0.8119548569197702, "learning_rate": 4.729225469364506e-06, "loss": 0.4106, "step": 3713 }, { "epoch": 0.9183976261127597, "grad_norm": 0.8080727821692407, "learning_rate": 4.729078327731566e-06, "loss": 0.4168, "step": 3714 }, { "epoch": 0.91864490603363, "grad_norm": 0.7656106325715241, "learning_rate": 4.728931148420663e-06, "loss": 0.4209, "step": 3715 }, { "epoch": 0.9188921859545005, "grad_norm": 0.7482334472892916, "learning_rate": 4.728783931434285e-06, "loss": 0.427, "step": 3716 }, { "epoch": 0.9191394658753709, "grad_norm": 0.7750948068078986, "learning_rate": 4.7286366767749195e-06, "loss": 0.3864, "step": 3717 }, { "epoch": 0.9193867457962414, "grad_norm": 0.802420863654844, "learning_rate": 4.728489384445055e-06, "loss": 0.4016, "step": 3718 }, { "epoch": 0.9196340257171117, "grad_norm": 0.7601304461080065, "learning_rate": 4.728342054447183e-06, "loss": 0.4446, "step": 3719 }, { "epoch": 0.9198813056379822, "grad_norm": 0.8089882839309193, "learning_rate": 4.728194686783792e-06, "loss": 0.3926, "step": 3720 }, { "epoch": 0.9201285855588526, "grad_norm": 0.7875812610990902, "learning_rate": 4.728047281457374e-06, "loss": 0.3865, "step": 3721 }, { "epoch": 0.920375865479723, "grad_norm": 0.8304273323171134, "learning_rate": 4.7278998384704215e-06, "loss": 0.3886, "step": 3722 }, { "epoch": 0.9206231454005934, "grad_norm": 0.7793749974809633, "learning_rate": 4.727752357825424e-06, "loss": 0.3991, "step": 3723 }, { "epoch": 0.9208704253214639, "grad_norm": 0.7655198670497095, "learning_rate": 4.7276048395248755e-06, "loss": 0.4016, "step": 3724 }, { "epoch": 0.9211177052423343, "grad_norm": 0.7785444053497739, "learning_rate": 4.7274572835712706e-06, "loss": 0.3837, "step": 3725 }, { "epoch": 0.9213649851632048, "grad_norm": 0.794178493200149, "learning_rate": 4.727309689967103e-06, "loss": 0.4141, "step": 3726 }, { "epoch": 0.9216122650840751, "grad_norm": 0.7616756186660109, "learning_rate": 4.727162058714867e-06, "loss": 0.4016, "step": 3727 }, { "epoch": 0.9218595450049456, "grad_norm": 0.827382828719098, "learning_rate": 4.7270143898170575e-06, "loss": 0.3829, "step": 3728 }, { "epoch": 0.922106824925816, "grad_norm": 0.7964010549622547, "learning_rate": 4.7268666832761725e-06, "loss": 0.3913, "step": 3729 }, { "epoch": 0.9223541048466865, "grad_norm": 0.8188786377099656, "learning_rate": 4.726718939094706e-06, "loss": 0.4117, "step": 3730 }, { "epoch": 0.9226013847675568, "grad_norm": 0.7849413309879384, "learning_rate": 4.726571157275157e-06, "loss": 0.4084, "step": 3731 }, { "epoch": 0.9228486646884273, "grad_norm": 0.8072618169447672, "learning_rate": 4.726423337820023e-06, "loss": 0.4058, "step": 3732 }, { "epoch": 0.9230959446092978, "grad_norm": 0.807105005048506, "learning_rate": 4.726275480731803e-06, "loss": 0.3882, "step": 3733 }, { "epoch": 0.9233432245301681, "grad_norm": 0.760185353289157, "learning_rate": 4.726127586012996e-06, "loss": 0.4132, "step": 3734 }, { "epoch": 0.9235905044510386, "grad_norm": 0.7754454391486514, "learning_rate": 4.7259796536661016e-06, "loss": 0.426, "step": 3735 }, { "epoch": 0.923837784371909, "grad_norm": 0.8142785355549925, "learning_rate": 4.725831683693621e-06, "loss": 0.398, "step": 3736 }, { "epoch": 0.9240850642927795, "grad_norm": 0.7681071514435952, "learning_rate": 4.725683676098054e-06, "loss": 0.4131, "step": 3737 }, { "epoch": 0.9243323442136498, "grad_norm": 0.7718039403892808, "learning_rate": 4.725535630881904e-06, "loss": 0.428, "step": 3738 }, { "epoch": 0.9245796241345203, "grad_norm": 0.7862011212454346, "learning_rate": 4.725387548047672e-06, "loss": 0.3932, "step": 3739 }, { "epoch": 0.9248269040553907, "grad_norm": 0.7927179649242823, "learning_rate": 4.725239427597862e-06, "loss": 0.4271, "step": 3740 }, { "epoch": 0.9250741839762612, "grad_norm": 0.7860521643345544, "learning_rate": 4.725091269534976e-06, "loss": 0.3935, "step": 3741 }, { "epoch": 0.9253214638971315, "grad_norm": 0.796238835215501, "learning_rate": 4.724943073861521e-06, "loss": 0.3948, "step": 3742 }, { "epoch": 0.925568743818002, "grad_norm": 0.8406638282243356, "learning_rate": 4.724794840580001e-06, "loss": 0.4006, "step": 3743 }, { "epoch": 0.9258160237388724, "grad_norm": 0.7855740670031731, "learning_rate": 4.724646569692919e-06, "loss": 0.3985, "step": 3744 }, { "epoch": 0.9260633036597429, "grad_norm": 0.7799292341163916, "learning_rate": 4.7244982612027845e-06, "loss": 0.3917, "step": 3745 }, { "epoch": 0.9263105835806132, "grad_norm": 0.7735208824748577, "learning_rate": 4.724349915112103e-06, "loss": 0.4268, "step": 3746 }, { "epoch": 0.9265578635014837, "grad_norm": 0.7861442577149689, "learning_rate": 4.724201531423383e-06, "loss": 0.4117, "step": 3747 }, { "epoch": 0.9268051434223541, "grad_norm": 0.7621689766137228, "learning_rate": 4.72405311013913e-06, "loss": 0.4052, "step": 3748 }, { "epoch": 0.9270524233432246, "grad_norm": 0.7738986753047935, "learning_rate": 4.723904651261855e-06, "loss": 0.4308, "step": 3749 }, { "epoch": 0.9272997032640949, "grad_norm": 0.7464914050650943, "learning_rate": 4.723756154794068e-06, "loss": 0.4125, "step": 3750 }, { "epoch": 0.9275469831849654, "grad_norm": 0.7875071720493665, "learning_rate": 4.7236076207382765e-06, "loss": 0.4171, "step": 3751 }, { "epoch": 0.9277942631058358, "grad_norm": 0.7695819617858256, "learning_rate": 4.7234590490969935e-06, "loss": 0.4251, "step": 3752 }, { "epoch": 0.9280415430267063, "grad_norm": 0.7860254683196816, "learning_rate": 4.723310439872729e-06, "loss": 0.4174, "step": 3753 }, { "epoch": 0.9282888229475766, "grad_norm": 0.799371539348264, "learning_rate": 4.723161793067995e-06, "loss": 0.4048, "step": 3754 }, { "epoch": 0.9285361028684471, "grad_norm": 0.766005328064519, "learning_rate": 4.723013108685306e-06, "loss": 0.3992, "step": 3755 }, { "epoch": 0.9287833827893175, "grad_norm": 0.8184769776996572, "learning_rate": 4.722864386727171e-06, "loss": 0.4098, "step": 3756 }, { "epoch": 0.929030662710188, "grad_norm": 0.767670477728639, "learning_rate": 4.722715627196109e-06, "loss": 0.376, "step": 3757 }, { "epoch": 0.9292779426310583, "grad_norm": 0.7691881581821964, "learning_rate": 4.72256683009463e-06, "loss": 0.4209, "step": 3758 }, { "epoch": 0.9295252225519288, "grad_norm": 0.779597647151623, "learning_rate": 4.722417995425252e-06, "loss": 0.3974, "step": 3759 }, { "epoch": 0.9297725024727992, "grad_norm": 0.7958084407546698, "learning_rate": 4.72226912319049e-06, "loss": 0.3913, "step": 3760 }, { "epoch": 0.9300197823936697, "grad_norm": 0.7578575157603611, "learning_rate": 4.722120213392859e-06, "loss": 0.4177, "step": 3761 }, { "epoch": 0.93026706231454, "grad_norm": 0.7738447839180078, "learning_rate": 4.721971266034878e-06, "loss": 0.4515, "step": 3762 }, { "epoch": 0.9305143422354105, "grad_norm": 0.7907234951401526, "learning_rate": 4.721822281119064e-06, "loss": 0.4325, "step": 3763 }, { "epoch": 0.9307616221562809, "grad_norm": 0.7756955998159536, "learning_rate": 4.721673258647934e-06, "loss": 0.4332, "step": 3764 }, { "epoch": 0.9310089020771514, "grad_norm": 0.782257930528881, "learning_rate": 4.721524198624009e-06, "loss": 0.4034, "step": 3765 }, { "epoch": 0.9312561819980217, "grad_norm": 0.7801610811208322, "learning_rate": 4.721375101049807e-06, "loss": 0.3815, "step": 3766 }, { "epoch": 0.9315034619188922, "grad_norm": 0.8097011083585558, "learning_rate": 4.721225965927848e-06, "loss": 0.4462, "step": 3767 }, { "epoch": 0.9317507418397626, "grad_norm": 0.7898701624455864, "learning_rate": 4.721076793260655e-06, "loss": 0.4028, "step": 3768 }, { "epoch": 0.9319980217606331, "grad_norm": 0.7551579097684481, "learning_rate": 4.720927583050747e-06, "loss": 0.4243, "step": 3769 }, { "epoch": 0.9322453016815034, "grad_norm": 0.7764822151923534, "learning_rate": 4.720778335300647e-06, "loss": 0.4082, "step": 3770 }, { "epoch": 0.9324925816023739, "grad_norm": 0.7862920635864419, "learning_rate": 4.720629050012879e-06, "loss": 0.3879, "step": 3771 }, { "epoch": 0.9327398615232443, "grad_norm": 0.8223007795021665, "learning_rate": 4.720479727189964e-06, "loss": 0.4085, "step": 3772 }, { "epoch": 0.9329871414441148, "grad_norm": 0.7707108563497688, "learning_rate": 4.720330366834427e-06, "loss": 0.4027, "step": 3773 }, { "epoch": 0.9332344213649851, "grad_norm": 0.8109320443845112, "learning_rate": 4.7201809689487935e-06, "loss": 0.3899, "step": 3774 }, { "epoch": 0.9334817012858556, "grad_norm": 0.7964599002381397, "learning_rate": 4.720031533535589e-06, "loss": 0.4535, "step": 3775 }, { "epoch": 0.933728981206726, "grad_norm": 0.7915463325437578, "learning_rate": 4.719882060597336e-06, "loss": 0.4162, "step": 3776 }, { "epoch": 0.9339762611275965, "grad_norm": 0.7698383537920458, "learning_rate": 4.719732550136565e-06, "loss": 0.4373, "step": 3777 }, { "epoch": 0.9342235410484668, "grad_norm": 0.7705957905234673, "learning_rate": 4.719583002155801e-06, "loss": 0.4066, "step": 3778 }, { "epoch": 0.9344708209693373, "grad_norm": 0.7787562181312311, "learning_rate": 4.719433416657573e-06, "loss": 0.4164, "step": 3779 }, { "epoch": 0.9347181008902077, "grad_norm": 0.7859748027749637, "learning_rate": 4.719283793644409e-06, "loss": 0.4083, "step": 3780 }, { "epoch": 0.9349653808110782, "grad_norm": 0.8513132306964396, "learning_rate": 4.719134133118838e-06, "loss": 0.3746, "step": 3781 }, { "epoch": 0.9352126607319485, "grad_norm": 0.8106609584332799, "learning_rate": 4.718984435083389e-06, "loss": 0.411, "step": 3782 }, { "epoch": 0.935459940652819, "grad_norm": 0.7840411155887811, "learning_rate": 4.718834699540593e-06, "loss": 0.3892, "step": 3783 }, { "epoch": 0.9357072205736894, "grad_norm": 0.7987354153840486, "learning_rate": 4.718684926492982e-06, "loss": 0.4125, "step": 3784 }, { "epoch": 0.9359545004945599, "grad_norm": 0.7769870805002546, "learning_rate": 4.718535115943085e-06, "loss": 0.4186, "step": 3785 }, { "epoch": 0.9362017804154302, "grad_norm": 0.7953525655481078, "learning_rate": 4.718385267893437e-06, "loss": 0.4571, "step": 3786 }, { "epoch": 0.9364490603363007, "grad_norm": 0.8257447321469814, "learning_rate": 4.718235382346569e-06, "loss": 0.4258, "step": 3787 }, { "epoch": 0.9366963402571711, "grad_norm": 0.7979742103452806, "learning_rate": 4.718085459305015e-06, "loss": 0.4325, "step": 3788 }, { "epoch": 0.9369436201780416, "grad_norm": 0.7755001210635785, "learning_rate": 4.717935498771311e-06, "loss": 0.3987, "step": 3789 }, { "epoch": 0.9371909000989119, "grad_norm": 0.7855397941016596, "learning_rate": 4.717785500747988e-06, "loss": 0.4179, "step": 3790 }, { "epoch": 0.9374381800197824, "grad_norm": 0.7659770262453232, "learning_rate": 4.717635465237584e-06, "loss": 0.3917, "step": 3791 }, { "epoch": 0.9376854599406528, "grad_norm": 0.7815948203349444, "learning_rate": 4.717485392242636e-06, "loss": 0.4193, "step": 3792 }, { "epoch": 0.9379327398615233, "grad_norm": 0.7930513949333287, "learning_rate": 4.717335281765677e-06, "loss": 0.3904, "step": 3793 }, { "epoch": 0.9381800197823936, "grad_norm": 0.8129070197480318, "learning_rate": 4.717185133809248e-06, "loss": 0.4164, "step": 3794 }, { "epoch": 0.9384272997032641, "grad_norm": 0.7668091089072321, "learning_rate": 4.7170349483758845e-06, "loss": 0.4051, "step": 3795 }, { "epoch": 0.9386745796241345, "grad_norm": 0.7910228910052575, "learning_rate": 4.716884725468127e-06, "loss": 0.4083, "step": 3796 }, { "epoch": 0.938921859545005, "grad_norm": 0.7732700837398885, "learning_rate": 4.716734465088513e-06, "loss": 0.4034, "step": 3797 }, { "epoch": 0.9391691394658753, "grad_norm": 0.774749652475244, "learning_rate": 4.716584167239584e-06, "loss": 0.4185, "step": 3798 }, { "epoch": 0.9394164193867458, "grad_norm": 0.8554633807185527, "learning_rate": 4.716433831923879e-06, "loss": 0.391, "step": 3799 }, { "epoch": 0.9396636993076162, "grad_norm": 0.8449891799779463, "learning_rate": 4.716283459143939e-06, "loss": 0.3926, "step": 3800 }, { "epoch": 0.9399109792284867, "grad_norm": 0.8158798360289666, "learning_rate": 4.716133048902307e-06, "loss": 0.4356, "step": 3801 }, { "epoch": 0.940158259149357, "grad_norm": 0.7615640183067676, "learning_rate": 4.715982601201525e-06, "loss": 0.4172, "step": 3802 }, { "epoch": 0.9404055390702275, "grad_norm": 0.7871218583450545, "learning_rate": 4.715832116044135e-06, "loss": 0.3677, "step": 3803 }, { "epoch": 0.9406528189910979, "grad_norm": 0.7582932974852993, "learning_rate": 4.715681593432683e-06, "loss": 0.4602, "step": 3804 }, { "epoch": 0.9409000989119684, "grad_norm": 0.7746130525975381, "learning_rate": 4.71553103336971e-06, "loss": 0.3856, "step": 3805 }, { "epoch": 0.9411473788328387, "grad_norm": 0.7636577412053083, "learning_rate": 4.715380435857763e-06, "loss": 0.4131, "step": 3806 }, { "epoch": 0.9413946587537092, "grad_norm": 0.7811739159391169, "learning_rate": 4.715229800899388e-06, "loss": 0.3884, "step": 3807 }, { "epoch": 0.9416419386745796, "grad_norm": 0.8188404252385233, "learning_rate": 4.715079128497129e-06, "loss": 0.3946, "step": 3808 }, { "epoch": 0.94188921859545, "grad_norm": 0.7875551448948243, "learning_rate": 4.714928418653535e-06, "loss": 0.3932, "step": 3809 }, { "epoch": 0.9421364985163204, "grad_norm": 0.8046139238238394, "learning_rate": 4.714777671371152e-06, "loss": 0.4221, "step": 3810 }, { "epoch": 0.9423837784371909, "grad_norm": 0.8110204164182576, "learning_rate": 4.71462688665253e-06, "loss": 0.4177, "step": 3811 }, { "epoch": 0.9426310583580614, "grad_norm": 0.7878491388246204, "learning_rate": 4.714476064500215e-06, "loss": 0.39, "step": 3812 }, { "epoch": 0.9428783382789317, "grad_norm": 0.7495863308390222, "learning_rate": 4.714325204916758e-06, "loss": 0.421, "step": 3813 }, { "epoch": 0.9431256181998022, "grad_norm": 0.77849672486737, "learning_rate": 4.714174307904709e-06, "loss": 0.4247, "step": 3814 }, { "epoch": 0.9433728981206726, "grad_norm": 0.802560196408335, "learning_rate": 4.714023373466618e-06, "loss": 0.408, "step": 3815 }, { "epoch": 0.9436201780415431, "grad_norm": 0.7786351448112381, "learning_rate": 4.713872401605036e-06, "loss": 0.3885, "step": 3816 }, { "epoch": 0.9438674579624134, "grad_norm": 0.8164733266440941, "learning_rate": 4.713721392322515e-06, "loss": 0.4153, "step": 3817 }, { "epoch": 0.9441147378832839, "grad_norm": 0.7955726912893689, "learning_rate": 4.713570345621609e-06, "loss": 0.4145, "step": 3818 }, { "epoch": 0.9443620178041543, "grad_norm": 0.7689706793710702, "learning_rate": 4.71341926150487e-06, "loss": 0.3869, "step": 3819 }, { "epoch": 0.9446092977250248, "grad_norm": 0.804572440311293, "learning_rate": 4.713268139974851e-06, "loss": 0.4042, "step": 3820 }, { "epoch": 0.9448565776458951, "grad_norm": 0.8035662350608253, "learning_rate": 4.713116981034107e-06, "loss": 0.4172, "step": 3821 }, { "epoch": 0.9451038575667656, "grad_norm": 0.7746997086586535, "learning_rate": 4.712965784685194e-06, "loss": 0.3954, "step": 3822 }, { "epoch": 0.945351137487636, "grad_norm": 0.7831748839896366, "learning_rate": 4.712814550930667e-06, "loss": 0.4129, "step": 3823 }, { "epoch": 0.9455984174085065, "grad_norm": 0.7809792520188084, "learning_rate": 4.712663279773081e-06, "loss": 0.4347, "step": 3824 }, { "epoch": 0.9458456973293768, "grad_norm": 0.7673768265462751, "learning_rate": 4.7125119712149944e-06, "loss": 0.4052, "step": 3825 }, { "epoch": 0.9460929772502473, "grad_norm": 0.7798452737328526, "learning_rate": 4.712360625258965e-06, "loss": 0.4024, "step": 3826 }, { "epoch": 0.9463402571711177, "grad_norm": 0.7778031688861111, "learning_rate": 4.7122092419075496e-06, "loss": 0.3775, "step": 3827 }, { "epoch": 0.9465875370919882, "grad_norm": 0.7806188818311592, "learning_rate": 4.712057821163308e-06, "loss": 0.4155, "step": 3828 }, { "epoch": 0.9468348170128585, "grad_norm": 0.7931029006594754, "learning_rate": 4.7119063630288e-06, "loss": 0.4178, "step": 3829 }, { "epoch": 0.947082096933729, "grad_norm": 0.7694503362483404, "learning_rate": 4.711754867506585e-06, "loss": 0.4008, "step": 3830 }, { "epoch": 0.9473293768545994, "grad_norm": 0.8129310646415457, "learning_rate": 4.711603334599224e-06, "loss": 0.3733, "step": 3831 }, { "epoch": 0.9475766567754699, "grad_norm": 0.7761005503274951, "learning_rate": 4.711451764309278e-06, "loss": 0.4384, "step": 3832 }, { "epoch": 0.9478239366963402, "grad_norm": 0.7912406175514075, "learning_rate": 4.711300156639309e-06, "loss": 0.403, "step": 3833 }, { "epoch": 0.9480712166172107, "grad_norm": 0.7841375432382586, "learning_rate": 4.7111485115918795e-06, "loss": 0.4205, "step": 3834 }, { "epoch": 0.9483184965380811, "grad_norm": 0.7876792036708451, "learning_rate": 4.710996829169554e-06, "loss": 0.3802, "step": 3835 }, { "epoch": 0.9485657764589516, "grad_norm": 0.7883706920212202, "learning_rate": 4.710845109374895e-06, "loss": 0.3764, "step": 3836 }, { "epoch": 0.9488130563798219, "grad_norm": 0.7713104412410063, "learning_rate": 4.710693352210468e-06, "loss": 0.4153, "step": 3837 }, { "epoch": 0.9490603363006924, "grad_norm": 0.774838074480863, "learning_rate": 4.7105415576788375e-06, "loss": 0.4099, "step": 3838 }, { "epoch": 0.9493076162215628, "grad_norm": 0.7482537027491292, "learning_rate": 4.710389725782568e-06, "loss": 0.4218, "step": 3839 }, { "epoch": 0.9495548961424333, "grad_norm": 0.7904541367962616, "learning_rate": 4.710237856524229e-06, "loss": 0.4037, "step": 3840 }, { "epoch": 0.9498021760633036, "grad_norm": 0.7702306085739898, "learning_rate": 4.710085949906385e-06, "loss": 0.4299, "step": 3841 }, { "epoch": 0.9500494559841741, "grad_norm": 0.7976548908474161, "learning_rate": 4.709934005931605e-06, "loss": 0.3735, "step": 3842 }, { "epoch": 0.9502967359050445, "grad_norm": 0.8362496119917581, "learning_rate": 4.709782024602456e-06, "loss": 0.4022, "step": 3843 }, { "epoch": 0.950544015825915, "grad_norm": 0.7829890013758223, "learning_rate": 4.709630005921508e-06, "loss": 0.4207, "step": 3844 }, { "epoch": 0.9507912957467853, "grad_norm": 0.7769273804916441, "learning_rate": 4.709477949891331e-06, "loss": 0.4209, "step": 3845 }, { "epoch": 0.9510385756676558, "grad_norm": 0.7589464323885547, "learning_rate": 4.709325856514494e-06, "loss": 0.4101, "step": 3846 }, { "epoch": 0.9512858555885262, "grad_norm": 0.8020651971148175, "learning_rate": 4.709173725793567e-06, "loss": 0.4404, "step": 3847 }, { "epoch": 0.9515331355093967, "grad_norm": 0.7785190052402172, "learning_rate": 4.709021557731125e-06, "loss": 0.3814, "step": 3848 }, { "epoch": 0.951780415430267, "grad_norm": 0.7755037590030682, "learning_rate": 4.708869352329736e-06, "loss": 0.3985, "step": 3849 }, { "epoch": 0.9520276953511375, "grad_norm": 0.7900519608011191, "learning_rate": 4.708717109591976e-06, "loss": 0.4362, "step": 3850 }, { "epoch": 0.9522749752720079, "grad_norm": 0.8073740775342424, "learning_rate": 4.708564829520416e-06, "loss": 0.4269, "step": 3851 }, { "epoch": 0.9525222551928784, "grad_norm": 0.8233059438001666, "learning_rate": 4.708412512117631e-06, "loss": 0.4044, "step": 3852 }, { "epoch": 0.9527695351137487, "grad_norm": 0.8118457866395735, "learning_rate": 4.708260157386196e-06, "loss": 0.4115, "step": 3853 }, { "epoch": 0.9530168150346192, "grad_norm": 0.7583537392743612, "learning_rate": 4.708107765328685e-06, "loss": 0.416, "step": 3854 }, { "epoch": 0.9532640949554896, "grad_norm": 0.8297248524334135, "learning_rate": 4.707955335947675e-06, "loss": 0.3898, "step": 3855 }, { "epoch": 0.9535113748763601, "grad_norm": 0.7870425199589566, "learning_rate": 4.707802869245742e-06, "loss": 0.4179, "step": 3856 }, { "epoch": 0.9537586547972304, "grad_norm": 0.8316754519208652, "learning_rate": 4.707650365225463e-06, "loss": 0.4201, "step": 3857 }, { "epoch": 0.9540059347181009, "grad_norm": 0.7792225618981867, "learning_rate": 4.7074978238894164e-06, "loss": 0.41, "step": 3858 }, { "epoch": 0.9542532146389713, "grad_norm": 0.7799396685116958, "learning_rate": 4.70734524524018e-06, "loss": 0.4066, "step": 3859 }, { "epoch": 0.9545004945598418, "grad_norm": 0.8053702110699514, "learning_rate": 4.707192629280334e-06, "loss": 0.3941, "step": 3860 }, { "epoch": 0.9547477744807121, "grad_norm": 0.7618664972873861, "learning_rate": 4.707039976012457e-06, "loss": 0.4125, "step": 3861 }, { "epoch": 0.9549950544015826, "grad_norm": 0.7931884558229111, "learning_rate": 4.706887285439128e-06, "loss": 0.3761, "step": 3862 }, { "epoch": 0.955242334322453, "grad_norm": 0.7728504886433268, "learning_rate": 4.70673455756293e-06, "loss": 0.3919, "step": 3863 }, { "epoch": 0.9554896142433235, "grad_norm": 0.7955438947171283, "learning_rate": 4.7065817923864435e-06, "loss": 0.3957, "step": 3864 }, { "epoch": 0.9557368941641938, "grad_norm": 0.7809682843371737, "learning_rate": 4.7064289899122515e-06, "loss": 0.3659, "step": 3865 }, { "epoch": 0.9559841740850643, "grad_norm": 0.801135077053223, "learning_rate": 4.706276150142936e-06, "loss": 0.4134, "step": 3866 }, { "epoch": 0.9562314540059347, "grad_norm": 0.769903315991128, "learning_rate": 4.706123273081081e-06, "loss": 0.4145, "step": 3867 }, { "epoch": 0.9564787339268052, "grad_norm": 0.7714718747779309, "learning_rate": 4.7059703587292706e-06, "loss": 0.4187, "step": 3868 }, { "epoch": 0.9567260138476755, "grad_norm": 0.7771131713870824, "learning_rate": 4.705817407090089e-06, "loss": 0.4276, "step": 3869 }, { "epoch": 0.956973293768546, "grad_norm": 0.7812494248399661, "learning_rate": 4.705664418166122e-06, "loss": 0.4195, "step": 3870 }, { "epoch": 0.9572205736894164, "grad_norm": 0.7870184851971324, "learning_rate": 4.705511391959955e-06, "loss": 0.3779, "step": 3871 }, { "epoch": 0.9574678536102869, "grad_norm": 0.7744463770512644, "learning_rate": 4.7053583284741745e-06, "loss": 0.4138, "step": 3872 }, { "epoch": 0.9577151335311572, "grad_norm": 0.7958653995952897, "learning_rate": 4.7052052277113695e-06, "loss": 0.3956, "step": 3873 }, { "epoch": 0.9579624134520277, "grad_norm": 0.7816603192193171, "learning_rate": 4.705052089674125e-06, "loss": 0.4006, "step": 3874 }, { "epoch": 0.9582096933728981, "grad_norm": 0.7918038182911267, "learning_rate": 4.704898914365032e-06, "loss": 0.4128, "step": 3875 }, { "epoch": 0.9584569732937686, "grad_norm": 0.8150385320639588, "learning_rate": 4.704745701786678e-06, "loss": 0.4069, "step": 3876 }, { "epoch": 0.9587042532146389, "grad_norm": 0.7920833803794489, "learning_rate": 4.704592451941654e-06, "loss": 0.4187, "step": 3877 }, { "epoch": 0.9589515331355094, "grad_norm": 0.7923085557647959, "learning_rate": 4.704439164832549e-06, "loss": 0.4362, "step": 3878 }, { "epoch": 0.9591988130563798, "grad_norm": 0.7660991000196078, "learning_rate": 4.704285840461955e-06, "loss": 0.4174, "step": 3879 }, { "epoch": 0.9594460929772503, "grad_norm": 0.8128783497936076, "learning_rate": 4.704132478832464e-06, "loss": 0.3888, "step": 3880 }, { "epoch": 0.9596933728981206, "grad_norm": 0.7698482384519877, "learning_rate": 4.703979079946667e-06, "loss": 0.4169, "step": 3881 }, { "epoch": 0.9599406528189911, "grad_norm": 0.7892348762474982, "learning_rate": 4.703825643807157e-06, "loss": 0.411, "step": 3882 }, { "epoch": 0.9601879327398615, "grad_norm": 0.7861064933964294, "learning_rate": 4.703672170416529e-06, "loss": 0.4015, "step": 3883 }, { "epoch": 0.960435212660732, "grad_norm": 0.8174752026252636, "learning_rate": 4.703518659777376e-06, "loss": 0.3917, "step": 3884 }, { "epoch": 0.9606824925816023, "grad_norm": 0.7732844681830707, "learning_rate": 4.703365111892293e-06, "loss": 0.3775, "step": 3885 }, { "epoch": 0.9609297725024728, "grad_norm": 0.805848268421217, "learning_rate": 4.703211526763875e-06, "loss": 0.3776, "step": 3886 }, { "epoch": 0.9611770524233432, "grad_norm": 0.7655914125589361, "learning_rate": 4.703057904394719e-06, "loss": 0.3853, "step": 3887 }, { "epoch": 0.9614243323442137, "grad_norm": 0.7744602016571512, "learning_rate": 4.7029042447874205e-06, "loss": 0.4206, "step": 3888 }, { "epoch": 0.9616716122650841, "grad_norm": 0.7876431280098052, "learning_rate": 4.702750547944577e-06, "loss": 0.434, "step": 3889 }, { "epoch": 0.9619188921859545, "grad_norm": 0.7701420784704784, "learning_rate": 4.702596813868787e-06, "loss": 0.4142, "step": 3890 }, { "epoch": 0.962166172106825, "grad_norm": 0.7876056794108254, "learning_rate": 4.70244304256265e-06, "loss": 0.392, "step": 3891 }, { "epoch": 0.9624134520276953, "grad_norm": 0.7944473649588513, "learning_rate": 4.702289234028763e-06, "loss": 0.411, "step": 3892 }, { "epoch": 0.9626607319485658, "grad_norm": 0.7746154516350491, "learning_rate": 4.702135388269727e-06, "loss": 0.3754, "step": 3893 }, { "epoch": 0.9629080118694362, "grad_norm": 0.7747445417607648, "learning_rate": 4.701981505288142e-06, "loss": 0.4035, "step": 3894 }, { "epoch": 0.9631552917903067, "grad_norm": 0.7628514933888112, "learning_rate": 4.70182758508661e-06, "loss": 0.4276, "step": 3895 }, { "epoch": 0.963402571711177, "grad_norm": 0.7697009449996881, "learning_rate": 4.701673627667732e-06, "loss": 0.4199, "step": 3896 }, { "epoch": 0.9636498516320475, "grad_norm": 0.8210442405395625, "learning_rate": 4.70151963303411e-06, "loss": 0.429, "step": 3897 }, { "epoch": 0.9638971315529179, "grad_norm": 0.7867985724002428, "learning_rate": 4.7013656011883476e-06, "loss": 0.433, "step": 3898 }, { "epoch": 0.9641444114737884, "grad_norm": 0.7943173211816381, "learning_rate": 4.7012115321330484e-06, "loss": 0.4057, "step": 3899 }, { "epoch": 0.9643916913946587, "grad_norm": 0.8182944927299638, "learning_rate": 4.701057425870816e-06, "loss": 0.3819, "step": 3900 }, { "epoch": 0.9646389713155292, "grad_norm": 0.7479471012595492, "learning_rate": 4.700903282404256e-06, "loss": 0.4035, "step": 3901 }, { "epoch": 0.9648862512363996, "grad_norm": 0.7812074899937719, "learning_rate": 4.700749101735973e-06, "loss": 0.4126, "step": 3902 }, { "epoch": 0.9651335311572701, "grad_norm": 0.8278036061767997, "learning_rate": 4.7005948838685735e-06, "loss": 0.3914, "step": 3903 }, { "epoch": 0.9653808110781404, "grad_norm": 0.8387896149443221, "learning_rate": 4.700440628804665e-06, "loss": 0.3896, "step": 3904 }, { "epoch": 0.9656280909990109, "grad_norm": 0.7947700311503184, "learning_rate": 4.700286336546854e-06, "loss": 0.4004, "step": 3905 }, { "epoch": 0.9658753709198813, "grad_norm": 0.7794144464386743, "learning_rate": 4.700132007097748e-06, "loss": 0.3994, "step": 3906 }, { "epoch": 0.9661226508407518, "grad_norm": 0.8099820963076695, "learning_rate": 4.699977640459958e-06, "loss": 0.4252, "step": 3907 }, { "epoch": 0.9663699307616221, "grad_norm": 0.7791668503349546, "learning_rate": 4.699823236636091e-06, "loss": 0.4112, "step": 3908 }, { "epoch": 0.9666172106824926, "grad_norm": 0.7799461943658222, "learning_rate": 4.6996687956287564e-06, "loss": 0.3938, "step": 3909 }, { "epoch": 0.966864490603363, "grad_norm": 0.7954451636878649, "learning_rate": 4.699514317440567e-06, "loss": 0.4116, "step": 3910 }, { "epoch": 0.9671117705242335, "grad_norm": 0.7551128256490328, "learning_rate": 4.699359802074131e-06, "loss": 0.4095, "step": 3911 }, { "epoch": 0.9673590504451038, "grad_norm": 0.7816752155120708, "learning_rate": 4.6992052495320635e-06, "loss": 0.3919, "step": 3912 }, { "epoch": 0.9676063303659743, "grad_norm": 0.7748996073968227, "learning_rate": 4.699050659816975e-06, "loss": 0.4034, "step": 3913 }, { "epoch": 0.9678536102868447, "grad_norm": 0.7713815417049966, "learning_rate": 4.698896032931478e-06, "loss": 0.3945, "step": 3914 }, { "epoch": 0.9681008902077152, "grad_norm": 0.7706764167647603, "learning_rate": 4.698741368878187e-06, "loss": 0.4321, "step": 3915 }, { "epoch": 0.9683481701285855, "grad_norm": 0.8023547706545407, "learning_rate": 4.698586667659717e-06, "loss": 0.4382, "step": 3916 }, { "epoch": 0.968595450049456, "grad_norm": 0.8233471376751998, "learning_rate": 4.698431929278681e-06, "loss": 0.397, "step": 3917 }, { "epoch": 0.9688427299703264, "grad_norm": 0.7778633351955079, "learning_rate": 4.698277153737697e-06, "loss": 0.4199, "step": 3918 }, { "epoch": 0.9690900098911969, "grad_norm": 0.7703112244491745, "learning_rate": 4.698122341039379e-06, "loss": 0.4158, "step": 3919 }, { "epoch": 0.9693372898120672, "grad_norm": 0.7727244260981386, "learning_rate": 4.697967491186345e-06, "loss": 0.4101, "step": 3920 }, { "epoch": 0.9695845697329377, "grad_norm": 0.8320384841071571, "learning_rate": 4.697812604181211e-06, "loss": 0.395, "step": 3921 }, { "epoch": 0.9698318496538081, "grad_norm": 0.7941589609812207, "learning_rate": 4.697657680026597e-06, "loss": 0.3636, "step": 3922 }, { "epoch": 0.9700791295746786, "grad_norm": 0.8215449942248417, "learning_rate": 4.69750271872512e-06, "loss": 0.383, "step": 3923 }, { "epoch": 0.9703264094955489, "grad_norm": 0.7842158778990416, "learning_rate": 4.697347720279401e-06, "loss": 0.3928, "step": 3924 }, { "epoch": 0.9705736894164194, "grad_norm": 0.7975302553095485, "learning_rate": 4.697192684692058e-06, "loss": 0.4068, "step": 3925 }, { "epoch": 0.9708209693372898, "grad_norm": 0.7778323064622887, "learning_rate": 4.697037611965713e-06, "loss": 0.4463, "step": 3926 }, { "epoch": 0.9710682492581603, "grad_norm": 0.7775846948755031, "learning_rate": 4.696882502102987e-06, "loss": 0.4284, "step": 3927 }, { "epoch": 0.9713155291790306, "grad_norm": 0.7746054597343937, "learning_rate": 4.6967273551065005e-06, "loss": 0.395, "step": 3928 }, { "epoch": 0.9715628090999011, "grad_norm": 0.8127997502657887, "learning_rate": 4.696572170978877e-06, "loss": 0.3914, "step": 3929 }, { "epoch": 0.9718100890207715, "grad_norm": 0.777611998632522, "learning_rate": 4.69641694972274e-06, "loss": 0.4205, "step": 3930 }, { "epoch": 0.972057368941642, "grad_norm": 0.7780004765147993, "learning_rate": 4.6962616913407125e-06, "loss": 0.433, "step": 3931 }, { "epoch": 0.9723046488625123, "grad_norm": 0.8241222014341494, "learning_rate": 4.6961063958354195e-06, "loss": 0.3979, "step": 3932 }, { "epoch": 0.9725519287833828, "grad_norm": 0.8089490443001208, "learning_rate": 4.695951063209485e-06, "loss": 0.3662, "step": 3933 }, { "epoch": 0.9727992087042532, "grad_norm": 0.7923141861732071, "learning_rate": 4.695795693465536e-06, "loss": 0.3904, "step": 3934 }, { "epoch": 0.9730464886251237, "grad_norm": 0.829573517979138, "learning_rate": 4.695640286606196e-06, "loss": 0.413, "step": 3935 }, { "epoch": 0.973293768545994, "grad_norm": 0.839797270153677, "learning_rate": 4.695484842634094e-06, "loss": 0.3953, "step": 3936 }, { "epoch": 0.9735410484668645, "grad_norm": 0.7857025307651592, "learning_rate": 4.695329361551858e-06, "loss": 0.3986, "step": 3937 }, { "epoch": 0.9737883283877349, "grad_norm": 0.7638491096993291, "learning_rate": 4.695173843362115e-06, "loss": 0.3932, "step": 3938 }, { "epoch": 0.9740356083086054, "grad_norm": 0.7812683265166083, "learning_rate": 4.6950182880674935e-06, "loss": 0.4217, "step": 3939 }, { "epoch": 0.9742828882294757, "grad_norm": 0.7603154077113041, "learning_rate": 4.694862695670623e-06, "loss": 0.4107, "step": 3940 }, { "epoch": 0.9745301681503462, "grad_norm": 0.7539347980917882, "learning_rate": 4.694707066174133e-06, "loss": 0.4051, "step": 3941 }, { "epoch": 0.9747774480712166, "grad_norm": 0.7863706446058237, "learning_rate": 4.694551399580656e-06, "loss": 0.4218, "step": 3942 }, { "epoch": 0.9750247279920871, "grad_norm": 0.7665569429715551, "learning_rate": 4.6943956958928215e-06, "loss": 0.4019, "step": 3943 }, { "epoch": 0.9752720079129574, "grad_norm": 0.799832166832461, "learning_rate": 4.694239955113262e-06, "loss": 0.3906, "step": 3944 }, { "epoch": 0.9755192878338279, "grad_norm": 0.8274704820268617, "learning_rate": 4.69408417724461e-06, "loss": 0.3743, "step": 3945 }, { "epoch": 0.9757665677546983, "grad_norm": 0.7752699483338866, "learning_rate": 4.6939283622894975e-06, "loss": 0.4137, "step": 3946 }, { "epoch": 0.9760138476755688, "grad_norm": 0.7891688093341738, "learning_rate": 4.693772510250559e-06, "loss": 0.4211, "step": 3947 }, { "epoch": 0.9762611275964391, "grad_norm": 0.7871416681868997, "learning_rate": 4.69361662113043e-06, "loss": 0.4214, "step": 3948 }, { "epoch": 0.9765084075173096, "grad_norm": 0.7961084106239589, "learning_rate": 4.693460694931744e-06, "loss": 0.3997, "step": 3949 }, { "epoch": 0.97675568743818, "grad_norm": 0.7892962345788587, "learning_rate": 4.693304731657138e-06, "loss": 0.405, "step": 3950 }, { "epoch": 0.9770029673590505, "grad_norm": 0.7894503245971667, "learning_rate": 4.6931487313092465e-06, "loss": 0.4026, "step": 3951 }, { "epoch": 0.9772502472799208, "grad_norm": 0.7703856475017746, "learning_rate": 4.692992693890706e-06, "loss": 0.4249, "step": 3952 }, { "epoch": 0.9774975272007913, "grad_norm": 0.8133799239732469, "learning_rate": 4.692836619404156e-06, "loss": 0.4114, "step": 3953 }, { "epoch": 0.9777448071216617, "grad_norm": 0.8152538089456904, "learning_rate": 4.692680507852235e-06, "loss": 0.3853, "step": 3954 }, { "epoch": 0.9779920870425322, "grad_norm": 0.8184149190064318, "learning_rate": 4.692524359237579e-06, "loss": 0.3976, "step": 3955 }, { "epoch": 0.9782393669634025, "grad_norm": 0.8254887145013703, "learning_rate": 4.69236817356283e-06, "loss": 0.3947, "step": 3956 }, { "epoch": 0.978486646884273, "grad_norm": 0.8164624279733338, "learning_rate": 4.692211950830626e-06, "loss": 0.3985, "step": 3957 }, { "epoch": 0.9787339268051434, "grad_norm": 0.7978082335627064, "learning_rate": 4.6920556910436085e-06, "loss": 0.4057, "step": 3958 }, { "epoch": 0.9789812067260139, "grad_norm": 0.7763542013199648, "learning_rate": 4.69189939420442e-06, "loss": 0.4137, "step": 3959 }, { "epoch": 0.9792284866468842, "grad_norm": 0.7984236860463884, "learning_rate": 4.6917430603157e-06, "loss": 0.41, "step": 3960 }, { "epoch": 0.9794757665677547, "grad_norm": 0.7842647035839468, "learning_rate": 4.691586689380092e-06, "loss": 0.3884, "step": 3961 }, { "epoch": 0.9797230464886251, "grad_norm": 0.7907516321710724, "learning_rate": 4.69143028140024e-06, "loss": 0.3949, "step": 3962 }, { "epoch": 0.9799703264094956, "grad_norm": 0.7992643147336413, "learning_rate": 4.691273836378787e-06, "loss": 0.4191, "step": 3963 }, { "epoch": 0.9802176063303659, "grad_norm": 0.796090473738687, "learning_rate": 4.691117354318377e-06, "loss": 0.3897, "step": 3964 }, { "epoch": 0.9804648862512364, "grad_norm": 0.7815628862657795, "learning_rate": 4.690960835221655e-06, "loss": 0.3789, "step": 3965 }, { "epoch": 0.9807121661721068, "grad_norm": 0.7507676442911566, "learning_rate": 4.690804279091268e-06, "loss": 0.3793, "step": 3966 }, { "epoch": 0.9809594460929772, "grad_norm": 0.7732006075013503, "learning_rate": 4.690647685929861e-06, "loss": 0.4298, "step": 3967 }, { "epoch": 0.9812067260138477, "grad_norm": 0.7922497219388176, "learning_rate": 4.69049105574008e-06, "loss": 0.4311, "step": 3968 }, { "epoch": 0.9814540059347181, "grad_norm": 0.762528197693143, "learning_rate": 4.690334388524576e-06, "loss": 0.4116, "step": 3969 }, { "epoch": 0.9817012858555886, "grad_norm": 0.7784231738630654, "learning_rate": 4.6901776842859926e-06, "loss": 0.4128, "step": 3970 }, { "epoch": 0.981948565776459, "grad_norm": 0.8111640731341906, "learning_rate": 4.690020943026982e-06, "loss": 0.3863, "step": 3971 }, { "epoch": 0.9821958456973294, "grad_norm": 0.783142213384446, "learning_rate": 4.689864164750192e-06, "loss": 0.3814, "step": 3972 }, { "epoch": 0.9824431256181998, "grad_norm": 0.7916818596859191, "learning_rate": 4.689707349458273e-06, "loss": 0.4082, "step": 3973 }, { "epoch": 0.9826904055390703, "grad_norm": 0.7979637978189713, "learning_rate": 4.689550497153876e-06, "loss": 0.4196, "step": 3974 }, { "epoch": 0.9829376854599406, "grad_norm": 0.7940260223007418, "learning_rate": 4.689393607839652e-06, "loss": 0.4083, "step": 3975 }, { "epoch": 0.9831849653808111, "grad_norm": 0.7639072225409764, "learning_rate": 4.6892366815182515e-06, "loss": 0.3994, "step": 3976 }, { "epoch": 0.9834322453016815, "grad_norm": 0.7817053106289357, "learning_rate": 4.689079718192329e-06, "loss": 0.3889, "step": 3977 }, { "epoch": 0.983679525222552, "grad_norm": 0.7598430403880667, "learning_rate": 4.688922717864537e-06, "loss": 0.4018, "step": 3978 }, { "epoch": 0.9839268051434223, "grad_norm": 0.8086789487803981, "learning_rate": 4.6887656805375296e-06, "loss": 0.3999, "step": 3979 }, { "epoch": 0.9841740850642928, "grad_norm": 0.7725826354053251, "learning_rate": 4.68860860621396e-06, "loss": 0.3951, "step": 3980 }, { "epoch": 0.9844213649851632, "grad_norm": 0.7875872055007198, "learning_rate": 4.688451494896485e-06, "loss": 0.4003, "step": 3981 }, { "epoch": 0.9846686449060337, "grad_norm": 0.7822097745681463, "learning_rate": 4.688294346587759e-06, "loss": 0.417, "step": 3982 }, { "epoch": 0.984915924826904, "grad_norm": 0.7809298685569199, "learning_rate": 4.688137161290438e-06, "loss": 0.3968, "step": 3983 }, { "epoch": 0.9851632047477745, "grad_norm": 0.7923832949438695, "learning_rate": 4.687979939007179e-06, "loss": 0.4056, "step": 3984 }, { "epoch": 0.9854104846686449, "grad_norm": 0.7708418159185525, "learning_rate": 4.687822679740641e-06, "loss": 0.4172, "step": 3985 }, { "epoch": 0.9856577645895154, "grad_norm": 0.7589092584911677, "learning_rate": 4.68766538349348e-06, "loss": 0.398, "step": 3986 }, { "epoch": 0.9859050445103857, "grad_norm": 0.7732339144056797, "learning_rate": 4.687508050268357e-06, "loss": 0.4171, "step": 3987 }, { "epoch": 0.9861523244312562, "grad_norm": 0.7643439359678004, "learning_rate": 4.6873506800679295e-06, "loss": 0.4489, "step": 3988 }, { "epoch": 0.9863996043521266, "grad_norm": 0.755783549108914, "learning_rate": 4.687193272894859e-06, "loss": 0.4125, "step": 3989 }, { "epoch": 0.9866468842729971, "grad_norm": 0.7987812317499954, "learning_rate": 4.6870358287518046e-06, "loss": 0.3959, "step": 3990 }, { "epoch": 0.9868941641938674, "grad_norm": 0.7920689363171113, "learning_rate": 4.686878347641428e-06, "loss": 0.4421, "step": 3991 }, { "epoch": 0.9871414441147379, "grad_norm": 0.7521273189464834, "learning_rate": 4.686720829566393e-06, "loss": 0.419, "step": 3992 }, { "epoch": 0.9873887240356083, "grad_norm": 0.7824768932828919, "learning_rate": 4.686563274529359e-06, "loss": 0.4081, "step": 3993 }, { "epoch": 0.9876360039564788, "grad_norm": 0.8262243329490202, "learning_rate": 4.686405682532992e-06, "loss": 0.3794, "step": 3994 }, { "epoch": 0.9878832838773491, "grad_norm": 0.7889887455036464, "learning_rate": 4.686248053579953e-06, "loss": 0.3703, "step": 3995 }, { "epoch": 0.9881305637982196, "grad_norm": 0.7825185955719633, "learning_rate": 4.686090387672909e-06, "loss": 0.4105, "step": 3996 }, { "epoch": 0.98837784371909, "grad_norm": 0.7783848883203941, "learning_rate": 4.685932684814524e-06, "loss": 0.4152, "step": 3997 }, { "epoch": 0.9886251236399605, "grad_norm": 0.791414411907457, "learning_rate": 4.6857749450074625e-06, "loss": 0.409, "step": 3998 }, { "epoch": 0.9888724035608308, "grad_norm": 0.8043289643137662, "learning_rate": 4.685617168254393e-06, "loss": 0.4222, "step": 3999 }, { "epoch": 0.9891196834817013, "grad_norm": 0.7714257806897765, "learning_rate": 4.68545935455798e-06, "loss": 0.4047, "step": 4000 }, { "epoch": 0.9893669634025717, "grad_norm": 0.7612816309042126, "learning_rate": 4.6853015039208924e-06, "loss": 0.3969, "step": 4001 }, { "epoch": 0.9896142433234422, "grad_norm": 0.8149288705742704, "learning_rate": 4.685143616345799e-06, "loss": 0.4114, "step": 4002 }, { "epoch": 0.9898615232443125, "grad_norm": 0.8201740720242746, "learning_rate": 4.684985691835367e-06, "loss": 0.3888, "step": 4003 }, { "epoch": 0.990108803165183, "grad_norm": 0.801323731002106, "learning_rate": 4.684827730392267e-06, "loss": 0.3877, "step": 4004 }, { "epoch": 0.9903560830860534, "grad_norm": 0.8193208923254229, "learning_rate": 4.6846697320191685e-06, "loss": 0.3644, "step": 4005 }, { "epoch": 0.9906033630069239, "grad_norm": 0.795362255642567, "learning_rate": 4.684511696718741e-06, "loss": 0.3919, "step": 4006 }, { "epoch": 0.9908506429277942, "grad_norm": 0.7751441931444772, "learning_rate": 4.684353624493658e-06, "loss": 0.4054, "step": 4007 }, { "epoch": 0.9910979228486647, "grad_norm": 0.8464714446984203, "learning_rate": 4.68419551534659e-06, "loss": 0.409, "step": 4008 }, { "epoch": 0.9913452027695351, "grad_norm": 0.7602446475018649, "learning_rate": 4.68403736928021e-06, "loss": 0.4328, "step": 4009 }, { "epoch": 0.9915924826904056, "grad_norm": 0.7966158870763892, "learning_rate": 4.683879186297191e-06, "loss": 0.3946, "step": 4010 }, { "epoch": 0.9918397626112759, "grad_norm": 0.7517907281222944, "learning_rate": 4.683720966400206e-06, "loss": 0.3914, "step": 4011 }, { "epoch": 0.9920870425321464, "grad_norm": 0.7889222304421734, "learning_rate": 4.683562709591931e-06, "loss": 0.3978, "step": 4012 }, { "epoch": 0.9923343224530168, "grad_norm": 0.7811980760364818, "learning_rate": 4.683404415875039e-06, "loss": 0.4237, "step": 4013 }, { "epoch": 0.9925816023738873, "grad_norm": 0.8277250098361327, "learning_rate": 4.683246085252207e-06, "loss": 0.3695, "step": 4014 }, { "epoch": 0.9928288822947576, "grad_norm": 0.7931595016445199, "learning_rate": 4.683087717726112e-06, "loss": 0.4025, "step": 4015 }, { "epoch": 0.9930761622156281, "grad_norm": 0.7703771521508883, "learning_rate": 4.682929313299428e-06, "loss": 0.3843, "step": 4016 }, { "epoch": 0.9933234421364985, "grad_norm": 0.7864352072212776, "learning_rate": 4.682770871974835e-06, "loss": 0.3914, "step": 4017 }, { "epoch": 0.993570722057369, "grad_norm": 0.8004356756819567, "learning_rate": 4.6826123937550115e-06, "loss": 0.4228, "step": 4018 }, { "epoch": 0.9938180019782393, "grad_norm": 0.767390308350493, "learning_rate": 4.682453878642634e-06, "loss": 0.4035, "step": 4019 }, { "epoch": 0.9940652818991098, "grad_norm": 0.7933226964116108, "learning_rate": 4.682295326640383e-06, "loss": 0.4025, "step": 4020 }, { "epoch": 0.9943125618199802, "grad_norm": 0.7925071712238786, "learning_rate": 4.68213673775094e-06, "loss": 0.3889, "step": 4021 }, { "epoch": 0.9945598417408507, "grad_norm": 0.7849972882849776, "learning_rate": 4.681978111976983e-06, "loss": 0.4121, "step": 4022 }, { "epoch": 0.994807121661721, "grad_norm": 0.7912155124220194, "learning_rate": 4.681819449321194e-06, "loss": 0.3771, "step": 4023 }, { "epoch": 0.9950544015825915, "grad_norm": 0.7829532894668891, "learning_rate": 4.681660749786257e-06, "loss": 0.3937, "step": 4024 }, { "epoch": 0.9953016815034619, "grad_norm": 0.8047332493651175, "learning_rate": 4.6815020133748514e-06, "loss": 0.392, "step": 4025 }, { "epoch": 0.9955489614243324, "grad_norm": 0.796903690277019, "learning_rate": 4.6813432400896615e-06, "loss": 0.4058, "step": 4026 }, { "epoch": 0.9957962413452027, "grad_norm": 0.7651108232160881, "learning_rate": 4.681184429933372e-06, "loss": 0.4037, "step": 4027 }, { "epoch": 0.9960435212660732, "grad_norm": 0.8032283012798399, "learning_rate": 4.681025582908666e-06, "loss": 0.3986, "step": 4028 }, { "epoch": 0.9962908011869436, "grad_norm": 0.7857611793933696, "learning_rate": 4.68086669901823e-06, "loss": 0.4026, "step": 4029 }, { "epoch": 0.996538081107814, "grad_norm": 0.7659698488027594, "learning_rate": 4.680707778264747e-06, "loss": 0.4015, "step": 4030 }, { "epoch": 0.9967853610286844, "grad_norm": 0.8012692642045438, "learning_rate": 4.680548820650905e-06, "loss": 0.3898, "step": 4031 }, { "epoch": 0.9970326409495549, "grad_norm": 0.7579225666375018, "learning_rate": 4.680389826179391e-06, "loss": 0.4337, "step": 4032 }, { "epoch": 0.9972799208704253, "grad_norm": 0.8076161741050162, "learning_rate": 4.680230794852892e-06, "loss": 0.406, "step": 4033 }, { "epoch": 0.9975272007912958, "grad_norm": 0.7625082478334948, "learning_rate": 4.680071726674097e-06, "loss": 0.4151, "step": 4034 }, { "epoch": 0.9977744807121661, "grad_norm": 0.7806980832371719, "learning_rate": 4.679912621645693e-06, "loss": 0.4038, "step": 4035 }, { "epoch": 0.9980217606330366, "grad_norm": 0.8086404913136939, "learning_rate": 4.6797534797703705e-06, "loss": 0.3958, "step": 4036 }, { "epoch": 0.998269040553907, "grad_norm": 0.759896651238656, "learning_rate": 4.679594301050819e-06, "loss": 0.4097, "step": 4037 }, { "epoch": 0.9985163204747775, "grad_norm": 0.7791843543913921, "learning_rate": 4.67943508548973e-06, "loss": 0.37, "step": 4038 }, { "epoch": 0.9987636003956478, "grad_norm": 0.7930431997735501, "learning_rate": 4.679275833089793e-06, "loss": 0.4134, "step": 4039 }, { "epoch": 0.9990108803165183, "grad_norm": 0.7597904031408639, "learning_rate": 4.679116543853702e-06, "loss": 0.4118, "step": 4040 }, { "epoch": 0.9992581602373887, "grad_norm": 0.7896276040674204, "learning_rate": 4.678957217784147e-06, "loss": 0.3747, "step": 4041 }, { "epoch": 0.9995054401582592, "grad_norm": 0.7923093015887974, "learning_rate": 4.678797854883823e-06, "loss": 0.4067, "step": 4042 }, { "epoch": 0.9997527200791295, "grad_norm": 0.769422575838774, "learning_rate": 4.678638455155424e-06, "loss": 0.4326, "step": 4043 }, { "epoch": 1.0, "grad_norm": 0.7984512022584634, "learning_rate": 4.6784790186016425e-06, "loss": 0.3915, "step": 4044 } ], "logging_steps": 1, "max_steps": 24264, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 4044, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 416699048263680.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }