{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 7177, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00013934369121438027, "grad_norm": 8.021376609802246, "learning_rate": 0.0, "loss": 0.7377, "step": 1 }, { "epoch": 0.00027868738242876054, "grad_norm": 9.807330131530762, "learning_rate": 1.953125e-08, "loss": 0.7476, "step": 2 }, { "epoch": 0.0004180310736431408, "grad_norm": 9.328646659851074, "learning_rate": 3.90625e-08, "loss": 0.7436, "step": 3 }, { "epoch": 0.0005573747648575211, "grad_norm": 9.633577346801758, "learning_rate": 5.859375000000001e-08, "loss": 0.7449, "step": 4 }, { "epoch": 0.0006967184560719013, "grad_norm": 10.230039596557617, "learning_rate": 7.8125e-08, "loss": 0.7509, "step": 5 }, { "epoch": 0.0008360621472862816, "grad_norm": 9.170586585998535, "learning_rate": 9.765625e-08, "loss": 0.7434, "step": 6 }, { "epoch": 0.0009754058385006619, "grad_norm": 10.135435104370117, "learning_rate": 1.1718750000000002e-07, "loss": 0.7485, "step": 7 }, { "epoch": 0.0011147495297150422, "grad_norm": 10.049454689025879, "learning_rate": 1.3671875000000001e-07, "loss": 0.7489, "step": 8 }, { "epoch": 0.0012540932209294225, "grad_norm": 9.72823715209961, "learning_rate": 1.5625e-07, "loss": 0.7464, "step": 9 }, { "epoch": 0.0013934369121438026, "grad_norm": 9.883057594299316, "learning_rate": 1.7578125e-07, "loss": 0.7483, "step": 10 }, { "epoch": 0.001532780603358183, "grad_norm": 9.826281547546387, "learning_rate": 1.953125e-07, "loss": 0.7477, "step": 11 }, { "epoch": 0.0016721242945725633, "grad_norm": 9.607656478881836, "learning_rate": 2.1484375e-07, "loss": 0.7457, "step": 12 }, { "epoch": 0.0018114679857869436, "grad_norm": 9.511650085449219, "learning_rate": 2.3437500000000003e-07, "loss": 0.7439, "step": 13 }, { "epoch": 0.0019508116770013237, "grad_norm": 9.928963661193848, "learning_rate": 2.5390625000000003e-07, "loss": 0.7465, "step": 14 }, { "epoch": 0.0020901553682157042, "grad_norm": 9.344914436340332, "learning_rate": 2.7343750000000003e-07, "loss": 0.7429, "step": 15 }, { "epoch": 0.0022294990594300844, "grad_norm": 9.727673530578613, "learning_rate": 2.9296875000000003e-07, "loss": 0.744, "step": 16 }, { "epoch": 0.0023688427506444645, "grad_norm": 9.393170356750488, "learning_rate": 3.125e-07, "loss": 0.7444, "step": 17 }, { "epoch": 0.002508186441858845, "grad_norm": 10.070064544677734, "learning_rate": 3.3203125e-07, "loss": 0.7472, "step": 18 }, { "epoch": 0.002647530133073225, "grad_norm": 10.06524658203125, "learning_rate": 3.515625e-07, "loss": 0.7476, "step": 19 }, { "epoch": 0.0027868738242876052, "grad_norm": 8.969830513000488, "learning_rate": 3.7109375e-07, "loss": 0.741, "step": 20 }, { "epoch": 0.0029262175155019858, "grad_norm": 9.432873725891113, "learning_rate": 3.90625e-07, "loss": 0.7407, "step": 21 }, { "epoch": 0.003065561206716366, "grad_norm": 9.764861106872559, "learning_rate": 4.1015625e-07, "loss": 0.7399, "step": 22 }, { "epoch": 0.003204904897930746, "grad_norm": 10.050787925720215, "learning_rate": 4.296875e-07, "loss": 0.7394, "step": 23 }, { "epoch": 0.0033442485891451265, "grad_norm": 9.288446426391602, "learning_rate": 4.4921875e-07, "loss": 0.7337, "step": 24 }, { "epoch": 0.0034835922803595066, "grad_norm": 9.135147094726562, "learning_rate": 4.6875000000000006e-07, "loss": 0.7329, "step": 25 }, { "epoch": 0.003622935971573887, "grad_norm": 9.170262336730957, "learning_rate": 4.8828125e-07, "loss": 0.7328, "step": 26 }, { "epoch": 0.0037622796627882673, "grad_norm": 9.357287406921387, "learning_rate": 5.078125000000001e-07, "loss": 0.7314, "step": 27 }, { "epoch": 0.0039016233540026474, "grad_norm": 9.925809860229492, "learning_rate": 5.2734375e-07, "loss": 0.7354, "step": 28 }, { "epoch": 0.0040409670452170275, "grad_norm": 9.612934112548828, "learning_rate": 5.468750000000001e-07, "loss": 0.732, "step": 29 }, { "epoch": 0.0041803107364314085, "grad_norm": 9.309930801391602, "learning_rate": 5.6640625e-07, "loss": 0.7198, "step": 30 }, { "epoch": 0.004319654427645789, "grad_norm": 9.547076225280762, "learning_rate": 5.859375000000001e-07, "loss": 0.7193, "step": 31 }, { "epoch": 0.004458998118860169, "grad_norm": 9.857612609863281, "learning_rate": 6.0546875e-07, "loss": 0.7247, "step": 32 }, { "epoch": 0.004598341810074549, "grad_norm": 9.420503616333008, "learning_rate": 6.25e-07, "loss": 0.7017, "step": 33 }, { "epoch": 0.004737685501288929, "grad_norm": 8.360392570495605, "learning_rate": 6.445312500000001e-07, "loss": 0.6981, "step": 34 }, { "epoch": 0.004877029192503309, "grad_norm": 8.884937286376953, "learning_rate": 6.640625e-07, "loss": 0.696, "step": 35 }, { "epoch": 0.00501637288371769, "grad_norm": 9.11793327331543, "learning_rate": 6.835937500000001e-07, "loss": 0.6946, "step": 36 }, { "epoch": 0.00515571657493207, "grad_norm": 8.401571273803711, "learning_rate": 7.03125e-07, "loss": 0.6914, "step": 37 }, { "epoch": 0.00529506026614645, "grad_norm": 8.655383110046387, "learning_rate": 7.226562500000001e-07, "loss": 0.6892, "step": 38 }, { "epoch": 0.00543440395736083, "grad_norm": 9.277316093444824, "learning_rate": 7.421875e-07, "loss": 0.6885, "step": 39 }, { "epoch": 0.0055737476485752105, "grad_norm": 9.427530288696289, "learning_rate": 7.617187500000001e-07, "loss": 0.6886, "step": 40 }, { "epoch": 0.005713091339789591, "grad_norm": 8.328143119812012, "learning_rate": 7.8125e-07, "loss": 0.6773, "step": 41 }, { "epoch": 0.0058524350310039715, "grad_norm": 8.14529800415039, "learning_rate": 8.007812500000001e-07, "loss": 0.6618, "step": 42 }, { "epoch": 0.005991778722218352, "grad_norm": 7.384437561035156, "learning_rate": 8.203125e-07, "loss": 0.6632, "step": 43 }, { "epoch": 0.006131122413432732, "grad_norm": 8.233144760131836, "learning_rate": 8.398437500000001e-07, "loss": 0.658, "step": 44 }, { "epoch": 0.006270466104647112, "grad_norm": 6.9439897537231445, "learning_rate": 8.59375e-07, "loss": 0.6564, "step": 45 }, { "epoch": 0.006409809795861492, "grad_norm": 7.498589515686035, "learning_rate": 8.789062500000001e-07, "loss": 0.6154, "step": 46 }, { "epoch": 0.006549153487075873, "grad_norm": 5.929362773895264, "learning_rate": 8.984375e-07, "loss": 0.6133, "step": 47 }, { "epoch": 0.006688497178290253, "grad_norm": 6.714421272277832, "learning_rate": 9.179687500000001e-07, "loss": 0.5944, "step": 48 }, { "epoch": 0.006827840869504633, "grad_norm": 7.684356212615967, "learning_rate": 9.375000000000001e-07, "loss": 0.5895, "step": 49 }, { "epoch": 0.006967184560719013, "grad_norm": 6.359445095062256, "learning_rate": 9.570312500000002e-07, "loss": 0.5912, "step": 50 }, { "epoch": 0.007106528251933393, "grad_norm": 6.881518840789795, "learning_rate": 9.765625e-07, "loss": 0.5891, "step": 51 }, { "epoch": 0.007245871943147774, "grad_norm": 6.29652214050293, "learning_rate": 9.9609375e-07, "loss": 0.5848, "step": 52 }, { "epoch": 0.0073852156343621545, "grad_norm": 6.886022567749023, "learning_rate": 1.0156250000000001e-06, "loss": 0.5726, "step": 53 }, { "epoch": 0.007524559325576535, "grad_norm": 6.045979976654053, "learning_rate": 1.0351562500000002e-06, "loss": 0.5838, "step": 54 }, { "epoch": 0.007663903016790915, "grad_norm": 6.510111331939697, "learning_rate": 1.0546875e-06, "loss": 0.5724, "step": 55 }, { "epoch": 0.007803246708005295, "grad_norm": 5.4720282554626465, "learning_rate": 1.07421875e-06, "loss": 0.5787, "step": 56 }, { "epoch": 0.007942590399219676, "grad_norm": 5.367983341217041, "learning_rate": 1.0937500000000001e-06, "loss": 0.563, "step": 57 }, { "epoch": 0.008081934090434055, "grad_norm": 6.395733833312988, "learning_rate": 1.1132812500000002e-06, "loss": 0.5341, "step": 58 }, { "epoch": 0.008221277781648436, "grad_norm": 5.883957386016846, "learning_rate": 1.1328125e-06, "loss": 0.5316, "step": 59 }, { "epoch": 0.008360621472862817, "grad_norm": 4.9043498039245605, "learning_rate": 1.15234375e-06, "loss": 0.53, "step": 60 }, { "epoch": 0.008499965164077196, "grad_norm": 4.62682580947876, "learning_rate": 1.1718750000000001e-06, "loss": 0.5136, "step": 61 }, { "epoch": 0.008639308855291577, "grad_norm": 4.123767375946045, "learning_rate": 1.1914062500000002e-06, "loss": 0.501, "step": 62 }, { "epoch": 0.008778652546505956, "grad_norm": 3.599135398864746, "learning_rate": 1.2109375e-06, "loss": 0.4989, "step": 63 }, { "epoch": 0.008917996237720337, "grad_norm": 3.8652231693267822, "learning_rate": 1.23046875e-06, "loss": 0.4624, "step": 64 }, { "epoch": 0.009057339928934717, "grad_norm": 3.4035747051239014, "learning_rate": 1.25e-06, "loss": 0.4697, "step": 65 }, { "epoch": 0.009196683620149098, "grad_norm": 3.9363739490509033, "learning_rate": 1.2695312500000002e-06, "loss": 0.4325, "step": 66 }, { "epoch": 0.009336027311363479, "grad_norm": 4.304762840270996, "learning_rate": 1.2890625000000002e-06, "loss": 0.4193, "step": 67 }, { "epoch": 0.009475371002577858, "grad_norm": 4.314105987548828, "learning_rate": 1.30859375e-06, "loss": 0.4001, "step": 68 }, { "epoch": 0.009614714693792239, "grad_norm": 3.014529228210449, "learning_rate": 1.328125e-06, "loss": 0.4533, "step": 69 }, { "epoch": 0.009754058385006618, "grad_norm": 3.8452749252319336, "learning_rate": 1.3476562500000001e-06, "loss": 0.403, "step": 70 }, { "epoch": 0.009893402076220999, "grad_norm": 2.513983726501465, "learning_rate": 1.3671875000000002e-06, "loss": 0.4543, "step": 71 }, { "epoch": 0.01003274576743538, "grad_norm": 3.4960575103759766, "learning_rate": 1.38671875e-06, "loss": 0.4025, "step": 72 }, { "epoch": 0.01017208945864976, "grad_norm": 1.8895436525344849, "learning_rate": 1.40625e-06, "loss": 0.4767, "step": 73 }, { "epoch": 0.01031143314986414, "grad_norm": 3.30134654045105, "learning_rate": 1.4257812500000001e-06, "loss": 0.4054, "step": 74 }, { "epoch": 0.01045077684107852, "grad_norm": 3.63547420501709, "learning_rate": 1.4453125000000002e-06, "loss": 0.3728, "step": 75 }, { "epoch": 0.0105901205322929, "grad_norm": 1.762001633644104, "learning_rate": 1.46484375e-06, "loss": 0.455, "step": 76 }, { "epoch": 0.010729464223507281, "grad_norm": 2.558823347091675, "learning_rate": 1.484375e-06, "loss": 0.4026, "step": 77 }, { "epoch": 0.01086880791472166, "grad_norm": 2.3660566806793213, "learning_rate": 1.5039062500000001e-06, "loss": 0.4034, "step": 78 }, { "epoch": 0.011008151605936042, "grad_norm": 2.0598206520080566, "learning_rate": 1.5234375000000002e-06, "loss": 0.4089, "step": 79 }, { "epoch": 0.011147495297150421, "grad_norm": 1.9902126789093018, "learning_rate": 1.54296875e-06, "loss": 0.3997, "step": 80 }, { "epoch": 0.011286838988364802, "grad_norm": 1.056702971458435, "learning_rate": 1.5625e-06, "loss": 0.4496, "step": 81 }, { "epoch": 0.011426182679579183, "grad_norm": 1.1707508563995361, "learning_rate": 1.5820312500000001e-06, "loss": 0.4332, "step": 82 }, { "epoch": 0.011565526370793562, "grad_norm": 0.8000664710998535, "learning_rate": 1.6015625000000002e-06, "loss": 0.4485, "step": 83 }, { "epoch": 0.011704870062007943, "grad_norm": 1.563494086265564, "learning_rate": 1.6210937500000002e-06, "loss": 0.3875, "step": 84 }, { "epoch": 0.011844213753222322, "grad_norm": 1.3950389623641968, "learning_rate": 1.640625e-06, "loss": 0.3918, "step": 85 }, { "epoch": 0.011983557444436703, "grad_norm": 1.427275538444519, "learning_rate": 1.6601562500000001e-06, "loss": 0.3832, "step": 86 }, { "epoch": 0.012122901135651083, "grad_norm": 1.7343196868896484, "learning_rate": 1.6796875000000002e-06, "loss": 0.3506, "step": 87 }, { "epoch": 0.012262244826865464, "grad_norm": 1.1223461627960205, "learning_rate": 1.6992187500000002e-06, "loss": 0.3845, "step": 88 }, { "epoch": 0.012401588518079844, "grad_norm": 1.1780928373336792, "learning_rate": 1.71875e-06, "loss": 0.3712, "step": 89 }, { "epoch": 0.012540932209294224, "grad_norm": 1.3207409381866455, "learning_rate": 1.7382812500000001e-06, "loss": 0.3475, "step": 90 }, { "epoch": 0.012680275900508605, "grad_norm": 1.0347950458526611, "learning_rate": 1.7578125000000002e-06, "loss": 0.3612, "step": 91 }, { "epoch": 0.012819619591722984, "grad_norm": 1.3203600645065308, "learning_rate": 1.7773437500000002e-06, "loss": 0.3311, "step": 92 }, { "epoch": 0.012958963282937365, "grad_norm": 0.5605036020278931, "learning_rate": 1.796875e-06, "loss": 0.4504, "step": 93 }, { "epoch": 0.013098306974151746, "grad_norm": 1.0460125207901, "learning_rate": 1.81640625e-06, "loss": 0.3426, "step": 94 }, { "epoch": 0.013237650665366125, "grad_norm": 1.086140513420105, "learning_rate": 1.8359375000000002e-06, "loss": 0.3378, "step": 95 }, { "epoch": 0.013376994356580506, "grad_norm": 1.3641337156295776, "learning_rate": 1.8554687500000002e-06, "loss": 0.3176, "step": 96 }, { "epoch": 0.013516338047794885, "grad_norm": 0.8380808234214783, "learning_rate": 1.8750000000000003e-06, "loss": 0.3538, "step": 97 }, { "epoch": 0.013655681739009266, "grad_norm": 0.9042176008224487, "learning_rate": 1.89453125e-06, "loss": 0.3488, "step": 98 }, { "epoch": 0.013795025430223647, "grad_norm": 0.34208759665489197, "learning_rate": 1.9140625000000004e-06, "loss": 0.4066, "step": 99 }, { "epoch": 0.013934369121438027, "grad_norm": 0.6845542788505554, "learning_rate": 1.93359375e-06, "loss": 0.347, "step": 100 }, { "epoch": 0.014073712812652408, "grad_norm": 1.0321247577667236, "learning_rate": 1.953125e-06, "loss": 0.3068, "step": 101 }, { "epoch": 0.014213056503866787, "grad_norm": 0.4854828417301178, "learning_rate": 1.97265625e-06, "loss": 0.4185, "step": 102 }, { "epoch": 0.014352400195081168, "grad_norm": 0.7432349920272827, "learning_rate": 1.9921875e-06, "loss": 0.3439, "step": 103 }, { "epoch": 0.014491743886295549, "grad_norm": 0.48063981533050537, "learning_rate": 2.01171875e-06, "loss": 0.3817, "step": 104 }, { "epoch": 0.014631087577509928, "grad_norm": 1.0298606157302856, "learning_rate": 2.0312500000000002e-06, "loss": 0.3163, "step": 105 }, { "epoch": 0.014770431268724309, "grad_norm": 0.4924507141113281, "learning_rate": 2.0507812500000003e-06, "loss": 0.3742, "step": 106 }, { "epoch": 0.014909774959938688, "grad_norm": 0.7968605756759644, "learning_rate": 2.0703125000000003e-06, "loss": 0.326, "step": 107 }, { "epoch": 0.01504911865115307, "grad_norm": 1.0793098211288452, "learning_rate": 2.08984375e-06, "loss": 0.2813, "step": 108 }, { "epoch": 0.01518846234236745, "grad_norm": 0.6162307858467102, "learning_rate": 2.109375e-06, "loss": 0.3407, "step": 109 }, { "epoch": 0.01532780603358183, "grad_norm": 0.6211869120597839, "learning_rate": 2.12890625e-06, "loss": 0.345, "step": 110 }, { "epoch": 0.01546714972479621, "grad_norm": 1.089192509651184, "learning_rate": 2.1484375e-06, "loss": 0.3055, "step": 111 }, { "epoch": 0.01560649341601059, "grad_norm": 1.0175081491470337, "learning_rate": 2.16796875e-06, "loss": 0.3534, "step": 112 }, { "epoch": 0.01574583710722497, "grad_norm": 0.6708502173423767, "learning_rate": 2.1875000000000002e-06, "loss": 0.3927, "step": 113 }, { "epoch": 0.01588518079843935, "grad_norm": 1.5823793411254883, "learning_rate": 2.2070312500000003e-06, "loss": 0.3448, "step": 114 }, { "epoch": 0.016024524489653733, "grad_norm": 1.2374241352081299, "learning_rate": 2.2265625000000003e-06, "loss": 0.294, "step": 115 }, { "epoch": 0.01616386818086811, "grad_norm": 1.844296932220459, "learning_rate": 2.2460937500000004e-06, "loss": 0.3935, "step": 116 }, { "epoch": 0.01630321187208249, "grad_norm": 1.850056529045105, "learning_rate": 2.265625e-06, "loss": 0.3162, "step": 117 }, { "epoch": 0.016442555563296872, "grad_norm": 2.2032482624053955, "learning_rate": 2.28515625e-06, "loss": 0.3111, "step": 118 }, { "epoch": 0.016581899254511253, "grad_norm": 2.9346210956573486, "learning_rate": 2.3046875e-06, "loss": 0.3676, "step": 119 }, { "epoch": 0.016721242945725634, "grad_norm": 1.4795796871185303, "learning_rate": 2.32421875e-06, "loss": 0.3704, "step": 120 }, { "epoch": 0.01686058663694001, "grad_norm": 1.664349913597107, "learning_rate": 2.3437500000000002e-06, "loss": 0.3797, "step": 121 }, { "epoch": 0.016999930328154392, "grad_norm": 1.8797165155410767, "learning_rate": 2.3632812500000003e-06, "loss": 0.3054, "step": 122 }, { "epoch": 0.017139274019368773, "grad_norm": 1.520815372467041, "learning_rate": 2.3828125000000003e-06, "loss": 0.2732, "step": 123 }, { "epoch": 0.017278617710583154, "grad_norm": 0.8875313997268677, "learning_rate": 2.4023437500000004e-06, "loss": 0.3473, "step": 124 }, { "epoch": 0.017417961401797532, "grad_norm": 1.764501690864563, "learning_rate": 2.421875e-06, "loss": 0.2857, "step": 125 }, { "epoch": 0.017557305093011913, "grad_norm": 1.002784252166748, "learning_rate": 2.44140625e-06, "loss": 0.293, "step": 126 }, { "epoch": 0.017696648784226294, "grad_norm": 1.1303249597549438, "learning_rate": 2.4609375e-06, "loss": 0.3245, "step": 127 }, { "epoch": 0.017835992475440675, "grad_norm": 0.94593745470047, "learning_rate": 2.48046875e-06, "loss": 0.3175, "step": 128 }, { "epoch": 0.017975336166655056, "grad_norm": 0.8359571695327759, "learning_rate": 2.5e-06, "loss": 0.3059, "step": 129 }, { "epoch": 0.018114679857869433, "grad_norm": 1.517887830734253, "learning_rate": 2.5195312500000003e-06, "loss": 0.2535, "step": 130 }, { "epoch": 0.018254023549083814, "grad_norm": 1.6622538566589355, "learning_rate": 2.5390625000000003e-06, "loss": 0.3201, "step": 131 }, { "epoch": 0.018393367240298195, "grad_norm": 1.6349177360534668, "learning_rate": 2.5585937500000004e-06, "loss": 0.3527, "step": 132 }, { "epoch": 0.018532710931512576, "grad_norm": 1.0122156143188477, "learning_rate": 2.5781250000000004e-06, "loss": 0.26, "step": 133 }, { "epoch": 0.018672054622726957, "grad_norm": 2.622164487838745, "learning_rate": 2.59765625e-06, "loss": 0.2734, "step": 134 }, { "epoch": 0.018811398313941335, "grad_norm": 1.447640061378479, "learning_rate": 2.6171875e-06, "loss": 0.2617, "step": 135 }, { "epoch": 0.018950742005155716, "grad_norm": 2.0198891162872314, "learning_rate": 2.63671875e-06, "loss": 0.3661, "step": 136 }, { "epoch": 0.019090085696370097, "grad_norm": 1.7591923475265503, "learning_rate": 2.65625e-06, "loss": 0.2949, "step": 137 }, { "epoch": 0.019229429387584478, "grad_norm": 0.7401490807533264, "learning_rate": 2.6757812500000002e-06, "loss": 0.3061, "step": 138 }, { "epoch": 0.01936877307879886, "grad_norm": 1.1000404357910156, "learning_rate": 2.6953125000000003e-06, "loss": 0.3293, "step": 139 }, { "epoch": 0.019508116770013236, "grad_norm": 1.1382228136062622, "learning_rate": 2.7148437500000003e-06, "loss": 0.2663, "step": 140 }, { "epoch": 0.019647460461227617, "grad_norm": 0.9800328016281128, "learning_rate": 2.7343750000000004e-06, "loss": 0.3472, "step": 141 }, { "epoch": 0.019786804152441998, "grad_norm": 0.8553122282028198, "learning_rate": 2.75390625e-06, "loss": 0.2804, "step": 142 }, { "epoch": 0.01992614784365638, "grad_norm": 0.9470166563987732, "learning_rate": 2.7734375e-06, "loss": 0.3396, "step": 143 }, { "epoch": 0.02006549153487076, "grad_norm": 0.7528599500656128, "learning_rate": 2.79296875e-06, "loss": 0.3051, "step": 144 }, { "epoch": 0.020204835226085138, "grad_norm": 1.7903907299041748, "learning_rate": 2.8125e-06, "loss": 0.2828, "step": 145 }, { "epoch": 0.02034417891729952, "grad_norm": 0.9599040746688843, "learning_rate": 2.8320312500000002e-06, "loss": 0.2625, "step": 146 }, { "epoch": 0.0204835226085139, "grad_norm": 1.2414008378982544, "learning_rate": 2.8515625000000003e-06, "loss": 0.3093, "step": 147 }, { "epoch": 0.02062286629972828, "grad_norm": 1.160372018814087, "learning_rate": 2.8710937500000003e-06, "loss": 0.2684, "step": 148 }, { "epoch": 0.02076220999094266, "grad_norm": 2.685065269470215, "learning_rate": 2.8906250000000004e-06, "loss": 0.2508, "step": 149 }, { "epoch": 0.02090155368215704, "grad_norm": 1.4086271524429321, "learning_rate": 2.9101562500000004e-06, "loss": 0.2735, "step": 150 }, { "epoch": 0.02104089737337142, "grad_norm": 0.5266318917274475, "learning_rate": 2.9296875e-06, "loss": 0.326, "step": 151 }, { "epoch": 0.0211802410645858, "grad_norm": 1.7606542110443115, "learning_rate": 2.94921875e-06, "loss": 0.3238, "step": 152 }, { "epoch": 0.021319584755800182, "grad_norm": 0.6608834862709045, "learning_rate": 2.96875e-06, "loss": 0.2142, "step": 153 }, { "epoch": 0.021458928447014563, "grad_norm": 0.6914610266685486, "learning_rate": 2.9882812500000002e-06, "loss": 0.274, "step": 154 }, { "epoch": 0.02159827213822894, "grad_norm": 0.7957499027252197, "learning_rate": 3.0078125000000003e-06, "loss": 0.2711, "step": 155 }, { "epoch": 0.02173761582944332, "grad_norm": 1.157928705215454, "learning_rate": 3.0273437500000003e-06, "loss": 0.2661, "step": 156 }, { "epoch": 0.021876959520657702, "grad_norm": 1.11233651638031, "learning_rate": 3.0468750000000004e-06, "loss": 0.2633, "step": 157 }, { "epoch": 0.022016303211872083, "grad_norm": 1.6993768215179443, "learning_rate": 3.0664062500000004e-06, "loss": 0.3352, "step": 158 }, { "epoch": 0.022155646903086464, "grad_norm": 1.4264522790908813, "learning_rate": 3.0859375e-06, "loss": 0.2385, "step": 159 }, { "epoch": 0.022294990594300842, "grad_norm": 1.7466444969177246, "learning_rate": 3.10546875e-06, "loss": 0.3183, "step": 160 }, { "epoch": 0.022434334285515223, "grad_norm": 0.6450313925743103, "learning_rate": 3.125e-06, "loss": 0.3149, "step": 161 }, { "epoch": 0.022573677976729604, "grad_norm": 2.2523245811462402, "learning_rate": 3.14453125e-06, "loss": 0.2268, "step": 162 }, { "epoch": 0.022713021667943985, "grad_norm": 1.7050520181655884, "learning_rate": 3.1640625000000003e-06, "loss": 0.2654, "step": 163 }, { "epoch": 0.022852365359158366, "grad_norm": 0.7723689079284668, "learning_rate": 3.1835937500000003e-06, "loss": 0.2076, "step": 164 }, { "epoch": 0.022991709050372743, "grad_norm": 1.983217477798462, "learning_rate": 3.2031250000000004e-06, "loss": 0.2693, "step": 165 }, { "epoch": 0.023131052741587124, "grad_norm": 0.9446669816970825, "learning_rate": 3.2226562500000004e-06, "loss": 0.282, "step": 166 }, { "epoch": 0.023270396432801505, "grad_norm": 1.460263967514038, "learning_rate": 3.2421875000000005e-06, "loss": 0.267, "step": 167 }, { "epoch": 0.023409740124015886, "grad_norm": 1.6765462160110474, "learning_rate": 3.26171875e-06, "loss": 0.2112, "step": 168 }, { "epoch": 0.023549083815230267, "grad_norm": 1.6032085418701172, "learning_rate": 3.28125e-06, "loss": 0.2144, "step": 169 }, { "epoch": 0.023688427506444645, "grad_norm": 1.4295040369033813, "learning_rate": 3.30078125e-06, "loss": 0.1939, "step": 170 }, { "epoch": 0.023827771197659026, "grad_norm": 1.1029198169708252, "learning_rate": 3.3203125000000002e-06, "loss": 0.2717, "step": 171 }, { "epoch": 0.023967114888873407, "grad_norm": 0.6396500468254089, "learning_rate": 3.3398437500000003e-06, "loss": 0.2028, "step": 172 }, { "epoch": 0.024106458580087788, "grad_norm": 1.2401297092437744, "learning_rate": 3.3593750000000003e-06, "loss": 0.223, "step": 173 }, { "epoch": 0.024245802271302165, "grad_norm": 1.1733909845352173, "learning_rate": 3.3789062500000004e-06, "loss": 0.2467, "step": 174 }, { "epoch": 0.024385145962516546, "grad_norm": 0.8268371224403381, "learning_rate": 3.3984375000000004e-06, "loss": 0.2933, "step": 175 }, { "epoch": 0.024524489653730927, "grad_norm": 1.0017307996749878, "learning_rate": 3.41796875e-06, "loss": 0.2563, "step": 176 }, { "epoch": 0.024663833344945308, "grad_norm": 0.49973830580711365, "learning_rate": 3.4375e-06, "loss": 0.201, "step": 177 }, { "epoch": 0.02480317703615969, "grad_norm": 1.372902512550354, "learning_rate": 3.45703125e-06, "loss": 0.2212, "step": 178 }, { "epoch": 0.024942520727374067, "grad_norm": 0.7623677849769592, "learning_rate": 3.4765625000000002e-06, "loss": 0.2004, "step": 179 }, { "epoch": 0.025081864418588447, "grad_norm": 1.4711778163909912, "learning_rate": 3.4960937500000003e-06, "loss": 0.1815, "step": 180 }, { "epoch": 0.02522120810980283, "grad_norm": 0.8796917796134949, "learning_rate": 3.5156250000000003e-06, "loss": 0.2411, "step": 181 }, { "epoch": 0.02536055180101721, "grad_norm": 1.4457226991653442, "learning_rate": 3.5351562500000004e-06, "loss": 0.2621, "step": 182 }, { "epoch": 0.02549989549223159, "grad_norm": 0.6419083476066589, "learning_rate": 3.5546875000000004e-06, "loss": 0.3162, "step": 183 }, { "epoch": 0.025639239183445968, "grad_norm": 1.4833141565322876, "learning_rate": 3.5742187500000005e-06, "loss": 0.2761, "step": 184 }, { "epoch": 0.02577858287466035, "grad_norm": 0.6906145811080933, "learning_rate": 3.59375e-06, "loss": 0.2297, "step": 185 }, { "epoch": 0.02591792656587473, "grad_norm": 1.0633000135421753, "learning_rate": 3.61328125e-06, "loss": 0.2768, "step": 186 }, { "epoch": 0.02605727025708911, "grad_norm": 0.9487593770027161, "learning_rate": 3.6328125e-06, "loss": 0.2583, "step": 187 }, { "epoch": 0.026196613948303492, "grad_norm": 1.6514251232147217, "learning_rate": 3.6523437500000003e-06, "loss": 0.263, "step": 188 }, { "epoch": 0.02633595763951787, "grad_norm": 1.3837698698043823, "learning_rate": 3.6718750000000003e-06, "loss": 0.2063, "step": 189 }, { "epoch": 0.02647530133073225, "grad_norm": 1.4917408227920532, "learning_rate": 3.6914062500000004e-06, "loss": 0.2637, "step": 190 }, { "epoch": 0.02661464502194663, "grad_norm": 0.8192497491836548, "learning_rate": 3.7109375000000004e-06, "loss": 0.2197, "step": 191 }, { "epoch": 0.026753988713161012, "grad_norm": 1.3909157514572144, "learning_rate": 3.7304687500000005e-06, "loss": 0.2987, "step": 192 }, { "epoch": 0.026893332404375393, "grad_norm": 1.4854021072387695, "learning_rate": 3.7500000000000005e-06, "loss": 0.3359, "step": 193 }, { "epoch": 0.02703267609558977, "grad_norm": 0.7651714086532593, "learning_rate": 3.76953125e-06, "loss": 0.2123, "step": 194 }, { "epoch": 0.02717201978680415, "grad_norm": 1.7868564128875732, "learning_rate": 3.7890625e-06, "loss": 0.2695, "step": 195 }, { "epoch": 0.027311363478018533, "grad_norm": 1.4414970874786377, "learning_rate": 3.8085937500000002e-06, "loss": 0.1972, "step": 196 }, { "epoch": 0.027450707169232914, "grad_norm": 2.049947738647461, "learning_rate": 3.828125000000001e-06, "loss": 0.284, "step": 197 }, { "epoch": 0.027590050860447295, "grad_norm": 1.483351230621338, "learning_rate": 3.84765625e-06, "loss": 0.2328, "step": 198 }, { "epoch": 0.027729394551661672, "grad_norm": 0.9208379983901978, "learning_rate": 3.8671875e-06, "loss": 0.2305, "step": 199 }, { "epoch": 0.027868738242876053, "grad_norm": 1.906101942062378, "learning_rate": 3.88671875e-06, "loss": 0.2604, "step": 200 }, { "epoch": 0.028008081934090434, "grad_norm": 0.4401322305202484, "learning_rate": 3.90625e-06, "loss": 0.2016, "step": 201 }, { "epoch": 0.028147425625304815, "grad_norm": 0.9388290047645569, "learning_rate": 3.92578125e-06, "loss": 0.1636, "step": 202 }, { "epoch": 0.028286769316519196, "grad_norm": 0.6221119165420532, "learning_rate": 3.9453125e-06, "loss": 0.1587, "step": 203 }, { "epoch": 0.028426113007733574, "grad_norm": 0.5492455363273621, "learning_rate": 3.96484375e-06, "loss": 0.1934, "step": 204 }, { "epoch": 0.028565456698947955, "grad_norm": 0.6674190163612366, "learning_rate": 3.984375e-06, "loss": 0.2313, "step": 205 }, { "epoch": 0.028704800390162336, "grad_norm": 0.8701089024543762, "learning_rate": 4.00390625e-06, "loss": 0.1856, "step": 206 }, { "epoch": 0.028844144081376717, "grad_norm": 2.171100616455078, "learning_rate": 4.0234375e-06, "loss": 0.2288, "step": 207 }, { "epoch": 0.028983487772591097, "grad_norm": 1.3176885843276978, "learning_rate": 4.0429687500000004e-06, "loss": 0.211, "step": 208 }, { "epoch": 0.029122831463805475, "grad_norm": 0.994116485118866, "learning_rate": 4.0625000000000005e-06, "loss": 0.157, "step": 209 }, { "epoch": 0.029262175155019856, "grad_norm": 1.1591472625732422, "learning_rate": 4.0820312500000005e-06, "loss": 0.2638, "step": 210 }, { "epoch": 0.029401518846234237, "grad_norm": 0.758394181728363, "learning_rate": 4.101562500000001e-06, "loss": 0.1788, "step": 211 }, { "epoch": 0.029540862537448618, "grad_norm": 0.7484501004219055, "learning_rate": 4.121093750000001e-06, "loss": 0.211, "step": 212 }, { "epoch": 0.029680206228663, "grad_norm": 0.6640965938568115, "learning_rate": 4.140625000000001e-06, "loss": 0.2369, "step": 213 }, { "epoch": 0.029819549919877376, "grad_norm": 0.43090274930000305, "learning_rate": 4.160156250000001e-06, "loss": 0.2679, "step": 214 }, { "epoch": 0.029958893611091757, "grad_norm": 0.6018099784851074, "learning_rate": 4.1796875e-06, "loss": 0.2843, "step": 215 }, { "epoch": 0.03009823730230614, "grad_norm": 0.9378985166549683, "learning_rate": 4.19921875e-06, "loss": 0.2822, "step": 216 }, { "epoch": 0.03023758099352052, "grad_norm": 1.267996072769165, "learning_rate": 4.21875e-06, "loss": 0.2105, "step": 217 }, { "epoch": 0.0303769246847349, "grad_norm": 0.9407188892364502, "learning_rate": 4.23828125e-06, "loss": 0.2304, "step": 218 }, { "epoch": 0.030516268375949278, "grad_norm": 1.6096389293670654, "learning_rate": 4.2578125e-06, "loss": 0.1932, "step": 219 }, { "epoch": 0.03065561206716366, "grad_norm": 1.1206459999084473, "learning_rate": 4.27734375e-06, "loss": 0.2149, "step": 220 }, { "epoch": 0.03079495575837804, "grad_norm": 1.0887712240219116, "learning_rate": 4.296875e-06, "loss": 0.2505, "step": 221 }, { "epoch": 0.03093429944959242, "grad_norm": 3.318112850189209, "learning_rate": 4.31640625e-06, "loss": 0.3304, "step": 222 }, { "epoch": 0.031073643140806798, "grad_norm": 4.569552898406982, "learning_rate": 4.3359375e-06, "loss": 0.3142, "step": 223 }, { "epoch": 0.03121298683202118, "grad_norm": 2.2822988033294678, "learning_rate": 4.35546875e-06, "loss": 0.2317, "step": 224 }, { "epoch": 0.031352330523235564, "grad_norm": 1.0244308710098267, "learning_rate": 4.3750000000000005e-06, "loss": 0.2279, "step": 225 }, { "epoch": 0.03149167421444994, "grad_norm": 1.909877896308899, "learning_rate": 4.3945312500000005e-06, "loss": 0.2111, "step": 226 }, { "epoch": 0.03163101790566432, "grad_norm": 0.8110335469245911, "learning_rate": 4.4140625000000006e-06, "loss": 0.1927, "step": 227 }, { "epoch": 0.0317703615968787, "grad_norm": 1.2062805891036987, "learning_rate": 4.433593750000001e-06, "loss": 0.256, "step": 228 }, { "epoch": 0.03190970528809308, "grad_norm": 1.209052324295044, "learning_rate": 4.453125000000001e-06, "loss": 0.2693, "step": 229 }, { "epoch": 0.032049048979307465, "grad_norm": 1.1695759296417236, "learning_rate": 4.472656250000001e-06, "loss": 0.2909, "step": 230 }, { "epoch": 0.03218839267052184, "grad_norm": 0.7102117538452148, "learning_rate": 4.492187500000001e-06, "loss": 0.2048, "step": 231 }, { "epoch": 0.03232773636173622, "grad_norm": 0.9185650944709778, "learning_rate": 4.51171875e-06, "loss": 0.2723, "step": 232 }, { "epoch": 0.032467080052950605, "grad_norm": 0.3543650805950165, "learning_rate": 4.53125e-06, "loss": 0.1904, "step": 233 }, { "epoch": 0.03260642374416498, "grad_norm": 0.8620002269744873, "learning_rate": 4.55078125e-06, "loss": 0.2967, "step": 234 }, { "epoch": 0.032745767435379367, "grad_norm": 0.7409037351608276, "learning_rate": 4.5703125e-06, "loss": 0.2259, "step": 235 }, { "epoch": 0.032885111126593744, "grad_norm": 0.8418070673942566, "learning_rate": 4.58984375e-06, "loss": 0.1998, "step": 236 }, { "epoch": 0.03302445481780812, "grad_norm": 0.8257443308830261, "learning_rate": 4.609375e-06, "loss": 0.2123, "step": 237 }, { "epoch": 0.033163798509022506, "grad_norm": 0.6759973168373108, "learning_rate": 4.62890625e-06, "loss": 0.1859, "step": 238 }, { "epoch": 0.033303142200236883, "grad_norm": 0.8184802532196045, "learning_rate": 4.6484375e-06, "loss": 0.1478, "step": 239 }, { "epoch": 0.03344248589145127, "grad_norm": 0.5959136486053467, "learning_rate": 4.66796875e-06, "loss": 0.2322, "step": 240 }, { "epoch": 0.033581829582665645, "grad_norm": 1.1653053760528564, "learning_rate": 4.6875000000000004e-06, "loss": 0.1823, "step": 241 }, { "epoch": 0.03372117327388002, "grad_norm": 0.8866743445396423, "learning_rate": 4.7070312500000005e-06, "loss": 0.1785, "step": 242 }, { "epoch": 0.03386051696509441, "grad_norm": 1.68141770362854, "learning_rate": 4.7265625000000005e-06, "loss": 0.2636, "step": 243 }, { "epoch": 0.033999860656308785, "grad_norm": 0.8148296475410461, "learning_rate": 4.746093750000001e-06, "loss": 0.2614, "step": 244 }, { "epoch": 0.03413920434752317, "grad_norm": 0.655866265296936, "learning_rate": 4.765625000000001e-06, "loss": 0.2172, "step": 245 }, { "epoch": 0.03427854803873755, "grad_norm": 0.5102150440216064, "learning_rate": 4.785156250000001e-06, "loss": 0.1514, "step": 246 }, { "epoch": 0.034417891729951924, "grad_norm": 2.2313151359558105, "learning_rate": 4.804687500000001e-06, "loss": 0.2093, "step": 247 }, { "epoch": 0.03455723542116631, "grad_norm": 0.7363331317901611, "learning_rate": 4.824218750000001e-06, "loss": 0.2098, "step": 248 }, { "epoch": 0.034696579112380686, "grad_norm": 0.8417456746101379, "learning_rate": 4.84375e-06, "loss": 0.2633, "step": 249 }, { "epoch": 0.034835922803595064, "grad_norm": 1.2555866241455078, "learning_rate": 4.86328125e-06, "loss": 0.1837, "step": 250 }, { "epoch": 0.03497526649480945, "grad_norm": 1.047256350517273, "learning_rate": 4.8828125e-06, "loss": 0.2111, "step": 251 }, { "epoch": 0.035114610186023826, "grad_norm": 1.5342527627944946, "learning_rate": 4.90234375e-06, "loss": 0.2967, "step": 252 }, { "epoch": 0.03525395387723821, "grad_norm": 0.7159642577171326, "learning_rate": 4.921875e-06, "loss": 0.2526, "step": 253 }, { "epoch": 0.03539329756845259, "grad_norm": 0.8672778010368347, "learning_rate": 4.94140625e-06, "loss": 0.1954, "step": 254 }, { "epoch": 0.035532641259666965, "grad_norm": 1.5154306888580322, "learning_rate": 4.9609375e-06, "loss": 0.2377, "step": 255 }, { "epoch": 0.03567198495088135, "grad_norm": 0.5544745922088623, "learning_rate": 4.98046875e-06, "loss": 0.2316, "step": 256 }, { "epoch": 0.03581132864209573, "grad_norm": 1.5655666589736938, "learning_rate": 5e-06, "loss": 0.2405, "step": 257 }, { "epoch": 0.03595067233331011, "grad_norm": 2.007209300994873, "learning_rate": 5.0195312500000005e-06, "loss": 0.2118, "step": 258 }, { "epoch": 0.03609001602452449, "grad_norm": 3.1762170791625977, "learning_rate": 5.0390625000000005e-06, "loss": 0.2525, "step": 259 }, { "epoch": 0.03622935971573887, "grad_norm": 0.955818772315979, "learning_rate": 5.0585937500000006e-06, "loss": 0.2029, "step": 260 }, { "epoch": 0.03636870340695325, "grad_norm": 1.2501811981201172, "learning_rate": 5.078125000000001e-06, "loss": 0.2128, "step": 261 }, { "epoch": 0.03650804709816763, "grad_norm": 0.9516337513923645, "learning_rate": 5.097656250000001e-06, "loss": 0.2374, "step": 262 }, { "epoch": 0.03664739078938201, "grad_norm": 1.2273920774459839, "learning_rate": 5.117187500000001e-06, "loss": 0.2577, "step": 263 }, { "epoch": 0.03678673448059639, "grad_norm": 0.7803658246994019, "learning_rate": 5.136718750000001e-06, "loss": 0.2609, "step": 264 }, { "epoch": 0.03692607817181077, "grad_norm": 1.5626821517944336, "learning_rate": 5.156250000000001e-06, "loss": 0.274, "step": 265 }, { "epoch": 0.03706542186302515, "grad_norm": 0.523383378982544, "learning_rate": 5.17578125e-06, "loss": 0.1786, "step": 266 }, { "epoch": 0.03720476555423953, "grad_norm": 3.2912957668304443, "learning_rate": 5.1953125e-06, "loss": 0.2123, "step": 267 }, { "epoch": 0.037344109245453914, "grad_norm": 3.8853061199188232, "learning_rate": 5.21484375e-06, "loss": 0.284, "step": 268 }, { "epoch": 0.03748345293666829, "grad_norm": 0.7732066512107849, "learning_rate": 5.234375e-06, "loss": 0.1987, "step": 269 }, { "epoch": 0.03762279662788267, "grad_norm": 2.2408299446105957, "learning_rate": 5.25390625e-06, "loss": 0.2093, "step": 270 }, { "epoch": 0.037762140319097054, "grad_norm": 2.377204179763794, "learning_rate": 5.2734375e-06, "loss": 0.2199, "step": 271 }, { "epoch": 0.03790148401031143, "grad_norm": 1.1620689630508423, "learning_rate": 5.29296875e-06, "loss": 0.2242, "step": 272 }, { "epoch": 0.038040827701525816, "grad_norm": 2.2323801517486572, "learning_rate": 5.3125e-06, "loss": 0.2277, "step": 273 }, { "epoch": 0.03818017139274019, "grad_norm": 1.2459794282913208, "learning_rate": 5.3320312500000004e-06, "loss": 0.1697, "step": 274 }, { "epoch": 0.03831951508395457, "grad_norm": 1.0518202781677246, "learning_rate": 5.3515625000000005e-06, "loss": 0.2103, "step": 275 }, { "epoch": 0.038458858775168955, "grad_norm": 0.569542407989502, "learning_rate": 5.3710937500000005e-06, "loss": 0.1573, "step": 276 }, { "epoch": 0.03859820246638333, "grad_norm": 1.577150821685791, "learning_rate": 5.390625000000001e-06, "loss": 0.2383, "step": 277 }, { "epoch": 0.03873754615759772, "grad_norm": 1.6120280027389526, "learning_rate": 5.410156250000001e-06, "loss": 0.2462, "step": 278 }, { "epoch": 0.038876889848812095, "grad_norm": 0.7995463013648987, "learning_rate": 5.429687500000001e-06, "loss": 0.2181, "step": 279 }, { "epoch": 0.03901623354002647, "grad_norm": 1.303452730178833, "learning_rate": 5.449218750000001e-06, "loss": 0.1912, "step": 280 }, { "epoch": 0.03915557723124086, "grad_norm": 3.2999753952026367, "learning_rate": 5.468750000000001e-06, "loss": 0.2723, "step": 281 }, { "epoch": 0.039294920922455234, "grad_norm": 1.200645089149475, "learning_rate": 5.488281250000001e-06, "loss": 0.1849, "step": 282 }, { "epoch": 0.03943426461366962, "grad_norm": 0.7598762512207031, "learning_rate": 5.5078125e-06, "loss": 0.245, "step": 283 }, { "epoch": 0.039573608304883996, "grad_norm": 1.266479253768921, "learning_rate": 5.52734375e-06, "loss": 0.3386, "step": 284 }, { "epoch": 0.039712951996098374, "grad_norm": 0.6694331169128418, "learning_rate": 5.546875e-06, "loss": 0.2183, "step": 285 }, { "epoch": 0.03985229568731276, "grad_norm": 0.920893669128418, "learning_rate": 5.56640625e-06, "loss": 0.2059, "step": 286 }, { "epoch": 0.039991639378527136, "grad_norm": 1.0388126373291016, "learning_rate": 5.5859375e-06, "loss": 0.2126, "step": 287 }, { "epoch": 0.04013098306974152, "grad_norm": 1.2837820053100586, "learning_rate": 5.60546875e-06, "loss": 0.1667, "step": 288 }, { "epoch": 0.0402703267609559, "grad_norm": 1.5905100107192993, "learning_rate": 5.625e-06, "loss": 0.2072, "step": 289 }, { "epoch": 0.040409670452170275, "grad_norm": 0.7201563119888306, "learning_rate": 5.64453125e-06, "loss": 0.1973, "step": 290 }, { "epoch": 0.04054901414338466, "grad_norm": 1.0041292905807495, "learning_rate": 5.6640625000000005e-06, "loss": 0.2291, "step": 291 }, { "epoch": 0.04068835783459904, "grad_norm": 0.757828414440155, "learning_rate": 5.6835937500000005e-06, "loss": 0.255, "step": 292 }, { "epoch": 0.04082770152581342, "grad_norm": 0.9463983178138733, "learning_rate": 5.7031250000000006e-06, "loss": 0.313, "step": 293 }, { "epoch": 0.0409670452170278, "grad_norm": 0.9470463395118713, "learning_rate": 5.722656250000001e-06, "loss": 0.2058, "step": 294 }, { "epoch": 0.04110638890824218, "grad_norm": 0.6795046925544739, "learning_rate": 5.742187500000001e-06, "loss": 0.1931, "step": 295 }, { "epoch": 0.04124573259945656, "grad_norm": 1.192853569984436, "learning_rate": 5.761718750000001e-06, "loss": 0.2598, "step": 296 }, { "epoch": 0.04138507629067094, "grad_norm": 1.641945481300354, "learning_rate": 5.781250000000001e-06, "loss": 0.1941, "step": 297 }, { "epoch": 0.04152441998188532, "grad_norm": 0.9575998783111572, "learning_rate": 5.800781250000001e-06, "loss": 0.1567, "step": 298 }, { "epoch": 0.0416637636730997, "grad_norm": 1.0720888376235962, "learning_rate": 5.820312500000001e-06, "loss": 0.1673, "step": 299 }, { "epoch": 0.04180310736431408, "grad_norm": 1.1555674076080322, "learning_rate": 5.83984375e-06, "loss": 0.238, "step": 300 }, { "epoch": 0.04194245105552846, "grad_norm": 1.090567946434021, "learning_rate": 5.859375e-06, "loss": 0.2616, "step": 301 }, { "epoch": 0.04208179474674284, "grad_norm": 2.670581102371216, "learning_rate": 5.87890625e-06, "loss": 0.2422, "step": 302 }, { "epoch": 0.042221138437957224, "grad_norm": 1.4655429124832153, "learning_rate": 5.8984375e-06, "loss": 0.1842, "step": 303 }, { "epoch": 0.0423604821291716, "grad_norm": 0.9775046110153198, "learning_rate": 5.91796875e-06, "loss": 0.2845, "step": 304 }, { "epoch": 0.04249982582038598, "grad_norm": 2.012024402618408, "learning_rate": 5.9375e-06, "loss": 0.2035, "step": 305 }, { "epoch": 0.042639169511600364, "grad_norm": 0.7346779704093933, "learning_rate": 5.95703125e-06, "loss": 0.2848, "step": 306 }, { "epoch": 0.04277851320281474, "grad_norm": 0.7348558902740479, "learning_rate": 5.9765625000000004e-06, "loss": 0.2218, "step": 307 }, { "epoch": 0.042917856894029126, "grad_norm": 1.8249315023422241, "learning_rate": 5.9960937500000005e-06, "loss": 0.1935, "step": 308 }, { "epoch": 0.0430572005852435, "grad_norm": 0.9249917268753052, "learning_rate": 6.0156250000000005e-06, "loss": 0.1271, "step": 309 }, { "epoch": 0.04319654427645788, "grad_norm": 1.6970033645629883, "learning_rate": 6.035156250000001e-06, "loss": 0.2477, "step": 310 }, { "epoch": 0.043335887967672265, "grad_norm": 1.8658331632614136, "learning_rate": 6.054687500000001e-06, "loss": 0.2065, "step": 311 }, { "epoch": 0.04347523165888664, "grad_norm": 1.0495853424072266, "learning_rate": 6.074218750000001e-06, "loss": 0.2754, "step": 312 }, { "epoch": 0.04361457535010103, "grad_norm": 0.7652114033699036, "learning_rate": 6.093750000000001e-06, "loss": 0.1825, "step": 313 }, { "epoch": 0.043753919041315405, "grad_norm": 0.5026959180831909, "learning_rate": 6.113281250000001e-06, "loss": 0.192, "step": 314 }, { "epoch": 0.04389326273252978, "grad_norm": 1.2418758869171143, "learning_rate": 6.132812500000001e-06, "loss": 0.2038, "step": 315 }, { "epoch": 0.04403260642374417, "grad_norm": 0.36877280473709106, "learning_rate": 6.152343750000001e-06, "loss": 0.1911, "step": 316 }, { "epoch": 0.044171950114958544, "grad_norm": 1.745977520942688, "learning_rate": 6.171875e-06, "loss": 0.2307, "step": 317 }, { "epoch": 0.04431129380617293, "grad_norm": 1.1172596216201782, "learning_rate": 6.19140625e-06, "loss": 0.2151, "step": 318 }, { "epoch": 0.044450637497387306, "grad_norm": 1.4864139556884766, "learning_rate": 6.2109375e-06, "loss": 0.1658, "step": 319 }, { "epoch": 0.044589981188601684, "grad_norm": 1.6988619565963745, "learning_rate": 6.23046875e-06, "loss": 0.1722, "step": 320 }, { "epoch": 0.04472932487981607, "grad_norm": 0.3790900409221649, "learning_rate": 6.25e-06, "loss": 0.1766, "step": 321 }, { "epoch": 0.044868668571030446, "grad_norm": 0.6940658092498779, "learning_rate": 6.26953125e-06, "loss": 0.1843, "step": 322 }, { "epoch": 0.04500801226224483, "grad_norm": 1.5682543516159058, "learning_rate": 6.2890625e-06, "loss": 0.2248, "step": 323 }, { "epoch": 0.04514735595345921, "grad_norm": 0.35302478075027466, "learning_rate": 6.3085937500000005e-06, "loss": 0.1425, "step": 324 }, { "epoch": 0.045286699644673585, "grad_norm": 2.371526002883911, "learning_rate": 6.3281250000000005e-06, "loss": 0.2174, "step": 325 }, { "epoch": 0.04542604333588797, "grad_norm": 1.5628489255905151, "learning_rate": 6.3476562500000006e-06, "loss": 0.2225, "step": 326 }, { "epoch": 0.04556538702710235, "grad_norm": 0.8372102975845337, "learning_rate": 6.367187500000001e-06, "loss": 0.1483, "step": 327 }, { "epoch": 0.04570473071831673, "grad_norm": 1.3894015550613403, "learning_rate": 6.386718750000001e-06, "loss": 0.2144, "step": 328 }, { "epoch": 0.04584407440953111, "grad_norm": 1.4348104000091553, "learning_rate": 6.406250000000001e-06, "loss": 0.2161, "step": 329 }, { "epoch": 0.045983418100745486, "grad_norm": 0.8482874631881714, "learning_rate": 6.425781250000001e-06, "loss": 0.2266, "step": 330 }, { "epoch": 0.04612276179195987, "grad_norm": 1.1041407585144043, "learning_rate": 6.445312500000001e-06, "loss": 0.2056, "step": 331 }, { "epoch": 0.04626210548317425, "grad_norm": 0.7737264633178711, "learning_rate": 6.464843750000001e-06, "loss": 0.195, "step": 332 }, { "epoch": 0.04640144917438863, "grad_norm": 0.7320677638053894, "learning_rate": 6.484375000000001e-06, "loss": 0.1794, "step": 333 }, { "epoch": 0.04654079286560301, "grad_norm": 1.2895909547805786, "learning_rate": 6.50390625e-06, "loss": 0.1923, "step": 334 }, { "epoch": 0.04668013655681739, "grad_norm": 0.6832213401794434, "learning_rate": 6.5234375e-06, "loss": 0.2097, "step": 335 }, { "epoch": 0.04681948024803177, "grad_norm": 1.1212739944458008, "learning_rate": 6.54296875e-06, "loss": 0.1692, "step": 336 }, { "epoch": 0.04695882393924615, "grad_norm": 0.9060540199279785, "learning_rate": 6.5625e-06, "loss": 0.1536, "step": 337 }, { "epoch": 0.047098167630460534, "grad_norm": 1.0962915420532227, "learning_rate": 6.58203125e-06, "loss": 0.2012, "step": 338 }, { "epoch": 0.04723751132167491, "grad_norm": 0.36767226457595825, "learning_rate": 6.6015625e-06, "loss": 0.1608, "step": 339 }, { "epoch": 0.04737685501288929, "grad_norm": 0.7625219821929932, "learning_rate": 6.6210937500000004e-06, "loss": 0.1523, "step": 340 }, { "epoch": 0.047516198704103674, "grad_norm": 0.48393890261650085, "learning_rate": 6.6406250000000005e-06, "loss": 0.1985, "step": 341 }, { "epoch": 0.04765554239531805, "grad_norm": 0.6653043031692505, "learning_rate": 6.6601562500000005e-06, "loss": 0.2247, "step": 342 }, { "epoch": 0.047794886086532436, "grad_norm": 0.687486469745636, "learning_rate": 6.679687500000001e-06, "loss": 0.195, "step": 343 }, { "epoch": 0.04793422977774681, "grad_norm": 0.5203015208244324, "learning_rate": 6.699218750000001e-06, "loss": 0.1891, "step": 344 }, { "epoch": 0.04807357346896119, "grad_norm": 0.8695482611656189, "learning_rate": 6.718750000000001e-06, "loss": 0.1918, "step": 345 }, { "epoch": 0.048212917160175575, "grad_norm": 0.5283342003822327, "learning_rate": 6.738281250000001e-06, "loss": 0.1568, "step": 346 }, { "epoch": 0.04835226085138995, "grad_norm": 0.6308654546737671, "learning_rate": 6.757812500000001e-06, "loss": 0.249, "step": 347 }, { "epoch": 0.04849160454260433, "grad_norm": 0.9943318963050842, "learning_rate": 6.777343750000001e-06, "loss": 0.1906, "step": 348 }, { "epoch": 0.048630948233818715, "grad_norm": 1.6549263000488281, "learning_rate": 6.796875000000001e-06, "loss": 0.2339, "step": 349 }, { "epoch": 0.04877029192503309, "grad_norm": 0.5583332777023315, "learning_rate": 6.816406250000001e-06, "loss": 0.1821, "step": 350 }, { "epoch": 0.04890963561624748, "grad_norm": 0.8090507388114929, "learning_rate": 6.8359375e-06, "loss": 0.1885, "step": 351 }, { "epoch": 0.049048979307461854, "grad_norm": 0.5787164568901062, "learning_rate": 6.85546875e-06, "loss": 0.1584, "step": 352 }, { "epoch": 0.04918832299867623, "grad_norm": 0.5660175681114197, "learning_rate": 6.875e-06, "loss": 0.1409, "step": 353 }, { "epoch": 0.049327666689890616, "grad_norm": 0.5207782983779907, "learning_rate": 6.89453125e-06, "loss": 0.2075, "step": 354 }, { "epoch": 0.049467010381104994, "grad_norm": 1.006770133972168, "learning_rate": 6.9140625e-06, "loss": 0.1671, "step": 355 }, { "epoch": 0.04960635407231938, "grad_norm": 1.1638282537460327, "learning_rate": 6.93359375e-06, "loss": 0.2149, "step": 356 }, { "epoch": 0.049745697763533755, "grad_norm": 1.3125817775726318, "learning_rate": 6.9531250000000004e-06, "loss": 0.2276, "step": 357 }, { "epoch": 0.04988504145474813, "grad_norm": 3.1580159664154053, "learning_rate": 6.9726562500000005e-06, "loss": 0.1896, "step": 358 }, { "epoch": 0.05002438514596252, "grad_norm": 2.7368078231811523, "learning_rate": 6.9921875000000006e-06, "loss": 0.1822, "step": 359 }, { "epoch": 0.050163728837176895, "grad_norm": 1.1446231603622437, "learning_rate": 7.011718750000001e-06, "loss": 0.2517, "step": 360 }, { "epoch": 0.05030307252839128, "grad_norm": 2.2737221717834473, "learning_rate": 7.031250000000001e-06, "loss": 0.2048, "step": 361 }, { "epoch": 0.05044241621960566, "grad_norm": 2.012911558151245, "learning_rate": 7.050781250000001e-06, "loss": 0.2245, "step": 362 }, { "epoch": 0.050581759910820034, "grad_norm": 1.4392945766448975, "learning_rate": 7.070312500000001e-06, "loss": 0.2086, "step": 363 }, { "epoch": 0.05072110360203442, "grad_norm": 0.8214359283447266, "learning_rate": 7.089843750000001e-06, "loss": 0.183, "step": 364 }, { "epoch": 0.050860447293248796, "grad_norm": 0.9956749081611633, "learning_rate": 7.109375000000001e-06, "loss": 0.1574, "step": 365 }, { "epoch": 0.05099979098446318, "grad_norm": 1.5801464319229126, "learning_rate": 7.128906250000001e-06, "loss": 0.1991, "step": 366 }, { "epoch": 0.05113913467567756, "grad_norm": 1.0189954042434692, "learning_rate": 7.148437500000001e-06, "loss": 0.1754, "step": 367 }, { "epoch": 0.051278478366891936, "grad_norm": 0.8087765574455261, "learning_rate": 7.16796875e-06, "loss": 0.1979, "step": 368 }, { "epoch": 0.05141782205810632, "grad_norm": 1.6426169872283936, "learning_rate": 7.1875e-06, "loss": 0.2413, "step": 369 }, { "epoch": 0.0515571657493207, "grad_norm": 0.9640676975250244, "learning_rate": 7.20703125e-06, "loss": 0.2411, "step": 370 }, { "epoch": 0.05169650944053508, "grad_norm": 0.569931149482727, "learning_rate": 7.2265625e-06, "loss": 0.2052, "step": 371 }, { "epoch": 0.05183585313174946, "grad_norm": 0.8305347561836243, "learning_rate": 7.24609375e-06, "loss": 0.1321, "step": 372 }, { "epoch": 0.05197519682296384, "grad_norm": 2.432321071624756, "learning_rate": 7.265625e-06, "loss": 0.2231, "step": 373 }, { "epoch": 0.05211454051417822, "grad_norm": 1.4741008281707764, "learning_rate": 7.2851562500000005e-06, "loss": 0.2137, "step": 374 }, { "epoch": 0.0522538842053926, "grad_norm": 0.7337501645088196, "learning_rate": 7.3046875000000005e-06, "loss": 0.2135, "step": 375 }, { "epoch": 0.052393227896606984, "grad_norm": 1.3235501050949097, "learning_rate": 7.3242187500000006e-06, "loss": 0.2292, "step": 376 }, { "epoch": 0.05253257158782136, "grad_norm": 0.8768891096115112, "learning_rate": 7.343750000000001e-06, "loss": 0.1976, "step": 377 }, { "epoch": 0.05267191527903574, "grad_norm": 0.7859050035476685, "learning_rate": 7.363281250000001e-06, "loss": 0.2069, "step": 378 }, { "epoch": 0.05281125897025012, "grad_norm": 1.242845892906189, "learning_rate": 7.382812500000001e-06, "loss": 0.2444, "step": 379 }, { "epoch": 0.0529506026614645, "grad_norm": 0.9969369769096375, "learning_rate": 7.402343750000001e-06, "loss": 0.1808, "step": 380 }, { "epoch": 0.053089946352678885, "grad_norm": 0.7126882076263428, "learning_rate": 7.421875000000001e-06, "loss": 0.1861, "step": 381 }, { "epoch": 0.05322929004389326, "grad_norm": 1.1107381582260132, "learning_rate": 7.441406250000001e-06, "loss": 0.2145, "step": 382 }, { "epoch": 0.05336863373510764, "grad_norm": 1.7700504064559937, "learning_rate": 7.460937500000001e-06, "loss": 0.1941, "step": 383 }, { "epoch": 0.053507977426322025, "grad_norm": 1.1243138313293457, "learning_rate": 7.480468750000001e-06, "loss": 0.1849, "step": 384 }, { "epoch": 0.0536473211175364, "grad_norm": 1.9287348985671997, "learning_rate": 7.500000000000001e-06, "loss": 0.1634, "step": 385 }, { "epoch": 0.053786664808750786, "grad_norm": 2.05293607711792, "learning_rate": 7.51953125e-06, "loss": 0.2132, "step": 386 }, { "epoch": 0.053926008499965164, "grad_norm": 0.6775907278060913, "learning_rate": 7.5390625e-06, "loss": 0.1939, "step": 387 }, { "epoch": 0.05406535219117954, "grad_norm": 2.2110214233398438, "learning_rate": 7.55859375e-06, "loss": 0.2675, "step": 388 }, { "epoch": 0.054204695882393926, "grad_norm": 0.8913717865943909, "learning_rate": 7.578125e-06, "loss": 0.1635, "step": 389 }, { "epoch": 0.0543440395736083, "grad_norm": 0.5030198097229004, "learning_rate": 7.5976562500000004e-06, "loss": 0.2047, "step": 390 }, { "epoch": 0.05448338326482269, "grad_norm": 1.2985706329345703, "learning_rate": 7.6171875000000005e-06, "loss": 0.1956, "step": 391 }, { "epoch": 0.054622726956037065, "grad_norm": 2.1328141689300537, "learning_rate": 7.63671875e-06, "loss": 0.2114, "step": 392 }, { "epoch": 0.05476207064725144, "grad_norm": 0.7189955115318298, "learning_rate": 7.656250000000001e-06, "loss": 0.2039, "step": 393 }, { "epoch": 0.05490141433846583, "grad_norm": 1.423165202140808, "learning_rate": 7.67578125e-06, "loss": 0.2244, "step": 394 }, { "epoch": 0.055040758029680205, "grad_norm": 1.3440827131271362, "learning_rate": 7.6953125e-06, "loss": 0.2135, "step": 395 }, { "epoch": 0.05518010172089459, "grad_norm": 0.43241891264915466, "learning_rate": 7.71484375e-06, "loss": 0.155, "step": 396 }, { "epoch": 0.05531944541210897, "grad_norm": 0.5960917472839355, "learning_rate": 7.734375e-06, "loss": 0.2027, "step": 397 }, { "epoch": 0.055458789103323344, "grad_norm": 1.1929659843444824, "learning_rate": 7.753906250000001e-06, "loss": 0.2022, "step": 398 }, { "epoch": 0.05559813279453773, "grad_norm": 0.5531989932060242, "learning_rate": 7.7734375e-06, "loss": 0.1712, "step": 399 }, { "epoch": 0.055737476485752106, "grad_norm": 0.5844118595123291, "learning_rate": 7.792968750000001e-06, "loss": 0.1361, "step": 400 }, { "epoch": 0.05587682017696649, "grad_norm": 1.2620089054107666, "learning_rate": 7.8125e-06, "loss": 0.2552, "step": 401 }, { "epoch": 0.05601616386818087, "grad_norm": 1.001563310623169, "learning_rate": 7.832031250000001e-06, "loss": 0.1531, "step": 402 }, { "epoch": 0.056155507559395246, "grad_norm": 0.743715763092041, "learning_rate": 7.8515625e-06, "loss": 0.2215, "step": 403 }, { "epoch": 0.05629485125060963, "grad_norm": 0.47788310050964355, "learning_rate": 7.871093750000001e-06, "loss": 0.1517, "step": 404 }, { "epoch": 0.05643419494182401, "grad_norm": 0.9211798310279846, "learning_rate": 7.890625e-06, "loss": 0.2126, "step": 405 }, { "epoch": 0.05657353863303839, "grad_norm": 0.4018145799636841, "learning_rate": 7.910156250000001e-06, "loss": 0.1681, "step": 406 }, { "epoch": 0.05671288232425277, "grad_norm": 0.42043012380599976, "learning_rate": 7.9296875e-06, "loss": 0.2078, "step": 407 }, { "epoch": 0.05685222601546715, "grad_norm": 0.63270503282547, "learning_rate": 7.949218750000001e-06, "loss": 0.1758, "step": 408 }, { "epoch": 0.05699156970668153, "grad_norm": 0.3479999899864197, "learning_rate": 7.96875e-06, "loss": 0.1531, "step": 409 }, { "epoch": 0.05713091339789591, "grad_norm": 0.585478663444519, "learning_rate": 7.988281250000001e-06, "loss": 0.1631, "step": 410 }, { "epoch": 0.057270257089110294, "grad_norm": 0.7849962115287781, "learning_rate": 8.0078125e-06, "loss": 0.207, "step": 411 }, { "epoch": 0.05740960078032467, "grad_norm": 0.504971981048584, "learning_rate": 8.02734375e-06, "loss": 0.1904, "step": 412 }, { "epoch": 0.05754894447153905, "grad_norm": 0.8090599775314331, "learning_rate": 8.046875e-06, "loss": 0.1917, "step": 413 }, { "epoch": 0.05768828816275343, "grad_norm": 0.8629535436630249, "learning_rate": 8.06640625e-06, "loss": 0.2099, "step": 414 }, { "epoch": 0.05782763185396781, "grad_norm": 0.42232540249824524, "learning_rate": 8.085937500000001e-06, "loss": 0.1828, "step": 415 }, { "epoch": 0.057966975545182195, "grad_norm": 0.9041046500205994, "learning_rate": 8.10546875e-06, "loss": 0.1592, "step": 416 }, { "epoch": 0.05810631923639657, "grad_norm": 0.5193017721176147, "learning_rate": 8.125000000000001e-06, "loss": 0.1897, "step": 417 }, { "epoch": 0.05824566292761095, "grad_norm": 0.46166402101516724, "learning_rate": 8.14453125e-06, "loss": 0.2315, "step": 418 }, { "epoch": 0.058385006618825334, "grad_norm": 0.40835264325141907, "learning_rate": 8.164062500000001e-06, "loss": 0.1688, "step": 419 }, { "epoch": 0.05852435031003971, "grad_norm": 0.8152537941932678, "learning_rate": 8.18359375e-06, "loss": 0.1807, "step": 420 }, { "epoch": 0.058663694001254096, "grad_norm": 0.4652497470378876, "learning_rate": 8.203125000000001e-06, "loss": 0.1678, "step": 421 }, { "epoch": 0.058803037692468474, "grad_norm": 0.3572303354740143, "learning_rate": 8.22265625e-06, "loss": 0.1537, "step": 422 }, { "epoch": 0.05894238138368285, "grad_norm": 0.4677634537220001, "learning_rate": 8.242187500000001e-06, "loss": 0.181, "step": 423 }, { "epoch": 0.059081725074897236, "grad_norm": 0.6456019282341003, "learning_rate": 8.26171875e-06, "loss": 0.2125, "step": 424 }, { "epoch": 0.05922106876611161, "grad_norm": 1.283217191696167, "learning_rate": 8.281250000000001e-06, "loss": 0.2469, "step": 425 }, { "epoch": 0.059360412457326, "grad_norm": 1.0500919818878174, "learning_rate": 8.30078125e-06, "loss": 0.2689, "step": 426 }, { "epoch": 0.059499756148540375, "grad_norm": 0.7168013453483582, "learning_rate": 8.320312500000001e-06, "loss": 0.1936, "step": 427 }, { "epoch": 0.05963909983975475, "grad_norm": 0.6392090320587158, "learning_rate": 8.33984375e-06, "loss": 0.1997, "step": 428 }, { "epoch": 0.05977844353096914, "grad_norm": 0.8216161727905273, "learning_rate": 8.359375e-06, "loss": 0.2087, "step": 429 }, { "epoch": 0.059917787222183515, "grad_norm": 0.7825489640235901, "learning_rate": 8.37890625e-06, "loss": 0.133, "step": 430 }, { "epoch": 0.0600571309133979, "grad_norm": 1.2595512866973877, "learning_rate": 8.3984375e-06, "loss": 0.1852, "step": 431 }, { "epoch": 0.06019647460461228, "grad_norm": 0.4944189488887787, "learning_rate": 8.417968750000001e-06, "loss": 0.1607, "step": 432 }, { "epoch": 0.060335818295826654, "grad_norm": 0.8500766754150391, "learning_rate": 8.4375e-06, "loss": 0.1627, "step": 433 }, { "epoch": 0.06047516198704104, "grad_norm": 0.7093019485473633, "learning_rate": 8.457031250000001e-06, "loss": 0.2243, "step": 434 }, { "epoch": 0.060614505678255416, "grad_norm": 0.7439914345741272, "learning_rate": 8.4765625e-06, "loss": 0.1535, "step": 435 }, { "epoch": 0.0607538493694698, "grad_norm": 1.2605758905410767, "learning_rate": 8.496093750000001e-06, "loss": 0.178, "step": 436 }, { "epoch": 0.06089319306068418, "grad_norm": 0.5525380373001099, "learning_rate": 8.515625e-06, "loss": 0.1692, "step": 437 }, { "epoch": 0.061032536751898556, "grad_norm": 0.8264906406402588, "learning_rate": 8.535156250000001e-06, "loss": 0.1547, "step": 438 }, { "epoch": 0.06117188044311294, "grad_norm": 0.9838054180145264, "learning_rate": 8.5546875e-06, "loss": 0.1531, "step": 439 }, { "epoch": 0.06131122413432732, "grad_norm": 0.8509569764137268, "learning_rate": 8.574218750000001e-06, "loss": 0.2063, "step": 440 }, { "epoch": 0.0614505678255417, "grad_norm": 0.6561834216117859, "learning_rate": 8.59375e-06, "loss": 0.1582, "step": 441 }, { "epoch": 0.06158991151675608, "grad_norm": 0.7113165855407715, "learning_rate": 8.613281250000001e-06, "loss": 0.204, "step": 442 }, { "epoch": 0.06172925520797046, "grad_norm": 0.7735342383384705, "learning_rate": 8.6328125e-06, "loss": 0.2034, "step": 443 }, { "epoch": 0.06186859889918484, "grad_norm": 1.8053336143493652, "learning_rate": 8.652343750000002e-06, "loss": 0.2133, "step": 444 }, { "epoch": 0.06200794259039922, "grad_norm": 1.597709059715271, "learning_rate": 8.671875e-06, "loss": 0.2454, "step": 445 }, { "epoch": 0.062147286281613597, "grad_norm": 1.1454098224639893, "learning_rate": 8.69140625e-06, "loss": 0.156, "step": 446 }, { "epoch": 0.06228662997282798, "grad_norm": 1.126125454902649, "learning_rate": 8.7109375e-06, "loss": 0.1815, "step": 447 }, { "epoch": 0.06242597366404236, "grad_norm": 0.9722925424575806, "learning_rate": 8.73046875e-06, "loss": 0.1754, "step": 448 }, { "epoch": 0.06256531735525674, "grad_norm": 1.069406270980835, "learning_rate": 8.750000000000001e-06, "loss": 0.1757, "step": 449 }, { "epoch": 0.06270466104647113, "grad_norm": 5.167445182800293, "learning_rate": 8.76953125e-06, "loss": 0.2917, "step": 450 }, { "epoch": 0.0628440047376855, "grad_norm": 0.7724747657775879, "learning_rate": 8.789062500000001e-06, "loss": 0.2255, "step": 451 }, { "epoch": 0.06298334842889988, "grad_norm": 0.8316136598587036, "learning_rate": 8.80859375e-06, "loss": 0.2177, "step": 452 }, { "epoch": 0.06312269212011426, "grad_norm": 0.6099687814712524, "learning_rate": 8.828125000000001e-06, "loss": 0.1671, "step": 453 }, { "epoch": 0.06326203581132864, "grad_norm": 0.9986627697944641, "learning_rate": 8.84765625e-06, "loss": 0.1617, "step": 454 }, { "epoch": 0.06340137950254303, "grad_norm": 0.79304039478302, "learning_rate": 8.867187500000001e-06, "loss": 0.1581, "step": 455 }, { "epoch": 0.0635407231937574, "grad_norm": 0.369170606136322, "learning_rate": 8.88671875e-06, "loss": 0.1461, "step": 456 }, { "epoch": 0.06368006688497178, "grad_norm": 0.6771777272224426, "learning_rate": 8.906250000000001e-06, "loss": 0.1625, "step": 457 }, { "epoch": 0.06381941057618616, "grad_norm": 2.043429136276245, "learning_rate": 8.92578125e-06, "loss": 0.2173, "step": 458 }, { "epoch": 0.06395875426740054, "grad_norm": 1.7122275829315186, "learning_rate": 8.945312500000001e-06, "loss": 0.1495, "step": 459 }, { "epoch": 0.06409809795861493, "grad_norm": 3.2461116313934326, "learning_rate": 8.96484375e-06, "loss": 0.2273, "step": 460 }, { "epoch": 0.06423744164982931, "grad_norm": 0.8951117992401123, "learning_rate": 8.984375000000002e-06, "loss": 0.215, "step": 461 }, { "epoch": 0.06437678534104369, "grad_norm": 2.1003808975219727, "learning_rate": 9.00390625e-06, "loss": 0.1746, "step": 462 }, { "epoch": 0.06451612903225806, "grad_norm": 0.9090794324874878, "learning_rate": 9.0234375e-06, "loss": 0.1589, "step": 463 }, { "epoch": 0.06465547272347244, "grad_norm": 0.736084520816803, "learning_rate": 9.042968750000001e-06, "loss": 0.1466, "step": 464 }, { "epoch": 0.06479481641468683, "grad_norm": 0.7821611762046814, "learning_rate": 9.0625e-06, "loss": 0.152, "step": 465 }, { "epoch": 0.06493416010590121, "grad_norm": 0.4295934736728668, "learning_rate": 9.082031250000001e-06, "loss": 0.2021, "step": 466 }, { "epoch": 0.06507350379711559, "grad_norm": 1.6494814157485962, "learning_rate": 9.1015625e-06, "loss": 0.2369, "step": 467 }, { "epoch": 0.06521284748832996, "grad_norm": 1.0359758138656616, "learning_rate": 9.121093750000001e-06, "loss": 0.1586, "step": 468 }, { "epoch": 0.06535219117954434, "grad_norm": 0.5630233287811279, "learning_rate": 9.140625e-06, "loss": 0.1608, "step": 469 }, { "epoch": 0.06549153487075873, "grad_norm": 0.6167742013931274, "learning_rate": 9.160156250000001e-06, "loss": 0.147, "step": 470 }, { "epoch": 0.06563087856197311, "grad_norm": 0.9881575107574463, "learning_rate": 9.1796875e-06, "loss": 0.2057, "step": 471 }, { "epoch": 0.06577022225318749, "grad_norm": 0.3875056505203247, "learning_rate": 9.199218750000001e-06, "loss": 0.2014, "step": 472 }, { "epoch": 0.06590956594440187, "grad_norm": 0.8542759418487549, "learning_rate": 9.21875e-06, "loss": 0.2033, "step": 473 }, { "epoch": 0.06604890963561624, "grad_norm": 1.0159021615982056, "learning_rate": 9.238281250000001e-06, "loss": 0.1697, "step": 474 }, { "epoch": 0.06618825332683063, "grad_norm": 1.0170438289642334, "learning_rate": 9.2578125e-06, "loss": 0.202, "step": 475 }, { "epoch": 0.06632759701804501, "grad_norm": 0.3327377736568451, "learning_rate": 9.277343750000001e-06, "loss": 0.1418, "step": 476 }, { "epoch": 0.06646694070925939, "grad_norm": 0.8775528073310852, "learning_rate": 9.296875e-06, "loss": 0.1703, "step": 477 }, { "epoch": 0.06660628440047377, "grad_norm": 0.327816367149353, "learning_rate": 9.316406250000002e-06, "loss": 0.134, "step": 478 }, { "epoch": 0.06674562809168814, "grad_norm": 2.1859312057495117, "learning_rate": 9.3359375e-06, "loss": 0.2285, "step": 479 }, { "epoch": 0.06688497178290254, "grad_norm": 1.3980540037155151, "learning_rate": 9.35546875e-06, "loss": 0.2461, "step": 480 }, { "epoch": 0.06702431547411691, "grad_norm": 0.9781203269958496, "learning_rate": 9.375000000000001e-06, "loss": 0.2236, "step": 481 }, { "epoch": 0.06716365916533129, "grad_norm": 2.3229639530181885, "learning_rate": 9.39453125e-06, "loss": 0.2015, "step": 482 }, { "epoch": 0.06730300285654567, "grad_norm": 0.867573082447052, "learning_rate": 9.414062500000001e-06, "loss": 0.2343, "step": 483 }, { "epoch": 0.06744234654776005, "grad_norm": 1.3535361289978027, "learning_rate": 9.43359375e-06, "loss": 0.1925, "step": 484 }, { "epoch": 0.06758169023897444, "grad_norm": 1.6669102907180786, "learning_rate": 9.453125000000001e-06, "loss": 0.2075, "step": 485 }, { "epoch": 0.06772103393018881, "grad_norm": 0.8127424716949463, "learning_rate": 9.47265625e-06, "loss": 0.1698, "step": 486 }, { "epoch": 0.06786037762140319, "grad_norm": 1.2546402215957642, "learning_rate": 9.492187500000001e-06, "loss": 0.1571, "step": 487 }, { "epoch": 0.06799972131261757, "grad_norm": 1.4505929946899414, "learning_rate": 9.51171875e-06, "loss": 0.1682, "step": 488 }, { "epoch": 0.06813906500383195, "grad_norm": 0.6182718873023987, "learning_rate": 9.531250000000001e-06, "loss": 0.1541, "step": 489 }, { "epoch": 0.06827840869504634, "grad_norm": 0.7869895696640015, "learning_rate": 9.55078125e-06, "loss": 0.175, "step": 490 }, { "epoch": 0.06841775238626072, "grad_norm": 0.8914532661437988, "learning_rate": 9.570312500000001e-06, "loss": 0.1896, "step": 491 }, { "epoch": 0.0685570960774751, "grad_norm": 0.5864881277084351, "learning_rate": 9.58984375e-06, "loss": 0.1719, "step": 492 }, { "epoch": 0.06869643976868947, "grad_norm": 0.4866710603237152, "learning_rate": 9.609375000000001e-06, "loss": 0.1491, "step": 493 }, { "epoch": 0.06883578345990385, "grad_norm": 0.8652053475379944, "learning_rate": 9.62890625e-06, "loss": 0.253, "step": 494 }, { "epoch": 0.06897512715111823, "grad_norm": 0.4419879913330078, "learning_rate": 9.648437500000002e-06, "loss": 0.1532, "step": 495 }, { "epoch": 0.06911447084233262, "grad_norm": 0.41157200932502747, "learning_rate": 9.66796875e-06, "loss": 0.14, "step": 496 }, { "epoch": 0.069253814533547, "grad_norm": 0.7131197452545166, "learning_rate": 9.6875e-06, "loss": 0.1268, "step": 497 }, { "epoch": 0.06939315822476137, "grad_norm": 0.8945165872573853, "learning_rate": 9.707031250000001e-06, "loss": 0.1509, "step": 498 }, { "epoch": 0.06953250191597575, "grad_norm": 0.8763072490692139, "learning_rate": 9.7265625e-06, "loss": 0.1792, "step": 499 }, { "epoch": 0.06967184560719013, "grad_norm": 1.214751124382019, "learning_rate": 9.746093750000001e-06, "loss": 0.1903, "step": 500 }, { "epoch": 0.06981118929840452, "grad_norm": 0.891090452671051, "learning_rate": 9.765625e-06, "loss": 0.1633, "step": 501 }, { "epoch": 0.0699505329896189, "grad_norm": 1.5323715209960938, "learning_rate": 9.785156250000001e-06, "loss": 0.2397, "step": 502 }, { "epoch": 0.07008987668083327, "grad_norm": 1.4367731809616089, "learning_rate": 9.8046875e-06, "loss": 0.1929, "step": 503 }, { "epoch": 0.07022922037204765, "grad_norm": 0.360797256231308, "learning_rate": 9.824218750000001e-06, "loss": 0.1684, "step": 504 }, { "epoch": 0.07036856406326203, "grad_norm": 0.7080640196800232, "learning_rate": 9.84375e-06, "loss": 0.1839, "step": 505 }, { "epoch": 0.07050790775447642, "grad_norm": 0.6318811774253845, "learning_rate": 9.863281250000001e-06, "loss": 0.1799, "step": 506 }, { "epoch": 0.0706472514456908, "grad_norm": 0.6616985201835632, "learning_rate": 9.8828125e-06, "loss": 0.1706, "step": 507 }, { "epoch": 0.07078659513690518, "grad_norm": 0.874865710735321, "learning_rate": 9.902343750000001e-06, "loss": 0.1701, "step": 508 }, { "epoch": 0.07092593882811955, "grad_norm": 0.6665449142456055, "learning_rate": 9.921875e-06, "loss": 0.1539, "step": 509 }, { "epoch": 0.07106528251933393, "grad_norm": 0.5997623205184937, "learning_rate": 9.941406250000002e-06, "loss": 0.1773, "step": 510 }, { "epoch": 0.07120462621054832, "grad_norm": 0.8770989775657654, "learning_rate": 9.9609375e-06, "loss": 0.2257, "step": 511 }, { "epoch": 0.0713439699017627, "grad_norm": 0.6978288292884827, "learning_rate": 9.980468750000002e-06, "loss": 0.2001, "step": 512 }, { "epoch": 0.07148331359297708, "grad_norm": 0.9682765007019043, "learning_rate": 1e-05, "loss": 0.1876, "step": 513 }, { "epoch": 0.07162265728419145, "grad_norm": 0.5565376877784729, "learning_rate": 9.999999444557077e-06, "loss": 0.1451, "step": 514 }, { "epoch": 0.07176200097540583, "grad_norm": 0.412929892539978, "learning_rate": 9.999997778228428e-06, "loss": 0.1649, "step": 515 }, { "epoch": 0.07190134466662022, "grad_norm": 0.7933184504508972, "learning_rate": 9.999995001014424e-06, "loss": 0.2037, "step": 516 }, { "epoch": 0.0720406883578346, "grad_norm": 0.9485891461372375, "learning_rate": 9.999991112915685e-06, "loss": 0.1941, "step": 517 }, { "epoch": 0.07218003204904898, "grad_norm": 0.497955858707428, "learning_rate": 9.999986113933071e-06, "loss": 0.2214, "step": 518 }, { "epoch": 0.07231937574026336, "grad_norm": 0.8916881084442139, "learning_rate": 9.999980004067694e-06, "loss": 0.2108, "step": 519 }, { "epoch": 0.07245871943147773, "grad_norm": 0.5969368815422058, "learning_rate": 9.99997278332091e-06, "loss": 0.1721, "step": 520 }, { "epoch": 0.07259806312269212, "grad_norm": 0.4283144474029541, "learning_rate": 9.999964451694328e-06, "loss": 0.1969, "step": 521 }, { "epoch": 0.0727374068139065, "grad_norm": 0.3585093319416046, "learning_rate": 9.999955009189795e-06, "loss": 0.1379, "step": 522 }, { "epoch": 0.07287675050512088, "grad_norm": 0.4045071005821228, "learning_rate": 9.999944455809408e-06, "loss": 0.2069, "step": 523 }, { "epoch": 0.07301609419633526, "grad_norm": 0.38140177726745605, "learning_rate": 9.999932791555516e-06, "loss": 0.1889, "step": 524 }, { "epoch": 0.07315543788754963, "grad_norm": 1.2157598733901978, "learning_rate": 9.999920016430706e-06, "loss": 0.2207, "step": 525 }, { "epoch": 0.07329478157876403, "grad_norm": 0.8454203605651855, "learning_rate": 9.99990613043782e-06, "loss": 0.2023, "step": 526 }, { "epoch": 0.0734341252699784, "grad_norm": 0.30748969316482544, "learning_rate": 9.999891133579941e-06, "loss": 0.1343, "step": 527 }, { "epoch": 0.07357346896119278, "grad_norm": 0.4787304103374481, "learning_rate": 9.999875025860401e-06, "loss": 0.1766, "step": 528 }, { "epoch": 0.07371281265240716, "grad_norm": 0.5255283117294312, "learning_rate": 9.99985780728278e-06, "loss": 0.255, "step": 529 }, { "epoch": 0.07385215634362154, "grad_norm": 0.6605222225189209, "learning_rate": 9.999839477850903e-06, "loss": 0.1699, "step": 530 }, { "epoch": 0.07399150003483593, "grad_norm": 0.29237353801727295, "learning_rate": 9.999820037568844e-06, "loss": 0.168, "step": 531 }, { "epoch": 0.0741308437260503, "grad_norm": 0.7213468551635742, "learning_rate": 9.999799486440917e-06, "loss": 0.197, "step": 532 }, { "epoch": 0.07427018741726468, "grad_norm": 0.3014366924762726, "learning_rate": 9.999777824471694e-06, "loss": 0.1342, "step": 533 }, { "epoch": 0.07440953110847906, "grad_norm": 0.40020355582237244, "learning_rate": 9.999755051665985e-06, "loss": 0.1357, "step": 534 }, { "epoch": 0.07454887479969344, "grad_norm": 0.8430357575416565, "learning_rate": 9.99973116802885e-06, "loss": 0.1717, "step": 535 }, { "epoch": 0.07468821849090783, "grad_norm": 1.1947262287139893, "learning_rate": 9.999706173565594e-06, "loss": 0.1369, "step": 536 }, { "epoch": 0.0748275621821222, "grad_norm": 1.584631085395813, "learning_rate": 9.999680068281773e-06, "loss": 0.1768, "step": 537 }, { "epoch": 0.07496690587333658, "grad_norm": 0.8552449941635132, "learning_rate": 9.999652852183184e-06, "loss": 0.2183, "step": 538 }, { "epoch": 0.07510624956455096, "grad_norm": 1.5015426874160767, "learning_rate": 9.999624525275875e-06, "loss": 0.2048, "step": 539 }, { "epoch": 0.07524559325576534, "grad_norm": 1.2087452411651611, "learning_rate": 9.99959508756614e-06, "loss": 0.2075, "step": 540 }, { "epoch": 0.07538493694697973, "grad_norm": 1.6872202157974243, "learning_rate": 9.99956453906052e-06, "loss": 0.1613, "step": 541 }, { "epoch": 0.07552428063819411, "grad_norm": 2.885582447052002, "learning_rate": 9.999532879765801e-06, "loss": 0.209, "step": 542 }, { "epoch": 0.07566362432940849, "grad_norm": 0.7183048129081726, "learning_rate": 9.999500109689018e-06, "loss": 0.19, "step": 543 }, { "epoch": 0.07580296802062286, "grad_norm": 0.2639847695827484, "learning_rate": 9.999466228837452e-06, "loss": 0.1392, "step": 544 }, { "epoch": 0.07594231171183724, "grad_norm": 1.1816743612289429, "learning_rate": 9.999431237218629e-06, "loss": 0.1832, "step": 545 }, { "epoch": 0.07608165540305163, "grad_norm": 0.9717331528663635, "learning_rate": 9.999395134840323e-06, "loss": 0.2123, "step": 546 }, { "epoch": 0.07622099909426601, "grad_norm": 0.2353295236825943, "learning_rate": 9.999357921710557e-06, "loss": 0.1374, "step": 547 }, { "epoch": 0.07636034278548039, "grad_norm": 0.5665003657341003, "learning_rate": 9.999319597837599e-06, "loss": 0.1784, "step": 548 }, { "epoch": 0.07649968647669476, "grad_norm": 0.9946545362472534, "learning_rate": 9.99928016322996e-06, "loss": 0.1854, "step": 549 }, { "epoch": 0.07663903016790914, "grad_norm": 1.2376691102981567, "learning_rate": 9.999239617896406e-06, "loss": 0.2043, "step": 550 }, { "epoch": 0.07677837385912353, "grad_norm": 0.8252826929092407, "learning_rate": 9.999197961845943e-06, "loss": 0.1821, "step": 551 }, { "epoch": 0.07691771755033791, "grad_norm": 0.6991438865661621, "learning_rate": 9.999155195087826e-06, "loss": 0.1661, "step": 552 }, { "epoch": 0.07705706124155229, "grad_norm": 0.44034847617149353, "learning_rate": 9.999111317631559e-06, "loss": 0.1755, "step": 553 }, { "epoch": 0.07719640493276667, "grad_norm": 0.7230649590492249, "learning_rate": 9.999066329486888e-06, "loss": 0.215, "step": 554 }, { "epoch": 0.07733574862398104, "grad_norm": 0.2734776735305786, "learning_rate": 9.999020230663809e-06, "loss": 0.1173, "step": 555 }, { "epoch": 0.07747509231519543, "grad_norm": 0.6473878026008606, "learning_rate": 9.998973021172564e-06, "loss": 0.1861, "step": 556 }, { "epoch": 0.07761443600640981, "grad_norm": 0.42749467492103577, "learning_rate": 9.998924701023645e-06, "loss": 0.1822, "step": 557 }, { "epoch": 0.07775377969762419, "grad_norm": 0.7306029200553894, "learning_rate": 9.998875270227781e-06, "loss": 0.1762, "step": 558 }, { "epoch": 0.07789312338883857, "grad_norm": 0.45930996537208557, "learning_rate": 9.99882472879596e-06, "loss": 0.1427, "step": 559 }, { "epoch": 0.07803246708005294, "grad_norm": 1.033310055732727, "learning_rate": 9.998773076739409e-06, "loss": 0.2076, "step": 560 }, { "epoch": 0.07817181077126734, "grad_norm": 0.3703937828540802, "learning_rate": 9.998720314069606e-06, "loss": 0.1365, "step": 561 }, { "epoch": 0.07831115446248171, "grad_norm": 1.4850897789001465, "learning_rate": 9.99866644079827e-06, "loss": 0.219, "step": 562 }, { "epoch": 0.07845049815369609, "grad_norm": 1.4156123399734497, "learning_rate": 9.998611456937373e-06, "loss": 0.2107, "step": 563 }, { "epoch": 0.07858984184491047, "grad_norm": 0.6207078695297241, "learning_rate": 9.99855536249913e-06, "loss": 0.238, "step": 564 }, { "epoch": 0.07872918553612485, "grad_norm": 1.0526546239852905, "learning_rate": 9.998498157496004e-06, "loss": 0.181, "step": 565 }, { "epoch": 0.07886852922733924, "grad_norm": 0.8352890610694885, "learning_rate": 9.998439841940706e-06, "loss": 0.1539, "step": 566 }, { "epoch": 0.07900787291855361, "grad_norm": 0.4416954219341278, "learning_rate": 9.998380415846191e-06, "loss": 0.1838, "step": 567 }, { "epoch": 0.07914721660976799, "grad_norm": 0.8625487685203552, "learning_rate": 9.998319879225662e-06, "loss": 0.2331, "step": 568 }, { "epoch": 0.07928656030098237, "grad_norm": 0.7630326747894287, "learning_rate": 9.998258232092571e-06, "loss": 0.1988, "step": 569 }, { "epoch": 0.07942590399219675, "grad_norm": 0.3081788122653961, "learning_rate": 9.998195474460613e-06, "loss": 0.1568, "step": 570 }, { "epoch": 0.07956524768341114, "grad_norm": 1.1103510856628418, "learning_rate": 9.998131606343729e-06, "loss": 0.1875, "step": 571 }, { "epoch": 0.07970459137462552, "grad_norm": 0.6479159593582153, "learning_rate": 9.998066627756113e-06, "loss": 0.1909, "step": 572 }, { "epoch": 0.0798439350658399, "grad_norm": 0.5789439082145691, "learning_rate": 9.9980005387122e-06, "loss": 0.1778, "step": 573 }, { "epoch": 0.07998327875705427, "grad_norm": 0.6172446012496948, "learning_rate": 9.997933339226675e-06, "loss": 0.2028, "step": 574 }, { "epoch": 0.08012262244826865, "grad_norm": 1.4369728565216064, "learning_rate": 9.997865029314464e-06, "loss": 0.2113, "step": 575 }, { "epoch": 0.08026196613948304, "grad_norm": 0.5846291780471802, "learning_rate": 9.997795608990749e-06, "loss": 0.241, "step": 576 }, { "epoch": 0.08040130983069742, "grad_norm": 0.8077232837677002, "learning_rate": 9.99772507827095e-06, "loss": 0.1699, "step": 577 }, { "epoch": 0.0805406535219118, "grad_norm": 0.9411321878433228, "learning_rate": 9.997653437170739e-06, "loss": 0.1775, "step": 578 }, { "epoch": 0.08067999721312617, "grad_norm": 0.3908841609954834, "learning_rate": 9.997580685706032e-06, "loss": 0.1673, "step": 579 }, { "epoch": 0.08081934090434055, "grad_norm": 0.9238132834434509, "learning_rate": 9.997506823892993e-06, "loss": 0.1662, "step": 580 }, { "epoch": 0.08095868459555494, "grad_norm": 0.5178993940353394, "learning_rate": 9.997431851748034e-06, "loss": 0.1778, "step": 581 }, { "epoch": 0.08109802828676932, "grad_norm": 0.6347031593322754, "learning_rate": 9.99735576928781e-06, "loss": 0.1522, "step": 582 }, { "epoch": 0.0812373719779837, "grad_norm": 1.279325246810913, "learning_rate": 9.997278576529228e-06, "loss": 0.2183, "step": 583 }, { "epoch": 0.08137671566919807, "grad_norm": 0.3728599548339844, "learning_rate": 9.997200273489434e-06, "loss": 0.142, "step": 584 }, { "epoch": 0.08151605936041245, "grad_norm": 0.7480508089065552, "learning_rate": 9.997120860185827e-06, "loss": 0.1422, "step": 585 }, { "epoch": 0.08165540305162684, "grad_norm": 0.9560132026672363, "learning_rate": 9.997040336636052e-06, "loss": 0.1531, "step": 586 }, { "epoch": 0.08179474674284122, "grad_norm": 0.841809093952179, "learning_rate": 9.996958702857997e-06, "loss": 0.1774, "step": 587 }, { "epoch": 0.0819340904340556, "grad_norm": 0.9784308671951294, "learning_rate": 9.996875958869803e-06, "loss": 0.1566, "step": 588 }, { "epoch": 0.08207343412526998, "grad_norm": 0.21596816182136536, "learning_rate": 9.996792104689849e-06, "loss": 0.1496, "step": 589 }, { "epoch": 0.08221277781648435, "grad_norm": 0.5095465779304504, "learning_rate": 9.99670714033677e-06, "loss": 0.1615, "step": 590 }, { "epoch": 0.08235212150769874, "grad_norm": 0.4513169527053833, "learning_rate": 9.996621065829442e-06, "loss": 0.1601, "step": 591 }, { "epoch": 0.08249146519891312, "grad_norm": 0.3721694350242615, "learning_rate": 9.996533881186986e-06, "loss": 0.1518, "step": 592 }, { "epoch": 0.0826308088901275, "grad_norm": 0.5562433004379272, "learning_rate": 9.996445586428776e-06, "loss": 0.1618, "step": 593 }, { "epoch": 0.08277015258134188, "grad_norm": 0.7895183563232422, "learning_rate": 9.996356181574425e-06, "loss": 0.1548, "step": 594 }, { "epoch": 0.08290949627255625, "grad_norm": 0.5432734489440918, "learning_rate": 9.9962656666438e-06, "loss": 0.1748, "step": 595 }, { "epoch": 0.08304883996377065, "grad_norm": 0.3639037311077118, "learning_rate": 9.996174041657012e-06, "loss": 0.142, "step": 596 }, { "epoch": 0.08318818365498502, "grad_norm": 0.4404639005661011, "learning_rate": 9.996081306634416e-06, "loss": 0.1608, "step": 597 }, { "epoch": 0.0833275273461994, "grad_norm": 0.4082886576652527, "learning_rate": 9.995987461596617e-06, "loss": 0.1254, "step": 598 }, { "epoch": 0.08346687103741378, "grad_norm": 0.4740399718284607, "learning_rate": 9.995892506564461e-06, "loss": 0.1626, "step": 599 }, { "epoch": 0.08360621472862816, "grad_norm": 0.7399467825889587, "learning_rate": 9.995796441559052e-06, "loss": 0.161, "step": 600 }, { "epoch": 0.08374555841984255, "grad_norm": 0.9268762469291687, "learning_rate": 9.995699266601728e-06, "loss": 0.162, "step": 601 }, { "epoch": 0.08388490211105692, "grad_norm": 1.035431146621704, "learning_rate": 9.995600981714082e-06, "loss": 0.1653, "step": 602 }, { "epoch": 0.0840242458022713, "grad_norm": 0.6291974782943726, "learning_rate": 9.995501586917949e-06, "loss": 0.1758, "step": 603 }, { "epoch": 0.08416358949348568, "grad_norm": 0.5564426183700562, "learning_rate": 9.99540108223541e-06, "loss": 0.1511, "step": 604 }, { "epoch": 0.08430293318470006, "grad_norm": 0.5959488153457642, "learning_rate": 9.9952994676888e-06, "loss": 0.1278, "step": 605 }, { "epoch": 0.08444227687591445, "grad_norm": 1.4553591012954712, "learning_rate": 9.995196743300693e-06, "loss": 0.2273, "step": 606 }, { "epoch": 0.08458162056712883, "grad_norm": 0.34581804275512695, "learning_rate": 9.995092909093911e-06, "loss": 0.1679, "step": 607 }, { "epoch": 0.0847209642583432, "grad_norm": 0.31367069482803345, "learning_rate": 9.994987965091525e-06, "loss": 0.1568, "step": 608 }, { "epoch": 0.08486030794955758, "grad_norm": 0.5813353061676025, "learning_rate": 9.994881911316849e-06, "loss": 0.1734, "step": 609 }, { "epoch": 0.08499965164077196, "grad_norm": 0.2720184326171875, "learning_rate": 9.99477474779345e-06, "loss": 0.1625, "step": 610 }, { "epoch": 0.08513899533198635, "grad_norm": 0.4173327684402466, "learning_rate": 9.994666474545133e-06, "loss": 0.1782, "step": 611 }, { "epoch": 0.08527833902320073, "grad_norm": 0.656448245048523, "learning_rate": 9.994557091595956e-06, "loss": 0.1849, "step": 612 }, { "epoch": 0.0854176827144151, "grad_norm": 0.312686949968338, "learning_rate": 9.99444659897022e-06, "loss": 0.1464, "step": 613 }, { "epoch": 0.08555702640562948, "grad_norm": 0.7816035151481628, "learning_rate": 9.994334996692476e-06, "loss": 0.1883, "step": 614 }, { "epoch": 0.08569637009684386, "grad_norm": 0.5803066492080688, "learning_rate": 9.994222284787519e-06, "loss": 0.1992, "step": 615 }, { "epoch": 0.08583571378805825, "grad_norm": 0.3375391960144043, "learning_rate": 9.99410846328039e-06, "loss": 0.1564, "step": 616 }, { "epoch": 0.08597505747927263, "grad_norm": 1.0493314266204834, "learning_rate": 9.993993532196376e-06, "loss": 0.1733, "step": 617 }, { "epoch": 0.086114401170487, "grad_norm": 0.7747969031333923, "learning_rate": 9.993877491561015e-06, "loss": 0.168, "step": 618 }, { "epoch": 0.08625374486170138, "grad_norm": 0.4252323508262634, "learning_rate": 9.99376034140009e-06, "loss": 0.1591, "step": 619 }, { "epoch": 0.08639308855291576, "grad_norm": 0.29228436946868896, "learning_rate": 9.993642081739623e-06, "loss": 0.1474, "step": 620 }, { "epoch": 0.08653243224413015, "grad_norm": 1.0178709030151367, "learning_rate": 9.993522712605895e-06, "loss": 0.1698, "step": 621 }, { "epoch": 0.08667177593534453, "grad_norm": 0.8711619973182678, "learning_rate": 9.993402234025422e-06, "loss": 0.171, "step": 622 }, { "epoch": 0.08681111962655891, "grad_norm": 0.8738051652908325, "learning_rate": 9.993280646024975e-06, "loss": 0.2091, "step": 623 }, { "epoch": 0.08695046331777329, "grad_norm": 1.094627022743225, "learning_rate": 9.993157948631566e-06, "loss": 0.2094, "step": 624 }, { "epoch": 0.08708980700898766, "grad_norm": 1.5746663808822632, "learning_rate": 9.993034141872459e-06, "loss": 0.2326, "step": 625 }, { "epoch": 0.08722915070020205, "grad_norm": 0.3765898644924164, "learning_rate": 9.992909225775157e-06, "loss": 0.1649, "step": 626 }, { "epoch": 0.08736849439141643, "grad_norm": 0.4195946753025055, "learning_rate": 9.992783200367414e-06, "loss": 0.2104, "step": 627 }, { "epoch": 0.08750783808263081, "grad_norm": 0.6065201759338379, "learning_rate": 9.992656065677234e-06, "loss": 0.1926, "step": 628 }, { "epoch": 0.08764718177384519, "grad_norm": 0.3900664746761322, "learning_rate": 9.992527821732858e-06, "loss": 0.1222, "step": 629 }, { "epoch": 0.08778652546505956, "grad_norm": 0.30178147554397583, "learning_rate": 9.992398468562782e-06, "loss": 0.1371, "step": 630 }, { "epoch": 0.08792586915627396, "grad_norm": 0.654692530632019, "learning_rate": 9.992268006195744e-06, "loss": 0.181, "step": 631 }, { "epoch": 0.08806521284748833, "grad_norm": 0.21772727370262146, "learning_rate": 9.992136434660733e-06, "loss": 0.1723, "step": 632 }, { "epoch": 0.08820455653870271, "grad_norm": 0.48321858048439026, "learning_rate": 9.992003753986976e-06, "loss": 0.163, "step": 633 }, { "epoch": 0.08834390022991709, "grad_norm": 0.20346024632453918, "learning_rate": 9.991869964203955e-06, "loss": 0.1439, "step": 634 }, { "epoch": 0.08848324392113147, "grad_norm": 0.24869860708713531, "learning_rate": 9.991735065341394e-06, "loss": 0.1532, "step": 635 }, { "epoch": 0.08862258761234586, "grad_norm": 0.17897701263427734, "learning_rate": 9.991599057429266e-06, "loss": 0.1422, "step": 636 }, { "epoch": 0.08876193130356023, "grad_norm": 1.4617507457733154, "learning_rate": 9.991461940497786e-06, "loss": 0.244, "step": 637 }, { "epoch": 0.08890127499477461, "grad_norm": 0.5234072804450989, "learning_rate": 9.991323714577421e-06, "loss": 0.1655, "step": 638 }, { "epoch": 0.08904061868598899, "grad_norm": 0.20457616448402405, "learning_rate": 9.99118437969888e-06, "loss": 0.171, "step": 639 }, { "epoch": 0.08917996237720337, "grad_norm": 0.7506306767463684, "learning_rate": 9.99104393589312e-06, "loss": 0.1833, "step": 640 }, { "epoch": 0.08931930606841776, "grad_norm": 1.6572221517562866, "learning_rate": 9.990902383191346e-06, "loss": 0.221, "step": 641 }, { "epoch": 0.08945864975963214, "grad_norm": 0.2738921344280243, "learning_rate": 9.990759721625005e-06, "loss": 0.1428, "step": 642 }, { "epoch": 0.08959799345084651, "grad_norm": 0.9100532531738281, "learning_rate": 9.990615951225797e-06, "loss": 0.1598, "step": 643 }, { "epoch": 0.08973733714206089, "grad_norm": 1.6465739011764526, "learning_rate": 9.99047107202566e-06, "loss": 0.2096, "step": 644 }, { "epoch": 0.08987668083327527, "grad_norm": 0.621289849281311, "learning_rate": 9.990325084056787e-06, "loss": 0.1738, "step": 645 }, { "epoch": 0.09001602452448966, "grad_norm": 0.7081321477890015, "learning_rate": 9.99017798735161e-06, "loss": 0.163, "step": 646 }, { "epoch": 0.09015536821570404, "grad_norm": 0.5263434648513794, "learning_rate": 9.990029781942814e-06, "loss": 0.1456, "step": 647 }, { "epoch": 0.09029471190691842, "grad_norm": 1.4475067853927612, "learning_rate": 9.989880467863323e-06, "loss": 0.1978, "step": 648 }, { "epoch": 0.09043405559813279, "grad_norm": 0.3361093997955322, "learning_rate": 9.989730045146313e-06, "loss": 0.1821, "step": 649 }, { "epoch": 0.09057339928934717, "grad_norm": 1.0251425504684448, "learning_rate": 9.989578513825205e-06, "loss": 0.2646, "step": 650 }, { "epoch": 0.09071274298056156, "grad_norm": 0.40581393241882324, "learning_rate": 9.989425873933666e-06, "loss": 0.1571, "step": 651 }, { "epoch": 0.09085208667177594, "grad_norm": 0.2690681517124176, "learning_rate": 9.989272125505606e-06, "loss": 0.1529, "step": 652 }, { "epoch": 0.09099143036299032, "grad_norm": 0.4862072169780731, "learning_rate": 9.98911726857519e-06, "loss": 0.1587, "step": 653 }, { "epoch": 0.0911307740542047, "grad_norm": 0.5859420299530029, "learning_rate": 9.988961303176818e-06, "loss": 0.1851, "step": 654 }, { "epoch": 0.09127011774541907, "grad_norm": 0.4218701720237732, "learning_rate": 9.988804229345146e-06, "loss": 0.1669, "step": 655 }, { "epoch": 0.09140946143663346, "grad_norm": 0.4752636253833771, "learning_rate": 9.98864604711507e-06, "loss": 0.161, "step": 656 }, { "epoch": 0.09154880512784784, "grad_norm": 1.6776171922683716, "learning_rate": 9.988486756521733e-06, "loss": 0.2299, "step": 657 }, { "epoch": 0.09168814881906222, "grad_norm": 0.25697049498558044, "learning_rate": 9.98832635760053e-06, "loss": 0.1221, "step": 658 }, { "epoch": 0.0918274925102766, "grad_norm": 0.31343886256217957, "learning_rate": 9.988164850387095e-06, "loss": 0.1949, "step": 659 }, { "epoch": 0.09196683620149097, "grad_norm": 0.557626485824585, "learning_rate": 9.988002234917312e-06, "loss": 0.2493, "step": 660 }, { "epoch": 0.09210617989270536, "grad_norm": 0.6337831020355225, "learning_rate": 9.987838511227311e-06, "loss": 0.1682, "step": 661 }, { "epoch": 0.09224552358391974, "grad_norm": 0.38289889693260193, "learning_rate": 9.987673679353467e-06, "loss": 0.1722, "step": 662 }, { "epoch": 0.09238486727513412, "grad_norm": 0.5195349454879761, "learning_rate": 9.987507739332401e-06, "loss": 0.1851, "step": 663 }, { "epoch": 0.0925242109663485, "grad_norm": 0.27029702067375183, "learning_rate": 9.987340691200984e-06, "loss": 0.1369, "step": 664 }, { "epoch": 0.09266355465756287, "grad_norm": 0.33835339546203613, "learning_rate": 9.987172534996326e-06, "loss": 0.161, "step": 665 }, { "epoch": 0.09280289834877727, "grad_norm": 0.2055782824754715, "learning_rate": 9.98700327075579e-06, "loss": 0.1159, "step": 666 }, { "epoch": 0.09294224203999164, "grad_norm": 0.20279181003570557, "learning_rate": 9.986832898516985e-06, "loss": 0.1668, "step": 667 }, { "epoch": 0.09308158573120602, "grad_norm": 0.44464462995529175, "learning_rate": 9.986661418317759e-06, "loss": 0.1955, "step": 668 }, { "epoch": 0.0932209294224204, "grad_norm": 0.29785990715026855, "learning_rate": 9.986488830196215e-06, "loss": 0.1531, "step": 669 }, { "epoch": 0.09336027311363478, "grad_norm": 0.3146425485610962, "learning_rate": 9.986315134190694e-06, "loss": 0.1175, "step": 670 }, { "epoch": 0.09349961680484917, "grad_norm": 0.7752662301063538, "learning_rate": 9.98614033033979e-06, "loss": 0.2141, "step": 671 }, { "epoch": 0.09363896049606354, "grad_norm": 0.6833397150039673, "learning_rate": 9.985964418682342e-06, "loss": 0.1983, "step": 672 }, { "epoch": 0.09377830418727792, "grad_norm": 0.5311076641082764, "learning_rate": 9.985787399257431e-06, "loss": 0.1511, "step": 673 }, { "epoch": 0.0939176478784923, "grad_norm": 1.417798638343811, "learning_rate": 9.985609272104387e-06, "loss": 0.2422, "step": 674 }, { "epoch": 0.09405699156970668, "grad_norm": 0.549278974533081, "learning_rate": 9.985430037262787e-06, "loss": 0.2243, "step": 675 }, { "epoch": 0.09419633526092107, "grad_norm": 0.6268471479415894, "learning_rate": 9.98524969477245e-06, "loss": 0.1735, "step": 676 }, { "epoch": 0.09433567895213545, "grad_norm": 0.750331461429596, "learning_rate": 9.985068244673449e-06, "loss": 0.1307, "step": 677 }, { "epoch": 0.09447502264334982, "grad_norm": 0.3439265787601471, "learning_rate": 9.984885687006093e-06, "loss": 0.2358, "step": 678 }, { "epoch": 0.0946143663345642, "grad_norm": 0.7415019273757935, "learning_rate": 9.984702021810944e-06, "loss": 0.2127, "step": 679 }, { "epoch": 0.09475371002577858, "grad_norm": 0.2516634464263916, "learning_rate": 9.98451724912881e-06, "loss": 0.1732, "step": 680 }, { "epoch": 0.09489305371699297, "grad_norm": 0.3729555606842041, "learning_rate": 9.984331369000739e-06, "loss": 0.1528, "step": 681 }, { "epoch": 0.09503239740820735, "grad_norm": 0.4780495762825012, "learning_rate": 9.984144381468035e-06, "loss": 0.1692, "step": 682 }, { "epoch": 0.09517174109942172, "grad_norm": 0.7914402484893799, "learning_rate": 9.983956286572238e-06, "loss": 0.2127, "step": 683 }, { "epoch": 0.0953110847906361, "grad_norm": 0.28256064653396606, "learning_rate": 9.983767084355141e-06, "loss": 0.1595, "step": 684 }, { "epoch": 0.09545042848185048, "grad_norm": 0.36112672090530396, "learning_rate": 9.983576774858776e-06, "loss": 0.1175, "step": 685 }, { "epoch": 0.09558977217306487, "grad_norm": 0.9187925457954407, "learning_rate": 9.983385358125432e-06, "loss": 0.1428, "step": 686 }, { "epoch": 0.09572911586427925, "grad_norm": 0.5879944562911987, "learning_rate": 9.983192834197633e-06, "loss": 0.172, "step": 687 }, { "epoch": 0.09586845955549363, "grad_norm": 0.4204654395580292, "learning_rate": 9.982999203118153e-06, "loss": 0.1414, "step": 688 }, { "epoch": 0.096007803246708, "grad_norm": 0.4331134259700775, "learning_rate": 9.982804464930016e-06, "loss": 0.2051, "step": 689 }, { "epoch": 0.09614714693792238, "grad_norm": 0.9506420493125916, "learning_rate": 9.982608619676485e-06, "loss": 0.1912, "step": 690 }, { "epoch": 0.09628649062913676, "grad_norm": 0.2801590859889984, "learning_rate": 9.982411667401076e-06, "loss": 0.1692, "step": 691 }, { "epoch": 0.09642583432035115, "grad_norm": 0.2534920275211334, "learning_rate": 9.982213608147541e-06, "loss": 0.1995, "step": 692 }, { "epoch": 0.09656517801156553, "grad_norm": 0.2647676169872284, "learning_rate": 9.982014441959891e-06, "loss": 0.1637, "step": 693 }, { "epoch": 0.0967045217027799, "grad_norm": 0.538061261177063, "learning_rate": 9.98181416888237e-06, "loss": 0.1623, "step": 694 }, { "epoch": 0.09684386539399428, "grad_norm": 0.4461873769760132, "learning_rate": 9.981612788959481e-06, "loss": 0.1748, "step": 695 }, { "epoch": 0.09698320908520866, "grad_norm": 0.496587336063385, "learning_rate": 9.981410302235962e-06, "loss": 0.1687, "step": 696 }, { "epoch": 0.09712255277642305, "grad_norm": 0.24348793923854828, "learning_rate": 9.9812067087568e-06, "loss": 0.1761, "step": 697 }, { "epoch": 0.09726189646763743, "grad_norm": 0.2859611213207245, "learning_rate": 9.98100200856723e-06, "loss": 0.1773, "step": 698 }, { "epoch": 0.0974012401588518, "grad_norm": 0.4924370050430298, "learning_rate": 9.980796201712734e-06, "loss": 0.1479, "step": 699 }, { "epoch": 0.09754058385006618, "grad_norm": 0.6326171159744263, "learning_rate": 9.980589288239034e-06, "loss": 0.1873, "step": 700 }, { "epoch": 0.09767992754128056, "grad_norm": 0.34135350584983826, "learning_rate": 9.980381268192103e-06, "loss": 0.1574, "step": 701 }, { "epoch": 0.09781927123249495, "grad_norm": 0.6209410429000854, "learning_rate": 9.980172141618159e-06, "loss": 0.199, "step": 702 }, { "epoch": 0.09795861492370933, "grad_norm": 0.20526902377605438, "learning_rate": 9.979961908563663e-06, "loss": 0.1409, "step": 703 }, { "epoch": 0.09809795861492371, "grad_norm": 0.7178646326065063, "learning_rate": 9.979750569075325e-06, "loss": 0.1647, "step": 704 }, { "epoch": 0.09823730230613809, "grad_norm": 0.7183959484100342, "learning_rate": 9.979538123200102e-06, "loss": 0.1958, "step": 705 }, { "epoch": 0.09837664599735246, "grad_norm": 0.3036815822124481, "learning_rate": 9.979324570985194e-06, "loss": 0.1525, "step": 706 }, { "epoch": 0.09851598968856685, "grad_norm": 0.4731093645095825, "learning_rate": 9.979109912478044e-06, "loss": 0.1603, "step": 707 }, { "epoch": 0.09865533337978123, "grad_norm": 1.053368091583252, "learning_rate": 9.978894147726346e-06, "loss": 0.2433, "step": 708 }, { "epoch": 0.09879467707099561, "grad_norm": 0.8223994970321655, "learning_rate": 9.97867727677804e-06, "loss": 0.1479, "step": 709 }, { "epoch": 0.09893402076220999, "grad_norm": 0.9187429547309875, "learning_rate": 9.978459299681306e-06, "loss": 0.1901, "step": 710 }, { "epoch": 0.09907336445342436, "grad_norm": 0.20909364521503448, "learning_rate": 9.978240216484579e-06, "loss": 0.1408, "step": 711 }, { "epoch": 0.09921270814463876, "grad_norm": 0.24706189334392548, "learning_rate": 9.978020027236529e-06, "loss": 0.1559, "step": 712 }, { "epoch": 0.09935205183585313, "grad_norm": 0.44406747817993164, "learning_rate": 9.977798731986079e-06, "loss": 0.1389, "step": 713 }, { "epoch": 0.09949139552706751, "grad_norm": 0.6721508502960205, "learning_rate": 9.977576330782397e-06, "loss": 0.2197, "step": 714 }, { "epoch": 0.09963073921828189, "grad_norm": 0.3360269069671631, "learning_rate": 9.977352823674893e-06, "loss": 0.1482, "step": 715 }, { "epoch": 0.09977008290949627, "grad_norm": 0.49204832315444946, "learning_rate": 9.977128210713227e-06, "loss": 0.1952, "step": 716 }, { "epoch": 0.09990942660071066, "grad_norm": 0.43043088912963867, "learning_rate": 9.976902491947303e-06, "loss": 0.1704, "step": 717 }, { "epoch": 0.10004877029192503, "grad_norm": 0.17117147147655487, "learning_rate": 9.976675667427268e-06, "loss": 0.1416, "step": 718 }, { "epoch": 0.10018811398313941, "grad_norm": 0.3339420557022095, "learning_rate": 9.976447737203521e-06, "loss": 0.2225, "step": 719 }, { "epoch": 0.10032745767435379, "grad_norm": 0.4926185607910156, "learning_rate": 9.976218701326701e-06, "loss": 0.157, "step": 720 }, { "epoch": 0.10046680136556817, "grad_norm": 0.3757495582103729, "learning_rate": 9.975988559847693e-06, "loss": 0.187, "step": 721 }, { "epoch": 0.10060614505678256, "grad_norm": 0.22776085138320923, "learning_rate": 9.975757312817634e-06, "loss": 0.13, "step": 722 }, { "epoch": 0.10074548874799694, "grad_norm": 0.23288287222385406, "learning_rate": 9.975524960287895e-06, "loss": 0.1266, "step": 723 }, { "epoch": 0.10088483243921131, "grad_norm": 0.4982161521911621, "learning_rate": 9.975291502310105e-06, "loss": 0.1595, "step": 724 }, { "epoch": 0.10102417613042569, "grad_norm": 0.20487506687641144, "learning_rate": 9.975056938936129e-06, "loss": 0.1244, "step": 725 }, { "epoch": 0.10116351982164007, "grad_norm": 0.4209713637828827, "learning_rate": 9.974821270218086e-06, "loss": 0.2517, "step": 726 }, { "epoch": 0.10130286351285446, "grad_norm": 0.27183017134666443, "learning_rate": 9.974584496208334e-06, "loss": 0.1428, "step": 727 }, { "epoch": 0.10144220720406884, "grad_norm": 0.3499148190021515, "learning_rate": 9.974346616959476e-06, "loss": 0.2059, "step": 728 }, { "epoch": 0.10158155089528322, "grad_norm": 0.3173540532588959, "learning_rate": 9.974107632524368e-06, "loss": 0.1651, "step": 729 }, { "epoch": 0.10172089458649759, "grad_norm": 0.45375412702560425, "learning_rate": 9.973867542956104e-06, "loss": 0.1508, "step": 730 }, { "epoch": 0.10186023827771197, "grad_norm": 0.5824233293533325, "learning_rate": 9.973626348308027e-06, "loss": 0.1598, "step": 731 }, { "epoch": 0.10199958196892636, "grad_norm": 0.4941546618938446, "learning_rate": 9.973384048633728e-06, "loss": 0.163, "step": 732 }, { "epoch": 0.10213892566014074, "grad_norm": 0.4011986553668976, "learning_rate": 9.973140643987034e-06, "loss": 0.127, "step": 733 }, { "epoch": 0.10227826935135512, "grad_norm": 0.5741795897483826, "learning_rate": 9.97289613442203e-06, "loss": 0.183, "step": 734 }, { "epoch": 0.1024176130425695, "grad_norm": 0.23235948383808136, "learning_rate": 9.972650519993037e-06, "loss": 0.1573, "step": 735 }, { "epoch": 0.10255695673378387, "grad_norm": 0.3575359284877777, "learning_rate": 9.972403800754626e-06, "loss": 0.1786, "step": 736 }, { "epoch": 0.10269630042499826, "grad_norm": 0.34315404295921326, "learning_rate": 9.972155976761613e-06, "loss": 0.1569, "step": 737 }, { "epoch": 0.10283564411621264, "grad_norm": 0.2217186838388443, "learning_rate": 9.971907048069058e-06, "loss": 0.1715, "step": 738 }, { "epoch": 0.10297498780742702, "grad_norm": 0.3328709900379181, "learning_rate": 9.971657014732268e-06, "loss": 0.1933, "step": 739 }, { "epoch": 0.1031143314986414, "grad_norm": 0.20713914930820465, "learning_rate": 9.971405876806792e-06, "loss": 0.1434, "step": 740 }, { "epoch": 0.10325367518985577, "grad_norm": 0.22948406636714935, "learning_rate": 9.971153634348431e-06, "loss": 0.1446, "step": 741 }, { "epoch": 0.10339301888107016, "grad_norm": 0.30341601371765137, "learning_rate": 9.970900287413225e-06, "loss": 0.1688, "step": 742 }, { "epoch": 0.10353236257228454, "grad_norm": 0.3110578954219818, "learning_rate": 9.970645836057464e-06, "loss": 0.1513, "step": 743 }, { "epoch": 0.10367170626349892, "grad_norm": 0.3142828047275543, "learning_rate": 9.970390280337681e-06, "loss": 0.1934, "step": 744 }, { "epoch": 0.1038110499547133, "grad_norm": 0.43507081270217896, "learning_rate": 9.970133620310652e-06, "loss": 0.1883, "step": 745 }, { "epoch": 0.10395039364592767, "grad_norm": 0.49034184217453003, "learning_rate": 9.969875856033402e-06, "loss": 0.1613, "step": 746 }, { "epoch": 0.10408973733714207, "grad_norm": 0.9753531813621521, "learning_rate": 9.969616987563202e-06, "loss": 0.1671, "step": 747 }, { "epoch": 0.10422908102835644, "grad_norm": 0.2022014707326889, "learning_rate": 9.969357014957564e-06, "loss": 0.1191, "step": 748 }, { "epoch": 0.10436842471957082, "grad_norm": 0.2975197434425354, "learning_rate": 9.969095938274251e-06, "loss": 0.1476, "step": 749 }, { "epoch": 0.1045077684107852, "grad_norm": 0.2665453255176544, "learning_rate": 9.968833757571268e-06, "loss": 0.1405, "step": 750 }, { "epoch": 0.10464711210199958, "grad_norm": 0.32093366980552673, "learning_rate": 9.968570472906862e-06, "loss": 0.1581, "step": 751 }, { "epoch": 0.10478645579321397, "grad_norm": 0.35785195231437683, "learning_rate": 9.968306084339534e-06, "loss": 0.1734, "step": 752 }, { "epoch": 0.10492579948442834, "grad_norm": 0.4058820307254791, "learning_rate": 9.96804059192802e-06, "loss": 0.1979, "step": 753 }, { "epoch": 0.10506514317564272, "grad_norm": 0.812142014503479, "learning_rate": 9.96777399573131e-06, "loss": 0.2005, "step": 754 }, { "epoch": 0.1052044868668571, "grad_norm": 0.40273749828338623, "learning_rate": 9.967506295808634e-06, "loss": 0.1453, "step": 755 }, { "epoch": 0.10534383055807148, "grad_norm": 0.5389613509178162, "learning_rate": 9.96723749221947e-06, "loss": 0.1647, "step": 756 }, { "epoch": 0.10548317424928587, "grad_norm": 0.8587306141853333, "learning_rate": 9.96696758502354e-06, "loss": 0.1604, "step": 757 }, { "epoch": 0.10562251794050025, "grad_norm": 0.46849319338798523, "learning_rate": 9.966696574280808e-06, "loss": 0.1594, "step": 758 }, { "epoch": 0.10576186163171462, "grad_norm": 0.6927437782287598, "learning_rate": 9.966424460051489e-06, "loss": 0.1942, "step": 759 }, { "epoch": 0.105901205322929, "grad_norm": 0.39422792196273804, "learning_rate": 9.96615124239604e-06, "loss": 0.1701, "step": 760 }, { "epoch": 0.10604054901414338, "grad_norm": 0.3484000265598297, "learning_rate": 9.965876921375165e-06, "loss": 0.1468, "step": 761 }, { "epoch": 0.10617989270535777, "grad_norm": 0.5243971347808838, "learning_rate": 9.965601497049812e-06, "loss": 0.1805, "step": 762 }, { "epoch": 0.10631923639657215, "grad_norm": 0.6266080737113953, "learning_rate": 9.965324969481172e-06, "loss": 0.1496, "step": 763 }, { "epoch": 0.10645858008778653, "grad_norm": 0.549302875995636, "learning_rate": 9.965047338730685e-06, "loss": 0.2221, "step": 764 }, { "epoch": 0.1065979237790009, "grad_norm": 0.27659863233566284, "learning_rate": 9.964768604860033e-06, "loss": 0.1365, "step": 765 }, { "epoch": 0.10673726747021528, "grad_norm": 0.43043795228004456, "learning_rate": 9.964488767931144e-06, "loss": 0.1687, "step": 766 }, { "epoch": 0.10687661116142967, "grad_norm": 0.8068141937255859, "learning_rate": 9.964207828006191e-06, "loss": 0.1885, "step": 767 }, { "epoch": 0.10701595485264405, "grad_norm": 0.2733873426914215, "learning_rate": 9.963925785147595e-06, "loss": 0.1572, "step": 768 }, { "epoch": 0.10715529854385843, "grad_norm": 0.5498738884925842, "learning_rate": 9.963642639418018e-06, "loss": 0.1599, "step": 769 }, { "epoch": 0.1072946422350728, "grad_norm": 1.031173586845398, "learning_rate": 9.963358390880367e-06, "loss": 0.1842, "step": 770 }, { "epoch": 0.10743398592628718, "grad_norm": 0.9715685844421387, "learning_rate": 9.963073039597798e-06, "loss": 0.2085, "step": 771 }, { "epoch": 0.10757332961750157, "grad_norm": 0.48879629373550415, "learning_rate": 9.962786585633708e-06, "loss": 0.147, "step": 772 }, { "epoch": 0.10771267330871595, "grad_norm": 0.8945350646972656, "learning_rate": 9.962499029051742e-06, "loss": 0.1508, "step": 773 }, { "epoch": 0.10785201699993033, "grad_norm": 0.871840238571167, "learning_rate": 9.962210369915787e-06, "loss": 0.1737, "step": 774 }, { "epoch": 0.1079913606911447, "grad_norm": 0.5530329942703247, "learning_rate": 9.961920608289977e-06, "loss": 0.1589, "step": 775 }, { "epoch": 0.10813070438235908, "grad_norm": 0.2471843808889389, "learning_rate": 9.96162974423869e-06, "loss": 0.1447, "step": 776 }, { "epoch": 0.10827004807357347, "grad_norm": 0.8243958353996277, "learning_rate": 9.961337777826549e-06, "loss": 0.1736, "step": 777 }, { "epoch": 0.10840939176478785, "grad_norm": 0.3769035041332245, "learning_rate": 9.961044709118425e-06, "loss": 0.2019, "step": 778 }, { "epoch": 0.10854873545600223, "grad_norm": 0.6450084447860718, "learning_rate": 9.960750538179428e-06, "loss": 0.1594, "step": 779 }, { "epoch": 0.1086880791472166, "grad_norm": 0.3740319311618805, "learning_rate": 9.960455265074918e-06, "loss": 0.1463, "step": 780 }, { "epoch": 0.10882742283843098, "grad_norm": 0.551361620426178, "learning_rate": 9.960158889870495e-06, "loss": 0.1796, "step": 781 }, { "epoch": 0.10896676652964538, "grad_norm": 0.6747917532920837, "learning_rate": 9.959861412632011e-06, "loss": 0.1786, "step": 782 }, { "epoch": 0.10910611022085975, "grad_norm": 0.4724183678627014, "learning_rate": 9.959562833425557e-06, "loss": 0.1692, "step": 783 }, { "epoch": 0.10924545391207413, "grad_norm": 0.6728743314743042, "learning_rate": 9.95926315231747e-06, "loss": 0.1987, "step": 784 }, { "epoch": 0.10938479760328851, "grad_norm": 0.4144642651081085, "learning_rate": 9.958962369374333e-06, "loss": 0.1967, "step": 785 }, { "epoch": 0.10952414129450289, "grad_norm": 0.43048977851867676, "learning_rate": 9.95866048466297e-06, "loss": 0.1927, "step": 786 }, { "epoch": 0.10966348498571728, "grad_norm": 0.21530990302562714, "learning_rate": 9.958357498250457e-06, "loss": 0.132, "step": 787 }, { "epoch": 0.10980282867693165, "grad_norm": 0.36171358823776245, "learning_rate": 9.95805341020411e-06, "loss": 0.218, "step": 788 }, { "epoch": 0.10994217236814603, "grad_norm": 0.3451308012008667, "learning_rate": 9.957748220591487e-06, "loss": 0.1539, "step": 789 }, { "epoch": 0.11008151605936041, "grad_norm": 0.5660595893859863, "learning_rate": 9.9574419294804e-06, "loss": 0.2125, "step": 790 }, { "epoch": 0.11022085975057479, "grad_norm": 0.4115775525569916, "learning_rate": 9.957134536938894e-06, "loss": 0.2011, "step": 791 }, { "epoch": 0.11036020344178918, "grad_norm": 0.2676725685596466, "learning_rate": 9.956826043035268e-06, "loss": 0.1527, "step": 792 }, { "epoch": 0.11049954713300356, "grad_norm": 0.7023073434829712, "learning_rate": 9.956516447838063e-06, "loss": 0.2011, "step": 793 }, { "epoch": 0.11063889082421793, "grad_norm": 0.6314655542373657, "learning_rate": 9.95620575141606e-06, "loss": 0.1476, "step": 794 }, { "epoch": 0.11077823451543231, "grad_norm": 0.28849905729293823, "learning_rate": 9.955893953838293e-06, "loss": 0.151, "step": 795 }, { "epoch": 0.11091757820664669, "grad_norm": 0.40623652935028076, "learning_rate": 9.955581055174034e-06, "loss": 0.1708, "step": 796 }, { "epoch": 0.11105692189786108, "grad_norm": 0.3782295882701874, "learning_rate": 9.9552670554928e-06, "loss": 0.176, "step": 797 }, { "epoch": 0.11119626558907546, "grad_norm": 0.2528710961341858, "learning_rate": 9.954951954864361e-06, "loss": 0.1708, "step": 798 }, { "epoch": 0.11133560928028984, "grad_norm": 0.19012829661369324, "learning_rate": 9.954635753358718e-06, "loss": 0.1697, "step": 799 }, { "epoch": 0.11147495297150421, "grad_norm": 0.38184183835983276, "learning_rate": 9.954318451046128e-06, "loss": 0.1541, "step": 800 }, { "epoch": 0.11161429666271859, "grad_norm": 0.46285539865493774, "learning_rate": 9.954000047997088e-06, "loss": 0.1882, "step": 801 }, { "epoch": 0.11175364035393298, "grad_norm": 0.2723150849342346, "learning_rate": 9.953680544282338e-06, "loss": 0.166, "step": 802 }, { "epoch": 0.11189298404514736, "grad_norm": 0.2436690628528595, "learning_rate": 9.953359939972866e-06, "loss": 0.1472, "step": 803 }, { "epoch": 0.11203232773636174, "grad_norm": 0.3085128962993622, "learning_rate": 9.953038235139902e-06, "loss": 0.1647, "step": 804 }, { "epoch": 0.11217167142757611, "grad_norm": 0.2381303608417511, "learning_rate": 9.952715429854923e-06, "loss": 0.1626, "step": 805 }, { "epoch": 0.11231101511879049, "grad_norm": 0.18367858231067657, "learning_rate": 9.952391524189646e-06, "loss": 0.1514, "step": 806 }, { "epoch": 0.11245035881000488, "grad_norm": 0.30449002981185913, "learning_rate": 9.952066518216039e-06, "loss": 0.1713, "step": 807 }, { "epoch": 0.11258970250121926, "grad_norm": 0.2129005789756775, "learning_rate": 9.951740412006308e-06, "loss": 0.1419, "step": 808 }, { "epoch": 0.11272904619243364, "grad_norm": 0.3205735683441162, "learning_rate": 9.95141320563291e-06, "loss": 0.1771, "step": 809 }, { "epoch": 0.11286838988364802, "grad_norm": 0.2510245740413666, "learning_rate": 9.951084899168537e-06, "loss": 0.1522, "step": 810 }, { "epoch": 0.11300773357486239, "grad_norm": 0.20192880928516388, "learning_rate": 9.950755492686138e-06, "loss": 0.1224, "step": 811 }, { "epoch": 0.11314707726607678, "grad_norm": 0.8969875574111938, "learning_rate": 9.950424986258893e-06, "loss": 0.1761, "step": 812 }, { "epoch": 0.11328642095729116, "grad_norm": 0.9069533944129944, "learning_rate": 9.950093379960238e-06, "loss": 0.1521, "step": 813 }, { "epoch": 0.11342576464850554, "grad_norm": 0.343321293592453, "learning_rate": 9.949760673863846e-06, "loss": 0.1662, "step": 814 }, { "epoch": 0.11356510833971992, "grad_norm": 0.28168898820877075, "learning_rate": 9.949426868043638e-06, "loss": 0.1387, "step": 815 }, { "epoch": 0.1137044520309343, "grad_norm": 0.15967294573783875, "learning_rate": 9.949091962573775e-06, "loss": 0.1326, "step": 816 }, { "epoch": 0.11384379572214869, "grad_norm": 0.17493095993995667, "learning_rate": 9.94875595752867e-06, "loss": 0.1103, "step": 817 }, { "epoch": 0.11398313941336306, "grad_norm": 0.8041445016860962, "learning_rate": 9.948418852982973e-06, "loss": 0.193, "step": 818 }, { "epoch": 0.11412248310457744, "grad_norm": 0.3261888027191162, "learning_rate": 9.948080649011582e-06, "loss": 0.1683, "step": 819 }, { "epoch": 0.11426182679579182, "grad_norm": 0.18657444417476654, "learning_rate": 9.947741345689635e-06, "loss": 0.1573, "step": 820 }, { "epoch": 0.1144011704870062, "grad_norm": 0.4956912398338318, "learning_rate": 9.947400943092522e-06, "loss": 0.1912, "step": 821 }, { "epoch": 0.11454051417822059, "grad_norm": 0.80642169713974, "learning_rate": 9.94705944129587e-06, "loss": 0.2087, "step": 822 }, { "epoch": 0.11467985786943496, "grad_norm": 0.3093682527542114, "learning_rate": 9.946716840375552e-06, "loss": 0.2086, "step": 823 }, { "epoch": 0.11481920156064934, "grad_norm": 0.557414710521698, "learning_rate": 9.946373140407688e-06, "loss": 0.1778, "step": 824 }, { "epoch": 0.11495854525186372, "grad_norm": 0.3128092885017395, "learning_rate": 9.946028341468642e-06, "loss": 0.1495, "step": 825 }, { "epoch": 0.1150978889430781, "grad_norm": 0.27858051657676697, "learning_rate": 9.945682443635015e-06, "loss": 0.1399, "step": 826 }, { "epoch": 0.11523723263429249, "grad_norm": 0.17638470232486725, "learning_rate": 9.945335446983662e-06, "loss": 0.1243, "step": 827 }, { "epoch": 0.11537657632550687, "grad_norm": 0.36747339367866516, "learning_rate": 9.944987351591677e-06, "loss": 0.1394, "step": 828 }, { "epoch": 0.11551592001672124, "grad_norm": 0.40098536014556885, "learning_rate": 9.944638157536399e-06, "loss": 0.1558, "step": 829 }, { "epoch": 0.11565526370793562, "grad_norm": 0.4392043352127075, "learning_rate": 9.94428786489541e-06, "loss": 0.2409, "step": 830 }, { "epoch": 0.11579460739915, "grad_norm": 0.36302706599235535, "learning_rate": 9.943936473746539e-06, "loss": 0.173, "step": 831 }, { "epoch": 0.11593395109036439, "grad_norm": 0.46922430396080017, "learning_rate": 9.943583984167853e-06, "loss": 0.1716, "step": 832 }, { "epoch": 0.11607329478157877, "grad_norm": 0.3461764454841614, "learning_rate": 9.94323039623767e-06, "loss": 0.1624, "step": 833 }, { "epoch": 0.11621263847279314, "grad_norm": 0.37814584374427795, "learning_rate": 9.942875710034549e-06, "loss": 0.1385, "step": 834 }, { "epoch": 0.11635198216400752, "grad_norm": 0.6719272136688232, "learning_rate": 9.942519925637293e-06, "loss": 0.1667, "step": 835 }, { "epoch": 0.1164913258552219, "grad_norm": 0.18273763358592987, "learning_rate": 9.942163043124951e-06, "loss": 0.1311, "step": 836 }, { "epoch": 0.11663066954643629, "grad_norm": 0.37601611018180847, "learning_rate": 9.941805062576811e-06, "loss": 0.1476, "step": 837 }, { "epoch": 0.11677001323765067, "grad_norm": 0.19994185864925385, "learning_rate": 9.941445984072408e-06, "loss": 0.1574, "step": 838 }, { "epoch": 0.11690935692886505, "grad_norm": 0.23210544884204865, "learning_rate": 9.941085807691524e-06, "loss": 0.1503, "step": 839 }, { "epoch": 0.11704870062007942, "grad_norm": 0.37118300795555115, "learning_rate": 9.94072453351418e-06, "loss": 0.1478, "step": 840 }, { "epoch": 0.1171880443112938, "grad_norm": 0.24109064042568207, "learning_rate": 9.940362161620644e-06, "loss": 0.1501, "step": 841 }, { "epoch": 0.11732738800250819, "grad_norm": 0.35554203391075134, "learning_rate": 9.939998692091427e-06, "loss": 0.1448, "step": 842 }, { "epoch": 0.11746673169372257, "grad_norm": 0.23700891435146332, "learning_rate": 9.939634125007279e-06, "loss": 0.1299, "step": 843 }, { "epoch": 0.11760607538493695, "grad_norm": 0.2835671603679657, "learning_rate": 9.939268460449205e-06, "loss": 0.1409, "step": 844 }, { "epoch": 0.11774541907615133, "grad_norm": 0.4481561481952667, "learning_rate": 9.938901698498444e-06, "loss": 0.1608, "step": 845 }, { "epoch": 0.1178847627673657, "grad_norm": 0.7954100966453552, "learning_rate": 9.938533839236483e-06, "loss": 0.182, "step": 846 }, { "epoch": 0.1180241064585801, "grad_norm": 0.26421838998794556, "learning_rate": 9.938164882745051e-06, "loss": 0.1467, "step": 847 }, { "epoch": 0.11816345014979447, "grad_norm": 0.9120105504989624, "learning_rate": 9.937794829106122e-06, "loss": 0.2234, "step": 848 }, { "epoch": 0.11830279384100885, "grad_norm": 0.6031692624092102, "learning_rate": 9.937423678401913e-06, "loss": 0.1614, "step": 849 }, { "epoch": 0.11844213753222323, "grad_norm": 0.38588178157806396, "learning_rate": 9.937051430714888e-06, "loss": 0.1298, "step": 850 }, { "epoch": 0.1185814812234376, "grad_norm": 0.1802603006362915, "learning_rate": 9.936678086127749e-06, "loss": 0.1407, "step": 851 }, { "epoch": 0.118720824914652, "grad_norm": 0.4848536252975464, "learning_rate": 9.936303644723446e-06, "loss": 0.1355, "step": 852 }, { "epoch": 0.11886016860586637, "grad_norm": 0.8608602285385132, "learning_rate": 9.93592810658517e-06, "loss": 0.2207, "step": 853 }, { "epoch": 0.11899951229708075, "grad_norm": 0.26573795080184937, "learning_rate": 9.935551471796358e-06, "loss": 0.1303, "step": 854 }, { "epoch": 0.11913885598829513, "grad_norm": 0.39721110463142395, "learning_rate": 9.935173740440692e-06, "loss": 0.1443, "step": 855 }, { "epoch": 0.1192781996795095, "grad_norm": 0.47564101219177246, "learning_rate": 9.93479491260209e-06, "loss": 0.1737, "step": 856 }, { "epoch": 0.1194175433707239, "grad_norm": 0.6284323334693909, "learning_rate": 9.934414988364722e-06, "loss": 0.188, "step": 857 }, { "epoch": 0.11955688706193827, "grad_norm": 0.5147808790206909, "learning_rate": 9.934033967812998e-06, "loss": 0.1661, "step": 858 }, { "epoch": 0.11969623075315265, "grad_norm": 0.6852324604988098, "learning_rate": 9.933651851031573e-06, "loss": 0.1903, "step": 859 }, { "epoch": 0.11983557444436703, "grad_norm": 0.30405697226524353, "learning_rate": 9.933268638105345e-06, "loss": 0.1444, "step": 860 }, { "epoch": 0.11997491813558141, "grad_norm": 0.38763782382011414, "learning_rate": 9.932884329119452e-06, "loss": 0.1691, "step": 861 }, { "epoch": 0.1201142618267958, "grad_norm": 0.41387590765953064, "learning_rate": 9.932498924159281e-06, "loss": 0.1741, "step": 862 }, { "epoch": 0.12025360551801018, "grad_norm": 0.3295454680919647, "learning_rate": 9.93211242331046e-06, "loss": 0.1423, "step": 863 }, { "epoch": 0.12039294920922455, "grad_norm": 0.3993915021419525, "learning_rate": 9.931724826658861e-06, "loss": 0.2526, "step": 864 }, { "epoch": 0.12053229290043893, "grad_norm": 0.27351170778274536, "learning_rate": 9.931336134290598e-06, "loss": 0.1872, "step": 865 }, { "epoch": 0.12067163659165331, "grad_norm": 0.2748868763446808, "learning_rate": 9.930946346292032e-06, "loss": 0.1721, "step": 866 }, { "epoch": 0.1208109802828677, "grad_norm": 0.38461679220199585, "learning_rate": 9.930555462749762e-06, "loss": 0.1796, "step": 867 }, { "epoch": 0.12095032397408208, "grad_norm": 0.15496966242790222, "learning_rate": 9.930163483750636e-06, "loss": 0.1161, "step": 868 }, { "epoch": 0.12108966766529645, "grad_norm": 0.389727920293808, "learning_rate": 9.92977040938174e-06, "loss": 0.1775, "step": 869 }, { "epoch": 0.12122901135651083, "grad_norm": 0.2401205599308014, "learning_rate": 9.929376239730408e-06, "loss": 0.1695, "step": 870 }, { "epoch": 0.12136835504772521, "grad_norm": 0.2245350033044815, "learning_rate": 9.928980974884215e-06, "loss": 0.175, "step": 871 }, { "epoch": 0.1215076987389396, "grad_norm": 0.37961289286613464, "learning_rate": 9.928584614930981e-06, "loss": 0.1824, "step": 872 }, { "epoch": 0.12164704243015398, "grad_norm": 0.2539222538471222, "learning_rate": 9.928187159958764e-06, "loss": 0.1609, "step": 873 }, { "epoch": 0.12178638612136836, "grad_norm": 0.24904067814350128, "learning_rate": 9.927788610055875e-06, "loss": 0.2063, "step": 874 }, { "epoch": 0.12192572981258273, "grad_norm": 0.2732439637184143, "learning_rate": 9.92738896531086e-06, "loss": 0.158, "step": 875 }, { "epoch": 0.12206507350379711, "grad_norm": 0.3286319673061371, "learning_rate": 9.926988225812511e-06, "loss": 0.1948, "step": 876 }, { "epoch": 0.1222044171950115, "grad_norm": 0.11751377582550049, "learning_rate": 9.926586391649863e-06, "loss": 0.1025, "step": 877 }, { "epoch": 0.12234376088622588, "grad_norm": 0.12550054490566254, "learning_rate": 9.926183462912196e-06, "loss": 0.144, "step": 878 }, { "epoch": 0.12248310457744026, "grad_norm": 0.30522578954696655, "learning_rate": 9.925779439689028e-06, "loss": 0.1703, "step": 879 }, { "epoch": 0.12262244826865464, "grad_norm": 0.3352541923522949, "learning_rate": 9.925374322070126e-06, "loss": 0.1688, "step": 880 }, { "epoch": 0.12276179195986901, "grad_norm": 0.242270827293396, "learning_rate": 9.9249681101455e-06, "loss": 0.1613, "step": 881 }, { "epoch": 0.1229011356510834, "grad_norm": 0.23448246717453003, "learning_rate": 9.924560804005397e-06, "loss": 0.1308, "step": 882 }, { "epoch": 0.12304047934229778, "grad_norm": 0.40116140246391296, "learning_rate": 9.924152403740315e-06, "loss": 0.1753, "step": 883 }, { "epoch": 0.12317982303351216, "grad_norm": 0.2133597731590271, "learning_rate": 9.923742909440987e-06, "loss": 0.1715, "step": 884 }, { "epoch": 0.12331916672472654, "grad_norm": 0.4675905108451843, "learning_rate": 9.923332321198396e-06, "loss": 0.2014, "step": 885 }, { "epoch": 0.12345851041594091, "grad_norm": 0.4165858030319214, "learning_rate": 9.922920639103766e-06, "loss": 0.1906, "step": 886 }, { "epoch": 0.12359785410715529, "grad_norm": 0.31204283237457275, "learning_rate": 9.92250786324856e-06, "loss": 0.1989, "step": 887 }, { "epoch": 0.12373719779836968, "grad_norm": 0.4381273090839386, "learning_rate": 9.922093993724492e-06, "loss": 0.1623, "step": 888 }, { "epoch": 0.12387654148958406, "grad_norm": 0.1668417602777481, "learning_rate": 9.92167903062351e-06, "loss": 0.1511, "step": 889 }, { "epoch": 0.12401588518079844, "grad_norm": 0.2485543042421341, "learning_rate": 9.921262974037813e-06, "loss": 0.169, "step": 890 }, { "epoch": 0.12415522887201282, "grad_norm": 0.5236531496047974, "learning_rate": 9.920845824059836e-06, "loss": 0.148, "step": 891 }, { "epoch": 0.12429457256322719, "grad_norm": 0.3865138292312622, "learning_rate": 9.920427580782263e-06, "loss": 0.1714, "step": 892 }, { "epoch": 0.12443391625444158, "grad_norm": 0.24228382110595703, "learning_rate": 9.920008244298016e-06, "loss": 0.1589, "step": 893 }, { "epoch": 0.12457325994565596, "grad_norm": 0.3543763756752014, "learning_rate": 9.919587814700262e-06, "loss": 0.152, "step": 894 }, { "epoch": 0.12471260363687034, "grad_norm": 0.4127757251262665, "learning_rate": 9.919166292082414e-06, "loss": 0.172, "step": 895 }, { "epoch": 0.12485194732808472, "grad_norm": 0.4265235662460327, "learning_rate": 9.91874367653812e-06, "loss": 0.1658, "step": 896 }, { "epoch": 0.1249912910192991, "grad_norm": 0.7250176072120667, "learning_rate": 9.91831996816128e-06, "loss": 0.1888, "step": 897 }, { "epoch": 0.12513063471051347, "grad_norm": 0.28945279121398926, "learning_rate": 9.917895167046027e-06, "loss": 0.1682, "step": 898 }, { "epoch": 0.12526997840172785, "grad_norm": 0.40410977602005005, "learning_rate": 9.917469273286749e-06, "loss": 0.1627, "step": 899 }, { "epoch": 0.12540932209294225, "grad_norm": 0.48594576120376587, "learning_rate": 9.917042286978064e-06, "loss": 0.1434, "step": 900 }, { "epoch": 0.12554866578415663, "grad_norm": 1.0298365354537964, "learning_rate": 9.916614208214841e-06, "loss": 0.2005, "step": 901 }, { "epoch": 0.125688009475371, "grad_norm": 0.44825994968414307, "learning_rate": 9.91618503709219e-06, "loss": 0.1431, "step": 902 }, { "epoch": 0.1258273531665854, "grad_norm": 0.47254055738449097, "learning_rate": 9.915754773705461e-06, "loss": 0.2095, "step": 903 }, { "epoch": 0.12596669685779976, "grad_norm": 0.3819425702095032, "learning_rate": 9.915323418150252e-06, "loss": 0.1552, "step": 904 }, { "epoch": 0.12610604054901414, "grad_norm": 0.3585364520549774, "learning_rate": 9.914890970522397e-06, "loss": 0.1523, "step": 905 }, { "epoch": 0.12624538424022852, "grad_norm": 0.4230147898197174, "learning_rate": 9.914457430917977e-06, "loss": 0.1956, "step": 906 }, { "epoch": 0.1263847279314429, "grad_norm": 0.3201293349266052, "learning_rate": 9.914022799433315e-06, "loss": 0.1941, "step": 907 }, { "epoch": 0.12652407162265727, "grad_norm": 0.2755069136619568, "learning_rate": 9.913587076164976e-06, "loss": 0.1517, "step": 908 }, { "epoch": 0.12666341531387165, "grad_norm": 1.1163281202316284, "learning_rate": 9.913150261209767e-06, "loss": 0.1982, "step": 909 }, { "epoch": 0.12680275900508606, "grad_norm": 0.24398061633110046, "learning_rate": 9.91271235466474e-06, "loss": 0.1578, "step": 910 }, { "epoch": 0.12694210269630044, "grad_norm": 0.6096394062042236, "learning_rate": 9.912273356627188e-06, "loss": 0.1586, "step": 911 }, { "epoch": 0.1270814463875148, "grad_norm": 0.8095985651016235, "learning_rate": 9.911833267194643e-06, "loss": 0.2178, "step": 912 }, { "epoch": 0.1272207900787292, "grad_norm": 0.12132446467876434, "learning_rate": 9.911392086464886e-06, "loss": 0.1062, "step": 913 }, { "epoch": 0.12736013376994357, "grad_norm": 0.5594218969345093, "learning_rate": 9.910949814535936e-06, "loss": 0.1704, "step": 914 }, { "epoch": 0.12749947746115795, "grad_norm": 0.3971444368362427, "learning_rate": 9.910506451506056e-06, "loss": 0.1383, "step": 915 }, { "epoch": 0.12763882115237232, "grad_norm": 0.1618366688489914, "learning_rate": 9.910061997473753e-06, "loss": 0.1108, "step": 916 }, { "epoch": 0.1277781648435867, "grad_norm": 0.23516510426998138, "learning_rate": 9.909616452537772e-06, "loss": 0.14, "step": 917 }, { "epoch": 0.12791750853480108, "grad_norm": 0.17964723706245422, "learning_rate": 9.909169816797102e-06, "loss": 0.1401, "step": 918 }, { "epoch": 0.12805685222601546, "grad_norm": 0.33789902925491333, "learning_rate": 9.908722090350979e-06, "loss": 0.1633, "step": 919 }, { "epoch": 0.12819619591722986, "grad_norm": 0.2844981551170349, "learning_rate": 9.908273273298874e-06, "loss": 0.1465, "step": 920 }, { "epoch": 0.12833553960844424, "grad_norm": 0.29199209809303284, "learning_rate": 9.907823365740507e-06, "loss": 0.1563, "step": 921 }, { "epoch": 0.12847488329965862, "grad_norm": 0.26529091596603394, "learning_rate": 9.907372367775834e-06, "loss": 0.1439, "step": 922 }, { "epoch": 0.128614226990873, "grad_norm": 0.16844888031482697, "learning_rate": 9.906920279505058e-06, "loss": 0.1417, "step": 923 }, { "epoch": 0.12875357068208737, "grad_norm": 0.7735025882720947, "learning_rate": 9.906467101028625e-06, "loss": 0.2361, "step": 924 }, { "epoch": 0.12889291437330175, "grad_norm": 0.4671840965747833, "learning_rate": 9.906012832447219e-06, "loss": 0.2081, "step": 925 }, { "epoch": 0.12903225806451613, "grad_norm": 0.5544021725654602, "learning_rate": 9.905557473861764e-06, "loss": 0.1638, "step": 926 }, { "epoch": 0.1291716017557305, "grad_norm": 0.5191493630409241, "learning_rate": 9.905101025373438e-06, "loss": 0.1925, "step": 927 }, { "epoch": 0.12931094544694488, "grad_norm": 0.30720728635787964, "learning_rate": 9.904643487083648e-06, "loss": 0.1289, "step": 928 }, { "epoch": 0.12945028913815926, "grad_norm": 0.1470063030719757, "learning_rate": 9.90418485909405e-06, "loss": 0.1377, "step": 929 }, { "epoch": 0.12958963282937366, "grad_norm": 0.32302504777908325, "learning_rate": 9.903725141506539e-06, "loss": 0.1302, "step": 930 }, { "epoch": 0.12972897652058804, "grad_norm": 0.40204471349716187, "learning_rate": 9.903264334423258e-06, "loss": 0.1694, "step": 931 }, { "epoch": 0.12986832021180242, "grad_norm": 0.4243897497653961, "learning_rate": 9.902802437946584e-06, "loss": 0.1402, "step": 932 }, { "epoch": 0.1300076639030168, "grad_norm": 0.494113028049469, "learning_rate": 9.902339452179142e-06, "loss": 0.1822, "step": 933 }, { "epoch": 0.13014700759423117, "grad_norm": 1.187561273574829, "learning_rate": 9.901875377223796e-06, "loss": 0.2404, "step": 934 }, { "epoch": 0.13028635128544555, "grad_norm": 0.44202446937561035, "learning_rate": 9.901410213183653e-06, "loss": 0.1658, "step": 935 }, { "epoch": 0.13042569497665993, "grad_norm": 0.3686000406742096, "learning_rate": 9.900943960162061e-06, "loss": 0.251, "step": 936 }, { "epoch": 0.1305650386678743, "grad_norm": 0.40844789147377014, "learning_rate": 9.900476618262612e-06, "loss": 0.1469, "step": 937 }, { "epoch": 0.13070438235908868, "grad_norm": 0.31203964352607727, "learning_rate": 9.900008187589138e-06, "loss": 0.1561, "step": 938 }, { "epoch": 0.13084372605030306, "grad_norm": 0.5010191798210144, "learning_rate": 9.899538668245713e-06, "loss": 0.148, "step": 939 }, { "epoch": 0.13098306974151747, "grad_norm": 0.5023465156555176, "learning_rate": 9.899068060336656e-06, "loss": 0.1953, "step": 940 }, { "epoch": 0.13112241343273184, "grad_norm": 0.28516095876693726, "learning_rate": 9.898596363966523e-06, "loss": 0.1554, "step": 941 }, { "epoch": 0.13126175712394622, "grad_norm": 0.8086040616035461, "learning_rate": 9.898123579240115e-06, "loss": 0.1779, "step": 942 }, { "epoch": 0.1314011008151606, "grad_norm": 0.47388747334480286, "learning_rate": 9.897649706262474e-06, "loss": 0.1305, "step": 943 }, { "epoch": 0.13154044450637498, "grad_norm": 0.19371749460697174, "learning_rate": 9.897174745138883e-06, "loss": 0.1703, "step": 944 }, { "epoch": 0.13167978819758935, "grad_norm": 0.4778497517108917, "learning_rate": 9.896698695974866e-06, "loss": 0.139, "step": 945 }, { "epoch": 0.13181913188880373, "grad_norm": 0.40674251317977905, "learning_rate": 9.896221558876195e-06, "loss": 0.1604, "step": 946 }, { "epoch": 0.1319584755800181, "grad_norm": 0.5583987236022949, "learning_rate": 9.895743333948875e-06, "loss": 0.1651, "step": 947 }, { "epoch": 0.1320978192712325, "grad_norm": 0.21929463744163513, "learning_rate": 9.895264021299158e-06, "loss": 0.115, "step": 948 }, { "epoch": 0.13223716296244686, "grad_norm": 0.28525716066360474, "learning_rate": 9.894783621033538e-06, "loss": 0.1674, "step": 949 }, { "epoch": 0.13237650665366127, "grad_norm": 0.6307267546653748, "learning_rate": 9.894302133258747e-06, "loss": 0.2414, "step": 950 }, { "epoch": 0.13251585034487565, "grad_norm": 0.3515221178531647, "learning_rate": 9.893819558081759e-06, "loss": 0.1479, "step": 951 }, { "epoch": 0.13265519403609002, "grad_norm": 1.2109507322311401, "learning_rate": 9.893335895609792e-06, "loss": 0.2038, "step": 952 }, { "epoch": 0.1327945377273044, "grad_norm": 0.44239139556884766, "learning_rate": 9.892851145950308e-06, "loss": 0.2001, "step": 953 }, { "epoch": 0.13293388141851878, "grad_norm": 0.5626450181007385, "learning_rate": 9.892365309211005e-06, "loss": 0.1572, "step": 954 }, { "epoch": 0.13307322510973316, "grad_norm": 0.3033396899700165, "learning_rate": 9.891878385499825e-06, "loss": 0.1621, "step": 955 }, { "epoch": 0.13321256880094753, "grad_norm": 0.3078019917011261, "learning_rate": 9.891390374924949e-06, "loss": 0.1368, "step": 956 }, { "epoch": 0.1333519124921619, "grad_norm": 0.2090400606393814, "learning_rate": 9.890901277594806e-06, "loss": 0.1833, "step": 957 }, { "epoch": 0.1334912561833763, "grad_norm": 0.16588038206100464, "learning_rate": 9.89041109361806e-06, "loss": 0.1226, "step": 958 }, { "epoch": 0.13363059987459067, "grad_norm": 0.3323328197002411, "learning_rate": 9.889919823103618e-06, "loss": 0.1603, "step": 959 }, { "epoch": 0.13376994356580507, "grad_norm": 0.3620232343673706, "learning_rate": 9.889427466160633e-06, "loss": 0.2114, "step": 960 }, { "epoch": 0.13390928725701945, "grad_norm": 0.3904270827770233, "learning_rate": 9.888934022898488e-06, "loss": 0.1652, "step": 961 }, { "epoch": 0.13404863094823383, "grad_norm": 0.22664201259613037, "learning_rate": 9.888439493426824e-06, "loss": 0.1584, "step": 962 }, { "epoch": 0.1341879746394482, "grad_norm": 0.6369572281837463, "learning_rate": 9.887943877855505e-06, "loss": 0.161, "step": 963 }, { "epoch": 0.13432731833066258, "grad_norm": 0.31803277134895325, "learning_rate": 9.887447176294653e-06, "loss": 0.1539, "step": 964 }, { "epoch": 0.13446666202187696, "grad_norm": 0.22399258613586426, "learning_rate": 9.88694938885462e-06, "loss": 0.1432, "step": 965 }, { "epoch": 0.13460600571309134, "grad_norm": 0.31422632932662964, "learning_rate": 9.886450515646005e-06, "loss": 0.1833, "step": 966 }, { "epoch": 0.13474534940430571, "grad_norm": 0.3049098551273346, "learning_rate": 9.885950556779644e-06, "loss": 0.1636, "step": 967 }, { "epoch": 0.1348846930955201, "grad_norm": 0.22110894322395325, "learning_rate": 9.885449512366617e-06, "loss": 0.1723, "step": 968 }, { "epoch": 0.13502403678673447, "grad_norm": 0.19928312301635742, "learning_rate": 9.884947382518247e-06, "loss": 0.1687, "step": 969 }, { "epoch": 0.13516338047794887, "grad_norm": 0.43535545468330383, "learning_rate": 9.88444416734609e-06, "loss": 0.1536, "step": 970 }, { "epoch": 0.13530272416916325, "grad_norm": 0.3480134904384613, "learning_rate": 9.883939866961956e-06, "loss": 0.1704, "step": 971 }, { "epoch": 0.13544206786037763, "grad_norm": 0.17556802928447723, "learning_rate": 9.883434481477885e-06, "loss": 0.1719, "step": 972 }, { "epoch": 0.135581411551592, "grad_norm": 0.27539122104644775, "learning_rate": 9.882928011006163e-06, "loss": 0.1575, "step": 973 }, { "epoch": 0.13572075524280638, "grad_norm": 0.5618157386779785, "learning_rate": 9.882420455659316e-06, "loss": 0.1779, "step": 974 }, { "epoch": 0.13586009893402076, "grad_norm": 0.25760599970817566, "learning_rate": 9.881911815550111e-06, "loss": 0.1604, "step": 975 }, { "epoch": 0.13599944262523514, "grad_norm": 0.2657250463962555, "learning_rate": 9.881402090791556e-06, "loss": 0.1591, "step": 976 }, { "epoch": 0.13613878631644952, "grad_norm": 0.3206520080566406, "learning_rate": 9.880891281496901e-06, "loss": 0.1879, "step": 977 }, { "epoch": 0.1362781300076639, "grad_norm": 0.19306233525276184, "learning_rate": 9.880379387779637e-06, "loss": 0.1325, "step": 978 }, { "epoch": 0.13641747369887827, "grad_norm": 0.24849435687065125, "learning_rate": 9.879866409753493e-06, "loss": 0.1723, "step": 979 }, { "epoch": 0.13655681739009268, "grad_norm": 0.27842363715171814, "learning_rate": 9.879352347532442e-06, "loss": 0.1581, "step": 980 }, { "epoch": 0.13669616108130705, "grad_norm": 0.2633057236671448, "learning_rate": 9.878837201230697e-06, "loss": 0.1346, "step": 981 }, { "epoch": 0.13683550477252143, "grad_norm": 0.22316665947437286, "learning_rate": 9.878320970962712e-06, "loss": 0.2007, "step": 982 }, { "epoch": 0.1369748484637358, "grad_norm": 0.16968286037445068, "learning_rate": 9.877803656843182e-06, "loss": 0.1394, "step": 983 }, { "epoch": 0.1371141921549502, "grad_norm": 0.21896810829639435, "learning_rate": 9.877285258987039e-06, "loss": 0.1985, "step": 984 }, { "epoch": 0.13725353584616456, "grad_norm": 0.2982581853866577, "learning_rate": 9.876765777509463e-06, "loss": 0.1454, "step": 985 }, { "epoch": 0.13739287953737894, "grad_norm": 0.2709868550300598, "learning_rate": 9.87624521252587e-06, "loss": 0.1582, "step": 986 }, { "epoch": 0.13753222322859332, "grad_norm": 0.33929914236068726, "learning_rate": 9.875723564151918e-06, "loss": 0.1971, "step": 987 }, { "epoch": 0.1376715669198077, "grad_norm": 0.27367138862609863, "learning_rate": 9.875200832503505e-06, "loss": 0.1535, "step": 988 }, { "epoch": 0.13781091061102207, "grad_norm": 0.2716500759124756, "learning_rate": 9.874677017696769e-06, "loss": 0.1597, "step": 989 }, { "epoch": 0.13795025430223645, "grad_norm": 0.25709068775177, "learning_rate": 9.87415211984809e-06, "loss": 0.1469, "step": 990 }, { "epoch": 0.13808959799345086, "grad_norm": 0.5261138081550598, "learning_rate": 9.873626139074088e-06, "loss": 0.1909, "step": 991 }, { "epoch": 0.13822894168466524, "grad_norm": 0.34576788544654846, "learning_rate": 9.873099075491626e-06, "loss": 0.1559, "step": 992 }, { "epoch": 0.1383682853758796, "grad_norm": 0.30247634649276733, "learning_rate": 9.872570929217804e-06, "loss": 0.1337, "step": 993 }, { "epoch": 0.138507629067094, "grad_norm": 0.2992444336414337, "learning_rate": 9.872041700369965e-06, "loss": 0.1706, "step": 994 }, { "epoch": 0.13864697275830837, "grad_norm": 0.28277093172073364, "learning_rate": 9.871511389065689e-06, "loss": 0.1671, "step": 995 }, { "epoch": 0.13878631644952275, "grad_norm": 0.5637606978416443, "learning_rate": 9.870979995422803e-06, "loss": 0.1676, "step": 996 }, { "epoch": 0.13892566014073712, "grad_norm": 0.1127137765288353, "learning_rate": 9.870447519559366e-06, "loss": 0.1324, "step": 997 }, { "epoch": 0.1390650038319515, "grad_norm": 0.2526048719882965, "learning_rate": 9.869913961593685e-06, "loss": 0.1235, "step": 998 }, { "epoch": 0.13920434752316588, "grad_norm": 0.48341768980026245, "learning_rate": 9.869379321644306e-06, "loss": 0.1867, "step": 999 }, { "epoch": 0.13934369121438026, "grad_norm": 0.5609840154647827, "learning_rate": 9.868843599830009e-06, "loss": 0.2091, "step": 1000 }, { "epoch": 0.13948303490559466, "grad_norm": 0.24299801886081696, "learning_rate": 9.868306796269822e-06, "loss": 0.1569, "step": 1001 }, { "epoch": 0.13962237859680904, "grad_norm": 0.3140122890472412, "learning_rate": 9.86776891108301e-06, "loss": 0.1518, "step": 1002 }, { "epoch": 0.13976172228802342, "grad_norm": 0.6477231979370117, "learning_rate": 9.86722994438908e-06, "loss": 0.2129, "step": 1003 }, { "epoch": 0.1399010659792378, "grad_norm": 0.1750383973121643, "learning_rate": 9.866689896307778e-06, "loss": 0.1287, "step": 1004 }, { "epoch": 0.14004040967045217, "grad_norm": 0.19919858872890472, "learning_rate": 9.866148766959087e-06, "loss": 0.1307, "step": 1005 }, { "epoch": 0.14017975336166655, "grad_norm": 0.15444974601268768, "learning_rate": 9.865606556463239e-06, "loss": 0.1316, "step": 1006 }, { "epoch": 0.14031909705288093, "grad_norm": 0.17677688598632812, "learning_rate": 9.865063264940695e-06, "loss": 0.1289, "step": 1007 }, { "epoch": 0.1404584407440953, "grad_norm": 0.33839157223701477, "learning_rate": 9.864518892512167e-06, "loss": 0.1446, "step": 1008 }, { "epoch": 0.14059778443530968, "grad_norm": 0.22510011494159698, "learning_rate": 9.863973439298597e-06, "loss": 0.1727, "step": 1009 }, { "epoch": 0.14073712812652406, "grad_norm": 0.2355165183544159, "learning_rate": 9.863426905421179e-06, "loss": 0.148, "step": 1010 }, { "epoch": 0.14087647181773846, "grad_norm": 0.2101445496082306, "learning_rate": 9.862879291001334e-06, "loss": 0.1167, "step": 1011 }, { "epoch": 0.14101581550895284, "grad_norm": 0.3166971504688263, "learning_rate": 9.862330596160732e-06, "loss": 0.2312, "step": 1012 }, { "epoch": 0.14115515920016722, "grad_norm": 0.15310271084308624, "learning_rate": 9.861780821021282e-06, "loss": 0.1464, "step": 1013 }, { "epoch": 0.1412945028913816, "grad_norm": 0.2953778803348541, "learning_rate": 9.861229965705129e-06, "loss": 0.1218, "step": 1014 }, { "epoch": 0.14143384658259597, "grad_norm": 0.3416615128517151, "learning_rate": 9.86067803033466e-06, "loss": 0.1919, "step": 1015 }, { "epoch": 0.14157319027381035, "grad_norm": 0.12646622955799103, "learning_rate": 9.860125015032506e-06, "loss": 0.1369, "step": 1016 }, { "epoch": 0.14171253396502473, "grad_norm": 0.6032209396362305, "learning_rate": 9.859570919921533e-06, "loss": 0.1687, "step": 1017 }, { "epoch": 0.1418518776562391, "grad_norm": 0.20254184305667877, "learning_rate": 9.859015745124844e-06, "loss": 0.1356, "step": 1018 }, { "epoch": 0.14199122134745348, "grad_norm": 0.3411158621311188, "learning_rate": 9.858459490765792e-06, "loss": 0.1497, "step": 1019 }, { "epoch": 0.14213056503866786, "grad_norm": 0.2697802484035492, "learning_rate": 9.857902156967961e-06, "loss": 0.1563, "step": 1020 }, { "epoch": 0.14226990872988227, "grad_norm": 0.23170235753059387, "learning_rate": 9.857343743855178e-06, "loss": 0.1375, "step": 1021 }, { "epoch": 0.14240925242109664, "grad_norm": 0.364153653383255, "learning_rate": 9.856784251551512e-06, "loss": 0.1634, "step": 1022 }, { "epoch": 0.14254859611231102, "grad_norm": 0.2806490659713745, "learning_rate": 9.856223680181267e-06, "loss": 0.1417, "step": 1023 }, { "epoch": 0.1426879398035254, "grad_norm": 0.5387818813323975, "learning_rate": 9.85566202986899e-06, "loss": 0.2307, "step": 1024 }, { "epoch": 0.14282728349473978, "grad_norm": 0.3346903920173645, "learning_rate": 9.855099300739463e-06, "loss": 0.1713, "step": 1025 }, { "epoch": 0.14296662718595415, "grad_norm": 0.24605032801628113, "learning_rate": 9.854535492917718e-06, "loss": 0.1386, "step": 1026 }, { "epoch": 0.14310597087716853, "grad_norm": 0.34629812836647034, "learning_rate": 9.853970606529018e-06, "loss": 0.1147, "step": 1027 }, { "epoch": 0.1432453145683829, "grad_norm": 0.4120827913284302, "learning_rate": 9.853404641698866e-06, "loss": 0.1758, "step": 1028 }, { "epoch": 0.1433846582595973, "grad_norm": 0.45639848709106445, "learning_rate": 9.85283759855301e-06, "loss": 0.1643, "step": 1029 }, { "epoch": 0.14352400195081166, "grad_norm": 0.19637399911880493, "learning_rate": 9.852269477217428e-06, "loss": 0.144, "step": 1030 }, { "epoch": 0.14366334564202607, "grad_norm": 0.23119448125362396, "learning_rate": 9.85170027781835e-06, "loss": 0.164, "step": 1031 }, { "epoch": 0.14380268933324045, "grad_norm": 0.326837956905365, "learning_rate": 9.851130000482236e-06, "loss": 0.1312, "step": 1032 }, { "epoch": 0.14394203302445482, "grad_norm": 0.5583389401435852, "learning_rate": 9.85055864533579e-06, "loss": 0.1568, "step": 1033 }, { "epoch": 0.1440813767156692, "grad_norm": 0.3262408971786499, "learning_rate": 9.849986212505952e-06, "loss": 0.1879, "step": 1034 }, { "epoch": 0.14422072040688358, "grad_norm": 0.3253825604915619, "learning_rate": 9.849412702119905e-06, "loss": 0.1587, "step": 1035 }, { "epoch": 0.14436006409809796, "grad_norm": 0.19238020479679108, "learning_rate": 9.848838114305069e-06, "loss": 0.1659, "step": 1036 }, { "epoch": 0.14449940778931233, "grad_norm": 0.3065122663974762, "learning_rate": 9.848262449189105e-06, "loss": 0.1615, "step": 1037 }, { "epoch": 0.1446387514805267, "grad_norm": 0.516493558883667, "learning_rate": 9.847685706899913e-06, "loss": 0.164, "step": 1038 }, { "epoch": 0.1447780951717411, "grad_norm": 0.6980692744255066, "learning_rate": 9.84710788756563e-06, "loss": 0.1574, "step": 1039 }, { "epoch": 0.14491743886295547, "grad_norm": 0.41812431812286377, "learning_rate": 9.846528991314638e-06, "loss": 0.1385, "step": 1040 }, { "epoch": 0.14505678255416987, "grad_norm": 0.3032841086387634, "learning_rate": 9.845949018275551e-06, "loss": 0.159, "step": 1041 }, { "epoch": 0.14519612624538425, "grad_norm": 0.3564783036708832, "learning_rate": 9.845367968577229e-06, "loss": 0.168, "step": 1042 }, { "epoch": 0.14533546993659863, "grad_norm": 0.5821123123168945, "learning_rate": 9.844785842348764e-06, "loss": 0.1581, "step": 1043 }, { "epoch": 0.145474813627813, "grad_norm": 0.9370611906051636, "learning_rate": 9.844202639719492e-06, "loss": 0.2435, "step": 1044 }, { "epoch": 0.14561415731902738, "grad_norm": 0.22352561354637146, "learning_rate": 9.84361836081899e-06, "loss": 0.1554, "step": 1045 }, { "epoch": 0.14575350101024176, "grad_norm": 0.43473148345947266, "learning_rate": 9.84303300577707e-06, "loss": 0.1609, "step": 1046 }, { "epoch": 0.14589284470145614, "grad_norm": 0.3235587775707245, "learning_rate": 9.842446574723786e-06, "loss": 0.1498, "step": 1047 }, { "epoch": 0.14603218839267051, "grad_norm": 0.3364776074886322, "learning_rate": 9.841859067789425e-06, "loss": 0.1762, "step": 1048 }, { "epoch": 0.1461715320838849, "grad_norm": 0.45861896872520447, "learning_rate": 9.841270485104522e-06, "loss": 0.175, "step": 1049 }, { "epoch": 0.14631087577509927, "grad_norm": 0.2554834485054016, "learning_rate": 9.840680826799845e-06, "loss": 0.1182, "step": 1050 }, { "epoch": 0.14645021946631367, "grad_norm": 0.41988685727119446, "learning_rate": 9.840090093006403e-06, "loss": 0.1711, "step": 1051 }, { "epoch": 0.14658956315752805, "grad_norm": 0.4416755735874176, "learning_rate": 9.839498283855444e-06, "loss": 0.1599, "step": 1052 }, { "epoch": 0.14672890684874243, "grad_norm": 0.27413859963417053, "learning_rate": 9.838905399478453e-06, "loss": 0.1363, "step": 1053 }, { "epoch": 0.1468682505399568, "grad_norm": 0.20786415040493011, "learning_rate": 9.838311440007159e-06, "loss": 0.1249, "step": 1054 }, { "epoch": 0.14700759423117118, "grad_norm": 0.5078989267349243, "learning_rate": 9.83771640557352e-06, "loss": 0.1586, "step": 1055 }, { "epoch": 0.14714693792238556, "grad_norm": 0.7565958499908447, "learning_rate": 9.837120296309744e-06, "loss": 0.2786, "step": 1056 }, { "epoch": 0.14728628161359994, "grad_norm": 0.19463472068309784, "learning_rate": 9.836523112348271e-06, "loss": 0.165, "step": 1057 }, { "epoch": 0.14742562530481432, "grad_norm": 0.38280853629112244, "learning_rate": 9.835924853821783e-06, "loss": 0.1533, "step": 1058 }, { "epoch": 0.1475649689960287, "grad_norm": 0.7889944911003113, "learning_rate": 9.8353255208632e-06, "loss": 0.1794, "step": 1059 }, { "epoch": 0.14770431268724307, "grad_norm": 0.19664636254310608, "learning_rate": 9.834725113605676e-06, "loss": 0.1607, "step": 1060 }, { "epoch": 0.14784365637845748, "grad_norm": 0.2786829471588135, "learning_rate": 9.83412363218261e-06, "loss": 0.1257, "step": 1061 }, { "epoch": 0.14798300006967186, "grad_norm": 0.382292240858078, "learning_rate": 9.833521076727638e-06, "loss": 0.1506, "step": 1062 }, { "epoch": 0.14812234376088623, "grad_norm": 0.21993573009967804, "learning_rate": 9.832917447374637e-06, "loss": 0.1507, "step": 1063 }, { "epoch": 0.1482616874521006, "grad_norm": 0.2855024039745331, "learning_rate": 9.832312744257715e-06, "loss": 0.1432, "step": 1064 }, { "epoch": 0.148401031143315, "grad_norm": 0.3274870216846466, "learning_rate": 9.831706967511223e-06, "loss": 0.1702, "step": 1065 }, { "epoch": 0.14854037483452937, "grad_norm": 0.32278749346733093, "learning_rate": 9.831100117269755e-06, "loss": 0.1923, "step": 1066 }, { "epoch": 0.14867971852574374, "grad_norm": 0.27855682373046875, "learning_rate": 9.830492193668135e-06, "loss": 0.1308, "step": 1067 }, { "epoch": 0.14881906221695812, "grad_norm": 0.22692076861858368, "learning_rate": 9.829883196841433e-06, "loss": 0.1599, "step": 1068 }, { "epoch": 0.1489584059081725, "grad_norm": 0.1812678873538971, "learning_rate": 9.829273126924952e-06, "loss": 0.1393, "step": 1069 }, { "epoch": 0.14909774959938688, "grad_norm": 0.40220072865486145, "learning_rate": 9.828661984054238e-06, "loss": 0.1588, "step": 1070 }, { "epoch": 0.14923709329060128, "grad_norm": 0.4371553063392639, "learning_rate": 9.82804976836507e-06, "loss": 0.1559, "step": 1071 }, { "epoch": 0.14937643698181566, "grad_norm": 0.15248432755470276, "learning_rate": 9.827436479993468e-06, "loss": 0.126, "step": 1072 }, { "epoch": 0.14951578067303004, "grad_norm": 0.18034546077251434, "learning_rate": 9.826822119075694e-06, "loss": 0.115, "step": 1073 }, { "epoch": 0.1496551243642444, "grad_norm": 0.597968578338623, "learning_rate": 9.826206685748242e-06, "loss": 0.1664, "step": 1074 }, { "epoch": 0.1497944680554588, "grad_norm": 0.3815983533859253, "learning_rate": 9.825590180147852e-06, "loss": 0.1263, "step": 1075 }, { "epoch": 0.14993381174667317, "grad_norm": 0.32972386479377747, "learning_rate": 9.82497260241149e-06, "loss": 0.1334, "step": 1076 }, { "epoch": 0.15007315543788755, "grad_norm": 0.2547137141227722, "learning_rate": 9.824353952676375e-06, "loss": 0.1674, "step": 1077 }, { "epoch": 0.15021249912910192, "grad_norm": 0.22880646586418152, "learning_rate": 9.823734231079953e-06, "loss": 0.1233, "step": 1078 }, { "epoch": 0.1503518428203163, "grad_norm": 0.49729329347610474, "learning_rate": 9.823113437759912e-06, "loss": 0.1743, "step": 1079 }, { "epoch": 0.15049118651153068, "grad_norm": 0.27257922291755676, "learning_rate": 9.822491572854178e-06, "loss": 0.1813, "step": 1080 }, { "epoch": 0.15063053020274508, "grad_norm": 0.18854980170726776, "learning_rate": 9.821868636500917e-06, "loss": 0.1799, "step": 1081 }, { "epoch": 0.15076987389395946, "grad_norm": 0.23174896836280823, "learning_rate": 9.82124462883853e-06, "loss": 0.159, "step": 1082 }, { "epoch": 0.15090921758517384, "grad_norm": 0.33370867371559143, "learning_rate": 9.820619550005656e-06, "loss": 0.1821, "step": 1083 }, { "epoch": 0.15104856127638822, "grad_norm": 0.27798181772232056, "learning_rate": 9.819993400141176e-06, "loss": 0.142, "step": 1084 }, { "epoch": 0.1511879049676026, "grad_norm": 0.46113187074661255, "learning_rate": 9.819366179384204e-06, "loss": 0.1629, "step": 1085 }, { "epoch": 0.15132724865881697, "grad_norm": 0.18750672042369843, "learning_rate": 9.818737887874097e-06, "loss": 0.1367, "step": 1086 }, { "epoch": 0.15146659235003135, "grad_norm": 0.19340980052947998, "learning_rate": 9.818108525750442e-06, "loss": 0.1558, "step": 1087 }, { "epoch": 0.15160593604124573, "grad_norm": 0.25383418798446655, "learning_rate": 9.817478093153074e-06, "loss": 0.129, "step": 1088 }, { "epoch": 0.1517452797324601, "grad_norm": 0.3613433837890625, "learning_rate": 9.816846590222058e-06, "loss": 0.1676, "step": 1089 }, { "epoch": 0.15188462342367448, "grad_norm": 0.1779477298259735, "learning_rate": 9.8162140170977e-06, "loss": 0.1387, "step": 1090 }, { "epoch": 0.15202396711488889, "grad_norm": 0.17910508811473846, "learning_rate": 9.815580373920543e-06, "loss": 0.1489, "step": 1091 }, { "epoch": 0.15216331080610326, "grad_norm": 0.20827417075634003, "learning_rate": 9.81494566083137e-06, "loss": 0.1604, "step": 1092 }, { "epoch": 0.15230265449731764, "grad_norm": 0.2543523907661438, "learning_rate": 9.814309877971195e-06, "loss": 0.1632, "step": 1093 }, { "epoch": 0.15244199818853202, "grad_norm": 0.28122735023498535, "learning_rate": 9.81367302548128e-06, "loss": 0.1537, "step": 1094 }, { "epoch": 0.1525813418797464, "grad_norm": 0.17046013474464417, "learning_rate": 9.813035103503116e-06, "loss": 0.1606, "step": 1095 }, { "epoch": 0.15272068557096077, "grad_norm": 0.26000475883483887, "learning_rate": 9.812396112178437e-06, "loss": 0.1577, "step": 1096 }, { "epoch": 0.15286002926217515, "grad_norm": 0.27920863032341003, "learning_rate": 9.811756051649209e-06, "loss": 0.1776, "step": 1097 }, { "epoch": 0.15299937295338953, "grad_norm": 0.33189690113067627, "learning_rate": 9.811114922057642e-06, "loss": 0.18, "step": 1098 }, { "epoch": 0.1531387166446039, "grad_norm": 0.22311322391033173, "learning_rate": 9.810472723546178e-06, "loss": 0.172, "step": 1099 }, { "epoch": 0.15327806033581828, "grad_norm": 0.23011435568332672, "learning_rate": 9.8098294562575e-06, "loss": 0.1384, "step": 1100 }, { "epoch": 0.1534174040270327, "grad_norm": 0.271467924118042, "learning_rate": 9.809185120334528e-06, "loss": 0.1718, "step": 1101 }, { "epoch": 0.15355674771824707, "grad_norm": 0.26454079151153564, "learning_rate": 9.808539715920415e-06, "loss": 0.178, "step": 1102 }, { "epoch": 0.15369609140946144, "grad_norm": 0.1906944364309311, "learning_rate": 9.807893243158562e-06, "loss": 0.1521, "step": 1103 }, { "epoch": 0.15383543510067582, "grad_norm": 0.2249043583869934, "learning_rate": 9.807245702192593e-06, "loss": 0.1557, "step": 1104 }, { "epoch": 0.1539747787918902, "grad_norm": 0.1765286773443222, "learning_rate": 9.80659709316638e-06, "loss": 0.1469, "step": 1105 }, { "epoch": 0.15411412248310458, "grad_norm": 0.17803500592708588, "learning_rate": 9.805947416224034e-06, "loss": 0.1453, "step": 1106 }, { "epoch": 0.15425346617431895, "grad_norm": 0.37202784419059753, "learning_rate": 9.80529667150989e-06, "loss": 0.1502, "step": 1107 }, { "epoch": 0.15439280986553333, "grad_norm": 0.29637032747268677, "learning_rate": 9.804644859168534e-06, "loss": 0.1108, "step": 1108 }, { "epoch": 0.1545321535567477, "grad_norm": 0.19074931740760803, "learning_rate": 9.80399197934478e-06, "loss": 0.1487, "step": 1109 }, { "epoch": 0.1546714972479621, "grad_norm": 0.21795891225337982, "learning_rate": 9.803338032183686e-06, "loss": 0.1182, "step": 1110 }, { "epoch": 0.1548108409391765, "grad_norm": 0.2720772325992584, "learning_rate": 9.802683017830544e-06, "loss": 0.1372, "step": 1111 }, { "epoch": 0.15495018463039087, "grad_norm": 0.27828067541122437, "learning_rate": 9.802026936430883e-06, "loss": 0.1572, "step": 1112 }, { "epoch": 0.15508952832160525, "grad_norm": 0.23876835405826569, "learning_rate": 9.801369788130468e-06, "loss": 0.1435, "step": 1113 }, { "epoch": 0.15522887201281962, "grad_norm": 0.29007941484451294, "learning_rate": 9.800711573075303e-06, "loss": 0.1691, "step": 1114 }, { "epoch": 0.155368215704034, "grad_norm": 0.1444951891899109, "learning_rate": 9.80005229141163e-06, "loss": 0.1699, "step": 1115 }, { "epoch": 0.15550755939524838, "grad_norm": 0.15327656269073486, "learning_rate": 9.799391943285923e-06, "loss": 0.1539, "step": 1116 }, { "epoch": 0.15564690308646276, "grad_norm": 0.21740415692329407, "learning_rate": 9.798730528844899e-06, "loss": 0.1742, "step": 1117 }, { "epoch": 0.15578624677767713, "grad_norm": 0.1892087161540985, "learning_rate": 9.79806804823551e-06, "loss": 0.1601, "step": 1118 }, { "epoch": 0.1559255904688915, "grad_norm": 0.30692532658576965, "learning_rate": 9.79740450160494e-06, "loss": 0.1697, "step": 1119 }, { "epoch": 0.1560649341601059, "grad_norm": 0.15670430660247803, "learning_rate": 9.796739889100617e-06, "loss": 0.1177, "step": 1120 }, { "epoch": 0.1562042778513203, "grad_norm": 0.3263852596282959, "learning_rate": 9.796074210870204e-06, "loss": 0.1984, "step": 1121 }, { "epoch": 0.15634362154253467, "grad_norm": 0.35354188084602356, "learning_rate": 9.795407467061596e-06, "loss": 0.179, "step": 1122 }, { "epoch": 0.15648296523374905, "grad_norm": 0.2896989583969116, "learning_rate": 9.794739657822929e-06, "loss": 0.164, "step": 1123 }, { "epoch": 0.15662230892496343, "grad_norm": 0.34937676787376404, "learning_rate": 9.794070783302576e-06, "loss": 0.1443, "step": 1124 }, { "epoch": 0.1567616526161778, "grad_norm": 0.20753681659698486, "learning_rate": 9.793400843649146e-06, "loss": 0.1427, "step": 1125 }, { "epoch": 0.15690099630739218, "grad_norm": 0.3672422766685486, "learning_rate": 9.792729839011484e-06, "loss": 0.1712, "step": 1126 }, { "epoch": 0.15704033999860656, "grad_norm": 0.3024674355983734, "learning_rate": 9.792057769538672e-06, "loss": 0.2072, "step": 1127 }, { "epoch": 0.15717968368982094, "grad_norm": 0.177614226937294, "learning_rate": 9.791384635380028e-06, "loss": 0.1402, "step": 1128 }, { "epoch": 0.15731902738103531, "grad_norm": 0.4005560874938965, "learning_rate": 9.790710436685105e-06, "loss": 0.1734, "step": 1129 }, { "epoch": 0.1574583710722497, "grad_norm": 0.18818476796150208, "learning_rate": 9.790035173603699e-06, "loss": 0.151, "step": 1130 }, { "epoch": 0.1575977147634641, "grad_norm": 0.26390793919563293, "learning_rate": 9.789358846285835e-06, "loss": 0.1612, "step": 1131 }, { "epoch": 0.15773705845467847, "grad_norm": 0.6184574365615845, "learning_rate": 9.788681454881778e-06, "loss": 0.2025, "step": 1132 }, { "epoch": 0.15787640214589285, "grad_norm": 0.29473933577537537, "learning_rate": 9.78800299954203e-06, "loss": 0.1756, "step": 1133 }, { "epoch": 0.15801574583710723, "grad_norm": 0.15973049402236938, "learning_rate": 9.787323480417328e-06, "loss": 0.13, "step": 1134 }, { "epoch": 0.1581550895283216, "grad_norm": 0.16656267642974854, "learning_rate": 9.786642897658645e-06, "loss": 0.139, "step": 1135 }, { "epoch": 0.15829443321953598, "grad_norm": 0.4086163640022278, "learning_rate": 9.78596125141719e-06, "loss": 0.1644, "step": 1136 }, { "epoch": 0.15843377691075036, "grad_norm": 0.5475366115570068, "learning_rate": 9.785278541844409e-06, "loss": 0.2063, "step": 1137 }, { "epoch": 0.15857312060196474, "grad_norm": 0.32481586933135986, "learning_rate": 9.784594769091989e-06, "loss": 0.1386, "step": 1138 }, { "epoch": 0.15871246429317912, "grad_norm": 0.1911095827817917, "learning_rate": 9.783909933311844e-06, "loss": 0.1381, "step": 1139 }, { "epoch": 0.1588518079843935, "grad_norm": 0.29105064272880554, "learning_rate": 9.78322403465613e-06, "loss": 0.1959, "step": 1140 }, { "epoch": 0.1589911516756079, "grad_norm": 0.4263208508491516, "learning_rate": 9.782537073277238e-06, "loss": 0.2007, "step": 1141 }, { "epoch": 0.15913049536682228, "grad_norm": 0.23344963788986206, "learning_rate": 9.781849049327796e-06, "loss": 0.142, "step": 1142 }, { "epoch": 0.15926983905803666, "grad_norm": 0.2117600440979004, "learning_rate": 9.781159962960667e-06, "loss": 0.1591, "step": 1143 }, { "epoch": 0.15940918274925103, "grad_norm": 0.6355187296867371, "learning_rate": 9.78046981432895e-06, "loss": 0.186, "step": 1144 }, { "epoch": 0.1595485264404654, "grad_norm": 0.2347707897424698, "learning_rate": 9.77977860358598e-06, "loss": 0.2141, "step": 1145 }, { "epoch": 0.1596878701316798, "grad_norm": 0.5226815342903137, "learning_rate": 9.779086330885328e-06, "loss": 0.1623, "step": 1146 }, { "epoch": 0.15982721382289417, "grad_norm": 0.8192313313484192, "learning_rate": 9.778392996380803e-06, "loss": 0.1649, "step": 1147 }, { "epoch": 0.15996655751410854, "grad_norm": 0.22320276498794556, "learning_rate": 9.777698600226446e-06, "loss": 0.1403, "step": 1148 }, { "epoch": 0.16010590120532292, "grad_norm": 0.4262777268886566, "learning_rate": 9.777003142576536e-06, "loss": 0.1398, "step": 1149 }, { "epoch": 0.1602452448965373, "grad_norm": 0.8086885213851929, "learning_rate": 9.77630662358559e-06, "loss": 0.1961, "step": 1150 }, { "epoch": 0.1603845885877517, "grad_norm": 0.35801777243614197, "learning_rate": 9.775609043408356e-06, "loss": 0.1696, "step": 1151 }, { "epoch": 0.16052393227896608, "grad_norm": 0.20847852528095245, "learning_rate": 9.774910402199821e-06, "loss": 0.1469, "step": 1152 }, { "epoch": 0.16066327597018046, "grad_norm": 0.6615159511566162, "learning_rate": 9.774210700115209e-06, "loss": 0.1769, "step": 1153 }, { "epoch": 0.16080261966139484, "grad_norm": 0.43010440468788147, "learning_rate": 9.773509937309978e-06, "loss": 0.1806, "step": 1154 }, { "epoch": 0.1609419633526092, "grad_norm": 0.3793630599975586, "learning_rate": 9.772808113939819e-06, "loss": 0.2126, "step": 1155 }, { "epoch": 0.1610813070438236, "grad_norm": 0.39194512367248535, "learning_rate": 9.77210523016066e-06, "loss": 0.1386, "step": 1156 }, { "epoch": 0.16122065073503797, "grad_norm": 0.29756227135658264, "learning_rate": 9.771401286128668e-06, "loss": 0.1344, "step": 1157 }, { "epoch": 0.16135999442625235, "grad_norm": 0.3799450099468231, "learning_rate": 9.770696282000245e-06, "loss": 0.1668, "step": 1158 }, { "epoch": 0.16149933811746672, "grad_norm": 0.3162807822227478, "learning_rate": 9.769990217932023e-06, "loss": 0.1606, "step": 1159 }, { "epoch": 0.1616386818086811, "grad_norm": 0.4260508716106415, "learning_rate": 9.769283094080878e-06, "loss": 0.1979, "step": 1160 }, { "epoch": 0.1617780254998955, "grad_norm": 0.20079770684242249, "learning_rate": 9.768574910603912e-06, "loss": 0.1892, "step": 1161 }, { "epoch": 0.16191736919110988, "grad_norm": 0.33751893043518066, "learning_rate": 9.767865667658472e-06, "loss": 0.1119, "step": 1162 }, { "epoch": 0.16205671288232426, "grad_norm": 0.2533831000328064, "learning_rate": 9.76715536540213e-06, "loss": 0.1431, "step": 1163 }, { "epoch": 0.16219605657353864, "grad_norm": 0.3203471899032593, "learning_rate": 9.766444003992704e-06, "loss": 0.1413, "step": 1164 }, { "epoch": 0.16233540026475302, "grad_norm": 0.3060317933559418, "learning_rate": 9.765731583588237e-06, "loss": 0.1699, "step": 1165 }, { "epoch": 0.1624747439559674, "grad_norm": 0.26273760199546814, "learning_rate": 9.765018104347017e-06, "loss": 0.2042, "step": 1166 }, { "epoch": 0.16261408764718177, "grad_norm": 0.2554643452167511, "learning_rate": 9.764303566427561e-06, "loss": 0.1676, "step": 1167 }, { "epoch": 0.16275343133839615, "grad_norm": 0.2299203723669052, "learning_rate": 9.763587969988626e-06, "loss": 0.1309, "step": 1168 }, { "epoch": 0.16289277502961053, "grad_norm": 0.36066630482673645, "learning_rate": 9.762871315189198e-06, "loss": 0.2019, "step": 1169 }, { "epoch": 0.1630321187208249, "grad_norm": 0.21280640363693237, "learning_rate": 9.7621536021885e-06, "loss": 0.1853, "step": 1170 }, { "epoch": 0.1631714624120393, "grad_norm": 0.29758697748184204, "learning_rate": 9.761434831145995e-06, "loss": 0.1314, "step": 1171 }, { "epoch": 0.1633108061032537, "grad_norm": 0.2765107750892639, "learning_rate": 9.760715002221375e-06, "loss": 0.1386, "step": 1172 }, { "epoch": 0.16345014979446806, "grad_norm": 0.21144601702690125, "learning_rate": 9.759994115574571e-06, "loss": 0.1552, "step": 1173 }, { "epoch": 0.16358949348568244, "grad_norm": 0.3049066662788391, "learning_rate": 9.759272171365746e-06, "loss": 0.1589, "step": 1174 }, { "epoch": 0.16372883717689682, "grad_norm": 0.20349138975143433, "learning_rate": 9.758549169755302e-06, "loss": 0.1453, "step": 1175 }, { "epoch": 0.1638681808681112, "grad_norm": 0.4158312678337097, "learning_rate": 9.757825110903872e-06, "loss": 0.1296, "step": 1176 }, { "epoch": 0.16400752455932557, "grad_norm": 0.48783135414123535, "learning_rate": 9.757099994972323e-06, "loss": 0.1567, "step": 1177 }, { "epoch": 0.16414686825053995, "grad_norm": 0.24924179911613464, "learning_rate": 9.756373822121762e-06, "loss": 0.1576, "step": 1178 }, { "epoch": 0.16428621194175433, "grad_norm": 0.1847054660320282, "learning_rate": 9.75564659251353e-06, "loss": 0.19, "step": 1179 }, { "epoch": 0.1644255556329687, "grad_norm": 0.3439066410064697, "learning_rate": 9.754918306309197e-06, "loss": 0.1473, "step": 1180 }, { "epoch": 0.16456489932418308, "grad_norm": 0.11570898443460464, "learning_rate": 9.754188963670573e-06, "loss": 0.112, "step": 1181 }, { "epoch": 0.1647042430153975, "grad_norm": 0.3279511332511902, "learning_rate": 9.753458564759701e-06, "loss": 0.1881, "step": 1182 }, { "epoch": 0.16484358670661187, "grad_norm": 0.18313807249069214, "learning_rate": 9.752727109738859e-06, "loss": 0.1716, "step": 1183 }, { "epoch": 0.16498293039782624, "grad_norm": 0.2894497513771057, "learning_rate": 9.751994598770563e-06, "loss": 0.1332, "step": 1184 }, { "epoch": 0.16512227408904062, "grad_norm": 0.4223545789718628, "learning_rate": 9.751261032017553e-06, "loss": 0.1754, "step": 1185 }, { "epoch": 0.165261617780255, "grad_norm": 0.20049448311328888, "learning_rate": 9.750526409642818e-06, "loss": 0.1323, "step": 1186 }, { "epoch": 0.16540096147146938, "grad_norm": 0.2085534930229187, "learning_rate": 9.749790731809568e-06, "loss": 0.1385, "step": 1187 }, { "epoch": 0.16554030516268375, "grad_norm": 0.19256681203842163, "learning_rate": 9.74905399868126e-06, "loss": 0.1766, "step": 1188 }, { "epoch": 0.16567964885389813, "grad_norm": 0.19816969335079193, "learning_rate": 9.748316210421573e-06, "loss": 0.1457, "step": 1189 }, { "epoch": 0.1658189925451125, "grad_norm": 0.1490800976753235, "learning_rate": 9.747577367194432e-06, "loss": 0.1278, "step": 1190 }, { "epoch": 0.1659583362363269, "grad_norm": 0.25959497690200806, "learning_rate": 9.74683746916399e-06, "loss": 0.1267, "step": 1191 }, { "epoch": 0.1660976799275413, "grad_norm": 0.18701641261577606, "learning_rate": 9.746096516494632e-06, "loss": 0.1435, "step": 1192 }, { "epoch": 0.16623702361875567, "grad_norm": 0.2995084226131439, "learning_rate": 9.745354509350983e-06, "loss": 0.1528, "step": 1193 }, { "epoch": 0.16637636730997005, "grad_norm": 0.20512571930885315, "learning_rate": 9.744611447897902e-06, "loss": 0.1253, "step": 1194 }, { "epoch": 0.16651571100118442, "grad_norm": 0.18238455057144165, "learning_rate": 9.743867332300478e-06, "loss": 0.1288, "step": 1195 }, { "epoch": 0.1666550546923988, "grad_norm": 0.32189711928367615, "learning_rate": 9.743122162724038e-06, "loss": 0.1483, "step": 1196 }, { "epoch": 0.16679439838361318, "grad_norm": 0.3797447383403778, "learning_rate": 9.742375939334141e-06, "loss": 0.1612, "step": 1197 }, { "epoch": 0.16693374207482756, "grad_norm": 0.16932347416877747, "learning_rate": 9.74162866229658e-06, "loss": 0.1393, "step": 1198 }, { "epoch": 0.16707308576604193, "grad_norm": 0.22706876695156097, "learning_rate": 9.740880331777383e-06, "loss": 0.1298, "step": 1199 }, { "epoch": 0.1672124294572563, "grad_norm": 0.5215122699737549, "learning_rate": 9.740130947942812e-06, "loss": 0.173, "step": 1200 }, { "epoch": 0.1673517731484707, "grad_norm": 0.2242976874113083, "learning_rate": 9.739380510959365e-06, "loss": 0.1466, "step": 1201 }, { "epoch": 0.1674911168396851, "grad_norm": 0.1563362032175064, "learning_rate": 9.738629020993769e-06, "loss": 0.1174, "step": 1202 }, { "epoch": 0.16763046053089947, "grad_norm": 0.40460991859436035, "learning_rate": 9.737876478212989e-06, "loss": 0.1718, "step": 1203 }, { "epoch": 0.16776980422211385, "grad_norm": 0.48099204897880554, "learning_rate": 9.737122882784225e-06, "loss": 0.1349, "step": 1204 }, { "epoch": 0.16790914791332823, "grad_norm": 0.1594274640083313, "learning_rate": 9.736368234874904e-06, "loss": 0.1484, "step": 1205 }, { "epoch": 0.1680484916045426, "grad_norm": 0.3521426320075989, "learning_rate": 9.735612534652697e-06, "loss": 0.1268, "step": 1206 }, { "epoch": 0.16818783529575698, "grad_norm": 0.3925263583660126, "learning_rate": 9.734855782285499e-06, "loss": 0.1186, "step": 1207 }, { "epoch": 0.16832717898697136, "grad_norm": 0.356486439704895, "learning_rate": 9.734097977941446e-06, "loss": 0.1775, "step": 1208 }, { "epoch": 0.16846652267818574, "grad_norm": 0.258466511964798, "learning_rate": 9.733339121788903e-06, "loss": 0.1815, "step": 1209 }, { "epoch": 0.16860586636940011, "grad_norm": 0.3020472526550293, "learning_rate": 9.73257921399647e-06, "loss": 0.1709, "step": 1210 }, { "epoch": 0.1687452100606145, "grad_norm": 0.24800415337085724, "learning_rate": 9.731818254732983e-06, "loss": 0.1444, "step": 1211 }, { "epoch": 0.1688845537518289, "grad_norm": 0.19716058671474457, "learning_rate": 9.73105624416751e-06, "loss": 0.1266, "step": 1212 }, { "epoch": 0.16902389744304328, "grad_norm": 0.34232550859451294, "learning_rate": 9.73029318246935e-06, "loss": 0.1605, "step": 1213 }, { "epoch": 0.16916324113425765, "grad_norm": 0.22067780792713165, "learning_rate": 9.72952906980804e-06, "loss": 0.1468, "step": 1214 }, { "epoch": 0.16930258482547203, "grad_norm": 0.28617390990257263, "learning_rate": 9.72876390635335e-06, "loss": 0.1857, "step": 1215 }, { "epoch": 0.1694419285166864, "grad_norm": 0.5377190709114075, "learning_rate": 9.727997692275275e-06, "loss": 0.139, "step": 1216 }, { "epoch": 0.16958127220790079, "grad_norm": 0.3418028950691223, "learning_rate": 9.727230427744058e-06, "loss": 0.1771, "step": 1217 }, { "epoch": 0.16972061589911516, "grad_norm": 0.17958025634288788, "learning_rate": 9.726462112930165e-06, "loss": 0.1804, "step": 1218 }, { "epoch": 0.16985995959032954, "grad_norm": 0.2021266222000122, "learning_rate": 9.725692748004295e-06, "loss": 0.1478, "step": 1219 }, { "epoch": 0.16999930328154392, "grad_norm": 0.21095286309719086, "learning_rate": 9.724922333137385e-06, "loss": 0.1613, "step": 1220 }, { "epoch": 0.1701386469727583, "grad_norm": 0.21536317467689514, "learning_rate": 9.724150868500607e-06, "loss": 0.1489, "step": 1221 }, { "epoch": 0.1702779906639727, "grad_norm": 0.21384312212467194, "learning_rate": 9.72337835426536e-06, "loss": 0.1688, "step": 1222 }, { "epoch": 0.17041733435518708, "grad_norm": 0.16369479894638062, "learning_rate": 9.722604790603279e-06, "loss": 0.155, "step": 1223 }, { "epoch": 0.17055667804640146, "grad_norm": 0.10130150616168976, "learning_rate": 9.721830177686231e-06, "loss": 0.1087, "step": 1224 }, { "epoch": 0.17069602173761583, "grad_norm": 0.11817577481269836, "learning_rate": 9.72105451568632e-06, "loss": 0.1002, "step": 1225 }, { "epoch": 0.1708353654288302, "grad_norm": 0.32122963666915894, "learning_rate": 9.720277804775879e-06, "loss": 0.1325, "step": 1226 }, { "epoch": 0.1709747091200446, "grad_norm": 0.39603134989738464, "learning_rate": 9.719500045127475e-06, "loss": 0.1676, "step": 1227 }, { "epoch": 0.17111405281125897, "grad_norm": 0.38982146978378296, "learning_rate": 9.718721236913909e-06, "loss": 0.1581, "step": 1228 }, { "epoch": 0.17125339650247334, "grad_norm": 0.39573049545288086, "learning_rate": 9.717941380308216e-06, "loss": 0.1969, "step": 1229 }, { "epoch": 0.17139274019368772, "grad_norm": 0.17656446993350983, "learning_rate": 9.717160475483659e-06, "loss": 0.164, "step": 1230 }, { "epoch": 0.1715320838849021, "grad_norm": 0.4645046889781952, "learning_rate": 9.71637852261374e-06, "loss": 0.197, "step": 1231 }, { "epoch": 0.1716714275761165, "grad_norm": 0.3499719500541687, "learning_rate": 9.71559552187219e-06, "loss": 0.1385, "step": 1232 }, { "epoch": 0.17181077126733088, "grad_norm": 0.15052728354930878, "learning_rate": 9.714811473432973e-06, "loss": 0.1432, "step": 1233 }, { "epoch": 0.17195011495854526, "grad_norm": 0.22081851959228516, "learning_rate": 9.714026377470287e-06, "loss": 0.1285, "step": 1234 }, { "epoch": 0.17208945864975964, "grad_norm": 0.5158572196960449, "learning_rate": 9.713240234158565e-06, "loss": 0.1775, "step": 1235 }, { "epoch": 0.172228802340974, "grad_norm": 0.30262571573257446, "learning_rate": 9.712453043672467e-06, "loss": 0.1287, "step": 1236 }, { "epoch": 0.1723681460321884, "grad_norm": 0.23830926418304443, "learning_rate": 9.71166480618689e-06, "loss": 0.1403, "step": 1237 }, { "epoch": 0.17250748972340277, "grad_norm": 0.5850203633308411, "learning_rate": 9.71087552187696e-06, "loss": 0.178, "step": 1238 }, { "epoch": 0.17264683341461715, "grad_norm": 0.22076360881328583, "learning_rate": 9.710085190918044e-06, "loss": 0.1326, "step": 1239 }, { "epoch": 0.17278617710583152, "grad_norm": 0.3472118079662323, "learning_rate": 9.70929381348573e-06, "loss": 0.1551, "step": 1240 }, { "epoch": 0.1729255207970459, "grad_norm": 0.3423045873641968, "learning_rate": 9.708501389755846e-06, "loss": 0.1552, "step": 1241 }, { "epoch": 0.1730648644882603, "grad_norm": 0.2576720416545868, "learning_rate": 9.70770791990445e-06, "loss": 0.1426, "step": 1242 }, { "epoch": 0.17320420817947468, "grad_norm": 0.4530787467956543, "learning_rate": 9.706913404107832e-06, "loss": 0.1717, "step": 1243 }, { "epoch": 0.17334355187068906, "grad_norm": 0.16962087154388428, "learning_rate": 9.706117842542517e-06, "loss": 0.1528, "step": 1244 }, { "epoch": 0.17348289556190344, "grad_norm": 0.14852772653102875, "learning_rate": 9.70532123538526e-06, "loss": 0.1407, "step": 1245 }, { "epoch": 0.17362223925311782, "grad_norm": 0.30879494547843933, "learning_rate": 9.704523582813049e-06, "loss": 0.1594, "step": 1246 }, { "epoch": 0.1737615829443322, "grad_norm": 0.45072269439697266, "learning_rate": 9.703724885003102e-06, "loss": 0.1635, "step": 1247 }, { "epoch": 0.17390092663554657, "grad_norm": 0.38117071986198425, "learning_rate": 9.702925142132876e-06, "loss": 0.1416, "step": 1248 }, { "epoch": 0.17404027032676095, "grad_norm": 0.17224760353565216, "learning_rate": 9.70212435438005e-06, "loss": 0.131, "step": 1249 }, { "epoch": 0.17417961401797533, "grad_norm": 0.33689531683921814, "learning_rate": 9.701322521922549e-06, "loss": 0.1559, "step": 1250 }, { "epoch": 0.1743189577091897, "grad_norm": 0.5340781807899475, "learning_rate": 9.700519644938513e-06, "loss": 0.2089, "step": 1251 }, { "epoch": 0.1744583014004041, "grad_norm": 0.221132293343544, "learning_rate": 9.699715723606327e-06, "loss": 0.1765, "step": 1252 }, { "epoch": 0.1745976450916185, "grad_norm": 0.18410727381706238, "learning_rate": 9.698910758104603e-06, "loss": 0.1332, "step": 1253 }, { "epoch": 0.17473698878283286, "grad_norm": 0.798984706401825, "learning_rate": 9.698104748612187e-06, "loss": 0.227, "step": 1254 }, { "epoch": 0.17487633247404724, "grad_norm": 0.3766033947467804, "learning_rate": 9.697297695308157e-06, "loss": 0.1956, "step": 1255 }, { "epoch": 0.17501567616526162, "grad_norm": 0.12012020498514175, "learning_rate": 9.696489598371817e-06, "loss": 0.113, "step": 1256 }, { "epoch": 0.175155019856476, "grad_norm": 0.6193284392356873, "learning_rate": 9.695680457982713e-06, "loss": 0.1679, "step": 1257 }, { "epoch": 0.17529436354769037, "grad_norm": 0.381464421749115, "learning_rate": 9.694870274320616e-06, "loss": 0.1316, "step": 1258 }, { "epoch": 0.17543370723890475, "grad_norm": 0.2596857249736786, "learning_rate": 9.694059047565529e-06, "loss": 0.1954, "step": 1259 }, { "epoch": 0.17557305093011913, "grad_norm": 0.662144124507904, "learning_rate": 9.69324677789769e-06, "loss": 0.1236, "step": 1260 }, { "epoch": 0.1757123946213335, "grad_norm": 0.3160862326622009, "learning_rate": 9.692433465497562e-06, "loss": 0.0954, "step": 1261 }, { "epoch": 0.1758517383125479, "grad_norm": 0.2603904604911804, "learning_rate": 9.69161911054585e-06, "loss": 0.1703, "step": 1262 }, { "epoch": 0.1759910820037623, "grad_norm": 0.16517479717731476, "learning_rate": 9.690803713223485e-06, "loss": 0.1693, "step": 1263 }, { "epoch": 0.17613042569497667, "grad_norm": 0.28174930810928345, "learning_rate": 9.689987273711626e-06, "loss": 0.1456, "step": 1264 }, { "epoch": 0.17626976938619104, "grad_norm": 0.3082026243209839, "learning_rate": 9.68916979219167e-06, "loss": 0.1513, "step": 1265 }, { "epoch": 0.17640911307740542, "grad_norm": 0.24335236847400665, "learning_rate": 9.68835126884524e-06, "loss": 0.1331, "step": 1266 }, { "epoch": 0.1765484567686198, "grad_norm": 0.3271634876728058, "learning_rate": 9.687531703854196e-06, "loss": 0.1781, "step": 1267 }, { "epoch": 0.17668780045983418, "grad_norm": 0.13569220900535583, "learning_rate": 9.686711097400625e-06, "loss": 0.1387, "step": 1268 }, { "epoch": 0.17682714415104855, "grad_norm": 0.46070218086242676, "learning_rate": 9.685889449666849e-06, "loss": 0.1614, "step": 1269 }, { "epoch": 0.17696648784226293, "grad_norm": 0.23965203762054443, "learning_rate": 9.685066760835417e-06, "loss": 0.1886, "step": 1270 }, { "epoch": 0.1771058315334773, "grad_norm": 0.22249387204647064, "learning_rate": 9.684243031089113e-06, "loss": 0.1781, "step": 1271 }, { "epoch": 0.17724517522469171, "grad_norm": 0.2068474441766739, "learning_rate": 9.68341826061095e-06, "loss": 0.1596, "step": 1272 }, { "epoch": 0.1773845189159061, "grad_norm": 0.2866107225418091, "learning_rate": 9.682592449584174e-06, "loss": 0.1587, "step": 1273 }, { "epoch": 0.17752386260712047, "grad_norm": 0.2475089281797409, "learning_rate": 9.68176559819226e-06, "loss": 0.1664, "step": 1274 }, { "epoch": 0.17766320629833485, "grad_norm": 0.2750874161720276, "learning_rate": 9.680937706618919e-06, "loss": 0.1879, "step": 1275 }, { "epoch": 0.17780254998954922, "grad_norm": 0.15523813664913177, "learning_rate": 9.680108775048087e-06, "loss": 0.1343, "step": 1276 }, { "epoch": 0.1779418936807636, "grad_norm": 0.19517451524734497, "learning_rate": 9.679278803663932e-06, "loss": 0.1484, "step": 1277 }, { "epoch": 0.17808123737197798, "grad_norm": 0.21209822595119476, "learning_rate": 9.678447792650858e-06, "loss": 0.151, "step": 1278 }, { "epoch": 0.17822058106319236, "grad_norm": 0.3152253031730652, "learning_rate": 9.677615742193495e-06, "loss": 0.1443, "step": 1279 }, { "epoch": 0.17835992475440673, "grad_norm": 0.13666653633117676, "learning_rate": 9.676782652476705e-06, "loss": 0.1243, "step": 1280 }, { "epoch": 0.1784992684456211, "grad_norm": 0.20554547011852264, "learning_rate": 9.675948523685583e-06, "loss": 0.159, "step": 1281 }, { "epoch": 0.17863861213683552, "grad_norm": 0.1425786316394806, "learning_rate": 9.675113356005453e-06, "loss": 0.1484, "step": 1282 }, { "epoch": 0.1787779558280499, "grad_norm": 0.30077382922172546, "learning_rate": 9.674277149621869e-06, "loss": 0.158, "step": 1283 }, { "epoch": 0.17891729951926427, "grad_norm": 0.2733464539051056, "learning_rate": 9.673439904720619e-06, "loss": 0.1828, "step": 1284 }, { "epoch": 0.17905664321047865, "grad_norm": 0.25564736127853394, "learning_rate": 9.672601621487718e-06, "loss": 0.1424, "step": 1285 }, { "epoch": 0.17919598690169303, "grad_norm": 0.2759866714477539, "learning_rate": 9.671762300109415e-06, "loss": 0.1429, "step": 1286 }, { "epoch": 0.1793353305929074, "grad_norm": 0.3346361815929413, "learning_rate": 9.670921940772186e-06, "loss": 0.2239, "step": 1287 }, { "epoch": 0.17947467428412178, "grad_norm": 0.19410696625709534, "learning_rate": 9.670080543662742e-06, "loss": 0.1865, "step": 1288 }, { "epoch": 0.17961401797533616, "grad_norm": 0.49575886130332947, "learning_rate": 9.669238108968018e-06, "loss": 0.1919, "step": 1289 }, { "epoch": 0.17975336166655054, "grad_norm": 0.19690904021263123, "learning_rate": 9.668394636875188e-06, "loss": 0.1734, "step": 1290 }, { "epoch": 0.17989270535776491, "grad_norm": 0.24774867296218872, "learning_rate": 9.667550127571653e-06, "loss": 0.1415, "step": 1291 }, { "epoch": 0.18003204904897932, "grad_norm": 0.4920285642147064, "learning_rate": 9.666704581245041e-06, "loss": 0.1981, "step": 1292 }, { "epoch": 0.1801713927401937, "grad_norm": 0.21379975974559784, "learning_rate": 9.665857998083212e-06, "loss": 0.1521, "step": 1293 }, { "epoch": 0.18031073643140808, "grad_norm": 0.29818111658096313, "learning_rate": 9.66501037827426e-06, "loss": 0.1792, "step": 1294 }, { "epoch": 0.18045008012262245, "grad_norm": 0.20864811539649963, "learning_rate": 9.664161722006506e-06, "loss": 0.129, "step": 1295 }, { "epoch": 0.18058942381383683, "grad_norm": 0.3035504221916199, "learning_rate": 9.663312029468504e-06, "loss": 0.1751, "step": 1296 }, { "epoch": 0.1807287675050512, "grad_norm": 0.3117918074131012, "learning_rate": 9.662461300849031e-06, "loss": 0.1199, "step": 1297 }, { "epoch": 0.18086811119626559, "grad_norm": 0.29823100566864014, "learning_rate": 9.661609536337104e-06, "loss": 0.189, "step": 1298 }, { "epoch": 0.18100745488747996, "grad_norm": 0.4280351400375366, "learning_rate": 9.660756736121964e-06, "loss": 0.1893, "step": 1299 }, { "epoch": 0.18114679857869434, "grad_norm": 0.2225537747144699, "learning_rate": 9.659902900393086e-06, "loss": 0.1309, "step": 1300 }, { "epoch": 0.18128614226990872, "grad_norm": 0.1639932543039322, "learning_rate": 9.659048029340169e-06, "loss": 0.152, "step": 1301 }, { "epoch": 0.18142548596112312, "grad_norm": 0.2769934833049774, "learning_rate": 9.658192123153149e-06, "loss": 0.1338, "step": 1302 }, { "epoch": 0.1815648296523375, "grad_norm": 0.26527562737464905, "learning_rate": 9.657335182022187e-06, "loss": 0.1697, "step": 1303 }, { "epoch": 0.18170417334355188, "grad_norm": 0.47396010160446167, "learning_rate": 9.656477206137675e-06, "loss": 0.1911, "step": 1304 }, { "epoch": 0.18184351703476626, "grad_norm": 0.4031538963317871, "learning_rate": 9.655618195690239e-06, "loss": 0.2167, "step": 1305 }, { "epoch": 0.18198286072598063, "grad_norm": 0.1916797161102295, "learning_rate": 9.654758150870728e-06, "loss": 0.1524, "step": 1306 }, { "epoch": 0.182122204417195, "grad_norm": 0.42143288254737854, "learning_rate": 9.653897071870226e-06, "loss": 0.1773, "step": 1307 }, { "epoch": 0.1822615481084094, "grad_norm": 0.24793843924999237, "learning_rate": 9.653034958880045e-06, "loss": 0.1709, "step": 1308 }, { "epoch": 0.18240089179962377, "grad_norm": 0.2282688021659851, "learning_rate": 9.652171812091728e-06, "loss": 0.163, "step": 1309 }, { "epoch": 0.18254023549083814, "grad_norm": 0.3483678102493286, "learning_rate": 9.651307631697044e-06, "loss": 0.1645, "step": 1310 }, { "epoch": 0.18267957918205252, "grad_norm": 0.16490839421749115, "learning_rate": 9.650442417887995e-06, "loss": 0.1378, "step": 1311 }, { "epoch": 0.18281892287326693, "grad_norm": 0.18325623869895935, "learning_rate": 9.649576170856814e-06, "loss": 0.1298, "step": 1312 }, { "epoch": 0.1829582665644813, "grad_norm": 0.20451800525188446, "learning_rate": 9.64870889079596e-06, "loss": 0.1418, "step": 1313 }, { "epoch": 0.18309761025569568, "grad_norm": 0.24855254590511322, "learning_rate": 9.64784057789812e-06, "loss": 0.1478, "step": 1314 }, { "epoch": 0.18323695394691006, "grad_norm": 0.3050850033760071, "learning_rate": 9.646971232356215e-06, "loss": 0.1824, "step": 1315 }, { "epoch": 0.18337629763812444, "grad_norm": 0.35360708832740784, "learning_rate": 9.646100854363396e-06, "loss": 0.1381, "step": 1316 }, { "epoch": 0.1835156413293388, "grad_norm": 0.48434728384017944, "learning_rate": 9.64522944411304e-06, "loss": 0.1803, "step": 1317 }, { "epoch": 0.1836549850205532, "grad_norm": 0.26054173707962036, "learning_rate": 9.644357001798752e-06, "loss": 0.1508, "step": 1318 }, { "epoch": 0.18379432871176757, "grad_norm": 0.25732675194740295, "learning_rate": 9.643483527614372e-06, "loss": 0.1225, "step": 1319 }, { "epoch": 0.18393367240298195, "grad_norm": 0.1787521094083786, "learning_rate": 9.642609021753964e-06, "loss": 0.1548, "step": 1320 }, { "epoch": 0.18407301609419632, "grad_norm": 0.2922101318836212, "learning_rate": 9.641733484411823e-06, "loss": 0.1341, "step": 1321 }, { "epoch": 0.18421235978541073, "grad_norm": 0.46315765380859375, "learning_rate": 9.640856915782477e-06, "loss": 0.1754, "step": 1322 }, { "epoch": 0.1843517034766251, "grad_norm": 0.17870889604091644, "learning_rate": 9.639979316060675e-06, "loss": 0.1621, "step": 1323 }, { "epoch": 0.18449104716783948, "grad_norm": 0.42611637711524963, "learning_rate": 9.639100685441403e-06, "loss": 0.143, "step": 1324 }, { "epoch": 0.18463039085905386, "grad_norm": 0.2910546660423279, "learning_rate": 9.638221024119869e-06, "loss": 0.153, "step": 1325 }, { "epoch": 0.18476973455026824, "grad_norm": 0.23241792619228363, "learning_rate": 9.637340332291518e-06, "loss": 0.1487, "step": 1326 }, { "epoch": 0.18490907824148262, "grad_norm": 0.2095266580581665, "learning_rate": 9.636458610152015e-06, "loss": 0.1702, "step": 1327 }, { "epoch": 0.185048421932697, "grad_norm": 0.20193786919116974, "learning_rate": 9.635575857897264e-06, "loss": 0.1448, "step": 1328 }, { "epoch": 0.18518776562391137, "grad_norm": 0.19296640157699585, "learning_rate": 9.634692075723386e-06, "loss": 0.1139, "step": 1329 }, { "epoch": 0.18532710931512575, "grad_norm": 0.2501770555973053, "learning_rate": 9.633807263826745e-06, "loss": 0.1618, "step": 1330 }, { "epoch": 0.18546645300634013, "grad_norm": 0.35823529958724976, "learning_rate": 9.632921422403918e-06, "loss": 0.1518, "step": 1331 }, { "epoch": 0.18560579669755453, "grad_norm": 0.2987440824508667, "learning_rate": 9.632034551651723e-06, "loss": 0.1719, "step": 1332 }, { "epoch": 0.1857451403887689, "grad_norm": 0.3133537769317627, "learning_rate": 9.631146651767202e-06, "loss": 0.1494, "step": 1333 }, { "epoch": 0.1858844840799833, "grad_norm": 0.45084795355796814, "learning_rate": 9.630257722947625e-06, "loss": 0.1488, "step": 1334 }, { "epoch": 0.18602382777119766, "grad_norm": 0.24699540436267853, "learning_rate": 9.629367765390494e-06, "loss": 0.1412, "step": 1335 }, { "epoch": 0.18616317146241204, "grad_norm": 0.2833060324192047, "learning_rate": 9.628476779293536e-06, "loss": 0.1342, "step": 1336 }, { "epoch": 0.18630251515362642, "grad_norm": 0.19924423098564148, "learning_rate": 9.627584764854706e-06, "loss": 0.1103, "step": 1337 }, { "epoch": 0.1864418588448408, "grad_norm": 0.19817467033863068, "learning_rate": 9.626691722272193e-06, "loss": 0.1508, "step": 1338 }, { "epoch": 0.18658120253605517, "grad_norm": 0.37161409854888916, "learning_rate": 9.625797651744406e-06, "loss": 0.1248, "step": 1339 }, { "epoch": 0.18672054622726955, "grad_norm": 0.44575536251068115, "learning_rate": 9.62490255346999e-06, "loss": 0.1506, "step": 1340 }, { "epoch": 0.18685988991848393, "grad_norm": 0.21289511024951935, "learning_rate": 9.624006427647817e-06, "loss": 0.1554, "step": 1341 }, { "epoch": 0.18699923360969833, "grad_norm": 0.3041161000728607, "learning_rate": 9.623109274476982e-06, "loss": 0.1456, "step": 1342 }, { "epoch": 0.1871385773009127, "grad_norm": 0.2542140781879425, "learning_rate": 9.622211094156812e-06, "loss": 0.1578, "step": 1343 }, { "epoch": 0.1872779209921271, "grad_norm": 0.3451121747493744, "learning_rate": 9.621311886886866e-06, "loss": 0.2063, "step": 1344 }, { "epoch": 0.18741726468334147, "grad_norm": 0.23592160642147064, "learning_rate": 9.620411652866926e-06, "loss": 0.1572, "step": 1345 }, { "epoch": 0.18755660837455584, "grad_norm": 0.2408890426158905, "learning_rate": 9.619510392297e-06, "loss": 0.1741, "step": 1346 }, { "epoch": 0.18769595206577022, "grad_norm": 0.32894453406333923, "learning_rate": 9.618608105377331e-06, "loss": 0.1597, "step": 1347 }, { "epoch": 0.1878352957569846, "grad_norm": 0.26757293939590454, "learning_rate": 9.617704792308387e-06, "loss": 0.1712, "step": 1348 }, { "epoch": 0.18797463944819898, "grad_norm": 0.3154507577419281, "learning_rate": 9.61680045329086e-06, "loss": 0.141, "step": 1349 }, { "epoch": 0.18811398313941335, "grad_norm": 0.3843487799167633, "learning_rate": 9.615895088525677e-06, "loss": 0.1793, "step": 1350 }, { "epoch": 0.18825332683062773, "grad_norm": 0.2509647607803345, "learning_rate": 9.614988698213987e-06, "loss": 0.1353, "step": 1351 }, { "epoch": 0.18839267052184214, "grad_norm": 0.530786395072937, "learning_rate": 9.614081282557172e-06, "loss": 0.1823, "step": 1352 }, { "epoch": 0.18853201421305651, "grad_norm": 0.3617441654205322, "learning_rate": 9.613172841756835e-06, "loss": 0.1468, "step": 1353 }, { "epoch": 0.1886713579042709, "grad_norm": 0.34898677468299866, "learning_rate": 9.612263376014815e-06, "loss": 0.1912, "step": 1354 }, { "epoch": 0.18881070159548527, "grad_norm": 0.3008996844291687, "learning_rate": 9.611352885533171e-06, "loss": 0.1946, "step": 1355 }, { "epoch": 0.18895004528669965, "grad_norm": 0.3738798201084137, "learning_rate": 9.610441370514196e-06, "loss": 0.1391, "step": 1356 }, { "epoch": 0.18908938897791402, "grad_norm": 0.23026151955127716, "learning_rate": 9.609528831160407e-06, "loss": 0.1808, "step": 1357 }, { "epoch": 0.1892287326691284, "grad_norm": 0.2737427353858948, "learning_rate": 9.608615267674548e-06, "loss": 0.1853, "step": 1358 }, { "epoch": 0.18936807636034278, "grad_norm": 0.33767369389533997, "learning_rate": 9.607700680259593e-06, "loss": 0.1554, "step": 1359 }, { "epoch": 0.18950742005155716, "grad_norm": 0.3629973828792572, "learning_rate": 9.606785069118742e-06, "loss": 0.1694, "step": 1360 }, { "epoch": 0.18964676374277153, "grad_norm": 0.2763581871986389, "learning_rate": 9.605868434455426e-06, "loss": 0.1354, "step": 1361 }, { "epoch": 0.18978610743398594, "grad_norm": 0.3763597309589386, "learning_rate": 9.604950776473294e-06, "loss": 0.1477, "step": 1362 }, { "epoch": 0.18992545112520032, "grad_norm": 0.371249258518219, "learning_rate": 9.604032095376234e-06, "loss": 0.1763, "step": 1363 }, { "epoch": 0.1900647948164147, "grad_norm": 0.4416034519672394, "learning_rate": 9.603112391368354e-06, "loss": 0.1514, "step": 1364 }, { "epoch": 0.19020413850762907, "grad_norm": 0.5337738990783691, "learning_rate": 9.602191664653992e-06, "loss": 0.189, "step": 1365 }, { "epoch": 0.19034348219884345, "grad_norm": 0.43252885341644287, "learning_rate": 9.601269915437713e-06, "loss": 0.1763, "step": 1366 }, { "epoch": 0.19048282589005783, "grad_norm": 0.30910569429397583, "learning_rate": 9.600347143924305e-06, "loss": 0.1312, "step": 1367 }, { "epoch": 0.1906221695812722, "grad_norm": 0.2031663954257965, "learning_rate": 9.599423350318791e-06, "loss": 0.1559, "step": 1368 }, { "epoch": 0.19076151327248658, "grad_norm": 0.3180152475833893, "learning_rate": 9.598498534826414e-06, "loss": 0.1531, "step": 1369 }, { "epoch": 0.19090085696370096, "grad_norm": 0.17969432473182678, "learning_rate": 9.597572697652649e-06, "loss": 0.1352, "step": 1370 }, { "epoch": 0.19104020065491534, "grad_norm": 0.23121578991413116, "learning_rate": 9.596645839003196e-06, "loss": 0.1033, "step": 1371 }, { "epoch": 0.19117954434612974, "grad_norm": 0.27502861618995667, "learning_rate": 9.595717959083978e-06, "loss": 0.1838, "step": 1372 }, { "epoch": 0.19131888803734412, "grad_norm": 0.4507421553134918, "learning_rate": 9.594789058101154e-06, "loss": 0.2051, "step": 1373 }, { "epoch": 0.1914582317285585, "grad_norm": 0.250270277261734, "learning_rate": 9.593859136261102e-06, "loss": 0.1644, "step": 1374 }, { "epoch": 0.19159757541977288, "grad_norm": 0.16571825742721558, "learning_rate": 9.592928193770427e-06, "loss": 0.1605, "step": 1375 }, { "epoch": 0.19173691911098725, "grad_norm": 0.2030767947435379, "learning_rate": 9.591996230835968e-06, "loss": 0.1831, "step": 1376 }, { "epoch": 0.19187626280220163, "grad_norm": 0.5369263291358948, "learning_rate": 9.591063247664783e-06, "loss": 0.1427, "step": 1377 }, { "epoch": 0.192015606493416, "grad_norm": 0.34052857756614685, "learning_rate": 9.59012924446416e-06, "loss": 0.1452, "step": 1378 }, { "epoch": 0.19215495018463039, "grad_norm": 0.2028743028640747, "learning_rate": 9.589194221441614e-06, "loss": 0.1708, "step": 1379 }, { "epoch": 0.19229429387584476, "grad_norm": 0.13110385835170746, "learning_rate": 9.588258178804884e-06, "loss": 0.1357, "step": 1380 }, { "epoch": 0.19243363756705914, "grad_norm": 0.19221588969230652, "learning_rate": 9.587321116761938e-06, "loss": 0.1511, "step": 1381 }, { "epoch": 0.19257298125827352, "grad_norm": 0.14668233692646027, "learning_rate": 9.586383035520972e-06, "loss": 0.1397, "step": 1382 }, { "epoch": 0.19271232494948792, "grad_norm": 0.17434653639793396, "learning_rate": 9.585443935290403e-06, "loss": 0.129, "step": 1383 }, { "epoch": 0.1928516686407023, "grad_norm": 0.30965402722358704, "learning_rate": 9.58450381627888e-06, "loss": 0.1491, "step": 1384 }, { "epoch": 0.19299101233191668, "grad_norm": 0.21060562133789062, "learning_rate": 9.583562678695275e-06, "loss": 0.147, "step": 1385 }, { "epoch": 0.19313035602313106, "grad_norm": 0.23262493312358856, "learning_rate": 9.582620522748686e-06, "loss": 0.1111, "step": 1386 }, { "epoch": 0.19326969971434543, "grad_norm": 0.23758940398693085, "learning_rate": 9.58167734864844e-06, "loss": 0.1613, "step": 1387 }, { "epoch": 0.1934090434055598, "grad_norm": 0.2535330653190613, "learning_rate": 9.58073315660409e-06, "loss": 0.1298, "step": 1388 }, { "epoch": 0.1935483870967742, "grad_norm": 0.4106731116771698, "learning_rate": 9.579787946825411e-06, "loss": 0.1899, "step": 1389 }, { "epoch": 0.19368773078798857, "grad_norm": 0.2403911054134369, "learning_rate": 9.57884171952241e-06, "loss": 0.1438, "step": 1390 }, { "epoch": 0.19382707447920294, "grad_norm": 0.445232629776001, "learning_rate": 9.577894474905314e-06, "loss": 0.161, "step": 1391 }, { "epoch": 0.19396641817041732, "grad_norm": 0.24736352264881134, "learning_rate": 9.576946213184583e-06, "loss": 0.1411, "step": 1392 }, { "epoch": 0.19410576186163173, "grad_norm": 0.22723399102687836, "learning_rate": 9.575996934570896e-06, "loss": 0.1577, "step": 1393 }, { "epoch": 0.1942451055528461, "grad_norm": 0.2596050798892975, "learning_rate": 9.57504663927516e-06, "loss": 0.1706, "step": 1394 }, { "epoch": 0.19438444924406048, "grad_norm": 0.16147339344024658, "learning_rate": 9.574095327508513e-06, "loss": 0.1222, "step": 1395 }, { "epoch": 0.19452379293527486, "grad_norm": 0.19892629981040955, "learning_rate": 9.573142999482313e-06, "loss": 0.1584, "step": 1396 }, { "epoch": 0.19466313662648924, "grad_norm": 0.20197097957134247, "learning_rate": 9.572189655408144e-06, "loss": 0.1433, "step": 1397 }, { "epoch": 0.1948024803177036, "grad_norm": 0.1925116777420044, "learning_rate": 9.571235295497818e-06, "loss": 0.141, "step": 1398 }, { "epoch": 0.194941824008918, "grad_norm": 0.21574990451335907, "learning_rate": 9.570279919963373e-06, "loss": 0.1505, "step": 1399 }, { "epoch": 0.19508116770013237, "grad_norm": 0.21540169417858124, "learning_rate": 9.569323529017071e-06, "loss": 0.153, "step": 1400 }, { "epoch": 0.19522051139134675, "grad_norm": 0.8681233525276184, "learning_rate": 9.568366122871399e-06, "loss": 0.1896, "step": 1401 }, { "epoch": 0.19535985508256112, "grad_norm": 0.22936180233955383, "learning_rate": 9.567407701739075e-06, "loss": 0.137, "step": 1402 }, { "epoch": 0.19549919877377553, "grad_norm": 0.43365418910980225, "learning_rate": 9.566448265833034e-06, "loss": 0.1325, "step": 1403 }, { "epoch": 0.1956385424649899, "grad_norm": 0.6542822122573853, "learning_rate": 9.56548781536644e-06, "loss": 0.2045, "step": 1404 }, { "epoch": 0.19577788615620428, "grad_norm": 0.1725963056087494, "learning_rate": 9.564526350552689e-06, "loss": 0.14, "step": 1405 }, { "epoch": 0.19591722984741866, "grad_norm": 0.14528732001781464, "learning_rate": 9.56356387160539e-06, "loss": 0.1149, "step": 1406 }, { "epoch": 0.19605657353863304, "grad_norm": 0.68516606092453, "learning_rate": 9.562600378738389e-06, "loss": 0.1647, "step": 1407 }, { "epoch": 0.19619591722984742, "grad_norm": 0.2682974934577942, "learning_rate": 9.561635872165747e-06, "loss": 0.1389, "step": 1408 }, { "epoch": 0.1963352609210618, "grad_norm": 0.2794683277606964, "learning_rate": 9.56067035210176e-06, "loss": 0.1527, "step": 1409 }, { "epoch": 0.19647460461227617, "grad_norm": 0.5702189803123474, "learning_rate": 9.559703818760943e-06, "loss": 0.1813, "step": 1410 }, { "epoch": 0.19661394830349055, "grad_norm": 0.3125520646572113, "learning_rate": 9.558736272358036e-06, "loss": 0.1623, "step": 1411 }, { "epoch": 0.19675329199470493, "grad_norm": 0.3310501277446747, "learning_rate": 9.557767713108009e-06, "loss": 0.2042, "step": 1412 }, { "epoch": 0.19689263568591933, "grad_norm": 0.1431940197944641, "learning_rate": 9.55679814122605e-06, "loss": 0.125, "step": 1413 }, { "epoch": 0.1970319793771337, "grad_norm": 0.29279446601867676, "learning_rate": 9.555827556927578e-06, "loss": 0.1332, "step": 1414 }, { "epoch": 0.1971713230683481, "grad_norm": 0.15241152048110962, "learning_rate": 9.554855960428234e-06, "loss": 0.1044, "step": 1415 }, { "epoch": 0.19731066675956246, "grad_norm": 0.12480594217777252, "learning_rate": 9.553883351943882e-06, "loss": 0.1296, "step": 1416 }, { "epoch": 0.19745001045077684, "grad_norm": 0.26448187232017517, "learning_rate": 9.55290973169062e-06, "loss": 0.1993, "step": 1417 }, { "epoch": 0.19758935414199122, "grad_norm": 0.2370356172323227, "learning_rate": 9.55193509988476e-06, "loss": 0.1383, "step": 1418 }, { "epoch": 0.1977286978332056, "grad_norm": 0.21401244401931763, "learning_rate": 9.55095945674284e-06, "loss": 0.1359, "step": 1419 }, { "epoch": 0.19786804152441997, "grad_norm": 0.12230684608221054, "learning_rate": 9.549982802481632e-06, "loss": 0.1404, "step": 1420 }, { "epoch": 0.19800738521563435, "grad_norm": 0.305006206035614, "learning_rate": 9.549005137318122e-06, "loss": 0.1432, "step": 1421 }, { "epoch": 0.19814672890684873, "grad_norm": 0.27913734316825867, "learning_rate": 9.548026461469527e-06, "loss": 0.1513, "step": 1422 }, { "epoch": 0.19828607259806313, "grad_norm": 0.15234744548797607, "learning_rate": 9.547046775153285e-06, "loss": 0.1358, "step": 1423 }, { "epoch": 0.1984254162892775, "grad_norm": 0.20580318570137024, "learning_rate": 9.54606607858706e-06, "loss": 0.1771, "step": 1424 }, { "epoch": 0.1985647599804919, "grad_norm": 0.5592332482337952, "learning_rate": 9.545084371988743e-06, "loss": 0.1907, "step": 1425 }, { "epoch": 0.19870410367170627, "grad_norm": 0.20477671921253204, "learning_rate": 9.54410165557644e-06, "loss": 0.1553, "step": 1426 }, { "epoch": 0.19884344736292064, "grad_norm": 0.24376894533634186, "learning_rate": 9.543117929568497e-06, "loss": 0.1227, "step": 1427 }, { "epoch": 0.19898279105413502, "grad_norm": 0.3334439992904663, "learning_rate": 9.542133194183469e-06, "loss": 0.1413, "step": 1428 }, { "epoch": 0.1991221347453494, "grad_norm": 0.19279789924621582, "learning_rate": 9.541147449640145e-06, "loss": 0.1396, "step": 1429 }, { "epoch": 0.19926147843656378, "grad_norm": 0.3869915306568146, "learning_rate": 9.540160696157532e-06, "loss": 0.1936, "step": 1430 }, { "epoch": 0.19940082212777815, "grad_norm": 0.23151251673698425, "learning_rate": 9.539172933954867e-06, "loss": 0.1433, "step": 1431 }, { "epoch": 0.19954016581899253, "grad_norm": 0.23896926641464233, "learning_rate": 9.538184163251608e-06, "loss": 0.1714, "step": 1432 }, { "epoch": 0.19967950951020694, "grad_norm": 0.4770006835460663, "learning_rate": 9.537194384267436e-06, "loss": 0.1776, "step": 1433 }, { "epoch": 0.19981885320142131, "grad_norm": 0.4601276218891144, "learning_rate": 9.536203597222259e-06, "loss": 0.174, "step": 1434 }, { "epoch": 0.1999581968926357, "grad_norm": 0.1903223991394043, "learning_rate": 9.535211802336204e-06, "loss": 0.1242, "step": 1435 }, { "epoch": 0.20009754058385007, "grad_norm": 0.20011666417121887, "learning_rate": 9.534218999829627e-06, "loss": 0.1502, "step": 1436 }, { "epoch": 0.20023688427506445, "grad_norm": 0.17638519406318665, "learning_rate": 9.533225189923107e-06, "loss": 0.1327, "step": 1437 }, { "epoch": 0.20037622796627882, "grad_norm": 0.43688687682151794, "learning_rate": 9.532230372837446e-06, "loss": 0.1523, "step": 1438 }, { "epoch": 0.2005155716574932, "grad_norm": 0.36539891362190247, "learning_rate": 9.531234548793667e-06, "loss": 0.2047, "step": 1439 }, { "epoch": 0.20065491534870758, "grad_norm": 0.3162056505680084, "learning_rate": 9.530237718013023e-06, "loss": 0.1386, "step": 1440 }, { "epoch": 0.20079425903992196, "grad_norm": 0.3914373219013214, "learning_rate": 9.529239880716983e-06, "loss": 0.1358, "step": 1441 }, { "epoch": 0.20093360273113633, "grad_norm": 0.173303484916687, "learning_rate": 9.528241037127247e-06, "loss": 0.1514, "step": 1442 }, { "epoch": 0.20107294642235074, "grad_norm": 0.1902550607919693, "learning_rate": 9.527241187465735e-06, "loss": 0.1173, "step": 1443 }, { "epoch": 0.20121229011356512, "grad_norm": 0.24859841167926788, "learning_rate": 9.526240331954589e-06, "loss": 0.1762, "step": 1444 }, { "epoch": 0.2013516338047795, "grad_norm": 0.21596495807170868, "learning_rate": 9.525238470816176e-06, "loss": 0.1548, "step": 1445 }, { "epoch": 0.20149097749599387, "grad_norm": 0.17190438508987427, "learning_rate": 9.524235604273088e-06, "loss": 0.1523, "step": 1446 }, { "epoch": 0.20163032118720825, "grad_norm": 0.1692861169576645, "learning_rate": 9.523231732548139e-06, "loss": 0.1119, "step": 1447 }, { "epoch": 0.20176966487842263, "grad_norm": 0.13185052573680878, "learning_rate": 9.522226855864366e-06, "loss": 0.1183, "step": 1448 }, { "epoch": 0.201909008569637, "grad_norm": 0.1535182148218155, "learning_rate": 9.521220974445032e-06, "loss": 0.1271, "step": 1449 }, { "epoch": 0.20204835226085138, "grad_norm": 0.5472874641418457, "learning_rate": 9.520214088513616e-06, "loss": 0.1523, "step": 1450 }, { "epoch": 0.20218769595206576, "grad_norm": 0.48481351137161255, "learning_rate": 9.519206198293828e-06, "loss": 0.152, "step": 1451 }, { "epoch": 0.20232703964328014, "grad_norm": 0.4116688668727875, "learning_rate": 9.5181973040096e-06, "loss": 0.1434, "step": 1452 }, { "epoch": 0.20246638333449454, "grad_norm": 0.26589664816856384, "learning_rate": 9.517187405885082e-06, "loss": 0.1856, "step": 1453 }, { "epoch": 0.20260572702570892, "grad_norm": 0.41878607869148254, "learning_rate": 9.516176504144652e-06, "loss": 0.1568, "step": 1454 }, { "epoch": 0.2027450707169233, "grad_norm": 0.2569368779659271, "learning_rate": 9.515164599012908e-06, "loss": 0.1196, "step": 1455 }, { "epoch": 0.20288441440813768, "grad_norm": 0.3549622595310211, "learning_rate": 9.514151690714672e-06, "loss": 0.1561, "step": 1456 }, { "epoch": 0.20302375809935205, "grad_norm": 0.43105876445770264, "learning_rate": 9.513137779474992e-06, "loss": 0.1005, "step": 1457 }, { "epoch": 0.20316310179056643, "grad_norm": 0.6845743656158447, "learning_rate": 9.512122865519135e-06, "loss": 0.198, "step": 1458 }, { "epoch": 0.2033024454817808, "grad_norm": 0.34279704093933105, "learning_rate": 9.511106949072588e-06, "loss": 0.1567, "step": 1459 }, { "epoch": 0.20344178917299519, "grad_norm": 0.3064875602722168, "learning_rate": 9.51009003036107e-06, "loss": 0.1241, "step": 1460 }, { "epoch": 0.20358113286420956, "grad_norm": 0.47662508487701416, "learning_rate": 9.509072109610514e-06, "loss": 0.1495, "step": 1461 }, { "epoch": 0.20372047655542394, "grad_norm": 0.3569803535938263, "learning_rate": 9.508053187047077e-06, "loss": 0.1251, "step": 1462 }, { "epoch": 0.20385982024663835, "grad_norm": 0.3570084273815155, "learning_rate": 9.507033262897142e-06, "loss": 0.1277, "step": 1463 }, { "epoch": 0.20399916393785272, "grad_norm": 0.12971125543117523, "learning_rate": 9.506012337387315e-06, "loss": 0.1188, "step": 1464 }, { "epoch": 0.2041385076290671, "grad_norm": 0.21676596999168396, "learning_rate": 9.504990410744422e-06, "loss": 0.1568, "step": 1465 }, { "epoch": 0.20427785132028148, "grad_norm": 0.5947558879852295, "learning_rate": 9.503967483195509e-06, "loss": 0.2028, "step": 1466 }, { "epoch": 0.20441719501149586, "grad_norm": 0.39976292848587036, "learning_rate": 9.502943554967848e-06, "loss": 0.1305, "step": 1467 }, { "epoch": 0.20455653870271023, "grad_norm": 0.17601469159126282, "learning_rate": 9.501918626288935e-06, "loss": 0.1367, "step": 1468 }, { "epoch": 0.2046958823939246, "grad_norm": 0.2677792012691498, "learning_rate": 9.500892697386482e-06, "loss": 0.1563, "step": 1469 }, { "epoch": 0.204835226085139, "grad_norm": 0.2929821014404297, "learning_rate": 9.499865768488429e-06, "loss": 0.1584, "step": 1470 }, { "epoch": 0.20497456977635337, "grad_norm": 0.4531896412372589, "learning_rate": 9.498837839822936e-06, "loss": 0.157, "step": 1471 }, { "epoch": 0.20511391346756774, "grad_norm": 0.6550073623657227, "learning_rate": 9.497808911618385e-06, "loss": 0.1829, "step": 1472 }, { "epoch": 0.20525325715878215, "grad_norm": 0.2006564736366272, "learning_rate": 9.496778984103381e-06, "loss": 0.1692, "step": 1473 }, { "epoch": 0.20539260084999653, "grad_norm": 0.5355167984962463, "learning_rate": 9.49574805750675e-06, "loss": 0.1733, "step": 1474 }, { "epoch": 0.2055319445412109, "grad_norm": 0.33659470081329346, "learning_rate": 9.49471613205754e-06, "loss": 0.1591, "step": 1475 }, { "epoch": 0.20567128823242528, "grad_norm": 0.3988312780857086, "learning_rate": 9.493683207985022e-06, "loss": 0.1544, "step": 1476 }, { "epoch": 0.20581063192363966, "grad_norm": 0.21908509731292725, "learning_rate": 9.492649285518688e-06, "loss": 0.1667, "step": 1477 }, { "epoch": 0.20594997561485404, "grad_norm": 0.14477452635765076, "learning_rate": 9.49161436488825e-06, "loss": 0.1192, "step": 1478 }, { "epoch": 0.2060893193060684, "grad_norm": 0.21859437227249146, "learning_rate": 9.490578446323646e-06, "loss": 0.1426, "step": 1479 }, { "epoch": 0.2062286629972828, "grad_norm": 0.24444660544395447, "learning_rate": 9.489541530055034e-06, "loss": 0.1411, "step": 1480 }, { "epoch": 0.20636800668849717, "grad_norm": 0.19723421335220337, "learning_rate": 9.488503616312793e-06, "loss": 0.1553, "step": 1481 }, { "epoch": 0.20650735037971155, "grad_norm": 0.19770447909832, "learning_rate": 9.48746470532752e-06, "loss": 0.1688, "step": 1482 }, { "epoch": 0.20664669407092595, "grad_norm": 0.22088666260242462, "learning_rate": 9.48642479733004e-06, "loss": 0.1475, "step": 1483 }, { "epoch": 0.20678603776214033, "grad_norm": 0.16527138650417328, "learning_rate": 9.4853838925514e-06, "loss": 0.143, "step": 1484 }, { "epoch": 0.2069253814533547, "grad_norm": 0.16749946773052216, "learning_rate": 9.484341991222858e-06, "loss": 0.1563, "step": 1485 }, { "epoch": 0.20706472514456908, "grad_norm": 0.17866715788841248, "learning_rate": 9.483299093575909e-06, "loss": 0.1349, "step": 1486 }, { "epoch": 0.20720406883578346, "grad_norm": 0.1692681461572647, "learning_rate": 9.482255199842254e-06, "loss": 0.1328, "step": 1487 }, { "epoch": 0.20734341252699784, "grad_norm": 0.23583845794200897, "learning_rate": 9.481210310253826e-06, "loss": 0.1678, "step": 1488 }, { "epoch": 0.20748275621821222, "grad_norm": 0.3391638994216919, "learning_rate": 9.480164425042775e-06, "loss": 0.1465, "step": 1489 }, { "epoch": 0.2076220999094266, "grad_norm": 0.16837061941623688, "learning_rate": 9.479117544441472e-06, "loss": 0.1055, "step": 1490 }, { "epoch": 0.20776144360064097, "grad_norm": 0.4029042422771454, "learning_rate": 9.47806966868251e-06, "loss": 0.1448, "step": 1491 }, { "epoch": 0.20790078729185535, "grad_norm": 0.26380202174186707, "learning_rate": 9.477020797998707e-06, "loss": 0.113, "step": 1492 }, { "epoch": 0.20804013098306975, "grad_norm": 0.11222163587808609, "learning_rate": 9.47597093262309e-06, "loss": 0.1006, "step": 1493 }, { "epoch": 0.20817947467428413, "grad_norm": 0.2650095522403717, "learning_rate": 9.474920072788925e-06, "loss": 0.1719, "step": 1494 }, { "epoch": 0.2083188183654985, "grad_norm": 0.10965844243764877, "learning_rate": 9.47386821872968e-06, "loss": 0.1005, "step": 1495 }, { "epoch": 0.2084581620567129, "grad_norm": 0.2725619673728943, "learning_rate": 9.47281537067906e-06, "loss": 0.1213, "step": 1496 }, { "epoch": 0.20859750574792726, "grad_norm": 0.327112078666687, "learning_rate": 9.471761528870978e-06, "loss": 0.1533, "step": 1497 }, { "epoch": 0.20873684943914164, "grad_norm": 0.18362164497375488, "learning_rate": 9.470706693539578e-06, "loss": 0.1328, "step": 1498 }, { "epoch": 0.20887619313035602, "grad_norm": 0.2171531617641449, "learning_rate": 9.469650864919217e-06, "loss": 0.1589, "step": 1499 }, { "epoch": 0.2090155368215704, "grad_norm": 0.22346390783786774, "learning_rate": 9.46859404324448e-06, "loss": 0.1574, "step": 1500 }, { "epoch": 0.20915488051278477, "grad_norm": 0.2036323994398117, "learning_rate": 9.467536228750166e-06, "loss": 0.1316, "step": 1501 }, { "epoch": 0.20929422420399915, "grad_norm": 0.29070720076560974, "learning_rate": 9.466477421671296e-06, "loss": 0.1568, "step": 1502 }, { "epoch": 0.20943356789521356, "grad_norm": 0.1821918487548828, "learning_rate": 9.465417622243116e-06, "loss": 0.1386, "step": 1503 }, { "epoch": 0.20957291158642793, "grad_norm": 0.2802189886569977, "learning_rate": 9.464356830701086e-06, "loss": 0.1381, "step": 1504 }, { "epoch": 0.2097122552776423, "grad_norm": 0.3650803565979004, "learning_rate": 9.463295047280892e-06, "loss": 0.162, "step": 1505 }, { "epoch": 0.2098515989688567, "grad_norm": 0.3979160487651825, "learning_rate": 9.462232272218437e-06, "loss": 0.1636, "step": 1506 }, { "epoch": 0.20999094266007107, "grad_norm": 0.20374272763729095, "learning_rate": 9.461168505749847e-06, "loss": 0.1702, "step": 1507 }, { "epoch": 0.21013028635128544, "grad_norm": 0.36264514923095703, "learning_rate": 9.460103748111462e-06, "loss": 0.1412, "step": 1508 }, { "epoch": 0.21026963004249982, "grad_norm": 0.33607763051986694, "learning_rate": 9.459037999539852e-06, "loss": 0.1495, "step": 1509 }, { "epoch": 0.2104089737337142, "grad_norm": 0.3551587760448456, "learning_rate": 9.4579712602718e-06, "loss": 0.2054, "step": 1510 }, { "epoch": 0.21054831742492858, "grad_norm": 0.17293336987495422, "learning_rate": 9.456903530544312e-06, "loss": 0.1242, "step": 1511 }, { "epoch": 0.21068766111614295, "grad_norm": 0.1891024261713028, "learning_rate": 9.455834810594611e-06, "loss": 0.1765, "step": 1512 }, { "epoch": 0.21082700480735736, "grad_norm": 0.18511657416820526, "learning_rate": 9.454765100660144e-06, "loss": 0.132, "step": 1513 }, { "epoch": 0.21096634849857174, "grad_norm": 0.14413703978061676, "learning_rate": 9.453694400978576e-06, "loss": 0.124, "step": 1514 }, { "epoch": 0.21110569218978611, "grad_norm": 0.2463129460811615, "learning_rate": 9.452622711787793e-06, "loss": 0.1515, "step": 1515 }, { "epoch": 0.2112450358810005, "grad_norm": 0.33308762311935425, "learning_rate": 9.451550033325896e-06, "loss": 0.1998, "step": 1516 }, { "epoch": 0.21138437957221487, "grad_norm": 0.5104802250862122, "learning_rate": 9.450476365831214e-06, "loss": 0.1977, "step": 1517 }, { "epoch": 0.21152372326342925, "grad_norm": 0.23125188052654266, "learning_rate": 9.449401709542289e-06, "loss": 0.129, "step": 1518 }, { "epoch": 0.21166306695464362, "grad_norm": 0.34015634655952454, "learning_rate": 9.448326064697886e-06, "loss": 0.1412, "step": 1519 }, { "epoch": 0.211802410645858, "grad_norm": 0.19615493714809418, "learning_rate": 9.447249431536987e-06, "loss": 0.115, "step": 1520 }, { "epoch": 0.21194175433707238, "grad_norm": 0.22072814404964447, "learning_rate": 9.446171810298799e-06, "loss": 0.1545, "step": 1521 }, { "epoch": 0.21208109802828676, "grad_norm": 0.13405591249465942, "learning_rate": 9.44509320122274e-06, "loss": 0.1321, "step": 1522 }, { "epoch": 0.21222044171950116, "grad_norm": 0.2514348328113556, "learning_rate": 9.444013604548457e-06, "loss": 0.1179, "step": 1523 }, { "epoch": 0.21235978541071554, "grad_norm": 0.17241235077381134, "learning_rate": 9.442933020515808e-06, "loss": 0.1223, "step": 1524 }, { "epoch": 0.21249912910192992, "grad_norm": 0.2908344864845276, "learning_rate": 9.441851449364878e-06, "loss": 0.1772, "step": 1525 }, { "epoch": 0.2126384727931443, "grad_norm": 0.19237381219863892, "learning_rate": 9.440768891335962e-06, "loss": 0.1368, "step": 1526 }, { "epoch": 0.21277781648435867, "grad_norm": 0.43872639536857605, "learning_rate": 9.439685346669585e-06, "loss": 0.167, "step": 1527 }, { "epoch": 0.21291716017557305, "grad_norm": 0.30191123485565186, "learning_rate": 9.438600815606483e-06, "loss": 0.1283, "step": 1528 }, { "epoch": 0.21305650386678743, "grad_norm": 0.30566367506980896, "learning_rate": 9.437515298387617e-06, "loss": 0.1199, "step": 1529 }, { "epoch": 0.2131958475580018, "grad_norm": 0.4329836368560791, "learning_rate": 9.436428795254159e-06, "loss": 0.1756, "step": 1530 }, { "epoch": 0.21333519124921618, "grad_norm": 0.15857593715190887, "learning_rate": 9.43534130644751e-06, "loss": 0.1355, "step": 1531 }, { "epoch": 0.21347453494043056, "grad_norm": 0.32947805523872375, "learning_rate": 9.43425283220928e-06, "loss": 0.18, "step": 1532 }, { "epoch": 0.21361387863164497, "grad_norm": 0.31074896454811096, "learning_rate": 9.43316337278131e-06, "loss": 0.1633, "step": 1533 }, { "epoch": 0.21375322232285934, "grad_norm": 0.2643822729587555, "learning_rate": 9.432072928405648e-06, "loss": 0.1359, "step": 1534 }, { "epoch": 0.21389256601407372, "grad_norm": 0.19306248426437378, "learning_rate": 9.430981499324567e-06, "loss": 0.1452, "step": 1535 }, { "epoch": 0.2140319097052881, "grad_norm": 0.17079511284828186, "learning_rate": 9.429889085780559e-06, "loss": 0.1236, "step": 1536 }, { "epoch": 0.21417125339650248, "grad_norm": 0.22778141498565674, "learning_rate": 9.42879568801633e-06, "loss": 0.131, "step": 1537 }, { "epoch": 0.21431059708771685, "grad_norm": 0.3648608326911926, "learning_rate": 9.427701306274812e-06, "loss": 0.152, "step": 1538 }, { "epoch": 0.21444994077893123, "grad_norm": 0.16650404036045074, "learning_rate": 9.42660594079915e-06, "loss": 0.1264, "step": 1539 }, { "epoch": 0.2145892844701456, "grad_norm": 0.14107099175453186, "learning_rate": 9.42550959183271e-06, "loss": 0.1248, "step": 1540 }, { "epoch": 0.21472862816135999, "grad_norm": 0.2715483009815216, "learning_rate": 9.424412259619073e-06, "loss": 0.1356, "step": 1541 }, { "epoch": 0.21486797185257436, "grad_norm": 0.3826341927051544, "learning_rate": 9.423313944402043e-06, "loss": 0.1553, "step": 1542 }, { "epoch": 0.21500731554378877, "grad_norm": 0.257630854845047, "learning_rate": 9.422214646425641e-06, "loss": 0.1423, "step": 1543 }, { "epoch": 0.21514665923500315, "grad_norm": 0.17225614190101624, "learning_rate": 9.421114365934105e-06, "loss": 0.1659, "step": 1544 }, { "epoch": 0.21528600292621752, "grad_norm": 0.2823266386985779, "learning_rate": 9.420013103171893e-06, "loss": 0.1484, "step": 1545 }, { "epoch": 0.2154253466174319, "grad_norm": 0.6427784562110901, "learning_rate": 9.418910858383681e-06, "loss": 0.1837, "step": 1546 }, { "epoch": 0.21556469030864628, "grad_norm": 0.1958610713481903, "learning_rate": 9.41780763181436e-06, "loss": 0.1254, "step": 1547 }, { "epoch": 0.21570403399986066, "grad_norm": 0.1951184868812561, "learning_rate": 9.416703423709044e-06, "loss": 0.1508, "step": 1548 }, { "epoch": 0.21584337769107503, "grad_norm": 0.3488282561302185, "learning_rate": 9.415598234313064e-06, "loss": 0.1364, "step": 1549 }, { "epoch": 0.2159827213822894, "grad_norm": 0.4070911705493927, "learning_rate": 9.414492063871964e-06, "loss": 0.1423, "step": 1550 }, { "epoch": 0.2161220650735038, "grad_norm": 0.42547714710235596, "learning_rate": 9.413384912631512e-06, "loss": 0.1682, "step": 1551 }, { "epoch": 0.21626140876471817, "grad_norm": 0.24832481145858765, "learning_rate": 9.412276780837692e-06, "loss": 0.178, "step": 1552 }, { "epoch": 0.21640075245593257, "grad_norm": 0.4923345148563385, "learning_rate": 9.411167668736707e-06, "loss": 0.1382, "step": 1553 }, { "epoch": 0.21654009614714695, "grad_norm": 0.16695646941661835, "learning_rate": 9.410057576574974e-06, "loss": 0.1298, "step": 1554 }, { "epoch": 0.21667943983836133, "grad_norm": 0.30572545528411865, "learning_rate": 9.408946504599131e-06, "loss": 0.1661, "step": 1555 }, { "epoch": 0.2168187835295757, "grad_norm": 0.2896764874458313, "learning_rate": 9.40783445305603e-06, "loss": 0.1572, "step": 1556 }, { "epoch": 0.21695812722079008, "grad_norm": 0.5667753219604492, "learning_rate": 9.406721422192748e-06, "loss": 0.1764, "step": 1557 }, { "epoch": 0.21709747091200446, "grad_norm": 0.22204653918743134, "learning_rate": 9.405607412256573e-06, "loss": 0.1277, "step": 1558 }, { "epoch": 0.21723681460321884, "grad_norm": 0.20583584904670715, "learning_rate": 9.404492423495012e-06, "loss": 0.1456, "step": 1559 }, { "epoch": 0.2173761582944332, "grad_norm": 0.5481559038162231, "learning_rate": 9.403376456155792e-06, "loss": 0.1541, "step": 1560 }, { "epoch": 0.2175155019856476, "grad_norm": 0.46231716871261597, "learning_rate": 9.402259510486855e-06, "loss": 0.1797, "step": 1561 }, { "epoch": 0.21765484567686197, "grad_norm": 0.28906574845314026, "learning_rate": 9.401141586736359e-06, "loss": 0.142, "step": 1562 }, { "epoch": 0.21779418936807637, "grad_norm": 0.5207429528236389, "learning_rate": 9.400022685152683e-06, "loss": 0.2374, "step": 1563 }, { "epoch": 0.21793353305929075, "grad_norm": 0.39743030071258545, "learning_rate": 9.398902805984417e-06, "loss": 0.185, "step": 1564 }, { "epoch": 0.21807287675050513, "grad_norm": 0.18674638867378235, "learning_rate": 9.397781949480381e-06, "loss": 0.1479, "step": 1565 }, { "epoch": 0.2182122204417195, "grad_norm": 0.35186460614204407, "learning_rate": 9.396660115889596e-06, "loss": 0.1478, "step": 1566 }, { "epoch": 0.21835156413293388, "grad_norm": 0.6052587628364563, "learning_rate": 9.395537305461312e-06, "loss": 0.1595, "step": 1567 }, { "epoch": 0.21849090782414826, "grad_norm": 0.1978813111782074, "learning_rate": 9.394413518444989e-06, "loss": 0.1201, "step": 1568 }, { "epoch": 0.21863025151536264, "grad_norm": 0.2662149965763092, "learning_rate": 9.39328875509031e-06, "loss": 0.1724, "step": 1569 }, { "epoch": 0.21876959520657702, "grad_norm": 0.31039106845855713, "learning_rate": 9.39216301564717e-06, "loss": 0.1645, "step": 1570 }, { "epoch": 0.2189089388977914, "grad_norm": 0.23188607394695282, "learning_rate": 9.391036300365681e-06, "loss": 0.1202, "step": 1571 }, { "epoch": 0.21904828258900577, "grad_norm": 0.15819214284420013, "learning_rate": 9.389908609496177e-06, "loss": 0.1604, "step": 1572 }, { "epoch": 0.21918762628022015, "grad_norm": 0.21548080444335938, "learning_rate": 9.388779943289204e-06, "loss": 0.1254, "step": 1573 }, { "epoch": 0.21932696997143455, "grad_norm": 0.33895808458328247, "learning_rate": 9.387650301995523e-06, "loss": 0.1633, "step": 1574 }, { "epoch": 0.21946631366264893, "grad_norm": 0.3601374328136444, "learning_rate": 9.386519685866117e-06, "loss": 0.1672, "step": 1575 }, { "epoch": 0.2196056573538633, "grad_norm": 0.18845413625240326, "learning_rate": 9.385388095152184e-06, "loss": 0.1403, "step": 1576 }, { "epoch": 0.2197450010450777, "grad_norm": 0.6698979735374451, "learning_rate": 9.384255530105136e-06, "loss": 0.1801, "step": 1577 }, { "epoch": 0.21988434473629206, "grad_norm": 0.23389247059822083, "learning_rate": 9.383121990976602e-06, "loss": 0.1499, "step": 1578 }, { "epoch": 0.22002368842750644, "grad_norm": 0.26225510239601135, "learning_rate": 9.381987478018431e-06, "loss": 0.1304, "step": 1579 }, { "epoch": 0.22016303211872082, "grad_norm": 0.30328434705734253, "learning_rate": 9.380851991482685e-06, "loss": 0.172, "step": 1580 }, { "epoch": 0.2203023758099352, "grad_norm": 0.5947482585906982, "learning_rate": 9.379715531621642e-06, "loss": 0.2004, "step": 1581 }, { "epoch": 0.22044171950114957, "grad_norm": 0.39184626936912537, "learning_rate": 9.3785780986878e-06, "loss": 0.1831, "step": 1582 }, { "epoch": 0.22058106319236395, "grad_norm": 0.2459092140197754, "learning_rate": 9.377439692933869e-06, "loss": 0.1173, "step": 1583 }, { "epoch": 0.22072040688357836, "grad_norm": 0.36770766973495483, "learning_rate": 9.376300314612775e-06, "loss": 0.1124, "step": 1584 }, { "epoch": 0.22085975057479273, "grad_norm": 0.2882489860057831, "learning_rate": 9.375159963977668e-06, "loss": 0.1708, "step": 1585 }, { "epoch": 0.2209990942660071, "grad_norm": 0.33858829736709595, "learning_rate": 9.374018641281898e-06, "loss": 0.1475, "step": 1586 }, { "epoch": 0.2211384379572215, "grad_norm": 0.2830744981765747, "learning_rate": 9.37287634677905e-06, "loss": 0.124, "step": 1587 }, { "epoch": 0.22127778164843587, "grad_norm": 0.11457280814647675, "learning_rate": 9.371733080722911e-06, "loss": 0.1155, "step": 1588 }, { "epoch": 0.22141712533965024, "grad_norm": 0.230549156665802, "learning_rate": 9.37058884336749e-06, "loss": 0.1431, "step": 1589 }, { "epoch": 0.22155646903086462, "grad_norm": 0.19483058154582977, "learning_rate": 9.36944363496701e-06, "loss": 0.1321, "step": 1590 }, { "epoch": 0.221695812722079, "grad_norm": 0.7218474745750427, "learning_rate": 9.368297455775911e-06, "loss": 0.18, "step": 1591 }, { "epoch": 0.22183515641329338, "grad_norm": 0.11094489693641663, "learning_rate": 9.367150306048847e-06, "loss": 0.1023, "step": 1592 }, { "epoch": 0.22197450010450775, "grad_norm": 0.13904337584972382, "learning_rate": 9.36600218604069e-06, "loss": 0.1151, "step": 1593 }, { "epoch": 0.22211384379572216, "grad_norm": 0.25142887234687805, "learning_rate": 9.364853096006523e-06, "loss": 0.1462, "step": 1594 }, { "epoch": 0.22225318748693654, "grad_norm": 0.3566407859325409, "learning_rate": 9.36370303620165e-06, "loss": 0.179, "step": 1595 }, { "epoch": 0.22239253117815092, "grad_norm": 0.20794977247714996, "learning_rate": 9.362552006881588e-06, "loss": 0.123, "step": 1596 }, { "epoch": 0.2225318748693653, "grad_norm": 0.2809351086616516, "learning_rate": 9.361400008302068e-06, "loss": 0.1655, "step": 1597 }, { "epoch": 0.22267121856057967, "grad_norm": 0.21655228734016418, "learning_rate": 9.36024704071904e-06, "loss": 0.1192, "step": 1598 }, { "epoch": 0.22281056225179405, "grad_norm": 0.3159007132053375, "learning_rate": 9.359093104388663e-06, "loss": 0.1477, "step": 1599 }, { "epoch": 0.22294990594300843, "grad_norm": 0.238515704870224, "learning_rate": 9.35793819956732e-06, "loss": 0.1488, "step": 1600 }, { "epoch": 0.2230892496342228, "grad_norm": 0.3586064577102661, "learning_rate": 9.356782326511602e-06, "loss": 0.1956, "step": 1601 }, { "epoch": 0.22322859332543718, "grad_norm": 0.2787145674228668, "learning_rate": 9.355625485478319e-06, "loss": 0.2055, "step": 1602 }, { "epoch": 0.22336793701665156, "grad_norm": 0.4257158041000366, "learning_rate": 9.354467676724491e-06, "loss": 0.167, "step": 1603 }, { "epoch": 0.22350728070786596, "grad_norm": 0.4240961968898773, "learning_rate": 9.353308900507361e-06, "loss": 0.1457, "step": 1604 }, { "epoch": 0.22364662439908034, "grad_norm": 0.4017748534679413, "learning_rate": 9.352149157084383e-06, "loss": 0.1922, "step": 1605 }, { "epoch": 0.22378596809029472, "grad_norm": 0.5171815156936646, "learning_rate": 9.350988446713221e-06, "loss": 0.1705, "step": 1606 }, { "epoch": 0.2239253117815091, "grad_norm": 0.18125654757022858, "learning_rate": 9.349826769651762e-06, "loss": 0.1397, "step": 1607 }, { "epoch": 0.22406465547272347, "grad_norm": 0.3637584149837494, "learning_rate": 9.348664126158103e-06, "loss": 0.1761, "step": 1608 }, { "epoch": 0.22420399916393785, "grad_norm": 0.1931033730506897, "learning_rate": 9.347500516490555e-06, "loss": 0.1145, "step": 1609 }, { "epoch": 0.22434334285515223, "grad_norm": 0.28988412022590637, "learning_rate": 9.346335940907648e-06, "loss": 0.17, "step": 1610 }, { "epoch": 0.2244826865463666, "grad_norm": 0.16759128868579865, "learning_rate": 9.345170399668127e-06, "loss": 0.1408, "step": 1611 }, { "epoch": 0.22462203023758098, "grad_norm": 0.11925852298736572, "learning_rate": 9.344003893030942e-06, "loss": 0.093, "step": 1612 }, { "epoch": 0.22476137392879536, "grad_norm": 0.18251986801624298, "learning_rate": 9.342836421255268e-06, "loss": 0.1278, "step": 1613 }, { "epoch": 0.22490071762000977, "grad_norm": 0.33188289403915405, "learning_rate": 9.341667984600489e-06, "loss": 0.1797, "step": 1614 }, { "epoch": 0.22504006131122414, "grad_norm": 0.3549938499927521, "learning_rate": 9.340498583326208e-06, "loss": 0.1198, "step": 1615 }, { "epoch": 0.22517940500243852, "grad_norm": 0.09310402721166611, "learning_rate": 9.339328217692233e-06, "loss": 0.0968, "step": 1616 }, { "epoch": 0.2253187486936529, "grad_norm": 0.44291073083877563, "learning_rate": 9.3381568879586e-06, "loss": 0.1604, "step": 1617 }, { "epoch": 0.22545809238486728, "grad_norm": 0.3384919762611389, "learning_rate": 9.336984594385547e-06, "loss": 0.1509, "step": 1618 }, { "epoch": 0.22559743607608165, "grad_norm": 0.18597844243049622, "learning_rate": 9.335811337233533e-06, "loss": 0.1243, "step": 1619 }, { "epoch": 0.22573677976729603, "grad_norm": 0.24832844734191895, "learning_rate": 9.334637116763227e-06, "loss": 0.1301, "step": 1620 }, { "epoch": 0.2258761234585104, "grad_norm": 0.14898347854614258, "learning_rate": 9.333461933235517e-06, "loss": 0.1306, "step": 1621 }, { "epoch": 0.22601546714972479, "grad_norm": 0.17517875134944916, "learning_rate": 9.332285786911498e-06, "loss": 0.1364, "step": 1622 }, { "epoch": 0.22615481084093916, "grad_norm": 0.3520864248275757, "learning_rate": 9.331108678052485e-06, "loss": 0.1293, "step": 1623 }, { "epoch": 0.22629415453215357, "grad_norm": 0.28960761427879333, "learning_rate": 9.329930606920005e-06, "loss": 0.1627, "step": 1624 }, { "epoch": 0.22643349822336795, "grad_norm": 0.28244921565055847, "learning_rate": 9.3287515737758e-06, "loss": 0.1419, "step": 1625 }, { "epoch": 0.22657284191458232, "grad_norm": 0.14398279786109924, "learning_rate": 9.32757157888182e-06, "loss": 0.1007, "step": 1626 }, { "epoch": 0.2267121856057967, "grad_norm": 0.26613649725914, "learning_rate": 9.326390622500236e-06, "loss": 0.1324, "step": 1627 }, { "epoch": 0.22685152929701108, "grad_norm": 0.2679789662361145, "learning_rate": 9.32520870489343e-06, "loss": 0.126, "step": 1628 }, { "epoch": 0.22699087298822546, "grad_norm": 0.3129565417766571, "learning_rate": 9.324025826323995e-06, "loss": 0.1542, "step": 1629 }, { "epoch": 0.22713021667943983, "grad_norm": 0.20204375684261322, "learning_rate": 9.322841987054741e-06, "loss": 0.1096, "step": 1630 }, { "epoch": 0.2272695603706542, "grad_norm": 0.22915467619895935, "learning_rate": 9.321657187348689e-06, "loss": 0.1127, "step": 1631 }, { "epoch": 0.2274089040618686, "grad_norm": 0.5177916884422302, "learning_rate": 9.320471427469076e-06, "loss": 0.1439, "step": 1632 }, { "epoch": 0.22754824775308297, "grad_norm": 0.36612990498542786, "learning_rate": 9.319284707679348e-06, "loss": 0.1477, "step": 1633 }, { "epoch": 0.22768759144429737, "grad_norm": 0.24670910835266113, "learning_rate": 9.31809702824317e-06, "loss": 0.2133, "step": 1634 }, { "epoch": 0.22782693513551175, "grad_norm": 0.4615837633609772, "learning_rate": 9.316908389424416e-06, "loss": 0.155, "step": 1635 }, { "epoch": 0.22796627882672613, "grad_norm": 0.5710688233375549, "learning_rate": 9.315718791487175e-06, "loss": 0.2129, "step": 1636 }, { "epoch": 0.2281056225179405, "grad_norm": 0.45960044860839844, "learning_rate": 9.314528234695747e-06, "loss": 0.1301, "step": 1637 }, { "epoch": 0.22824496620915488, "grad_norm": 1.2845940589904785, "learning_rate": 9.31333671931465e-06, "loss": 0.176, "step": 1638 }, { "epoch": 0.22838430990036926, "grad_norm": 0.37350019812583923, "learning_rate": 9.312144245608608e-06, "loss": 0.2036, "step": 1639 }, { "epoch": 0.22852365359158364, "grad_norm": 0.380226731300354, "learning_rate": 9.31095081384256e-06, "loss": 0.1781, "step": 1640 }, { "epoch": 0.22866299728279801, "grad_norm": 0.27346253395080566, "learning_rate": 9.309756424281664e-06, "loss": 0.1346, "step": 1641 }, { "epoch": 0.2288023409740124, "grad_norm": 0.18419818580150604, "learning_rate": 9.308561077191284e-06, "loss": 0.1591, "step": 1642 }, { "epoch": 0.22894168466522677, "grad_norm": 0.2408846765756607, "learning_rate": 9.307364772837e-06, "loss": 0.1151, "step": 1643 }, { "epoch": 0.22908102835644117, "grad_norm": 0.25749310851097107, "learning_rate": 9.306167511484601e-06, "loss": 0.142, "step": 1644 }, { "epoch": 0.22922037204765555, "grad_norm": 0.24721163511276245, "learning_rate": 9.304969293400092e-06, "loss": 0.1376, "step": 1645 }, { "epoch": 0.22935971573886993, "grad_norm": 0.46639570593833923, "learning_rate": 9.303770118849692e-06, "loss": 0.1883, "step": 1646 }, { "epoch": 0.2294990594300843, "grad_norm": 0.29740551114082336, "learning_rate": 9.302569988099825e-06, "loss": 0.144, "step": 1647 }, { "epoch": 0.22963840312129868, "grad_norm": 0.34575924277305603, "learning_rate": 9.301368901417138e-06, "loss": 0.1735, "step": 1648 }, { "epoch": 0.22977774681251306, "grad_norm": 0.4573255181312561, "learning_rate": 9.300166859068482e-06, "loss": 0.1519, "step": 1649 }, { "epoch": 0.22991709050372744, "grad_norm": 0.29382383823394775, "learning_rate": 9.298963861320927e-06, "loss": 0.1404, "step": 1650 }, { "epoch": 0.23005643419494182, "grad_norm": 0.2081262618303299, "learning_rate": 9.297759908441747e-06, "loss": 0.1333, "step": 1651 }, { "epoch": 0.2301957778861562, "grad_norm": 0.2974257469177246, "learning_rate": 9.296555000698435e-06, "loss": 0.1357, "step": 1652 }, { "epoch": 0.23033512157737057, "grad_norm": 0.17753711342811584, "learning_rate": 9.295349138358693e-06, "loss": 0.1349, "step": 1653 }, { "epoch": 0.23047446526858498, "grad_norm": 0.21708141267299652, "learning_rate": 9.294142321690438e-06, "loss": 0.1357, "step": 1654 }, { "epoch": 0.23061380895979935, "grad_norm": 0.17476092278957367, "learning_rate": 9.292934550961796e-06, "loss": 0.1317, "step": 1655 }, { "epoch": 0.23075315265101373, "grad_norm": 0.3297169804573059, "learning_rate": 9.291725826441107e-06, "loss": 0.1947, "step": 1656 }, { "epoch": 0.2308924963422281, "grad_norm": 0.3532029092311859, "learning_rate": 9.29051614839692e-06, "loss": 0.1407, "step": 1657 }, { "epoch": 0.2310318400334425, "grad_norm": 0.2226722240447998, "learning_rate": 9.289305517098e-06, "loss": 0.1521, "step": 1658 }, { "epoch": 0.23117118372465686, "grad_norm": 0.16249965131282806, "learning_rate": 9.28809393281332e-06, "loss": 0.1226, "step": 1659 }, { "epoch": 0.23131052741587124, "grad_norm": 0.348035991191864, "learning_rate": 9.286881395812066e-06, "loss": 0.1991, "step": 1660 }, { "epoch": 0.23144987110708562, "grad_norm": 0.22970376908779144, "learning_rate": 9.285667906363637e-06, "loss": 0.1508, "step": 1661 }, { "epoch": 0.2315892147983, "grad_norm": 0.1610359251499176, "learning_rate": 9.284453464737644e-06, "loss": 0.1235, "step": 1662 }, { "epoch": 0.23172855848951437, "grad_norm": 0.24537727236747742, "learning_rate": 9.283238071203907e-06, "loss": 0.143, "step": 1663 }, { "epoch": 0.23186790218072878, "grad_norm": 0.4211762547492981, "learning_rate": 9.282021726032457e-06, "loss": 0.1448, "step": 1664 }, { "epoch": 0.23200724587194316, "grad_norm": 0.18069428205490112, "learning_rate": 9.280804429493542e-06, "loss": 0.1484, "step": 1665 }, { "epoch": 0.23214658956315753, "grad_norm": 0.23616167902946472, "learning_rate": 9.279586181857613e-06, "loss": 0.145, "step": 1666 }, { "epoch": 0.2322859332543719, "grad_norm": 0.24091148376464844, "learning_rate": 9.278366983395341e-06, "loss": 0.1144, "step": 1667 }, { "epoch": 0.2324252769455863, "grad_norm": 0.5719103217124939, "learning_rate": 9.277146834377601e-06, "loss": 0.1818, "step": 1668 }, { "epoch": 0.23256462063680067, "grad_norm": 0.2997611463069916, "learning_rate": 9.275925735075484e-06, "loss": 0.1295, "step": 1669 }, { "epoch": 0.23270396432801504, "grad_norm": 0.12433838844299316, "learning_rate": 9.274703685760287e-06, "loss": 0.092, "step": 1670 }, { "epoch": 0.23284330801922942, "grad_norm": 0.1993897557258606, "learning_rate": 9.273480686703526e-06, "loss": 0.1445, "step": 1671 }, { "epoch": 0.2329826517104438, "grad_norm": 0.2401997447013855, "learning_rate": 9.272256738176924e-06, "loss": 0.1087, "step": 1672 }, { "epoch": 0.23312199540165818, "grad_norm": 0.7541149258613586, "learning_rate": 9.271031840452409e-06, "loss": 0.221, "step": 1673 }, { "epoch": 0.23326133909287258, "grad_norm": 0.203342467546463, "learning_rate": 9.26980599380213e-06, "loss": 0.1297, "step": 1674 }, { "epoch": 0.23340068278408696, "grad_norm": 0.22818300127983093, "learning_rate": 9.268579198498438e-06, "loss": 0.1704, "step": 1675 }, { "epoch": 0.23354002647530134, "grad_norm": 0.19734053313732147, "learning_rate": 9.267351454813904e-06, "loss": 0.1458, "step": 1676 }, { "epoch": 0.23367937016651572, "grad_norm": 0.11494249850511551, "learning_rate": 9.266122763021302e-06, "loss": 0.1358, "step": 1677 }, { "epoch": 0.2338187138577301, "grad_norm": 0.3707859218120575, "learning_rate": 9.264893123393618e-06, "loss": 0.1658, "step": 1678 }, { "epoch": 0.23395805754894447, "grad_norm": 0.13236398994922638, "learning_rate": 9.26366253620405e-06, "loss": 0.1441, "step": 1679 }, { "epoch": 0.23409740124015885, "grad_norm": 0.3211853504180908, "learning_rate": 9.26243100172601e-06, "loss": 0.1577, "step": 1680 }, { "epoch": 0.23423674493137323, "grad_norm": 0.18704359233379364, "learning_rate": 9.261198520233113e-06, "loss": 0.1572, "step": 1681 }, { "epoch": 0.2343760886225876, "grad_norm": 0.17897410690784454, "learning_rate": 9.25996509199919e-06, "loss": 0.1374, "step": 1682 }, { "epoch": 0.23451543231380198, "grad_norm": 0.22488999366760254, "learning_rate": 9.258730717298281e-06, "loss": 0.1289, "step": 1683 }, { "epoch": 0.23465477600501639, "grad_norm": 0.23052118718624115, "learning_rate": 9.257495396404635e-06, "loss": 0.163, "step": 1684 }, { "epoch": 0.23479411969623076, "grad_norm": 0.24786268174648285, "learning_rate": 9.256259129592711e-06, "loss": 0.1374, "step": 1685 }, { "epoch": 0.23493346338744514, "grad_norm": 0.40667805075645447, "learning_rate": 9.255021917137181e-06, "loss": 0.1704, "step": 1686 }, { "epoch": 0.23507280707865952, "grad_norm": 0.2145306020975113, "learning_rate": 9.253783759312924e-06, "loss": 0.1675, "step": 1687 }, { "epoch": 0.2352121507698739, "grad_norm": 0.19605165719985962, "learning_rate": 9.252544656395033e-06, "loss": 0.1277, "step": 1688 }, { "epoch": 0.23535149446108827, "grad_norm": 0.5625879168510437, "learning_rate": 9.251304608658806e-06, "loss": 0.1572, "step": 1689 }, { "epoch": 0.23549083815230265, "grad_norm": 0.2602996230125427, "learning_rate": 9.250063616379754e-06, "loss": 0.1645, "step": 1690 }, { "epoch": 0.23563018184351703, "grad_norm": 0.5283105969429016, "learning_rate": 9.248821679833596e-06, "loss": 0.1605, "step": 1691 }, { "epoch": 0.2357695255347314, "grad_norm": 0.22253288328647614, "learning_rate": 9.247578799296263e-06, "loss": 0.1426, "step": 1692 }, { "epoch": 0.23590886922594578, "grad_norm": 0.2583664357662201, "learning_rate": 9.246334975043896e-06, "loss": 0.1491, "step": 1693 }, { "epoch": 0.2360482129171602, "grad_norm": 0.3853068947792053, "learning_rate": 9.245090207352842e-06, "loss": 0.1551, "step": 1694 }, { "epoch": 0.23618755660837457, "grad_norm": 0.2308436930179596, "learning_rate": 9.243844496499661e-06, "loss": 0.1391, "step": 1695 }, { "epoch": 0.23632690029958894, "grad_norm": 0.31323400139808655, "learning_rate": 9.242597842761123e-06, "loss": 0.1442, "step": 1696 }, { "epoch": 0.23646624399080332, "grad_norm": 0.331382155418396, "learning_rate": 9.241350246414203e-06, "loss": 0.1769, "step": 1697 }, { "epoch": 0.2366055876820177, "grad_norm": 0.24285316467285156, "learning_rate": 9.24010170773609e-06, "loss": 0.1456, "step": 1698 }, { "epoch": 0.23674493137323208, "grad_norm": 0.228899285197258, "learning_rate": 9.23885222700418e-06, "loss": 0.1735, "step": 1699 }, { "epoch": 0.23688427506444645, "grad_norm": 0.32293540239334106, "learning_rate": 9.237601804496081e-06, "loss": 0.1262, "step": 1700 }, { "epoch": 0.23702361875566083, "grad_norm": 0.2090945541858673, "learning_rate": 9.236350440489608e-06, "loss": 0.158, "step": 1701 }, { "epoch": 0.2371629624468752, "grad_norm": 0.14681343734264374, "learning_rate": 9.235098135262783e-06, "loss": 0.1302, "step": 1702 }, { "epoch": 0.23730230613808959, "grad_norm": 0.34705597162246704, "learning_rate": 9.233844889093842e-06, "loss": 0.1514, "step": 1703 }, { "epoch": 0.237441649829304, "grad_norm": 0.23209241032600403, "learning_rate": 9.232590702261227e-06, "loss": 0.1497, "step": 1704 }, { "epoch": 0.23758099352051837, "grad_norm": 0.17661409080028534, "learning_rate": 9.23133557504359e-06, "loss": 0.1595, "step": 1705 }, { "epoch": 0.23772033721173275, "grad_norm": 0.6432665586471558, "learning_rate": 9.23007950771979e-06, "loss": 0.1693, "step": 1706 }, { "epoch": 0.23785968090294712, "grad_norm": 0.22270525991916656, "learning_rate": 9.228822500568898e-06, "loss": 0.1641, "step": 1707 }, { "epoch": 0.2379990245941615, "grad_norm": 0.23657888174057007, "learning_rate": 9.227564553870192e-06, "loss": 0.1521, "step": 1708 }, { "epoch": 0.23813836828537588, "grad_norm": 0.22193600237369537, "learning_rate": 9.226305667903159e-06, "loss": 0.1682, "step": 1709 }, { "epoch": 0.23827771197659026, "grad_norm": 0.2622225284576416, "learning_rate": 9.225045842947496e-06, "loss": 0.1322, "step": 1710 }, { "epoch": 0.23841705566780463, "grad_norm": 0.2527066469192505, "learning_rate": 9.223785079283106e-06, "loss": 0.1149, "step": 1711 }, { "epoch": 0.238556399359019, "grad_norm": 0.2560746967792511, "learning_rate": 9.2225233771901e-06, "loss": 0.1367, "step": 1712 }, { "epoch": 0.2386957430502334, "grad_norm": 0.2676650285720825, "learning_rate": 9.221260736948803e-06, "loss": 0.1451, "step": 1713 }, { "epoch": 0.2388350867414478, "grad_norm": 0.18908043205738068, "learning_rate": 9.219997158839743e-06, "loss": 0.1432, "step": 1714 }, { "epoch": 0.23897443043266217, "grad_norm": 0.2997601628303528, "learning_rate": 9.21873264314366e-06, "loss": 0.1587, "step": 1715 }, { "epoch": 0.23911377412387655, "grad_norm": 0.15618987381458282, "learning_rate": 9.217467190141498e-06, "loss": 0.1017, "step": 1716 }, { "epoch": 0.23925311781509093, "grad_norm": 0.14642909169197083, "learning_rate": 9.216200800114412e-06, "loss": 0.1385, "step": 1717 }, { "epoch": 0.2393924615063053, "grad_norm": 0.29088032245635986, "learning_rate": 9.214933473343765e-06, "loss": 0.1521, "step": 1718 }, { "epoch": 0.23953180519751968, "grad_norm": 0.5289713740348816, "learning_rate": 9.213665210111131e-06, "loss": 0.1531, "step": 1719 }, { "epoch": 0.23967114888873406, "grad_norm": 0.3020109236240387, "learning_rate": 9.212396010698286e-06, "loss": 0.1667, "step": 1720 }, { "epoch": 0.23981049257994844, "grad_norm": 0.14413033425807953, "learning_rate": 9.211125875387217e-06, "loss": 0.1084, "step": 1721 }, { "epoch": 0.23994983627116281, "grad_norm": 0.12606243789196014, "learning_rate": 9.209854804460121e-06, "loss": 0.1144, "step": 1722 }, { "epoch": 0.2400891799623772, "grad_norm": 0.621715784072876, "learning_rate": 9.208582798199402e-06, "loss": 0.172, "step": 1723 }, { "epoch": 0.2402285236535916, "grad_norm": 0.3493179678916931, "learning_rate": 9.207309856887664e-06, "loss": 0.1567, "step": 1724 }, { "epoch": 0.24036786734480597, "grad_norm": 0.5076096057891846, "learning_rate": 9.206035980807734e-06, "loss": 0.1636, "step": 1725 }, { "epoch": 0.24050721103602035, "grad_norm": 0.1784847527742386, "learning_rate": 9.204761170242635e-06, "loss": 0.1363, "step": 1726 }, { "epoch": 0.24064655472723473, "grad_norm": 0.2529459595680237, "learning_rate": 9.203485425475598e-06, "loss": 0.134, "step": 1727 }, { "epoch": 0.2407858984184491, "grad_norm": 0.20645293593406677, "learning_rate": 9.202208746790069e-06, "loss": 0.1188, "step": 1728 }, { "epoch": 0.24092524210966348, "grad_norm": 0.2367623895406723, "learning_rate": 9.200931134469692e-06, "loss": 0.1587, "step": 1729 }, { "epoch": 0.24106458580087786, "grad_norm": 0.24439607560634613, "learning_rate": 9.199652588798327e-06, "loss": 0.1322, "step": 1730 }, { "epoch": 0.24120392949209224, "grad_norm": 0.19485366344451904, "learning_rate": 9.198373110060037e-06, "loss": 0.1318, "step": 1731 }, { "epoch": 0.24134327318330662, "grad_norm": 0.362411767244339, "learning_rate": 9.197092698539092e-06, "loss": 0.1674, "step": 1732 }, { "epoch": 0.241482616874521, "grad_norm": 0.22726905345916748, "learning_rate": 9.19581135451997e-06, "loss": 0.1648, "step": 1733 }, { "epoch": 0.2416219605657354, "grad_norm": 0.2768399715423584, "learning_rate": 9.194529078287358e-06, "loss": 0.1526, "step": 1734 }, { "epoch": 0.24176130425694978, "grad_norm": 0.3365817070007324, "learning_rate": 9.193245870126147e-06, "loss": 0.123, "step": 1735 }, { "epoch": 0.24190064794816415, "grad_norm": 0.19181637465953827, "learning_rate": 9.191961730321437e-06, "loss": 0.1352, "step": 1736 }, { "epoch": 0.24203999163937853, "grad_norm": 0.12637647986412048, "learning_rate": 9.190676659158535e-06, "loss": 0.1344, "step": 1737 }, { "epoch": 0.2421793353305929, "grad_norm": 0.12638087570667267, "learning_rate": 9.189390656922955e-06, "loss": 0.1171, "step": 1738 }, { "epoch": 0.2423186790218073, "grad_norm": 0.27888190746307373, "learning_rate": 9.188103723900414e-06, "loss": 0.1711, "step": 1739 }, { "epoch": 0.24245802271302166, "grad_norm": 0.2284240573644638, "learning_rate": 9.186815860376843e-06, "loss": 0.1366, "step": 1740 }, { "epoch": 0.24259736640423604, "grad_norm": 0.39330556988716125, "learning_rate": 9.185527066638375e-06, "loss": 0.1722, "step": 1741 }, { "epoch": 0.24273671009545042, "grad_norm": 0.32610973715782166, "learning_rate": 9.184237342971349e-06, "loss": 0.1239, "step": 1742 }, { "epoch": 0.2428760537866648, "grad_norm": 0.26762574911117554, "learning_rate": 9.182946689662314e-06, "loss": 0.1579, "step": 1743 }, { "epoch": 0.2430153974778792, "grad_norm": 0.39487385749816895, "learning_rate": 9.181655106998023e-06, "loss": 0.1875, "step": 1744 }, { "epoch": 0.24315474116909358, "grad_norm": 0.19924341142177582, "learning_rate": 9.180362595265435e-06, "loss": 0.1038, "step": 1745 }, { "epoch": 0.24329408486030796, "grad_norm": 0.4598081409931183, "learning_rate": 9.179069154751718e-06, "loss": 0.1617, "step": 1746 }, { "epoch": 0.24343342855152234, "grad_norm": 0.23876316845417023, "learning_rate": 9.177774785744245e-06, "loss": 0.1401, "step": 1747 }, { "epoch": 0.2435727722427367, "grad_norm": 0.20330235362052917, "learning_rate": 9.176479488530594e-06, "loss": 0.1395, "step": 1748 }, { "epoch": 0.2437121159339511, "grad_norm": 0.2580684721469879, "learning_rate": 9.175183263398553e-06, "loss": 0.1333, "step": 1749 }, { "epoch": 0.24385145962516547, "grad_norm": 0.406691312789917, "learning_rate": 9.17388611063611e-06, "loss": 0.1468, "step": 1750 }, { "epoch": 0.24399080331637985, "grad_norm": 0.6918083429336548, "learning_rate": 9.172588030531467e-06, "loss": 0.1773, "step": 1751 }, { "epoch": 0.24413014700759422, "grad_norm": 0.3015018701553345, "learning_rate": 9.171289023373022e-06, "loss": 0.1767, "step": 1752 }, { "epoch": 0.2442694906988086, "grad_norm": 0.3672984838485718, "learning_rate": 9.16998908944939e-06, "loss": 0.1608, "step": 1753 }, { "epoch": 0.244408834390023, "grad_norm": 0.19938796758651733, "learning_rate": 9.168688229049386e-06, "loss": 0.1261, "step": 1754 }, { "epoch": 0.24454817808123738, "grad_norm": 0.374238520860672, "learning_rate": 9.167386442462029e-06, "loss": 0.1451, "step": 1755 }, { "epoch": 0.24468752177245176, "grad_norm": 0.2284046709537506, "learning_rate": 9.166083729976547e-06, "loss": 0.141, "step": 1756 }, { "epoch": 0.24482686546366614, "grad_norm": 0.1707335263490677, "learning_rate": 9.164780091882374e-06, "loss": 0.1282, "step": 1757 }, { "epoch": 0.24496620915488052, "grad_norm": 0.3656597137451172, "learning_rate": 9.163475528469148e-06, "loss": 0.1315, "step": 1758 }, { "epoch": 0.2451055528460949, "grad_norm": 0.46711742877960205, "learning_rate": 9.162170040026714e-06, "loss": 0.1237, "step": 1759 }, { "epoch": 0.24524489653730927, "grad_norm": 0.29273995757102966, "learning_rate": 9.16086362684512e-06, "loss": 0.1682, "step": 1760 }, { "epoch": 0.24538424022852365, "grad_norm": 0.22405241429805756, "learning_rate": 9.159556289214623e-06, "loss": 0.1499, "step": 1761 }, { "epoch": 0.24552358391973803, "grad_norm": 0.1451958566904068, "learning_rate": 9.158248027425683e-06, "loss": 0.1429, "step": 1762 }, { "epoch": 0.2456629276109524, "grad_norm": 0.28121525049209595, "learning_rate": 9.156938841768965e-06, "loss": 0.1626, "step": 1763 }, { "epoch": 0.2458022713021668, "grad_norm": 0.24136769771575928, "learning_rate": 9.155628732535342e-06, "loss": 0.1256, "step": 1764 }, { "epoch": 0.24594161499338119, "grad_norm": 0.35462310910224915, "learning_rate": 9.15431770001589e-06, "loss": 0.1528, "step": 1765 }, { "epoch": 0.24608095868459556, "grad_norm": 0.17025144398212433, "learning_rate": 9.153005744501886e-06, "loss": 0.1497, "step": 1766 }, { "epoch": 0.24622030237580994, "grad_norm": 0.21823106706142426, "learning_rate": 9.151692866284824e-06, "loss": 0.1766, "step": 1767 }, { "epoch": 0.24635964606702432, "grad_norm": 0.23426289856433868, "learning_rate": 9.150379065656389e-06, "loss": 0.0984, "step": 1768 }, { "epoch": 0.2464989897582387, "grad_norm": 0.44498032331466675, "learning_rate": 9.149064342908482e-06, "loss": 0.1646, "step": 1769 }, { "epoch": 0.24663833344945307, "grad_norm": 0.19109581410884857, "learning_rate": 9.147748698333203e-06, "loss": 0.1641, "step": 1770 }, { "epoch": 0.24677767714066745, "grad_norm": 0.2645459473133087, "learning_rate": 9.146432132222858e-06, "loss": 0.1418, "step": 1771 }, { "epoch": 0.24691702083188183, "grad_norm": 0.19025015830993652, "learning_rate": 9.145114644869957e-06, "loss": 0.1459, "step": 1772 }, { "epoch": 0.2470563645230962, "grad_norm": 0.272447407245636, "learning_rate": 9.143796236567218e-06, "loss": 0.1762, "step": 1773 }, { "epoch": 0.24719570821431058, "grad_norm": 0.23119761049747467, "learning_rate": 9.142476907607558e-06, "loss": 0.1393, "step": 1774 }, { "epoch": 0.247335051905525, "grad_norm": 0.21848173439502716, "learning_rate": 9.141156658284104e-06, "loss": 0.1217, "step": 1775 }, { "epoch": 0.24747439559673937, "grad_norm": 0.2246558964252472, "learning_rate": 9.139835488890186e-06, "loss": 0.1347, "step": 1776 }, { "epoch": 0.24761373928795374, "grad_norm": 0.27138465642929077, "learning_rate": 9.138513399719335e-06, "loss": 0.1579, "step": 1777 }, { "epoch": 0.24775308297916812, "grad_norm": 0.27850013971328735, "learning_rate": 9.13719039106529e-06, "loss": 0.1586, "step": 1778 }, { "epoch": 0.2478924266703825, "grad_norm": 0.22944647073745728, "learning_rate": 9.135866463221994e-06, "loss": 0.1553, "step": 1779 }, { "epoch": 0.24803177036159688, "grad_norm": 0.2465527355670929, "learning_rate": 9.134541616483594e-06, "loss": 0.1656, "step": 1780 }, { "epoch": 0.24817111405281125, "grad_norm": 0.4914076328277588, "learning_rate": 9.13321585114444e-06, "loss": 0.1498, "step": 1781 }, { "epoch": 0.24831045774402563, "grad_norm": 0.17460806667804718, "learning_rate": 9.131889167499086e-06, "loss": 0.1218, "step": 1782 }, { "epoch": 0.24844980143524, "grad_norm": 0.21288900077342987, "learning_rate": 9.130561565842293e-06, "loss": 0.1667, "step": 1783 }, { "epoch": 0.24858914512645439, "grad_norm": 0.1588551104068756, "learning_rate": 9.129233046469021e-06, "loss": 0.1342, "step": 1784 }, { "epoch": 0.2487284888176688, "grad_norm": 0.20537522435188293, "learning_rate": 9.12790360967444e-06, "loss": 0.152, "step": 1785 }, { "epoch": 0.24886783250888317, "grad_norm": 0.34647616744041443, "learning_rate": 9.126573255753917e-06, "loss": 0.1622, "step": 1786 }, { "epoch": 0.24900717620009755, "grad_norm": 0.5812463760375977, "learning_rate": 9.125241985003028e-06, "loss": 0.1946, "step": 1787 }, { "epoch": 0.24914651989131192, "grad_norm": 0.2114432007074356, "learning_rate": 9.123909797717551e-06, "loss": 0.1748, "step": 1788 }, { "epoch": 0.2492858635825263, "grad_norm": 0.2923901379108429, "learning_rate": 9.122576694193467e-06, "loss": 0.1402, "step": 1789 }, { "epoch": 0.24942520727374068, "grad_norm": 0.389528751373291, "learning_rate": 9.121242674726962e-06, "loss": 0.1587, "step": 1790 }, { "epoch": 0.24956455096495506, "grad_norm": 0.1458260864019394, "learning_rate": 9.119907739614424e-06, "loss": 0.1317, "step": 1791 }, { "epoch": 0.24970389465616943, "grad_norm": 0.3170439898967743, "learning_rate": 9.118571889152445e-06, "loss": 0.1528, "step": 1792 }, { "epoch": 0.2498432383473838, "grad_norm": 0.36070457100868225, "learning_rate": 9.117235123637822e-06, "loss": 0.1595, "step": 1793 }, { "epoch": 0.2499825820385982, "grad_norm": 0.0908508151769638, "learning_rate": 9.115897443367552e-06, "loss": 0.1123, "step": 1794 }, { "epoch": 0.25012192572981257, "grad_norm": 0.12188560515642166, "learning_rate": 9.114558848638836e-06, "loss": 0.113, "step": 1795 }, { "epoch": 0.25026126942102694, "grad_norm": 0.14279623329639435, "learning_rate": 9.113219339749084e-06, "loss": 0.1365, "step": 1796 }, { "epoch": 0.2504006131122413, "grad_norm": 0.22544194757938385, "learning_rate": 9.1118789169959e-06, "loss": 0.1585, "step": 1797 }, { "epoch": 0.2505399568034557, "grad_norm": 0.13123765587806702, "learning_rate": 9.110537580677094e-06, "loss": 0.1694, "step": 1798 }, { "epoch": 0.2506793004946701, "grad_norm": 0.2683120667934418, "learning_rate": 9.109195331090685e-06, "loss": 0.1447, "step": 1799 }, { "epoch": 0.2508186441858845, "grad_norm": 0.18912433087825775, "learning_rate": 9.10785216853489e-06, "loss": 0.1342, "step": 1800 }, { "epoch": 0.2509579878770989, "grad_norm": 0.11382788419723511, "learning_rate": 9.106508093308123e-06, "loss": 0.1209, "step": 1801 }, { "epoch": 0.25109733156831326, "grad_norm": 0.14589868485927582, "learning_rate": 9.105163105709011e-06, "loss": 0.115, "step": 1802 }, { "epoch": 0.25123667525952764, "grad_norm": 0.3222620487213135, "learning_rate": 9.103817206036383e-06, "loss": 0.1969, "step": 1803 }, { "epoch": 0.251376018950742, "grad_norm": 0.13839440047740936, "learning_rate": 9.10247039458926e-06, "loss": 0.1063, "step": 1804 }, { "epoch": 0.2515153626419564, "grad_norm": 0.1419767141342163, "learning_rate": 9.101122671666878e-06, "loss": 0.1414, "step": 1805 }, { "epoch": 0.2516547063331708, "grad_norm": 0.20335766673088074, "learning_rate": 9.09977403756867e-06, "loss": 0.1404, "step": 1806 }, { "epoch": 0.25179405002438515, "grad_norm": 0.14934353530406952, "learning_rate": 9.098424492594268e-06, "loss": 0.1254, "step": 1807 }, { "epoch": 0.25193339371559953, "grad_norm": 0.11058908700942993, "learning_rate": 9.097074037043512e-06, "loss": 0.1253, "step": 1808 }, { "epoch": 0.2520727374068139, "grad_norm": 0.2419983148574829, "learning_rate": 9.095722671216443e-06, "loss": 0.1533, "step": 1809 }, { "epoch": 0.2522120810980283, "grad_norm": 0.283284068107605, "learning_rate": 9.094370395413306e-06, "loss": 0.212, "step": 1810 }, { "epoch": 0.25235142478924266, "grad_norm": 0.3865135610103607, "learning_rate": 9.09301720993454e-06, "loss": 0.207, "step": 1811 }, { "epoch": 0.25249076848045704, "grad_norm": 0.3888664245605469, "learning_rate": 9.091663115080797e-06, "loss": 0.1593, "step": 1812 }, { "epoch": 0.2526301121716714, "grad_norm": 0.2641243040561676, "learning_rate": 9.090308111152924e-06, "loss": 0.1419, "step": 1813 }, { "epoch": 0.2527694558628858, "grad_norm": 0.2422063797712326, "learning_rate": 9.08895219845197e-06, "loss": 0.166, "step": 1814 }, { "epoch": 0.25290879955410017, "grad_norm": 0.28822189569473267, "learning_rate": 9.087595377279192e-06, "loss": 0.1478, "step": 1815 }, { "epoch": 0.25304814324531455, "grad_norm": 0.46472176909446716, "learning_rate": 9.086237647936043e-06, "loss": 0.1808, "step": 1816 }, { "epoch": 0.2531874869365289, "grad_norm": 0.21256506443023682, "learning_rate": 9.084879010724177e-06, "loss": 0.1748, "step": 1817 }, { "epoch": 0.2533268306277433, "grad_norm": 0.38943231105804443, "learning_rate": 9.083519465945456e-06, "loss": 0.1502, "step": 1818 }, { "epoch": 0.2534661743189577, "grad_norm": 0.4280785322189331, "learning_rate": 9.082159013901937e-06, "loss": 0.1607, "step": 1819 }, { "epoch": 0.2536055180101721, "grad_norm": 0.4269370138645172, "learning_rate": 9.080797654895883e-06, "loss": 0.1907, "step": 1820 }, { "epoch": 0.2537448617013865, "grad_norm": 0.3312249481678009, "learning_rate": 9.079435389229755e-06, "loss": 0.1681, "step": 1821 }, { "epoch": 0.25388420539260087, "grad_norm": 0.46297964453697205, "learning_rate": 9.07807221720622e-06, "loss": 0.1511, "step": 1822 }, { "epoch": 0.25402354908381525, "grad_norm": 0.3135543465614319, "learning_rate": 9.07670813912814e-06, "loss": 0.1268, "step": 1823 }, { "epoch": 0.2541628927750296, "grad_norm": 0.5303459763526917, "learning_rate": 9.075343155298589e-06, "loss": 0.1596, "step": 1824 }, { "epoch": 0.254302236466244, "grad_norm": 0.2883036434650421, "learning_rate": 9.073977266020826e-06, "loss": 0.1289, "step": 1825 }, { "epoch": 0.2544415801574584, "grad_norm": 0.28297579288482666, "learning_rate": 9.072610471598327e-06, "loss": 0.1591, "step": 1826 }, { "epoch": 0.25458092384867276, "grad_norm": 0.3670741319656372, "learning_rate": 9.07124277233476e-06, "loss": 0.1202, "step": 1827 }, { "epoch": 0.25472026753988714, "grad_norm": 0.27492988109588623, "learning_rate": 9.069874168533996e-06, "loss": 0.1665, "step": 1828 }, { "epoch": 0.2548596112311015, "grad_norm": 0.4456004798412323, "learning_rate": 9.068504660500111e-06, "loss": 0.1383, "step": 1829 }, { "epoch": 0.2549989549223159, "grad_norm": 0.28840047121047974, "learning_rate": 9.067134248537374e-06, "loss": 0.1342, "step": 1830 }, { "epoch": 0.25513829861353027, "grad_norm": 0.3344220519065857, "learning_rate": 9.065762932950262e-06, "loss": 0.167, "step": 1831 }, { "epoch": 0.25527764230474465, "grad_norm": 0.22109733521938324, "learning_rate": 9.06439071404345e-06, "loss": 0.1546, "step": 1832 }, { "epoch": 0.255416985995959, "grad_norm": 0.1959979236125946, "learning_rate": 9.063017592121812e-06, "loss": 0.1396, "step": 1833 }, { "epoch": 0.2555563296871734, "grad_norm": 0.4138856530189514, "learning_rate": 9.061643567490425e-06, "loss": 0.1512, "step": 1834 }, { "epoch": 0.2556956733783878, "grad_norm": 0.6163581609725952, "learning_rate": 9.060268640454565e-06, "loss": 0.1803, "step": 1835 }, { "epoch": 0.25583501706960216, "grad_norm": 0.23222146928310394, "learning_rate": 9.058892811319713e-06, "loss": 0.1316, "step": 1836 }, { "epoch": 0.25597436076081653, "grad_norm": 0.527908980846405, "learning_rate": 9.057516080391544e-06, "loss": 0.1588, "step": 1837 }, { "epoch": 0.2561137044520309, "grad_norm": 0.4137364625930786, "learning_rate": 9.056138447975936e-06, "loss": 0.1505, "step": 1838 }, { "epoch": 0.2562530481432453, "grad_norm": 0.17063003778457642, "learning_rate": 9.05475991437897e-06, "loss": 0.1227, "step": 1839 }, { "epoch": 0.2563923918344597, "grad_norm": 0.35991859436035156, "learning_rate": 9.053380479906919e-06, "loss": 0.1892, "step": 1840 }, { "epoch": 0.2565317355256741, "grad_norm": 0.2664371430873871, "learning_rate": 9.052000144866269e-06, "loss": 0.1294, "step": 1841 }, { "epoch": 0.2566710792168885, "grad_norm": 0.22829607129096985, "learning_rate": 9.050618909563693e-06, "loss": 0.1653, "step": 1842 }, { "epoch": 0.25681042290810285, "grad_norm": 0.12440832704305649, "learning_rate": 9.049236774306073e-06, "loss": 0.1106, "step": 1843 }, { "epoch": 0.25694976659931723, "grad_norm": 0.23421697318553925, "learning_rate": 9.04785373940049e-06, "loss": 0.1335, "step": 1844 }, { "epoch": 0.2570891102905316, "grad_norm": 0.20110191404819489, "learning_rate": 9.046469805154218e-06, "loss": 0.1586, "step": 1845 }, { "epoch": 0.257228453981746, "grad_norm": 0.2794843018054962, "learning_rate": 9.045084971874738e-06, "loss": 0.1562, "step": 1846 }, { "epoch": 0.25736779767296036, "grad_norm": 0.20786069333553314, "learning_rate": 9.043699239869727e-06, "loss": 0.1338, "step": 1847 }, { "epoch": 0.25750714136417474, "grad_norm": 0.22807441651821136, "learning_rate": 9.042312609447066e-06, "loss": 0.1327, "step": 1848 }, { "epoch": 0.2576464850553891, "grad_norm": 0.3136560916900635, "learning_rate": 9.040925080914832e-06, "loss": 0.1518, "step": 1849 }, { "epoch": 0.2577858287466035, "grad_norm": 0.3309148848056793, "learning_rate": 9.039536654581297e-06, "loss": 0.1463, "step": 1850 }, { "epoch": 0.2579251724378179, "grad_norm": 0.23321430385112762, "learning_rate": 9.038147330754944e-06, "loss": 0.1373, "step": 1851 }, { "epoch": 0.25806451612903225, "grad_norm": 0.14822044968605042, "learning_rate": 9.036757109744447e-06, "loss": 0.1338, "step": 1852 }, { "epoch": 0.25820385982024663, "grad_norm": 0.2086091786623001, "learning_rate": 9.035365991858679e-06, "loss": 0.1275, "step": 1853 }, { "epoch": 0.258343203511461, "grad_norm": 0.300189346075058, "learning_rate": 9.033973977406718e-06, "loss": 0.166, "step": 1854 }, { "epoch": 0.2584825472026754, "grad_norm": 0.16014112532138824, "learning_rate": 9.032581066697836e-06, "loss": 0.1178, "step": 1855 }, { "epoch": 0.25862189089388976, "grad_norm": 0.17092739045619965, "learning_rate": 9.031187260041505e-06, "loss": 0.1424, "step": 1856 }, { "epoch": 0.25876123458510414, "grad_norm": 0.21620585024356842, "learning_rate": 9.0297925577474e-06, "loss": 0.1294, "step": 1857 }, { "epoch": 0.2589005782763185, "grad_norm": 0.2095920592546463, "learning_rate": 9.028396960125392e-06, "loss": 0.1223, "step": 1858 }, { "epoch": 0.2590399219675329, "grad_norm": 0.2439785748720169, "learning_rate": 9.027000467485547e-06, "loss": 0.1371, "step": 1859 }, { "epoch": 0.2591792656587473, "grad_norm": 0.3099682629108429, "learning_rate": 9.025603080138136e-06, "loss": 0.1744, "step": 1860 }, { "epoch": 0.2593186093499617, "grad_norm": 0.32893940806388855, "learning_rate": 9.024204798393627e-06, "loss": 0.1373, "step": 1861 }, { "epoch": 0.2594579530411761, "grad_norm": 0.22487081587314606, "learning_rate": 9.022805622562687e-06, "loss": 0.1493, "step": 1862 }, { "epoch": 0.25959729673239046, "grad_norm": 0.2199648916721344, "learning_rate": 9.02140555295618e-06, "loss": 0.1274, "step": 1863 }, { "epoch": 0.25973664042360484, "grad_norm": 0.20690064132213593, "learning_rate": 9.020004589885167e-06, "loss": 0.1218, "step": 1864 }, { "epoch": 0.2598759841148192, "grad_norm": 0.23494938015937805, "learning_rate": 9.018602733660915e-06, "loss": 0.1373, "step": 1865 }, { "epoch": 0.2600153278060336, "grad_norm": 0.1472770869731903, "learning_rate": 9.01719998459488e-06, "loss": 0.1181, "step": 1866 }, { "epoch": 0.26015467149724797, "grad_norm": 0.10632935166358948, "learning_rate": 9.015796342998724e-06, "loss": 0.1051, "step": 1867 }, { "epoch": 0.26029401518846235, "grad_norm": 0.17877551913261414, "learning_rate": 9.014391809184302e-06, "loss": 0.136, "step": 1868 }, { "epoch": 0.2604333588796767, "grad_norm": 0.22206206619739532, "learning_rate": 9.01298638346367e-06, "loss": 0.1252, "step": 1869 }, { "epoch": 0.2605727025708911, "grad_norm": 0.15702305734157562, "learning_rate": 9.011580066149081e-06, "loss": 0.127, "step": 1870 }, { "epoch": 0.2607120462621055, "grad_norm": 0.44095510244369507, "learning_rate": 9.010172857552989e-06, "loss": 0.1864, "step": 1871 }, { "epoch": 0.26085138995331986, "grad_norm": 0.22538045048713684, "learning_rate": 9.008764757988042e-06, "loss": 0.1192, "step": 1872 }, { "epoch": 0.26099073364453423, "grad_norm": 0.29083362221717834, "learning_rate": 9.007355767767085e-06, "loss": 0.1255, "step": 1873 }, { "epoch": 0.2611300773357486, "grad_norm": 0.2409420758485794, "learning_rate": 9.005945887203167e-06, "loss": 0.1869, "step": 1874 }, { "epoch": 0.261269421026963, "grad_norm": 0.21523086726665497, "learning_rate": 9.004535116609532e-06, "loss": 0.1482, "step": 1875 }, { "epoch": 0.26140876471817737, "grad_norm": 0.3111552298069, "learning_rate": 9.003123456299617e-06, "loss": 0.1822, "step": 1876 }, { "epoch": 0.26154810840939174, "grad_norm": 0.3511328399181366, "learning_rate": 9.001710906587064e-06, "loss": 0.1862, "step": 1877 }, { "epoch": 0.2616874521006061, "grad_norm": 0.4875585734844208, "learning_rate": 9.000297467785708e-06, "loss": 0.1381, "step": 1878 }, { "epoch": 0.2618267957918205, "grad_norm": 0.31814008951187134, "learning_rate": 8.998883140209582e-06, "loss": 0.1782, "step": 1879 }, { "epoch": 0.26196613948303493, "grad_norm": 0.17440766096115112, "learning_rate": 8.99746792417292e-06, "loss": 0.1368, "step": 1880 }, { "epoch": 0.2621054831742493, "grad_norm": 0.2958487868309021, "learning_rate": 8.996051819990148e-06, "loss": 0.1603, "step": 1881 }, { "epoch": 0.2622448268654637, "grad_norm": 0.13866877555847168, "learning_rate": 8.994634827975892e-06, "loss": 0.1257, "step": 1882 }, { "epoch": 0.26238417055667806, "grad_norm": 0.39515572786331177, "learning_rate": 8.993216948444978e-06, "loss": 0.1365, "step": 1883 }, { "epoch": 0.26252351424789244, "grad_norm": 0.14906840026378632, "learning_rate": 8.991798181712423e-06, "loss": 0.1348, "step": 1884 }, { "epoch": 0.2626628579391068, "grad_norm": 0.24327734112739563, "learning_rate": 8.99037852809345e-06, "loss": 0.1164, "step": 1885 }, { "epoch": 0.2628022016303212, "grad_norm": 0.15063266456127167, "learning_rate": 8.988957987903467e-06, "loss": 0.1128, "step": 1886 }, { "epoch": 0.2629415453215356, "grad_norm": 0.3392021656036377, "learning_rate": 8.987536561458088e-06, "loss": 0.1618, "step": 1887 }, { "epoch": 0.26308088901274995, "grad_norm": 0.22522084414958954, "learning_rate": 8.986114249073122e-06, "loss": 0.1396, "step": 1888 }, { "epoch": 0.26322023270396433, "grad_norm": 0.19249317049980164, "learning_rate": 8.984691051064576e-06, "loss": 0.1142, "step": 1889 }, { "epoch": 0.2633595763951787, "grad_norm": 0.16022562980651855, "learning_rate": 8.98326696774865e-06, "loss": 0.1304, "step": 1890 }, { "epoch": 0.2634989200863931, "grad_norm": 0.11116141080856323, "learning_rate": 8.981841999441743e-06, "loss": 0.1173, "step": 1891 }, { "epoch": 0.26363826377760746, "grad_norm": 0.172471284866333, "learning_rate": 8.980416146460452e-06, "loss": 0.1089, "step": 1892 }, { "epoch": 0.26377760746882184, "grad_norm": 0.2516050934791565, "learning_rate": 8.978989409121565e-06, "loss": 0.141, "step": 1893 }, { "epoch": 0.2639169511600362, "grad_norm": 0.10902474820613861, "learning_rate": 8.977561787742074e-06, "loss": 0.1068, "step": 1894 }, { "epoch": 0.2640562948512506, "grad_norm": 0.19637073576450348, "learning_rate": 8.976133282639166e-06, "loss": 0.1379, "step": 1895 }, { "epoch": 0.264195638542465, "grad_norm": 0.20737436413764954, "learning_rate": 8.974703894130218e-06, "loss": 0.1604, "step": 1896 }, { "epoch": 0.26433498223367935, "grad_norm": 0.2518463432788849, "learning_rate": 8.973273622532806e-06, "loss": 0.1948, "step": 1897 }, { "epoch": 0.2644743259248937, "grad_norm": 0.252856969833374, "learning_rate": 8.97184246816471e-06, "loss": 0.1824, "step": 1898 }, { "epoch": 0.2646136696161081, "grad_norm": 0.32094448804855347, "learning_rate": 8.970410431343892e-06, "loss": 0.1879, "step": 1899 }, { "epoch": 0.26475301330732254, "grad_norm": 0.1817636340856552, "learning_rate": 8.968977512388524e-06, "loss": 0.1142, "step": 1900 }, { "epoch": 0.2648923569985369, "grad_norm": 0.6435189843177795, "learning_rate": 8.967543711616968e-06, "loss": 0.2145, "step": 1901 }, { "epoch": 0.2650317006897513, "grad_norm": 0.17166920006275177, "learning_rate": 8.966109029347777e-06, "loss": 0.1651, "step": 1902 }, { "epoch": 0.26517104438096567, "grad_norm": 0.26558035612106323, "learning_rate": 8.96467346589971e-06, "loss": 0.1404, "step": 1903 }, { "epoch": 0.26531038807218005, "grad_norm": 0.3551652431488037, "learning_rate": 8.963237021591714e-06, "loss": 0.1356, "step": 1904 }, { "epoch": 0.2654497317633944, "grad_norm": 0.20853784680366516, "learning_rate": 8.961799696742933e-06, "loss": 0.1183, "step": 1905 }, { "epoch": 0.2655890754546088, "grad_norm": 0.21857759356498718, "learning_rate": 8.960361491672708e-06, "loss": 0.1456, "step": 1906 }, { "epoch": 0.2657284191458232, "grad_norm": 0.2607150673866272, "learning_rate": 8.958922406700578e-06, "loss": 0.1409, "step": 1907 }, { "epoch": 0.26586776283703756, "grad_norm": 0.18319612741470337, "learning_rate": 8.957482442146271e-06, "loss": 0.1405, "step": 1908 }, { "epoch": 0.26600710652825194, "grad_norm": 0.2624933421611786, "learning_rate": 8.956041598329716e-06, "loss": 0.1348, "step": 1909 }, { "epoch": 0.2661464502194663, "grad_norm": 0.2444200962781906, "learning_rate": 8.954599875571039e-06, "loss": 0.1843, "step": 1910 }, { "epoch": 0.2662857939106807, "grad_norm": 0.4591536819934845, "learning_rate": 8.953157274190552e-06, "loss": 0.162, "step": 1911 }, { "epoch": 0.26642513760189507, "grad_norm": 0.2135055959224701, "learning_rate": 8.951713794508771e-06, "loss": 0.1201, "step": 1912 }, { "epoch": 0.26656448129310945, "grad_norm": 0.13013629615306854, "learning_rate": 8.950269436846405e-06, "loss": 0.1143, "step": 1913 }, { "epoch": 0.2667038249843238, "grad_norm": 0.14406566321849823, "learning_rate": 8.948824201524355e-06, "loss": 0.1174, "step": 1914 }, { "epoch": 0.2668431686755382, "grad_norm": 0.40177303552627563, "learning_rate": 8.947378088863722e-06, "loss": 0.1846, "step": 1915 }, { "epoch": 0.2669825123667526, "grad_norm": 0.34512194991111755, "learning_rate": 8.945931099185798e-06, "loss": 0.1672, "step": 1916 }, { "epoch": 0.26712185605796696, "grad_norm": 0.26702016592025757, "learning_rate": 8.94448323281207e-06, "loss": 0.1694, "step": 1917 }, { "epoch": 0.26726119974918133, "grad_norm": 0.15436911582946777, "learning_rate": 8.943034490064222e-06, "loss": 0.1213, "step": 1918 }, { "epoch": 0.2674005434403957, "grad_norm": 0.16917477548122406, "learning_rate": 8.941584871264131e-06, "loss": 0.1135, "step": 1919 }, { "epoch": 0.26753988713161014, "grad_norm": 0.1498951017856598, "learning_rate": 8.940134376733869e-06, "loss": 0.1427, "step": 1920 }, { "epoch": 0.2676792308228245, "grad_norm": 0.20932814478874207, "learning_rate": 8.938683006795704e-06, "loss": 0.1301, "step": 1921 }, { "epoch": 0.2678185745140389, "grad_norm": 0.18000216782093048, "learning_rate": 8.937230761772098e-06, "loss": 0.1221, "step": 1922 }, { "epoch": 0.2679579182052533, "grad_norm": 0.4600371718406677, "learning_rate": 8.935777641985704e-06, "loss": 0.1494, "step": 1923 }, { "epoch": 0.26809726189646765, "grad_norm": 0.2833157777786255, "learning_rate": 8.934323647759373e-06, "loss": 0.1605, "step": 1924 }, { "epoch": 0.26823660558768203, "grad_norm": 0.2852308750152588, "learning_rate": 8.932868779416148e-06, "loss": 0.1272, "step": 1925 }, { "epoch": 0.2683759492788964, "grad_norm": 0.31947052478790283, "learning_rate": 8.931413037279271e-06, "loss": 0.1377, "step": 1926 }, { "epoch": 0.2685152929701108, "grad_norm": 0.36586859822273254, "learning_rate": 8.929956421672172e-06, "loss": 0.1447, "step": 1927 }, { "epoch": 0.26865463666132516, "grad_norm": 0.23925736546516418, "learning_rate": 8.92849893291848e-06, "loss": 0.1309, "step": 1928 }, { "epoch": 0.26879398035253954, "grad_norm": 0.1950359344482422, "learning_rate": 8.927040571342014e-06, "loss": 0.1285, "step": 1929 }, { "epoch": 0.2689333240437539, "grad_norm": 0.30858296155929565, "learning_rate": 8.92558133726679e-06, "loss": 0.1438, "step": 1930 }, { "epoch": 0.2690726677349683, "grad_norm": 0.2978014051914215, "learning_rate": 8.924121231017012e-06, "loss": 0.1633, "step": 1931 }, { "epoch": 0.2692120114261827, "grad_norm": 0.26589664816856384, "learning_rate": 8.922660252917088e-06, "loss": 0.1891, "step": 1932 }, { "epoch": 0.26935135511739705, "grad_norm": 0.25252556800842285, "learning_rate": 8.92119840329161e-06, "loss": 0.1519, "step": 1933 }, { "epoch": 0.26949069880861143, "grad_norm": 0.20621615648269653, "learning_rate": 8.919735682465372e-06, "loss": 0.1155, "step": 1934 }, { "epoch": 0.2696300424998258, "grad_norm": 0.25406351685523987, "learning_rate": 8.918272090763352e-06, "loss": 0.1543, "step": 1935 }, { "epoch": 0.2697693861910402, "grad_norm": 0.12559187412261963, "learning_rate": 8.91680762851073e-06, "loss": 0.1347, "step": 1936 }, { "epoch": 0.26990872988225456, "grad_norm": 0.24722769856452942, "learning_rate": 8.915342296032874e-06, "loss": 0.1697, "step": 1937 }, { "epoch": 0.27004807357346894, "grad_norm": 0.1471726894378662, "learning_rate": 8.913876093655351e-06, "loss": 0.1549, "step": 1938 }, { "epoch": 0.2701874172646833, "grad_norm": 0.31364330649375916, "learning_rate": 8.912409021703914e-06, "loss": 0.1182, "step": 1939 }, { "epoch": 0.27032676095589775, "grad_norm": 0.5830516219139099, "learning_rate": 8.910941080504514e-06, "loss": 0.1634, "step": 1940 }, { "epoch": 0.2704661046471121, "grad_norm": 0.1769903302192688, "learning_rate": 8.909472270383293e-06, "loss": 0.1884, "step": 1941 }, { "epoch": 0.2706054483383265, "grad_norm": 0.2938484847545624, "learning_rate": 8.90800259166659e-06, "loss": 0.1346, "step": 1942 }, { "epoch": 0.2707447920295409, "grad_norm": 0.3872935175895691, "learning_rate": 8.906532044680933e-06, "loss": 0.1535, "step": 1943 }, { "epoch": 0.27088413572075526, "grad_norm": 0.292231023311615, "learning_rate": 8.905060629753041e-06, "loss": 0.1551, "step": 1944 }, { "epoch": 0.27102347941196964, "grad_norm": 0.1801270842552185, "learning_rate": 8.903588347209833e-06, "loss": 0.1339, "step": 1945 }, { "epoch": 0.271162823103184, "grad_norm": 0.3374931812286377, "learning_rate": 8.902115197378414e-06, "loss": 0.1536, "step": 1946 }, { "epoch": 0.2713021667943984, "grad_norm": 0.18394450843334198, "learning_rate": 8.900641180586086e-06, "loss": 0.1641, "step": 1947 }, { "epoch": 0.27144151048561277, "grad_norm": 0.32290026545524597, "learning_rate": 8.89916629716034e-06, "loss": 0.1441, "step": 1948 }, { "epoch": 0.27158085417682715, "grad_norm": 0.36150285601615906, "learning_rate": 8.897690547428861e-06, "loss": 0.1446, "step": 1949 }, { "epoch": 0.2717201978680415, "grad_norm": 0.23146945238113403, "learning_rate": 8.89621393171953e-06, "loss": 0.1543, "step": 1950 }, { "epoch": 0.2718595415592559, "grad_norm": 0.1570659726858139, "learning_rate": 8.894736450360415e-06, "loss": 0.1264, "step": 1951 }, { "epoch": 0.2719988852504703, "grad_norm": 0.5714474320411682, "learning_rate": 8.893258103679779e-06, "loss": 0.195, "step": 1952 }, { "epoch": 0.27213822894168466, "grad_norm": 0.32974204421043396, "learning_rate": 8.891778892006077e-06, "loss": 0.181, "step": 1953 }, { "epoch": 0.27227757263289903, "grad_norm": 0.2417234480381012, "learning_rate": 8.890298815667956e-06, "loss": 0.1455, "step": 1954 }, { "epoch": 0.2724169163241134, "grad_norm": 0.16793321073055267, "learning_rate": 8.888817874994254e-06, "loss": 0.1251, "step": 1955 }, { "epoch": 0.2725562600153278, "grad_norm": 0.3530118763446808, "learning_rate": 8.887336070314005e-06, "loss": 0.1255, "step": 1956 }, { "epoch": 0.27269560370654217, "grad_norm": 0.575602114200592, "learning_rate": 8.88585340195643e-06, "loss": 0.1584, "step": 1957 }, { "epoch": 0.27283494739775654, "grad_norm": 0.33857667446136475, "learning_rate": 8.884369870250945e-06, "loss": 0.1633, "step": 1958 }, { "epoch": 0.2729742910889709, "grad_norm": 0.20752450823783875, "learning_rate": 8.882885475527156e-06, "loss": 0.1022, "step": 1959 }, { "epoch": 0.27311363478018535, "grad_norm": 0.3163542151451111, "learning_rate": 8.881400218114861e-06, "loss": 0.1753, "step": 1960 }, { "epoch": 0.27325297847139973, "grad_norm": 0.3193841576576233, "learning_rate": 8.879914098344053e-06, "loss": 0.1055, "step": 1961 }, { "epoch": 0.2733923221626141, "grad_norm": 0.47589731216430664, "learning_rate": 8.878427116544912e-06, "loss": 0.1669, "step": 1962 }, { "epoch": 0.2735316658538285, "grad_norm": 0.21013018488883972, "learning_rate": 8.876939273047813e-06, "loss": 0.1716, "step": 1963 }, { "epoch": 0.27367100954504286, "grad_norm": 0.1565125435590744, "learning_rate": 8.875450568183318e-06, "loss": 0.145, "step": 1964 }, { "epoch": 0.27381035323625724, "grad_norm": 0.4177800714969635, "learning_rate": 8.873961002282185e-06, "loss": 0.1568, "step": 1965 }, { "epoch": 0.2739496969274716, "grad_norm": 0.1611185520887375, "learning_rate": 8.872470575675361e-06, "loss": 0.1338, "step": 1966 }, { "epoch": 0.274089040618686, "grad_norm": 0.22750572860240936, "learning_rate": 8.870979288693985e-06, "loss": 0.1331, "step": 1967 }, { "epoch": 0.2742283843099004, "grad_norm": 0.252237468957901, "learning_rate": 8.86948714166939e-06, "loss": 0.1218, "step": 1968 }, { "epoch": 0.27436772800111475, "grad_norm": 0.14271196722984314, "learning_rate": 8.86799413493309e-06, "loss": 0.1386, "step": 1969 }, { "epoch": 0.27450707169232913, "grad_norm": 0.3053579330444336, "learning_rate": 8.866500268816803e-06, "loss": 0.1512, "step": 1970 }, { "epoch": 0.2746464153835435, "grad_norm": 0.2177601456642151, "learning_rate": 8.865005543652428e-06, "loss": 0.1403, "step": 1971 }, { "epoch": 0.2747857590747579, "grad_norm": 0.14583681523799896, "learning_rate": 8.863509959772064e-06, "loss": 0.1148, "step": 1972 }, { "epoch": 0.27492510276597226, "grad_norm": 0.31365641951560974, "learning_rate": 8.86201351750799e-06, "loss": 0.1607, "step": 1973 }, { "epoch": 0.27506444645718664, "grad_norm": 0.14980018138885498, "learning_rate": 8.860516217192683e-06, "loss": 0.1232, "step": 1974 }, { "epoch": 0.275203790148401, "grad_norm": 0.2540495991706848, "learning_rate": 8.85901805915881e-06, "loss": 0.1206, "step": 1975 }, { "epoch": 0.2753431338396154, "grad_norm": 0.20388339459896088, "learning_rate": 8.85751904373923e-06, "loss": 0.1359, "step": 1976 }, { "epoch": 0.2754824775308298, "grad_norm": 0.12256712466478348, "learning_rate": 8.856019171266984e-06, "loss": 0.1145, "step": 1977 }, { "epoch": 0.27562182122204415, "grad_norm": 0.22244535386562347, "learning_rate": 8.854518442075313e-06, "loss": 0.1622, "step": 1978 }, { "epoch": 0.2757611649132585, "grad_norm": 0.21017540991306305, "learning_rate": 8.853016856497646e-06, "loss": 0.1321, "step": 1979 }, { "epoch": 0.2759005086044729, "grad_norm": 0.1748930811882019, "learning_rate": 8.8515144148676e-06, "loss": 0.161, "step": 1980 }, { "epoch": 0.27603985229568734, "grad_norm": 0.2341972291469574, "learning_rate": 8.85001111751898e-06, "loss": 0.1929, "step": 1981 }, { "epoch": 0.2761791959869017, "grad_norm": 0.18964016437530518, "learning_rate": 8.848506964785789e-06, "loss": 0.1222, "step": 1982 }, { "epoch": 0.2763185396781161, "grad_norm": 0.14814667403697968, "learning_rate": 8.847001957002211e-06, "loss": 0.1564, "step": 1983 }, { "epoch": 0.27645788336933047, "grad_norm": 0.24149642884731293, "learning_rate": 8.845496094502628e-06, "loss": 0.1561, "step": 1984 }, { "epoch": 0.27659722706054485, "grad_norm": 0.10579300671815872, "learning_rate": 8.843989377621606e-06, "loss": 0.1151, "step": 1985 }, { "epoch": 0.2767365707517592, "grad_norm": 0.14471282064914703, "learning_rate": 8.842481806693906e-06, "loss": 0.1166, "step": 1986 }, { "epoch": 0.2768759144429736, "grad_norm": 0.22579559683799744, "learning_rate": 8.840973382054472e-06, "loss": 0.1337, "step": 1987 }, { "epoch": 0.277015258134188, "grad_norm": 0.22470445930957794, "learning_rate": 8.839464104038445e-06, "loss": 0.1125, "step": 1988 }, { "epoch": 0.27715460182540236, "grad_norm": 0.13552448153495789, "learning_rate": 8.83795397298115e-06, "loss": 0.1288, "step": 1989 }, { "epoch": 0.27729394551661674, "grad_norm": 0.11368247121572495, "learning_rate": 8.836442989218104e-06, "loss": 0.1038, "step": 1990 }, { "epoch": 0.2774332892078311, "grad_norm": 0.18588605523109436, "learning_rate": 8.834931153085014e-06, "loss": 0.1327, "step": 1991 }, { "epoch": 0.2775726328990455, "grad_norm": 0.2549978792667389, "learning_rate": 8.833418464917774e-06, "loss": 0.1128, "step": 1992 }, { "epoch": 0.27771197659025987, "grad_norm": 0.2214653044939041, "learning_rate": 8.831904925052468e-06, "loss": 0.1903, "step": 1993 }, { "epoch": 0.27785132028147425, "grad_norm": 0.23088963329792023, "learning_rate": 8.830390533825373e-06, "loss": 0.1292, "step": 1994 }, { "epoch": 0.2779906639726886, "grad_norm": 0.30841612815856934, "learning_rate": 8.828875291572951e-06, "loss": 0.1746, "step": 1995 }, { "epoch": 0.278130007663903, "grad_norm": 0.25264936685562134, "learning_rate": 8.827359198631854e-06, "loss": 0.1139, "step": 1996 }, { "epoch": 0.2782693513551174, "grad_norm": 0.1461860090494156, "learning_rate": 8.825842255338923e-06, "loss": 0.1325, "step": 1997 }, { "epoch": 0.27840869504633176, "grad_norm": 0.15443815290927887, "learning_rate": 8.824324462031189e-06, "loss": 0.1372, "step": 1998 }, { "epoch": 0.27854803873754613, "grad_norm": 0.40535059571266174, "learning_rate": 8.822805819045869e-06, "loss": 0.2418, "step": 1999 }, { "epoch": 0.2786873824287605, "grad_norm": 0.2136220932006836, "learning_rate": 8.821286326720372e-06, "loss": 0.1426, "step": 2000 }, { "epoch": 0.27882672611997494, "grad_norm": 0.19994471967220306, "learning_rate": 8.819765985392297e-06, "loss": 0.14, "step": 2001 }, { "epoch": 0.2789660698111893, "grad_norm": 0.1731433868408203, "learning_rate": 8.818244795399425e-06, "loss": 0.135, "step": 2002 }, { "epoch": 0.2791054135024037, "grad_norm": 0.38761427998542786, "learning_rate": 8.81672275707973e-06, "loss": 0.1887, "step": 2003 }, { "epoch": 0.2792447571936181, "grad_norm": 0.20869798958301544, "learning_rate": 8.815199870771378e-06, "loss": 0.1584, "step": 2004 }, { "epoch": 0.27938410088483245, "grad_norm": 0.31513580679893494, "learning_rate": 8.813676136812717e-06, "loss": 0.1242, "step": 2005 }, { "epoch": 0.27952344457604683, "grad_norm": 0.1626405119895935, "learning_rate": 8.812151555542286e-06, "loss": 0.1342, "step": 2006 }, { "epoch": 0.2796627882672612, "grad_norm": 0.19551031291484833, "learning_rate": 8.81062612729881e-06, "loss": 0.1583, "step": 2007 }, { "epoch": 0.2798021319584756, "grad_norm": 0.11320306360721588, "learning_rate": 8.80909985242121e-06, "loss": 0.1086, "step": 2008 }, { "epoch": 0.27994147564968996, "grad_norm": 0.12344880402088165, "learning_rate": 8.807572731248583e-06, "loss": 0.1145, "step": 2009 }, { "epoch": 0.28008081934090434, "grad_norm": 0.23486551642417908, "learning_rate": 8.806044764120226e-06, "loss": 0.1325, "step": 2010 }, { "epoch": 0.2802201630321187, "grad_norm": 0.36675670742988586, "learning_rate": 8.804515951375615e-06, "loss": 0.1957, "step": 2011 }, { "epoch": 0.2803595067233331, "grad_norm": 0.2941606640815735, "learning_rate": 8.802986293354418e-06, "loss": 0.1465, "step": 2012 }, { "epoch": 0.2804988504145475, "grad_norm": 0.2671109735965729, "learning_rate": 8.80145579039649e-06, "loss": 0.1348, "step": 2013 }, { "epoch": 0.28063819410576185, "grad_norm": 0.22316522896289825, "learning_rate": 8.799924442841873e-06, "loss": 0.1086, "step": 2014 }, { "epoch": 0.28077753779697623, "grad_norm": 0.3466989994049072, "learning_rate": 8.798392251030801e-06, "loss": 0.1304, "step": 2015 }, { "epoch": 0.2809168814881906, "grad_norm": 0.14841048419475555, "learning_rate": 8.796859215303688e-06, "loss": 0.1212, "step": 2016 }, { "epoch": 0.281056225179405, "grad_norm": 0.2994566559791565, "learning_rate": 8.795325336001143e-06, "loss": 0.1355, "step": 2017 }, { "epoch": 0.28119556887061936, "grad_norm": 0.328892320394516, "learning_rate": 8.793790613463956e-06, "loss": 0.1453, "step": 2018 }, { "epoch": 0.28133491256183374, "grad_norm": 0.22416262328624725, "learning_rate": 8.792255048033106e-06, "loss": 0.182, "step": 2019 }, { "epoch": 0.2814742562530481, "grad_norm": 0.3028537631034851, "learning_rate": 8.790718640049767e-06, "loss": 0.1422, "step": 2020 }, { "epoch": 0.28161359994426255, "grad_norm": 0.38933029770851135, "learning_rate": 8.789181389855288e-06, "loss": 0.1689, "step": 2021 }, { "epoch": 0.2817529436354769, "grad_norm": 0.2309274673461914, "learning_rate": 8.787643297791214e-06, "loss": 0.1985, "step": 2022 }, { "epoch": 0.2818922873266913, "grad_norm": 0.14449457824230194, "learning_rate": 8.78610436419927e-06, "loss": 0.1248, "step": 2023 }, { "epoch": 0.2820316310179057, "grad_norm": 0.22126415371894836, "learning_rate": 8.784564589421373e-06, "loss": 0.1434, "step": 2024 }, { "epoch": 0.28217097470912006, "grad_norm": 0.33082062005996704, "learning_rate": 8.783023973799632e-06, "loss": 0.1559, "step": 2025 }, { "epoch": 0.28231031840033444, "grad_norm": 0.24310584366321564, "learning_rate": 8.78148251767633e-06, "loss": 0.1193, "step": 2026 }, { "epoch": 0.2824496620915488, "grad_norm": 0.41824114322662354, "learning_rate": 8.779940221393946e-06, "loss": 0.1382, "step": 2027 }, { "epoch": 0.2825890057827632, "grad_norm": 0.25792598724365234, "learning_rate": 8.778397085295141e-06, "loss": 0.1665, "step": 2028 }, { "epoch": 0.28272834947397757, "grad_norm": 0.23889663815498352, "learning_rate": 8.776853109722765e-06, "loss": 0.1247, "step": 2029 }, { "epoch": 0.28286769316519195, "grad_norm": 0.157027468085289, "learning_rate": 8.775308295019857e-06, "loss": 0.1074, "step": 2030 }, { "epoch": 0.2830070368564063, "grad_norm": 0.49104127287864685, "learning_rate": 8.773762641529637e-06, "loss": 0.1603, "step": 2031 }, { "epoch": 0.2831463805476207, "grad_norm": 0.27664539217948914, "learning_rate": 8.772216149595515e-06, "loss": 0.1461, "step": 2032 }, { "epoch": 0.2832857242388351, "grad_norm": 0.1387529820203781, "learning_rate": 8.770668819561085e-06, "loss": 0.1162, "step": 2033 }, { "epoch": 0.28342506793004946, "grad_norm": 0.5351938009262085, "learning_rate": 8.769120651770128e-06, "loss": 0.1498, "step": 2034 }, { "epoch": 0.28356441162126383, "grad_norm": 0.4131740927696228, "learning_rate": 8.767571646566615e-06, "loss": 0.1612, "step": 2035 }, { "epoch": 0.2837037553124782, "grad_norm": 0.15756915509700775, "learning_rate": 8.766021804294697e-06, "loss": 0.122, "step": 2036 }, { "epoch": 0.2838430990036926, "grad_norm": 0.21452130377292633, "learning_rate": 8.764471125298712e-06, "loss": 0.1246, "step": 2037 }, { "epoch": 0.28398244269490697, "grad_norm": 0.20889326930046082, "learning_rate": 8.76291960992319e-06, "loss": 0.1432, "step": 2038 }, { "epoch": 0.28412178638612134, "grad_norm": 0.39308178424835205, "learning_rate": 8.761367258512838e-06, "loss": 0.1599, "step": 2039 }, { "epoch": 0.2842611300773357, "grad_norm": 0.3575601279735565, "learning_rate": 8.759814071412554e-06, "loss": 0.1865, "step": 2040 }, { "epoch": 0.28440047376855015, "grad_norm": 0.1552668958902359, "learning_rate": 8.758260048967421e-06, "loss": 0.1356, "step": 2041 }, { "epoch": 0.28453981745976453, "grad_norm": 0.09805548936128616, "learning_rate": 8.75670519152271e-06, "loss": 0.1173, "step": 2042 }, { "epoch": 0.2846791611509789, "grad_norm": 0.27260029315948486, "learning_rate": 8.755149499423871e-06, "loss": 0.14, "step": 2043 }, { "epoch": 0.2848185048421933, "grad_norm": 0.20189537107944489, "learning_rate": 8.753592973016545e-06, "loss": 0.1252, "step": 2044 }, { "epoch": 0.28495784853340766, "grad_norm": 0.3434407711029053, "learning_rate": 8.752035612646557e-06, "loss": 0.2115, "step": 2045 }, { "epoch": 0.28509719222462204, "grad_norm": 0.2993614077568054, "learning_rate": 8.750477418659914e-06, "loss": 0.1455, "step": 2046 }, { "epoch": 0.2852365359158364, "grad_norm": 0.13643425703048706, "learning_rate": 8.748918391402816e-06, "loss": 0.1324, "step": 2047 }, { "epoch": 0.2853758796070508, "grad_norm": 0.17761987447738647, "learning_rate": 8.74735853122164e-06, "loss": 0.1054, "step": 2048 }, { "epoch": 0.2855152232982652, "grad_norm": 0.31496453285217285, "learning_rate": 8.745797838462951e-06, "loss": 0.1461, "step": 2049 }, { "epoch": 0.28565456698947955, "grad_norm": 0.24405620992183685, "learning_rate": 8.7442363134735e-06, "loss": 0.1442, "step": 2050 }, { "epoch": 0.28579391068069393, "grad_norm": 0.14725911617279053, "learning_rate": 8.742673956600225e-06, "loss": 0.1407, "step": 2051 }, { "epoch": 0.2859332543719083, "grad_norm": 0.18891046941280365, "learning_rate": 8.741110768190242e-06, "loss": 0.1306, "step": 2052 }, { "epoch": 0.2860725980631227, "grad_norm": 0.23391830921173096, "learning_rate": 8.739546748590857e-06, "loss": 0.1116, "step": 2053 }, { "epoch": 0.28621194175433706, "grad_norm": 0.3319946229457855, "learning_rate": 8.73798189814956e-06, "loss": 0.1361, "step": 2054 }, { "epoch": 0.28635128544555144, "grad_norm": 0.24168723821640015, "learning_rate": 8.736416217214026e-06, "loss": 0.1369, "step": 2055 }, { "epoch": 0.2864906291367658, "grad_norm": 0.1315011829137802, "learning_rate": 8.734849706132112e-06, "loss": 0.1271, "step": 2056 }, { "epoch": 0.2866299728279802, "grad_norm": 0.19784674048423767, "learning_rate": 8.733282365251858e-06, "loss": 0.1364, "step": 2057 }, { "epoch": 0.2867693165191946, "grad_norm": 0.10295633226633072, "learning_rate": 8.731714194921498e-06, "loss": 0.1154, "step": 2058 }, { "epoch": 0.28690866021040895, "grad_norm": 0.27723804116249084, "learning_rate": 8.73014519548944e-06, "loss": 0.1525, "step": 2059 }, { "epoch": 0.2870480039016233, "grad_norm": 0.29744020104408264, "learning_rate": 8.72857536730428e-06, "loss": 0.1611, "step": 2060 }, { "epoch": 0.28718734759283776, "grad_norm": 0.11784835159778595, "learning_rate": 8.7270047107148e-06, "loss": 0.1227, "step": 2061 }, { "epoch": 0.28732669128405214, "grad_norm": 0.25910454988479614, "learning_rate": 8.72543322606996e-06, "loss": 0.1515, "step": 2062 }, { "epoch": 0.2874660349752665, "grad_norm": 0.386528342962265, "learning_rate": 8.72386091371891e-06, "loss": 0.1579, "step": 2063 }, { "epoch": 0.2876053786664809, "grad_norm": 0.0988343358039856, "learning_rate": 8.722287774010983e-06, "loss": 0.1076, "step": 2064 }, { "epoch": 0.28774472235769527, "grad_norm": 0.2279839664697647, "learning_rate": 8.720713807295692e-06, "loss": 0.161, "step": 2065 }, { "epoch": 0.28788406604890965, "grad_norm": 0.24291785061359406, "learning_rate": 8.71913901392274e-06, "loss": 0.1576, "step": 2066 }, { "epoch": 0.288023409740124, "grad_norm": 0.2869592010974884, "learning_rate": 8.71756339424201e-06, "loss": 0.154, "step": 2067 }, { "epoch": 0.2881627534313384, "grad_norm": 0.29500535130500793, "learning_rate": 8.715986948603566e-06, "loss": 0.1384, "step": 2068 }, { "epoch": 0.2883020971225528, "grad_norm": 0.38261842727661133, "learning_rate": 8.71440967735766e-06, "loss": 0.1514, "step": 2069 }, { "epoch": 0.28844144081376716, "grad_norm": 0.20995937287807465, "learning_rate": 8.712831580854724e-06, "loss": 0.132, "step": 2070 }, { "epoch": 0.28858078450498154, "grad_norm": 0.28830400109291077, "learning_rate": 8.711252659445378e-06, "loss": 0.161, "step": 2071 }, { "epoch": 0.2887201281961959, "grad_norm": 0.19328932464122772, "learning_rate": 8.709672913480418e-06, "loss": 0.138, "step": 2072 }, { "epoch": 0.2888594718874103, "grad_norm": 0.22414003312587738, "learning_rate": 8.70809234331083e-06, "loss": 0.1475, "step": 2073 }, { "epoch": 0.28899881557862467, "grad_norm": 0.19871322810649872, "learning_rate": 8.706510949287782e-06, "loss": 0.1688, "step": 2074 }, { "epoch": 0.28913815926983905, "grad_norm": 0.23015964031219482, "learning_rate": 8.70492873176262e-06, "loss": 0.1525, "step": 2075 }, { "epoch": 0.2892775029610534, "grad_norm": 0.3158590495586395, "learning_rate": 8.703345691086882e-06, "loss": 0.1173, "step": 2076 }, { "epoch": 0.2894168466522678, "grad_norm": 0.184052973985672, "learning_rate": 8.701761827612278e-06, "loss": 0.1333, "step": 2077 }, { "epoch": 0.2895561903434822, "grad_norm": 0.1418401598930359, "learning_rate": 8.700177141690708e-06, "loss": 0.132, "step": 2078 }, { "epoch": 0.28969553403469656, "grad_norm": 0.18283790349960327, "learning_rate": 8.698591633674256e-06, "loss": 0.1597, "step": 2079 }, { "epoch": 0.28983487772591093, "grad_norm": 0.16327965259552002, "learning_rate": 8.697005303915183e-06, "loss": 0.1318, "step": 2080 }, { "epoch": 0.28997422141712537, "grad_norm": 0.2627761960029602, "learning_rate": 8.695418152765933e-06, "loss": 0.1406, "step": 2081 }, { "epoch": 0.29011356510833974, "grad_norm": 0.24852564930915833, "learning_rate": 8.693830180579139e-06, "loss": 0.1334, "step": 2082 }, { "epoch": 0.2902529087995541, "grad_norm": 0.25174349546432495, "learning_rate": 8.69224138770761e-06, "loss": 0.1309, "step": 2083 }, { "epoch": 0.2903922524907685, "grad_norm": 0.2788793444633484, "learning_rate": 8.69065177450434e-06, "loss": 0.1761, "step": 2084 }, { "epoch": 0.2905315961819829, "grad_norm": 0.1130206435918808, "learning_rate": 8.689061341322505e-06, "loss": 0.1106, "step": 2085 }, { "epoch": 0.29067093987319725, "grad_norm": 0.12699930369853973, "learning_rate": 8.687470088515464e-06, "loss": 0.1274, "step": 2086 }, { "epoch": 0.29081028356441163, "grad_norm": 0.18661701679229736, "learning_rate": 8.685878016436753e-06, "loss": 0.1392, "step": 2087 }, { "epoch": 0.290949627255626, "grad_norm": 0.32788994908332825, "learning_rate": 8.684285125440099e-06, "loss": 0.1355, "step": 2088 }, { "epoch": 0.2910889709468404, "grad_norm": 0.13274770975112915, "learning_rate": 8.682691415879402e-06, "loss": 0.1003, "step": 2089 }, { "epoch": 0.29122831463805476, "grad_norm": 0.3124416768550873, "learning_rate": 8.681096888108751e-06, "loss": 0.1398, "step": 2090 }, { "epoch": 0.29136765832926914, "grad_norm": 0.28860795497894287, "learning_rate": 8.679501542482412e-06, "loss": 0.1515, "step": 2091 }, { "epoch": 0.2915070020204835, "grad_norm": 0.18785566091537476, "learning_rate": 8.677905379354834e-06, "loss": 0.1422, "step": 2092 }, { "epoch": 0.2916463457116979, "grad_norm": 0.4801212251186371, "learning_rate": 8.67630839908065e-06, "loss": 0.1912, "step": 2093 }, { "epoch": 0.2917856894029123, "grad_norm": 0.3011344373226166, "learning_rate": 8.674710602014672e-06, "loss": 0.1696, "step": 2094 }, { "epoch": 0.29192503309412665, "grad_norm": 0.28738513588905334, "learning_rate": 8.673111988511892e-06, "loss": 0.1413, "step": 2095 }, { "epoch": 0.29206437678534103, "grad_norm": 0.14162486791610718, "learning_rate": 8.671512558927483e-06, "loss": 0.1153, "step": 2096 }, { "epoch": 0.2922037204765554, "grad_norm": 0.26707080006599426, "learning_rate": 8.669912313616811e-06, "loss": 0.1775, "step": 2097 }, { "epoch": 0.2923430641677698, "grad_norm": 0.13786719739437103, "learning_rate": 8.668311252935407e-06, "loss": 0.1324, "step": 2098 }, { "epoch": 0.29248240785898416, "grad_norm": 0.10831575095653534, "learning_rate": 8.66670937723899e-06, "loss": 0.1206, "step": 2099 }, { "epoch": 0.29262175155019854, "grad_norm": 0.23223674297332764, "learning_rate": 8.665106686883461e-06, "loss": 0.15, "step": 2100 }, { "epoch": 0.29276109524141297, "grad_norm": 0.15868151187896729, "learning_rate": 8.663503182224906e-06, "loss": 0.1284, "step": 2101 }, { "epoch": 0.29290043893262735, "grad_norm": 0.17104491591453552, "learning_rate": 8.66189886361958e-06, "loss": 0.139, "step": 2102 }, { "epoch": 0.2930397826238417, "grad_norm": 0.25621363520622253, "learning_rate": 8.660293731423929e-06, "loss": 0.2003, "step": 2103 }, { "epoch": 0.2931791263150561, "grad_norm": 0.12555401027202606, "learning_rate": 8.658687785994579e-06, "loss": 0.1377, "step": 2104 }, { "epoch": 0.2933184700062705, "grad_norm": 0.14150230586528778, "learning_rate": 8.657081027688332e-06, "loss": 0.1492, "step": 2105 }, { "epoch": 0.29345781369748486, "grad_norm": 0.11737605184316635, "learning_rate": 8.655473456862172e-06, "loss": 0.1267, "step": 2106 }, { "epoch": 0.29359715738869924, "grad_norm": 0.27220648527145386, "learning_rate": 8.653865073873265e-06, "loss": 0.1359, "step": 2107 }, { "epoch": 0.2937365010799136, "grad_norm": 0.16068032383918762, "learning_rate": 8.652255879078959e-06, "loss": 0.1251, "step": 2108 }, { "epoch": 0.293875844771128, "grad_norm": 0.21454639732837677, "learning_rate": 8.650645872836779e-06, "loss": 0.1333, "step": 2109 }, { "epoch": 0.29401518846234237, "grad_norm": 0.1992761343717575, "learning_rate": 8.649035055504431e-06, "loss": 0.1395, "step": 2110 }, { "epoch": 0.29415453215355675, "grad_norm": 0.12721183896064758, "learning_rate": 8.647423427439804e-06, "loss": 0.1087, "step": 2111 }, { "epoch": 0.2942938758447711, "grad_norm": 0.23923629522323608, "learning_rate": 8.645810989000962e-06, "loss": 0.1203, "step": 2112 }, { "epoch": 0.2944332195359855, "grad_norm": 0.1772688776254654, "learning_rate": 8.644197740546153e-06, "loss": 0.1409, "step": 2113 }, { "epoch": 0.2945725632271999, "grad_norm": 0.11981125921010971, "learning_rate": 8.642583682433808e-06, "loss": 0.1215, "step": 2114 }, { "epoch": 0.29471190691841426, "grad_norm": 0.20711754262447357, "learning_rate": 8.640968815022529e-06, "loss": 0.1487, "step": 2115 }, { "epoch": 0.29485125060962863, "grad_norm": 0.13919195532798767, "learning_rate": 8.6393531386711e-06, "loss": 0.1348, "step": 2116 }, { "epoch": 0.294990594300843, "grad_norm": 0.16860787570476532, "learning_rate": 8.637736653738496e-06, "loss": 0.1458, "step": 2117 }, { "epoch": 0.2951299379920574, "grad_norm": 0.17088733613491058, "learning_rate": 8.636119360583857e-06, "loss": 0.1321, "step": 2118 }, { "epoch": 0.29526928168327177, "grad_norm": 0.14947520196437836, "learning_rate": 8.63450125956651e-06, "loss": 0.1226, "step": 2119 }, { "epoch": 0.29540862537448614, "grad_norm": 0.13914436101913452, "learning_rate": 8.63288235104596e-06, "loss": 0.1324, "step": 2120 }, { "epoch": 0.2955479690657006, "grad_norm": 0.14653271436691284, "learning_rate": 8.631262635381892e-06, "loss": 0.1138, "step": 2121 }, { "epoch": 0.29568731275691496, "grad_norm": 0.10921651870012283, "learning_rate": 8.629642112934169e-06, "loss": 0.1089, "step": 2122 }, { "epoch": 0.29582665644812933, "grad_norm": 0.11913470923900604, "learning_rate": 8.628020784062837e-06, "loss": 0.0975, "step": 2123 }, { "epoch": 0.2959660001393437, "grad_norm": 0.3312835097312927, "learning_rate": 8.626398649128113e-06, "loss": 0.141, "step": 2124 }, { "epoch": 0.2961053438305581, "grad_norm": 0.10867452621459961, "learning_rate": 8.624775708490403e-06, "loss": 0.1017, "step": 2125 }, { "epoch": 0.29624468752177247, "grad_norm": 0.21731841564178467, "learning_rate": 8.623151962510284e-06, "loss": 0.1506, "step": 2126 }, { "epoch": 0.29638403121298684, "grad_norm": 0.18366484344005585, "learning_rate": 8.621527411548517e-06, "loss": 0.1398, "step": 2127 }, { "epoch": 0.2965233749042012, "grad_norm": 0.2208486795425415, "learning_rate": 8.619902055966043e-06, "loss": 0.1821, "step": 2128 }, { "epoch": 0.2966627185954156, "grad_norm": 0.2634929120540619, "learning_rate": 8.618275896123973e-06, "loss": 0.1611, "step": 2129 }, { "epoch": 0.29680206228663, "grad_norm": 0.28776809573173523, "learning_rate": 8.616648932383607e-06, "loss": 0.139, "step": 2130 }, { "epoch": 0.29694140597784435, "grad_norm": 0.2612351179122925, "learning_rate": 8.615021165106415e-06, "loss": 0.1473, "step": 2131 }, { "epoch": 0.29708074966905873, "grad_norm": 0.2113099843263626, "learning_rate": 8.613392594654056e-06, "loss": 0.137, "step": 2132 }, { "epoch": 0.2972200933602731, "grad_norm": 0.16161151230335236, "learning_rate": 8.611763221388356e-06, "loss": 0.112, "step": 2133 }, { "epoch": 0.2973594370514875, "grad_norm": 0.15048910677433014, "learning_rate": 8.610133045671325e-06, "loss": 0.1288, "step": 2134 }, { "epoch": 0.29749878074270186, "grad_norm": 0.2274533212184906, "learning_rate": 8.608502067865155e-06, "loss": 0.1345, "step": 2135 }, { "epoch": 0.29763812443391624, "grad_norm": 0.1872674524784088, "learning_rate": 8.606870288332206e-06, "loss": 0.1199, "step": 2136 }, { "epoch": 0.2977774681251306, "grad_norm": 0.1876722127199173, "learning_rate": 8.605237707435028e-06, "loss": 0.1392, "step": 2137 }, { "epoch": 0.297916811816345, "grad_norm": 0.15016329288482666, "learning_rate": 8.603604325536338e-06, "loss": 0.1283, "step": 2138 }, { "epoch": 0.2980561555075594, "grad_norm": 0.2584426999092102, "learning_rate": 8.60197014299904e-06, "loss": 0.1342, "step": 2139 }, { "epoch": 0.29819549919877375, "grad_norm": 0.16816255450248718, "learning_rate": 8.600335160186208e-06, "loss": 0.1676, "step": 2140 }, { "epoch": 0.2983348428899882, "grad_norm": 0.12293771654367447, "learning_rate": 8.598699377461104e-06, "loss": 0.1329, "step": 2141 }, { "epoch": 0.29847418658120256, "grad_norm": 0.16861872375011444, "learning_rate": 8.597062795187157e-06, "loss": 0.1078, "step": 2142 }, { "epoch": 0.29861353027241694, "grad_norm": 0.3093007504940033, "learning_rate": 8.595425413727979e-06, "loss": 0.1574, "step": 2143 }, { "epoch": 0.2987528739636313, "grad_norm": 0.2473069578409195, "learning_rate": 8.593787233447357e-06, "loss": 0.1534, "step": 2144 }, { "epoch": 0.2988922176548457, "grad_norm": 0.15264195203781128, "learning_rate": 8.592148254709262e-06, "loss": 0.1097, "step": 2145 }, { "epoch": 0.29903156134606007, "grad_norm": 0.1802988499403, "learning_rate": 8.590508477877834e-06, "loss": 0.145, "step": 2146 }, { "epoch": 0.29917090503727445, "grad_norm": 0.16985459625720978, "learning_rate": 8.588867903317395e-06, "loss": 0.1868, "step": 2147 }, { "epoch": 0.2993102487284888, "grad_norm": 0.22414128482341766, "learning_rate": 8.587226531392443e-06, "loss": 0.165, "step": 2148 }, { "epoch": 0.2994495924197032, "grad_norm": 0.3141110837459564, "learning_rate": 8.585584362467652e-06, "loss": 0.1626, "step": 2149 }, { "epoch": 0.2995889361109176, "grad_norm": 0.27086082100868225, "learning_rate": 8.583941396907877e-06, "loss": 0.1209, "step": 2150 }, { "epoch": 0.29972827980213196, "grad_norm": 0.2162880301475525, "learning_rate": 8.582297635078149e-06, "loss": 0.1393, "step": 2151 }, { "epoch": 0.29986762349334634, "grad_norm": 0.25978296995162964, "learning_rate": 8.58065307734367e-06, "loss": 0.1145, "step": 2152 }, { "epoch": 0.3000069671845607, "grad_norm": 0.18887026607990265, "learning_rate": 8.579007724069823e-06, "loss": 0.1614, "step": 2153 }, { "epoch": 0.3001463108757751, "grad_norm": 0.2099285125732422, "learning_rate": 8.577361575622171e-06, "loss": 0.104, "step": 2154 }, { "epoch": 0.30028565456698947, "grad_norm": 0.5295179486274719, "learning_rate": 8.575714632366451e-06, "loss": 0.1476, "step": 2155 }, { "epoch": 0.30042499825820385, "grad_norm": 0.30344200134277344, "learning_rate": 8.574066894668573e-06, "loss": 0.1526, "step": 2156 }, { "epoch": 0.3005643419494182, "grad_norm": 0.27292367815971375, "learning_rate": 8.57241836289463e-06, "loss": 0.1841, "step": 2157 }, { "epoch": 0.3007036856406326, "grad_norm": 0.37108609080314636, "learning_rate": 8.570769037410885e-06, "loss": 0.1548, "step": 2158 }, { "epoch": 0.300843029331847, "grad_norm": 0.34157097339630127, "learning_rate": 8.56911891858378e-06, "loss": 0.175, "step": 2159 }, { "epoch": 0.30098237302306136, "grad_norm": 0.3498985469341278, "learning_rate": 8.56746800677994e-06, "loss": 0.142, "step": 2160 }, { "epoch": 0.3011217167142758, "grad_norm": 0.1708317995071411, "learning_rate": 8.565816302366151e-06, "loss": 0.1046, "step": 2161 }, { "epoch": 0.30126106040549017, "grad_norm": 0.12223107367753983, "learning_rate": 8.564163805709393e-06, "loss": 0.1218, "step": 2162 }, { "epoch": 0.30140040409670454, "grad_norm": 0.15756894648075104, "learning_rate": 8.562510517176807e-06, "loss": 0.118, "step": 2163 }, { "epoch": 0.3015397477879189, "grad_norm": 0.20377227663993835, "learning_rate": 8.560856437135716e-06, "loss": 0.1733, "step": 2164 }, { "epoch": 0.3016790914791333, "grad_norm": 0.29113414883613586, "learning_rate": 8.559201565953623e-06, "loss": 0.1454, "step": 2165 }, { "epoch": 0.3018184351703477, "grad_norm": 0.24115751683712006, "learning_rate": 8.557545903998197e-06, "loss": 0.1486, "step": 2166 }, { "epoch": 0.30195777886156205, "grad_norm": 0.19002097845077515, "learning_rate": 8.555889451637294e-06, "loss": 0.1674, "step": 2167 }, { "epoch": 0.30209712255277643, "grad_norm": 0.19795076549053192, "learning_rate": 8.554232209238935e-06, "loss": 0.1521, "step": 2168 }, { "epoch": 0.3022364662439908, "grad_norm": 0.2837347686290741, "learning_rate": 8.552574177171326e-06, "loss": 0.1428, "step": 2169 }, { "epoch": 0.3023758099352052, "grad_norm": 0.1976095288991928, "learning_rate": 8.55091535580284e-06, "loss": 0.124, "step": 2170 }, { "epoch": 0.30251515362641956, "grad_norm": 0.2126205563545227, "learning_rate": 8.54925574550203e-06, "loss": 0.1703, "step": 2171 }, { "epoch": 0.30265449731763394, "grad_norm": 0.3373163640499115, "learning_rate": 8.547595346637624e-06, "loss": 0.1447, "step": 2172 }, { "epoch": 0.3027938410088483, "grad_norm": 0.1719200611114502, "learning_rate": 8.545934159578527e-06, "loss": 0.1491, "step": 2173 }, { "epoch": 0.3029331847000627, "grad_norm": 0.2412387877702713, "learning_rate": 8.544272184693814e-06, "loss": 0.1453, "step": 2174 }, { "epoch": 0.3030725283912771, "grad_norm": 0.17239204049110413, "learning_rate": 8.542609422352738e-06, "loss": 0.1524, "step": 2175 }, { "epoch": 0.30321187208249145, "grad_norm": 0.24811311066150665, "learning_rate": 8.540945872924728e-06, "loss": 0.1721, "step": 2176 }, { "epoch": 0.30335121577370583, "grad_norm": 0.271136075258255, "learning_rate": 8.539281536779388e-06, "loss": 0.1434, "step": 2177 }, { "epoch": 0.3034905594649202, "grad_norm": 0.29757794737815857, "learning_rate": 8.537616414286491e-06, "loss": 0.1272, "step": 2178 }, { "epoch": 0.3036299031561346, "grad_norm": 0.17393863201141357, "learning_rate": 8.535950505815993e-06, "loss": 0.1133, "step": 2179 }, { "epoch": 0.30376924684734896, "grad_norm": 0.15720568597316742, "learning_rate": 8.53428381173802e-06, "loss": 0.1525, "step": 2180 }, { "epoch": 0.30390859053856334, "grad_norm": 0.11727342009544373, "learning_rate": 8.532616332422872e-06, "loss": 0.1468, "step": 2181 }, { "epoch": 0.30404793422977777, "grad_norm": 0.15490339696407318, "learning_rate": 8.530948068241028e-06, "loss": 0.1191, "step": 2182 }, { "epoch": 0.30418727792099215, "grad_norm": 0.24909062683582306, "learning_rate": 8.529279019563133e-06, "loss": 0.1254, "step": 2183 }, { "epoch": 0.3043266216122065, "grad_norm": 0.22353430092334747, "learning_rate": 8.527609186760017e-06, "loss": 0.1463, "step": 2184 }, { "epoch": 0.3044659653034209, "grad_norm": 0.16523800790309906, "learning_rate": 8.525938570202676e-06, "loss": 0.1195, "step": 2185 }, { "epoch": 0.3046053089946353, "grad_norm": 0.22920365631580353, "learning_rate": 8.524267170262283e-06, "loss": 0.1612, "step": 2186 }, { "epoch": 0.30474465268584966, "grad_norm": 0.09196074306964874, "learning_rate": 8.522594987310184e-06, "loss": 0.1065, "step": 2187 }, { "epoch": 0.30488399637706404, "grad_norm": 0.202069491147995, "learning_rate": 8.520922021717903e-06, "loss": 0.1335, "step": 2188 }, { "epoch": 0.3050233400682784, "grad_norm": 0.30281323194503784, "learning_rate": 8.519248273857132e-06, "loss": 0.1481, "step": 2189 }, { "epoch": 0.3051626837594928, "grad_norm": 0.17051389813423157, "learning_rate": 8.51757374409974e-06, "loss": 0.1334, "step": 2190 }, { "epoch": 0.30530202745070717, "grad_norm": 0.197818323969841, "learning_rate": 8.51589843281777e-06, "loss": 0.1766, "step": 2191 }, { "epoch": 0.30544137114192155, "grad_norm": 0.12399482727050781, "learning_rate": 8.514222340383438e-06, "loss": 0.1256, "step": 2192 }, { "epoch": 0.3055807148331359, "grad_norm": 0.3262777328491211, "learning_rate": 8.512545467169133e-06, "loss": 0.14, "step": 2193 }, { "epoch": 0.3057200585243503, "grad_norm": 0.13740792870521545, "learning_rate": 8.510867813547417e-06, "loss": 0.1278, "step": 2194 }, { "epoch": 0.3058594022155647, "grad_norm": 0.2732554078102112, "learning_rate": 8.509189379891029e-06, "loss": 0.1348, "step": 2195 }, { "epoch": 0.30599874590677906, "grad_norm": 0.1634773313999176, "learning_rate": 8.507510166572875e-06, "loss": 0.1421, "step": 2196 }, { "epoch": 0.30613808959799343, "grad_norm": 0.16212278604507446, "learning_rate": 8.50583017396604e-06, "loss": 0.1476, "step": 2197 }, { "epoch": 0.3062774332892078, "grad_norm": 0.27314209938049316, "learning_rate": 8.504149402443782e-06, "loss": 0.1237, "step": 2198 }, { "epoch": 0.3064167769804222, "grad_norm": 0.2959073483943939, "learning_rate": 8.502467852379526e-06, "loss": 0.1359, "step": 2199 }, { "epoch": 0.30655612067163657, "grad_norm": 0.11668670922517776, "learning_rate": 8.500785524146875e-06, "loss": 0.1117, "step": 2200 }, { "epoch": 0.30669546436285094, "grad_norm": 0.19793397188186646, "learning_rate": 8.499102418119607e-06, "loss": 0.1273, "step": 2201 }, { "epoch": 0.3068348080540654, "grad_norm": 0.15447235107421875, "learning_rate": 8.497418534671666e-06, "loss": 0.1393, "step": 2202 }, { "epoch": 0.30697415174527976, "grad_norm": 0.22713761031627655, "learning_rate": 8.495733874177176e-06, "loss": 0.1297, "step": 2203 }, { "epoch": 0.30711349543649413, "grad_norm": 0.29090580344200134, "learning_rate": 8.494048437010427e-06, "loss": 0.1495, "step": 2204 }, { "epoch": 0.3072528391277085, "grad_norm": 0.12017729133367538, "learning_rate": 8.492362223545884e-06, "loss": 0.1292, "step": 2205 }, { "epoch": 0.3073921828189229, "grad_norm": 0.16115669906139374, "learning_rate": 8.49067523415819e-06, "loss": 0.1253, "step": 2206 }, { "epoch": 0.30753152651013727, "grad_norm": 0.3175918161869049, "learning_rate": 8.48898746922215e-06, "loss": 0.1586, "step": 2207 }, { "epoch": 0.30767087020135164, "grad_norm": 0.25756317377090454, "learning_rate": 8.487298929112751e-06, "loss": 0.1523, "step": 2208 }, { "epoch": 0.307810213892566, "grad_norm": 0.20243537425994873, "learning_rate": 8.485609614205146e-06, "loss": 0.1621, "step": 2209 }, { "epoch": 0.3079495575837804, "grad_norm": 0.2257983237504959, "learning_rate": 8.483919524874661e-06, "loss": 0.115, "step": 2210 }, { "epoch": 0.3080889012749948, "grad_norm": 0.3034466505050659, "learning_rate": 8.482228661496797e-06, "loss": 0.1326, "step": 2211 }, { "epoch": 0.30822824496620915, "grad_norm": 0.26462435722351074, "learning_rate": 8.480537024447227e-06, "loss": 0.1492, "step": 2212 }, { "epoch": 0.30836758865742353, "grad_norm": 0.18944118916988373, "learning_rate": 8.478844614101792e-06, "loss": 0.1357, "step": 2213 }, { "epoch": 0.3085069323486379, "grad_norm": 0.11002016812562943, "learning_rate": 8.477151430836505e-06, "loss": 0.1086, "step": 2214 }, { "epoch": 0.3086462760398523, "grad_norm": 0.20702563226222992, "learning_rate": 8.475457475027555e-06, "loss": 0.1287, "step": 2215 }, { "epoch": 0.30878561973106666, "grad_norm": 0.24342839419841766, "learning_rate": 8.473762747051302e-06, "loss": 0.1563, "step": 2216 }, { "epoch": 0.30892496342228104, "grad_norm": 0.14113061130046844, "learning_rate": 8.472067247284272e-06, "loss": 0.1139, "step": 2217 }, { "epoch": 0.3090643071134954, "grad_norm": 0.10883061587810516, "learning_rate": 8.470370976103171e-06, "loss": 0.1284, "step": 2218 }, { "epoch": 0.3092036508047098, "grad_norm": 0.2998693287372589, "learning_rate": 8.468673933884867e-06, "loss": 0.1311, "step": 2219 }, { "epoch": 0.3093429944959242, "grad_norm": 0.2080339938402176, "learning_rate": 8.466976121006407e-06, "loss": 0.1301, "step": 2220 }, { "epoch": 0.30948233818713855, "grad_norm": 0.15326222777366638, "learning_rate": 8.465277537845004e-06, "loss": 0.1504, "step": 2221 }, { "epoch": 0.309621681878353, "grad_norm": 0.1434650719165802, "learning_rate": 8.463578184778047e-06, "loss": 0.1222, "step": 2222 }, { "epoch": 0.30976102556956736, "grad_norm": 0.1480981707572937, "learning_rate": 8.461878062183092e-06, "loss": 0.1291, "step": 2223 }, { "epoch": 0.30990036926078174, "grad_norm": 0.2109181135892868, "learning_rate": 8.460177170437865e-06, "loss": 0.1346, "step": 2224 }, { "epoch": 0.3100397129519961, "grad_norm": 0.18973682820796967, "learning_rate": 8.458475509920272e-06, "loss": 0.1406, "step": 2225 }, { "epoch": 0.3101790566432105, "grad_norm": 0.24071985483169556, "learning_rate": 8.456773081008376e-06, "loss": 0.1399, "step": 2226 }, { "epoch": 0.31031840033442487, "grad_norm": 0.2703801691532135, "learning_rate": 8.455069884080422e-06, "loss": 0.1307, "step": 2227 }, { "epoch": 0.31045774402563925, "grad_norm": 0.1706540286540985, "learning_rate": 8.45336591951482e-06, "loss": 0.1493, "step": 2228 }, { "epoch": 0.3105970877168536, "grad_norm": 0.08665762096643448, "learning_rate": 8.451661187690154e-06, "loss": 0.0968, "step": 2229 }, { "epoch": 0.310736431408068, "grad_norm": 0.1312989890575409, "learning_rate": 8.449955688985174e-06, "loss": 0.1242, "step": 2230 }, { "epoch": 0.3108757750992824, "grad_norm": 0.24650730192661285, "learning_rate": 8.448249423778802e-06, "loss": 0.1346, "step": 2231 }, { "epoch": 0.31101511879049676, "grad_norm": 0.3118823766708374, "learning_rate": 8.446542392450134e-06, "loss": 0.1792, "step": 2232 }, { "epoch": 0.31115446248171114, "grad_norm": 0.24056175351142883, "learning_rate": 8.444834595378434e-06, "loss": 0.1454, "step": 2233 }, { "epoch": 0.3112938061729255, "grad_norm": 0.25458261370658875, "learning_rate": 8.443126032943132e-06, "loss": 0.1377, "step": 2234 }, { "epoch": 0.3114331498641399, "grad_norm": 0.19800865650177002, "learning_rate": 8.441416705523834e-06, "loss": 0.1436, "step": 2235 }, { "epoch": 0.31157249355535427, "grad_norm": 0.17617766559123993, "learning_rate": 8.439706613500312e-06, "loss": 0.1451, "step": 2236 }, { "epoch": 0.31171183724656865, "grad_norm": 0.29682132601737976, "learning_rate": 8.43799575725251e-06, "loss": 0.1614, "step": 2237 }, { "epoch": 0.311851180937783, "grad_norm": 0.16707715392112732, "learning_rate": 8.436284137160544e-06, "loss": 0.1499, "step": 2238 }, { "epoch": 0.3119905246289974, "grad_norm": 0.20053629577159882, "learning_rate": 8.434571753604693e-06, "loss": 0.1422, "step": 2239 }, { "epoch": 0.3121298683202118, "grad_norm": 0.361014187335968, "learning_rate": 8.432858606965411e-06, "loss": 0.1444, "step": 2240 }, { "epoch": 0.31226921201142616, "grad_norm": 0.2538716793060303, "learning_rate": 8.43114469762332e-06, "loss": 0.1178, "step": 2241 }, { "epoch": 0.3124085557026406, "grad_norm": 0.25972095131874084, "learning_rate": 8.429430025959212e-06, "loss": 0.1469, "step": 2242 }, { "epoch": 0.31254789939385497, "grad_norm": 0.21079498529434204, "learning_rate": 8.427714592354046e-06, "loss": 0.1779, "step": 2243 }, { "epoch": 0.31268724308506934, "grad_norm": 0.14168335497379303, "learning_rate": 8.425998397188955e-06, "loss": 0.1033, "step": 2244 }, { "epoch": 0.3128265867762837, "grad_norm": 0.26757192611694336, "learning_rate": 8.424281440845236e-06, "loss": 0.1426, "step": 2245 }, { "epoch": 0.3129659304674981, "grad_norm": 0.22740311920642853, "learning_rate": 8.42256372370436e-06, "loss": 0.1451, "step": 2246 }, { "epoch": 0.3131052741587125, "grad_norm": 0.21106453239917755, "learning_rate": 8.420845246147961e-06, "loss": 0.132, "step": 2247 }, { "epoch": 0.31324461784992685, "grad_norm": 0.26092278957366943, "learning_rate": 8.41912600855785e-06, "loss": 0.1938, "step": 2248 }, { "epoch": 0.31338396154114123, "grad_norm": 0.19074825942516327, "learning_rate": 8.417406011316e-06, "loss": 0.1295, "step": 2249 }, { "epoch": 0.3135233052323556, "grad_norm": 0.21333865821361542, "learning_rate": 8.415685254804552e-06, "loss": 0.132, "step": 2250 }, { "epoch": 0.31366264892357, "grad_norm": 0.1568218469619751, "learning_rate": 8.413963739405824e-06, "loss": 0.1137, "step": 2251 }, { "epoch": 0.31380199261478436, "grad_norm": 0.2779082953929901, "learning_rate": 8.412241465502294e-06, "loss": 0.1419, "step": 2252 }, { "epoch": 0.31394133630599874, "grad_norm": 0.32384347915649414, "learning_rate": 8.410518433476613e-06, "loss": 0.176, "step": 2253 }, { "epoch": 0.3140806799972131, "grad_norm": 0.22292950749397278, "learning_rate": 8.408794643711601e-06, "loss": 0.1285, "step": 2254 }, { "epoch": 0.3142200236884275, "grad_norm": 0.19513556361198425, "learning_rate": 8.407070096590243e-06, "loss": 0.1084, "step": 2255 }, { "epoch": 0.3143593673796419, "grad_norm": 0.49697020649909973, "learning_rate": 8.405344792495694e-06, "loss": 0.137, "step": 2256 }, { "epoch": 0.31449871107085625, "grad_norm": 0.18501070141792297, "learning_rate": 8.403618731811277e-06, "loss": 0.1055, "step": 2257 }, { "epoch": 0.31463805476207063, "grad_norm": 0.3456658720970154, "learning_rate": 8.401891914920483e-06, "loss": 0.1628, "step": 2258 }, { "epoch": 0.314777398453285, "grad_norm": 0.26356247067451477, "learning_rate": 8.400164342206973e-06, "loss": 0.1557, "step": 2259 }, { "epoch": 0.3149167421444994, "grad_norm": 0.25502729415893555, "learning_rate": 8.398436014054575e-06, "loss": 0.1223, "step": 2260 }, { "epoch": 0.31505608583571376, "grad_norm": 0.4178365468978882, "learning_rate": 8.39670693084728e-06, "loss": 0.1543, "step": 2261 }, { "epoch": 0.3151954295269282, "grad_norm": 0.10840478539466858, "learning_rate": 8.394977092969253e-06, "loss": 0.1158, "step": 2262 }, { "epoch": 0.31533477321814257, "grad_norm": 0.23695267736911774, "learning_rate": 8.393246500804825e-06, "loss": 0.1408, "step": 2263 }, { "epoch": 0.31547411690935695, "grad_norm": 0.3393460512161255, "learning_rate": 8.391515154738495e-06, "loss": 0.1904, "step": 2264 }, { "epoch": 0.3156134606005713, "grad_norm": 0.2844921052455902, "learning_rate": 8.389783055154925e-06, "loss": 0.1359, "step": 2265 }, { "epoch": 0.3157528042917857, "grad_norm": 0.24171312153339386, "learning_rate": 8.388050202438952e-06, "loss": 0.1328, "step": 2266 }, { "epoch": 0.3158921479830001, "grad_norm": 0.1441793441772461, "learning_rate": 8.386316596975574e-06, "loss": 0.1113, "step": 2267 }, { "epoch": 0.31603149167421446, "grad_norm": 0.1952381432056427, "learning_rate": 8.38458223914996e-06, "loss": 0.1386, "step": 2268 }, { "epoch": 0.31617083536542884, "grad_norm": 0.19594113528728485, "learning_rate": 8.38284712934744e-06, "loss": 0.159, "step": 2269 }, { "epoch": 0.3163101790566432, "grad_norm": 0.26932045817375183, "learning_rate": 8.381111267953523e-06, "loss": 0.1487, "step": 2270 }, { "epoch": 0.3164495227478576, "grad_norm": 0.1733291894197464, "learning_rate": 8.379374655353874e-06, "loss": 0.1447, "step": 2271 }, { "epoch": 0.31658886643907197, "grad_norm": 0.2786027789115906, "learning_rate": 8.377637291934329e-06, "loss": 0.1585, "step": 2272 }, { "epoch": 0.31672821013028635, "grad_norm": 0.14321956038475037, "learning_rate": 8.37589917808089e-06, "loss": 0.1179, "step": 2273 }, { "epoch": 0.3168675538215007, "grad_norm": 0.21668651700019836, "learning_rate": 8.374160314179727e-06, "loss": 0.1631, "step": 2274 }, { "epoch": 0.3170068975127151, "grad_norm": 0.17155376076698303, "learning_rate": 8.372420700617176e-06, "loss": 0.1107, "step": 2275 }, { "epoch": 0.3171462412039295, "grad_norm": 0.2909504473209381, "learning_rate": 8.370680337779737e-06, "loss": 0.1445, "step": 2276 }, { "epoch": 0.31728558489514386, "grad_norm": 0.2644071877002716, "learning_rate": 8.368939226054083e-06, "loss": 0.1389, "step": 2277 }, { "epoch": 0.31742492858635823, "grad_norm": 0.1922302395105362, "learning_rate": 8.367197365827047e-06, "loss": 0.1626, "step": 2278 }, { "epoch": 0.3175642722775726, "grad_norm": 0.23222428560256958, "learning_rate": 8.36545475748563e-06, "loss": 0.227, "step": 2279 }, { "epoch": 0.317703615968787, "grad_norm": 0.3677062392234802, "learning_rate": 8.363711401417e-06, "loss": 0.159, "step": 2280 }, { "epoch": 0.31784295966000137, "grad_norm": 0.37094658613204956, "learning_rate": 8.361967298008494e-06, "loss": 0.1422, "step": 2281 }, { "epoch": 0.3179823033512158, "grad_norm": 0.2401321530342102, "learning_rate": 8.360222447647606e-06, "loss": 0.1228, "step": 2282 }, { "epoch": 0.3181216470424302, "grad_norm": 0.1898527890443802, "learning_rate": 8.358476850722007e-06, "loss": 0.1189, "step": 2283 }, { "epoch": 0.31826099073364456, "grad_norm": 0.3686445951461792, "learning_rate": 8.356730507619526e-06, "loss": 0.1457, "step": 2284 }, { "epoch": 0.31840033442485893, "grad_norm": 0.47246602177619934, "learning_rate": 8.354983418728165e-06, "loss": 0.108, "step": 2285 }, { "epoch": 0.3185396781160733, "grad_norm": 0.3513410687446594, "learning_rate": 8.353235584436082e-06, "loss": 0.1666, "step": 2286 }, { "epoch": 0.3186790218072877, "grad_norm": 0.12981140613555908, "learning_rate": 8.351487005131606e-06, "loss": 0.1148, "step": 2287 }, { "epoch": 0.31881836549850207, "grad_norm": 0.16587334871292114, "learning_rate": 8.349737681203234e-06, "loss": 0.1118, "step": 2288 }, { "epoch": 0.31895770918971644, "grad_norm": 0.18409094214439392, "learning_rate": 8.347987613039626e-06, "loss": 0.1543, "step": 2289 }, { "epoch": 0.3190970528809308, "grad_norm": 0.34264445304870605, "learning_rate": 8.346236801029605e-06, "loss": 0.1329, "step": 2290 }, { "epoch": 0.3192363965721452, "grad_norm": 0.22296622395515442, "learning_rate": 8.344485245562165e-06, "loss": 0.149, "step": 2291 }, { "epoch": 0.3193757402633596, "grad_norm": 0.12579205632209778, "learning_rate": 8.342732947026457e-06, "loss": 0.1168, "step": 2292 }, { "epoch": 0.31951508395457395, "grad_norm": 0.1557200700044632, "learning_rate": 8.340979905811805e-06, "loss": 0.1214, "step": 2293 }, { "epoch": 0.31965442764578833, "grad_norm": 0.24469918012619019, "learning_rate": 8.339226122307696e-06, "loss": 0.1362, "step": 2294 }, { "epoch": 0.3197937713370027, "grad_norm": 0.14357362687587738, "learning_rate": 8.337471596903774e-06, "loss": 0.1198, "step": 2295 }, { "epoch": 0.3199331150282171, "grad_norm": 0.12285399436950684, "learning_rate": 8.335716329989863e-06, "loss": 0.1086, "step": 2296 }, { "epoch": 0.32007245871943146, "grad_norm": 0.156715527176857, "learning_rate": 8.333960321955937e-06, "loss": 0.1411, "step": 2297 }, { "epoch": 0.32021180241064584, "grad_norm": 0.20331981778144836, "learning_rate": 8.332203573192143e-06, "loss": 0.1131, "step": 2298 }, { "epoch": 0.3203511461018602, "grad_norm": 0.08480936288833618, "learning_rate": 8.330446084088791e-06, "loss": 0.0901, "step": 2299 }, { "epoch": 0.3204904897930746, "grad_norm": 0.1942809373140335, "learning_rate": 8.328687855036355e-06, "loss": 0.1402, "step": 2300 }, { "epoch": 0.320629833484289, "grad_norm": 0.15488258004188538, "learning_rate": 8.326928886425471e-06, "loss": 0.1236, "step": 2301 }, { "epoch": 0.3207691771755034, "grad_norm": 0.2492803931236267, "learning_rate": 8.325169178646946e-06, "loss": 0.1426, "step": 2302 }, { "epoch": 0.3209085208667178, "grad_norm": 0.23551419377326965, "learning_rate": 8.323408732091743e-06, "loss": 0.1394, "step": 2303 }, { "epoch": 0.32104786455793216, "grad_norm": 0.22159652411937714, "learning_rate": 8.321647547150995e-06, "loss": 0.1376, "step": 2304 }, { "epoch": 0.32118720824914654, "grad_norm": 0.34441709518432617, "learning_rate": 8.319885624215996e-06, "loss": 0.1679, "step": 2305 }, { "epoch": 0.3213265519403609, "grad_norm": 0.1862511783838272, "learning_rate": 8.318122963678206e-06, "loss": 0.1318, "step": 2306 }, { "epoch": 0.3214658956315753, "grad_norm": 0.12024383246898651, "learning_rate": 8.316359565929248e-06, "loss": 0.1005, "step": 2307 }, { "epoch": 0.32160523932278967, "grad_norm": 0.1595359593629837, "learning_rate": 8.314595431360906e-06, "loss": 0.1109, "step": 2308 }, { "epoch": 0.32174458301400405, "grad_norm": 0.06592294573783875, "learning_rate": 8.312830560365136e-06, "loss": 0.0947, "step": 2309 }, { "epoch": 0.3218839267052184, "grad_norm": 0.19248197972774506, "learning_rate": 8.311064953334046e-06, "loss": 0.1447, "step": 2310 }, { "epoch": 0.3220232703964328, "grad_norm": 0.31567859649658203, "learning_rate": 8.309298610659917e-06, "loss": 0.1672, "step": 2311 }, { "epoch": 0.3221626140876472, "grad_norm": 0.25178012251853943, "learning_rate": 8.307531532735188e-06, "loss": 0.1448, "step": 2312 }, { "epoch": 0.32230195777886156, "grad_norm": 0.2325277179479599, "learning_rate": 8.305763719952467e-06, "loss": 0.1331, "step": 2313 }, { "epoch": 0.32244130147007594, "grad_norm": 0.1300540566444397, "learning_rate": 8.303995172704519e-06, "loss": 0.112, "step": 2314 }, { "epoch": 0.3225806451612903, "grad_norm": 0.3100995421409607, "learning_rate": 8.302225891384275e-06, "loss": 0.1452, "step": 2315 }, { "epoch": 0.3227199888525047, "grad_norm": 0.16932371258735657, "learning_rate": 8.300455876384827e-06, "loss": 0.1143, "step": 2316 }, { "epoch": 0.32285933254371907, "grad_norm": 0.30470010638237, "learning_rate": 8.298685128099437e-06, "loss": 0.1457, "step": 2317 }, { "epoch": 0.32299867623493345, "grad_norm": 0.19749630987644196, "learning_rate": 8.29691364692152e-06, "loss": 0.1402, "step": 2318 }, { "epoch": 0.3231380199261478, "grad_norm": 0.18629927933216095, "learning_rate": 8.29514143324466e-06, "loss": 0.1376, "step": 2319 }, { "epoch": 0.3232773636173622, "grad_norm": 0.31796976923942566, "learning_rate": 8.293368487462604e-06, "loss": 0.1609, "step": 2320 }, { "epoch": 0.3234167073085766, "grad_norm": 0.22384944558143616, "learning_rate": 8.29159480996926e-06, "loss": 0.1297, "step": 2321 }, { "epoch": 0.323556050999791, "grad_norm": 0.1333789825439453, "learning_rate": 8.289820401158695e-06, "loss": 0.12, "step": 2322 }, { "epoch": 0.3236953946910054, "grad_norm": 0.18101930618286133, "learning_rate": 8.288045261425146e-06, "loss": 0.1287, "step": 2323 }, { "epoch": 0.32383473838221977, "grad_norm": 0.27032047510147095, "learning_rate": 8.286269391163006e-06, "loss": 0.1353, "step": 2324 }, { "epoch": 0.32397408207343414, "grad_norm": 0.32771408557891846, "learning_rate": 8.284492790766835e-06, "loss": 0.1609, "step": 2325 }, { "epoch": 0.3241134257646485, "grad_norm": 0.2514092028141022, "learning_rate": 8.282715460631354e-06, "loss": 0.1534, "step": 2326 }, { "epoch": 0.3242527694558629, "grad_norm": 0.35378938913345337, "learning_rate": 8.280937401151441e-06, "loss": 0.1888, "step": 2327 }, { "epoch": 0.3243921131470773, "grad_norm": 0.27188825607299805, "learning_rate": 8.279158612722145e-06, "loss": 0.1847, "step": 2328 }, { "epoch": 0.32453145683829165, "grad_norm": 0.2173469066619873, "learning_rate": 8.277379095738668e-06, "loss": 0.1335, "step": 2329 }, { "epoch": 0.32467080052950603, "grad_norm": 0.2840465009212494, "learning_rate": 8.27559885059638e-06, "loss": 0.1173, "step": 2330 }, { "epoch": 0.3248101442207204, "grad_norm": 0.5985223054885864, "learning_rate": 8.273817877690809e-06, "loss": 0.1769, "step": 2331 }, { "epoch": 0.3249494879119348, "grad_norm": 0.09667589515447617, "learning_rate": 8.272036177417649e-06, "loss": 0.1008, "step": 2332 }, { "epoch": 0.32508883160314916, "grad_norm": 0.20750655233860016, "learning_rate": 8.270253750172754e-06, "loss": 0.1413, "step": 2333 }, { "epoch": 0.32522817529436354, "grad_norm": 0.16406646370887756, "learning_rate": 8.268470596352134e-06, "loss": 0.1144, "step": 2334 }, { "epoch": 0.3253675189855779, "grad_norm": 0.26067233085632324, "learning_rate": 8.26668671635197e-06, "loss": 0.1331, "step": 2335 }, { "epoch": 0.3255068626767923, "grad_norm": 0.2783116400241852, "learning_rate": 8.264902110568598e-06, "loss": 0.2057, "step": 2336 }, { "epoch": 0.3256462063680067, "grad_norm": 0.23032313585281372, "learning_rate": 8.263116779398514e-06, "loss": 0.112, "step": 2337 }, { "epoch": 0.32578555005922105, "grad_norm": 0.235463485121727, "learning_rate": 8.261330723238381e-06, "loss": 0.1583, "step": 2338 }, { "epoch": 0.32592489375043543, "grad_norm": 0.15075162053108215, "learning_rate": 8.25954394248502e-06, "loss": 0.1306, "step": 2339 }, { "epoch": 0.3260642374416498, "grad_norm": 0.21266159415245056, "learning_rate": 8.25775643753541e-06, "loss": 0.147, "step": 2340 }, { "epoch": 0.3262035811328642, "grad_norm": 0.22312521934509277, "learning_rate": 8.255968208786694e-06, "loss": 0.1629, "step": 2341 }, { "epoch": 0.3263429248240786, "grad_norm": 0.2216222882270813, "learning_rate": 8.25417925663618e-06, "loss": 0.1163, "step": 2342 }, { "epoch": 0.326482268515293, "grad_norm": 0.18747280538082123, "learning_rate": 8.252389581481328e-06, "loss": 0.1025, "step": 2343 }, { "epoch": 0.3266216122065074, "grad_norm": 0.17210908234119415, "learning_rate": 8.250599183719763e-06, "loss": 0.1346, "step": 2344 }, { "epoch": 0.32676095589772175, "grad_norm": 0.30241772532463074, "learning_rate": 8.248808063749273e-06, "loss": 0.1643, "step": 2345 }, { "epoch": 0.3269002995889361, "grad_norm": 0.1702362298965454, "learning_rate": 8.247016221967802e-06, "loss": 0.13, "step": 2346 }, { "epoch": 0.3270396432801505, "grad_norm": 0.24111638963222504, "learning_rate": 8.245223658773459e-06, "loss": 0.1426, "step": 2347 }, { "epoch": 0.3271789869713649, "grad_norm": 0.18660257756710052, "learning_rate": 8.243430374564507e-06, "loss": 0.1676, "step": 2348 }, { "epoch": 0.32731833066257926, "grad_norm": 0.13784314692020416, "learning_rate": 8.241636369739376e-06, "loss": 0.1245, "step": 2349 }, { "epoch": 0.32745767435379364, "grad_norm": 0.1992398202419281, "learning_rate": 8.23984164469665e-06, "loss": 0.1525, "step": 2350 }, { "epoch": 0.327597018045008, "grad_norm": 0.26184186339378357, "learning_rate": 8.23804619983508e-06, "loss": 0.1421, "step": 2351 }, { "epoch": 0.3277363617362224, "grad_norm": 0.26485565304756165, "learning_rate": 8.236250035553569e-06, "loss": 0.1509, "step": 2352 }, { "epoch": 0.32787570542743677, "grad_norm": 0.1930147111415863, "learning_rate": 8.234453152251183e-06, "loss": 0.1185, "step": 2353 }, { "epoch": 0.32801504911865115, "grad_norm": 0.13638797402381897, "learning_rate": 8.23265555032715e-06, "loss": 0.1283, "step": 2354 }, { "epoch": 0.3281543928098655, "grad_norm": 0.2523273825645447, "learning_rate": 8.23085723018086e-06, "loss": 0.1541, "step": 2355 }, { "epoch": 0.3282937365010799, "grad_norm": 0.18284311890602112, "learning_rate": 8.229058192211851e-06, "loss": 0.1169, "step": 2356 }, { "epoch": 0.3284330801922943, "grad_norm": 0.14726458489894867, "learning_rate": 8.227258436819836e-06, "loss": 0.1461, "step": 2357 }, { "epoch": 0.32857242388350866, "grad_norm": 0.13338644802570343, "learning_rate": 8.225457964404675e-06, "loss": 0.1255, "step": 2358 }, { "epoch": 0.32871176757472303, "grad_norm": 0.1361958384513855, "learning_rate": 8.223656775366393e-06, "loss": 0.1325, "step": 2359 }, { "epoch": 0.3288511112659374, "grad_norm": 0.11878964304924011, "learning_rate": 8.221854870105172e-06, "loss": 0.1236, "step": 2360 }, { "epoch": 0.3289904549571518, "grad_norm": 0.1283678263425827, "learning_rate": 8.220052249021356e-06, "loss": 0.1287, "step": 2361 }, { "epoch": 0.32912979864836617, "grad_norm": 0.12634260952472687, "learning_rate": 8.218248912515443e-06, "loss": 0.1233, "step": 2362 }, { "epoch": 0.3292691423395806, "grad_norm": 0.24748225510120392, "learning_rate": 8.216444860988098e-06, "loss": 0.1585, "step": 2363 }, { "epoch": 0.329408486030795, "grad_norm": 0.26653793454170227, "learning_rate": 8.214640094840136e-06, "loss": 0.1372, "step": 2364 }, { "epoch": 0.32954782972200936, "grad_norm": 0.13675980269908905, "learning_rate": 8.212834614472538e-06, "loss": 0.1384, "step": 2365 }, { "epoch": 0.32968717341322373, "grad_norm": 0.18580760061740875, "learning_rate": 8.211028420286437e-06, "loss": 0.1545, "step": 2366 }, { "epoch": 0.3298265171044381, "grad_norm": 0.12770824134349823, "learning_rate": 8.209221512683132e-06, "loss": 0.1221, "step": 2367 }, { "epoch": 0.3299658607956525, "grad_norm": 0.2737557291984558, "learning_rate": 8.207413892064073e-06, "loss": 0.1671, "step": 2368 }, { "epoch": 0.33010520448686687, "grad_norm": 0.1914772093296051, "learning_rate": 8.205605558830873e-06, "loss": 0.1514, "step": 2369 }, { "epoch": 0.33024454817808124, "grad_norm": 0.2854081094264984, "learning_rate": 8.203796513385307e-06, "loss": 0.1557, "step": 2370 }, { "epoch": 0.3303838918692956, "grad_norm": 0.22312533855438232, "learning_rate": 8.201986756129297e-06, "loss": 0.1055, "step": 2371 }, { "epoch": 0.33052323556051, "grad_norm": 0.19898729026317596, "learning_rate": 8.200176287464931e-06, "loss": 0.1408, "step": 2372 }, { "epoch": 0.3306625792517244, "grad_norm": 0.15295180678367615, "learning_rate": 8.198365107794457e-06, "loss": 0.1294, "step": 2373 }, { "epoch": 0.33080192294293875, "grad_norm": 0.16944268345832825, "learning_rate": 8.196553217520275e-06, "loss": 0.128, "step": 2374 }, { "epoch": 0.33094126663415313, "grad_norm": 0.14021362364292145, "learning_rate": 8.194740617044948e-06, "loss": 0.1419, "step": 2375 }, { "epoch": 0.3310806103253675, "grad_norm": 0.0936959758400917, "learning_rate": 8.192927306771193e-06, "loss": 0.1057, "step": 2376 }, { "epoch": 0.3312199540165819, "grad_norm": 0.14074622094631195, "learning_rate": 8.191113287101884e-06, "loss": 0.12, "step": 2377 }, { "epoch": 0.33135929770779626, "grad_norm": 0.23977495729923248, "learning_rate": 8.18929855844006e-06, "loss": 0.1513, "step": 2378 }, { "epoch": 0.33149864139901064, "grad_norm": 0.1533629596233368, "learning_rate": 8.187483121188908e-06, "loss": 0.1441, "step": 2379 }, { "epoch": 0.331637985090225, "grad_norm": 0.12327343970537186, "learning_rate": 8.185666975751778e-06, "loss": 0.1181, "step": 2380 }, { "epoch": 0.3317773287814394, "grad_norm": 0.1792830526828766, "learning_rate": 8.183850122532174e-06, "loss": 0.115, "step": 2381 }, { "epoch": 0.3319166724726538, "grad_norm": 0.22014549374580383, "learning_rate": 8.182032561933764e-06, "loss": 0.1492, "step": 2382 }, { "epoch": 0.3320560161638682, "grad_norm": 0.1238020658493042, "learning_rate": 8.180214294360365e-06, "loss": 0.1402, "step": 2383 }, { "epoch": 0.3321953598550826, "grad_norm": 0.09474123269319534, "learning_rate": 8.178395320215953e-06, "loss": 0.1134, "step": 2384 }, { "epoch": 0.33233470354629696, "grad_norm": 0.21067465841770172, "learning_rate": 8.176575639904668e-06, "loss": 0.1725, "step": 2385 }, { "epoch": 0.33247404723751134, "grad_norm": 0.22028601169586182, "learning_rate": 8.174755253830797e-06, "loss": 0.1611, "step": 2386 }, { "epoch": 0.3326133909287257, "grad_norm": 0.21229185163974762, "learning_rate": 8.17293416239879e-06, "loss": 0.1534, "step": 2387 }, { "epoch": 0.3327527346199401, "grad_norm": 0.25286635756492615, "learning_rate": 8.171112366013252e-06, "loss": 0.1422, "step": 2388 }, { "epoch": 0.33289207831115447, "grad_norm": 0.22615228593349457, "learning_rate": 8.169289865078942e-06, "loss": 0.1715, "step": 2389 }, { "epoch": 0.33303142200236885, "grad_norm": 0.19394494593143463, "learning_rate": 8.167466660000781e-06, "loss": 0.1714, "step": 2390 }, { "epoch": 0.3331707656935832, "grad_norm": 0.17803694307804108, "learning_rate": 8.165642751183844e-06, "loss": 0.1284, "step": 2391 }, { "epoch": 0.3333101093847976, "grad_norm": 0.38380637764930725, "learning_rate": 8.163818139033359e-06, "loss": 0.1321, "step": 2392 }, { "epoch": 0.333449453076012, "grad_norm": 0.16307243704795837, "learning_rate": 8.161992823954715e-06, "loss": 0.1149, "step": 2393 }, { "epoch": 0.33358879676722636, "grad_norm": 0.1832832247018814, "learning_rate": 8.160166806353455e-06, "loss": 0.1427, "step": 2394 }, { "epoch": 0.33372814045844074, "grad_norm": 0.13393226265907288, "learning_rate": 8.15834008663528e-06, "loss": 0.1395, "step": 2395 }, { "epoch": 0.3338674841496551, "grad_norm": 0.21363495290279388, "learning_rate": 8.156512665206043e-06, "loss": 0.1339, "step": 2396 }, { "epoch": 0.3340068278408695, "grad_norm": 0.13761214911937714, "learning_rate": 8.154684542471754e-06, "loss": 0.1177, "step": 2397 }, { "epoch": 0.33414617153208387, "grad_norm": 0.1141810417175293, "learning_rate": 8.152855718838583e-06, "loss": 0.098, "step": 2398 }, { "epoch": 0.33428551522329825, "grad_norm": 0.25179749727249146, "learning_rate": 8.151026194712854e-06, "loss": 0.1587, "step": 2399 }, { "epoch": 0.3344248589145126, "grad_norm": 0.09925556182861328, "learning_rate": 8.149195970501043e-06, "loss": 0.12, "step": 2400 }, { "epoch": 0.334564202605727, "grad_norm": 0.19086953997612, "learning_rate": 8.147365046609786e-06, "loss": 0.1249, "step": 2401 }, { "epoch": 0.3347035462969414, "grad_norm": 0.1593829095363617, "learning_rate": 8.145533423445869e-06, "loss": 0.1339, "step": 2402 }, { "epoch": 0.3348428899881558, "grad_norm": 0.14071884751319885, "learning_rate": 8.14370110141624e-06, "loss": 0.1227, "step": 2403 }, { "epoch": 0.3349822336793702, "grad_norm": 0.10402972996234894, "learning_rate": 8.141868080927998e-06, "loss": 0.112, "step": 2404 }, { "epoch": 0.33512157737058457, "grad_norm": 0.3422079384326935, "learning_rate": 8.140034362388398e-06, "loss": 0.1785, "step": 2405 }, { "epoch": 0.33526092106179894, "grad_norm": 0.24216164648532867, "learning_rate": 8.13819994620485e-06, "loss": 0.1381, "step": 2406 }, { "epoch": 0.3354002647530133, "grad_norm": 0.17712554335594177, "learning_rate": 8.136364832784923e-06, "loss": 0.1338, "step": 2407 }, { "epoch": 0.3355396084442277, "grad_norm": 0.1812460869550705, "learning_rate": 8.134529022536332e-06, "loss": 0.1597, "step": 2408 }, { "epoch": 0.3356789521354421, "grad_norm": 0.21955780684947968, "learning_rate": 8.132692515866959e-06, "loss": 0.1086, "step": 2409 }, { "epoch": 0.33581829582665645, "grad_norm": 0.2497844249010086, "learning_rate": 8.130855313184824e-06, "loss": 0.1489, "step": 2410 }, { "epoch": 0.33595763951787083, "grad_norm": 0.21966993808746338, "learning_rate": 8.129017414898121e-06, "loss": 0.1258, "step": 2411 }, { "epoch": 0.3360969832090852, "grad_norm": 0.3229609429836273, "learning_rate": 8.127178821415183e-06, "loss": 0.1604, "step": 2412 }, { "epoch": 0.3362363269002996, "grad_norm": 0.19181841611862183, "learning_rate": 8.125339533144507e-06, "loss": 0.1502, "step": 2413 }, { "epoch": 0.33637567059151396, "grad_norm": 0.12778323888778687, "learning_rate": 8.123499550494737e-06, "loss": 0.1059, "step": 2414 }, { "epoch": 0.33651501428272834, "grad_norm": 0.3072018325328827, "learning_rate": 8.12165887387468e-06, "loss": 0.1196, "step": 2415 }, { "epoch": 0.3366543579739427, "grad_norm": 0.3437138795852661, "learning_rate": 8.11981750369329e-06, "loss": 0.1158, "step": 2416 }, { "epoch": 0.3367937016651571, "grad_norm": 0.16669578850269318, "learning_rate": 8.117975440359677e-06, "loss": 0.1409, "step": 2417 }, { "epoch": 0.3369330453563715, "grad_norm": 0.13433074951171875, "learning_rate": 8.116132684283104e-06, "loss": 0.1193, "step": 2418 }, { "epoch": 0.33707238904758585, "grad_norm": 0.1575034111738205, "learning_rate": 8.114289235872993e-06, "loss": 0.1125, "step": 2419 }, { "epoch": 0.33721173273880023, "grad_norm": 0.18716050684452057, "learning_rate": 8.112445095538915e-06, "loss": 0.1416, "step": 2420 }, { "epoch": 0.3373510764300146, "grad_norm": 0.13321231305599213, "learning_rate": 8.110600263690592e-06, "loss": 0.1218, "step": 2421 }, { "epoch": 0.337490420121229, "grad_norm": 0.15759748220443726, "learning_rate": 8.10875474073791e-06, "loss": 0.1487, "step": 2422 }, { "epoch": 0.3376297638124434, "grad_norm": 0.18853221833705902, "learning_rate": 8.106908527090895e-06, "loss": 0.1556, "step": 2423 }, { "epoch": 0.3377691075036578, "grad_norm": 0.131280779838562, "learning_rate": 8.10506162315974e-06, "loss": 0.129, "step": 2424 }, { "epoch": 0.3379084511948722, "grad_norm": 0.22162413597106934, "learning_rate": 8.103214029354783e-06, "loss": 0.1727, "step": 2425 }, { "epoch": 0.33804779488608655, "grad_norm": 0.1666242778301239, "learning_rate": 8.101365746086514e-06, "loss": 0.1221, "step": 2426 }, { "epoch": 0.3381871385773009, "grad_norm": 0.12141281366348267, "learning_rate": 8.099516773765581e-06, "loss": 0.1262, "step": 2427 }, { "epoch": 0.3383264822685153, "grad_norm": 0.12488049268722534, "learning_rate": 8.097667112802784e-06, "loss": 0.1291, "step": 2428 }, { "epoch": 0.3384658259597297, "grad_norm": 0.11902723461389542, "learning_rate": 8.095816763609077e-06, "loss": 0.1355, "step": 2429 }, { "epoch": 0.33860516965094406, "grad_norm": 0.1637689769268036, "learning_rate": 8.093965726595565e-06, "loss": 0.1348, "step": 2430 }, { "epoch": 0.33874451334215844, "grad_norm": 0.20358949899673462, "learning_rate": 8.092114002173503e-06, "loss": 0.129, "step": 2431 }, { "epoch": 0.3388838570333728, "grad_norm": 0.24812185764312744, "learning_rate": 8.090261590754304e-06, "loss": 0.1823, "step": 2432 }, { "epoch": 0.3390232007245872, "grad_norm": 0.21049457788467407, "learning_rate": 8.088408492749534e-06, "loss": 0.1368, "step": 2433 }, { "epoch": 0.33916254441580157, "grad_norm": 0.14892612397670746, "learning_rate": 8.086554708570901e-06, "loss": 0.1279, "step": 2434 }, { "epoch": 0.33930188810701595, "grad_norm": 0.2895677089691162, "learning_rate": 8.084700238630283e-06, "loss": 0.1661, "step": 2435 }, { "epoch": 0.3394412317982303, "grad_norm": 0.22405865788459778, "learning_rate": 8.082845083339698e-06, "loss": 0.1651, "step": 2436 }, { "epoch": 0.3395805754894447, "grad_norm": 0.17590273916721344, "learning_rate": 8.080989243111315e-06, "loss": 0.132, "step": 2437 }, { "epoch": 0.3397199191806591, "grad_norm": 0.20542746782302856, "learning_rate": 8.079132718357465e-06, "loss": 0.1303, "step": 2438 }, { "epoch": 0.33985926287187346, "grad_norm": 0.1668844223022461, "learning_rate": 8.07727550949062e-06, "loss": 0.1288, "step": 2439 }, { "epoch": 0.33999860656308784, "grad_norm": 0.08170565962791443, "learning_rate": 8.075417616923413e-06, "loss": 0.1048, "step": 2440 }, { "epoch": 0.3401379502543022, "grad_norm": 0.2302793562412262, "learning_rate": 8.073559041068626e-06, "loss": 0.1291, "step": 2441 }, { "epoch": 0.3402772939455166, "grad_norm": 0.22780625522136688, "learning_rate": 8.071699782339188e-06, "loss": 0.1161, "step": 2442 }, { "epoch": 0.340416637636731, "grad_norm": 0.20223256945610046, "learning_rate": 8.06983984114819e-06, "loss": 0.158, "step": 2443 }, { "epoch": 0.3405559813279454, "grad_norm": 0.2157791554927826, "learning_rate": 8.067979217908864e-06, "loss": 0.1889, "step": 2444 }, { "epoch": 0.3406953250191598, "grad_norm": 0.20370151102542877, "learning_rate": 8.066117913034597e-06, "loss": 0.1516, "step": 2445 }, { "epoch": 0.34083466871037416, "grad_norm": 0.19670729339122772, "learning_rate": 8.06425592693893e-06, "loss": 0.1505, "step": 2446 }, { "epoch": 0.34097401240158853, "grad_norm": 0.23624901473522186, "learning_rate": 8.062393260035557e-06, "loss": 0.1564, "step": 2447 }, { "epoch": 0.3411133560928029, "grad_norm": 0.254452109336853, "learning_rate": 8.060529912738316e-06, "loss": 0.151, "step": 2448 }, { "epoch": 0.3412526997840173, "grad_norm": 0.46785691380500793, "learning_rate": 8.058665885461201e-06, "loss": 0.1726, "step": 2449 }, { "epoch": 0.34139204347523167, "grad_norm": 0.2586135268211365, "learning_rate": 8.056801178618357e-06, "loss": 0.1268, "step": 2450 }, { "epoch": 0.34153138716644604, "grad_norm": 0.18983633816242218, "learning_rate": 8.05493579262408e-06, "loss": 0.1283, "step": 2451 }, { "epoch": 0.3416707308576604, "grad_norm": 0.17890287935733795, "learning_rate": 8.053069727892813e-06, "loss": 0.1693, "step": 2452 }, { "epoch": 0.3418100745488748, "grad_norm": 0.21018633246421814, "learning_rate": 8.051202984839157e-06, "loss": 0.1431, "step": 2453 }, { "epoch": 0.3419494182400892, "grad_norm": 0.16072086989879608, "learning_rate": 8.049335563877858e-06, "loss": 0.1356, "step": 2454 }, { "epoch": 0.34208876193130355, "grad_norm": 0.21992819011211395, "learning_rate": 8.047467465423813e-06, "loss": 0.1194, "step": 2455 }, { "epoch": 0.34222810562251793, "grad_norm": 0.1805303543806076, "learning_rate": 8.045598689892072e-06, "loss": 0.1407, "step": 2456 }, { "epoch": 0.3423674493137323, "grad_norm": 0.26426592469215393, "learning_rate": 8.043729237697835e-06, "loss": 0.1601, "step": 2457 }, { "epoch": 0.3425067930049467, "grad_norm": 0.07972310483455658, "learning_rate": 8.041859109256452e-06, "loss": 0.0916, "step": 2458 }, { "epoch": 0.34264613669616106, "grad_norm": 0.2014947533607483, "learning_rate": 8.03998830498342e-06, "loss": 0.1469, "step": 2459 }, { "epoch": 0.34278548038737544, "grad_norm": 0.26029255986213684, "learning_rate": 8.038116825294393e-06, "loss": 0.1714, "step": 2460 }, { "epoch": 0.3429248240785898, "grad_norm": 0.16422931849956512, "learning_rate": 8.036244670605166e-06, "loss": 0.1446, "step": 2461 }, { "epoch": 0.3430641677698042, "grad_norm": 0.23630715906620026, "learning_rate": 8.034371841331693e-06, "loss": 0.1752, "step": 2462 }, { "epoch": 0.34320351146101863, "grad_norm": 0.3244646489620209, "learning_rate": 8.032498337890073e-06, "loss": 0.1584, "step": 2463 }, { "epoch": 0.343342855152233, "grad_norm": 0.20205314457416534, "learning_rate": 8.030624160696554e-06, "loss": 0.1634, "step": 2464 }, { "epoch": 0.3434821988434474, "grad_norm": 0.15628758072853088, "learning_rate": 8.02874931016754e-06, "loss": 0.1495, "step": 2465 }, { "epoch": 0.34362154253466176, "grad_norm": 0.21603193879127502, "learning_rate": 8.026873786719574e-06, "loss": 0.1204, "step": 2466 }, { "epoch": 0.34376088622587614, "grad_norm": 0.11483704298734665, "learning_rate": 8.024997590769359e-06, "loss": 0.1156, "step": 2467 }, { "epoch": 0.3439002299170905, "grad_norm": 0.2552553117275238, "learning_rate": 8.02312072273374e-06, "loss": 0.1907, "step": 2468 }, { "epoch": 0.3440395736083049, "grad_norm": 0.33074456453323364, "learning_rate": 8.021243183029715e-06, "loss": 0.1392, "step": 2469 }, { "epoch": 0.34417891729951927, "grad_norm": 0.4551158845424652, "learning_rate": 8.019364972074432e-06, "loss": 0.129, "step": 2470 }, { "epoch": 0.34431826099073365, "grad_norm": 0.11908655613660812, "learning_rate": 8.017486090285185e-06, "loss": 0.1104, "step": 2471 }, { "epoch": 0.344457604681948, "grad_norm": 0.1903996765613556, "learning_rate": 8.01560653807942e-06, "loss": 0.1279, "step": 2472 }, { "epoch": 0.3445969483731624, "grad_norm": 0.24151811003684998, "learning_rate": 8.013726315874729e-06, "loss": 0.1335, "step": 2473 }, { "epoch": 0.3447362920643768, "grad_norm": 0.22585131227970123, "learning_rate": 8.011845424088856e-06, "loss": 0.1399, "step": 2474 }, { "epoch": 0.34487563575559116, "grad_norm": 0.1587325632572174, "learning_rate": 8.009963863139689e-06, "loss": 0.1221, "step": 2475 }, { "epoch": 0.34501497944680554, "grad_norm": 0.2842099666595459, "learning_rate": 8.008081633445272e-06, "loss": 0.1762, "step": 2476 }, { "epoch": 0.3451543231380199, "grad_norm": 0.16949380934238434, "learning_rate": 8.00619873542379e-06, "loss": 0.1153, "step": 2477 }, { "epoch": 0.3452936668292343, "grad_norm": 0.2502078115940094, "learning_rate": 8.004315169493586e-06, "loss": 0.1337, "step": 2478 }, { "epoch": 0.34543301052044867, "grad_norm": 0.1295241266489029, "learning_rate": 8.002430936073137e-06, "loss": 0.1221, "step": 2479 }, { "epoch": 0.34557235421166305, "grad_norm": 0.1598132699728012, "learning_rate": 8.000546035581083e-06, "loss": 0.1378, "step": 2480 }, { "epoch": 0.3457116979028774, "grad_norm": 0.23950360715389252, "learning_rate": 7.998660468436202e-06, "loss": 0.1323, "step": 2481 }, { "epoch": 0.3458510415940918, "grad_norm": 0.20416070520877838, "learning_rate": 7.996774235057425e-06, "loss": 0.1483, "step": 2482 }, { "epoch": 0.34599038528530623, "grad_norm": 0.1524847000837326, "learning_rate": 7.994887335863832e-06, "loss": 0.12, "step": 2483 }, { "epoch": 0.3461297289765206, "grad_norm": 0.17828132212162018, "learning_rate": 7.992999771274646e-06, "loss": 0.1357, "step": 2484 }, { "epoch": 0.346269072667735, "grad_norm": 0.14555715024471283, "learning_rate": 7.991111541709244e-06, "loss": 0.1055, "step": 2485 }, { "epoch": 0.34640841635894937, "grad_norm": 0.2184109389781952, "learning_rate": 7.989222647587146e-06, "loss": 0.1414, "step": 2486 }, { "epoch": 0.34654776005016374, "grad_norm": 0.16054558753967285, "learning_rate": 7.987333089328018e-06, "loss": 0.1344, "step": 2487 }, { "epoch": 0.3466871037413781, "grad_norm": 0.21246452629566193, "learning_rate": 7.985442867351682e-06, "loss": 0.1097, "step": 2488 }, { "epoch": 0.3468264474325925, "grad_norm": 0.29645606875419617, "learning_rate": 7.983551982078097e-06, "loss": 0.1305, "step": 2489 }, { "epoch": 0.3469657911238069, "grad_norm": 0.24257029592990875, "learning_rate": 7.98166043392738e-06, "loss": 0.1264, "step": 2490 }, { "epoch": 0.34710513481502125, "grad_norm": 0.14661681652069092, "learning_rate": 7.979768223319786e-06, "loss": 0.1244, "step": 2491 }, { "epoch": 0.34724447850623563, "grad_norm": 0.17378173768520355, "learning_rate": 7.977875350675721e-06, "loss": 0.1197, "step": 2492 }, { "epoch": 0.34738382219745, "grad_norm": 0.35344135761260986, "learning_rate": 7.975981816415741e-06, "loss": 0.1385, "step": 2493 }, { "epoch": 0.3475231658886644, "grad_norm": 0.39852386713027954, "learning_rate": 7.974087620960543e-06, "loss": 0.182, "step": 2494 }, { "epoch": 0.34766250957987876, "grad_norm": 0.2288758009672165, "learning_rate": 7.972192764730975e-06, "loss": 0.1864, "step": 2495 }, { "epoch": 0.34780185327109314, "grad_norm": 0.2109685242176056, "learning_rate": 7.970297248148033e-06, "loss": 0.1655, "step": 2496 }, { "epoch": 0.3479411969623075, "grad_norm": 0.1707151234149933, "learning_rate": 7.968401071632854e-06, "loss": 0.1114, "step": 2497 }, { "epoch": 0.3480805406535219, "grad_norm": 0.21814227104187012, "learning_rate": 7.966504235606726e-06, "loss": 0.1473, "step": 2498 }, { "epoch": 0.3482198843447363, "grad_norm": 0.19267578423023224, "learning_rate": 7.964606740491085e-06, "loss": 0.132, "step": 2499 }, { "epoch": 0.34835922803595065, "grad_norm": 0.14229995012283325, "learning_rate": 7.962708586707508e-06, "loss": 0.1104, "step": 2500 }, { "epoch": 0.34849857172716503, "grad_norm": 0.2587013244628906, "learning_rate": 7.960809774677722e-06, "loss": 0.139, "step": 2501 }, { "epoch": 0.3486379154183794, "grad_norm": 0.1768626719713211, "learning_rate": 7.958910304823603e-06, "loss": 0.145, "step": 2502 }, { "epoch": 0.34877725910959384, "grad_norm": 0.19059953093528748, "learning_rate": 7.957010177567167e-06, "loss": 0.138, "step": 2503 }, { "epoch": 0.3489166028008082, "grad_norm": 0.24877429008483887, "learning_rate": 7.955109393330577e-06, "loss": 0.1321, "step": 2504 }, { "epoch": 0.3490559464920226, "grad_norm": 0.2709355354309082, "learning_rate": 7.953207952536147e-06, "loss": 0.1329, "step": 2505 }, { "epoch": 0.349195290183237, "grad_norm": 0.12286399304866791, "learning_rate": 7.951305855606333e-06, "loss": 0.1023, "step": 2506 }, { "epoch": 0.34933463387445135, "grad_norm": 0.2802167534828186, "learning_rate": 7.949403102963738e-06, "loss": 0.1558, "step": 2507 }, { "epoch": 0.3494739775656657, "grad_norm": 0.13921597599983215, "learning_rate": 7.947499695031108e-06, "loss": 0.1159, "step": 2508 }, { "epoch": 0.3496133212568801, "grad_norm": 0.15317091345787048, "learning_rate": 7.94559563223134e-06, "loss": 0.1371, "step": 2509 }, { "epoch": 0.3497526649480945, "grad_norm": 0.1699516326189041, "learning_rate": 7.943690914987472e-06, "loss": 0.1262, "step": 2510 }, { "epoch": 0.34989200863930886, "grad_norm": 0.21877345442771912, "learning_rate": 7.941785543722686e-06, "loss": 0.1524, "step": 2511 }, { "epoch": 0.35003135233052324, "grad_norm": 0.20407192409038544, "learning_rate": 7.939879518860316e-06, "loss": 0.1478, "step": 2512 }, { "epoch": 0.3501706960217376, "grad_norm": 0.23891255259513855, "learning_rate": 7.937972840823836e-06, "loss": 0.1617, "step": 2513 }, { "epoch": 0.350310039712952, "grad_norm": 0.30308499932289124, "learning_rate": 7.936065510036863e-06, "loss": 0.2262, "step": 2514 }, { "epoch": 0.35044938340416637, "grad_norm": 0.1607806533575058, "learning_rate": 7.934157526923167e-06, "loss": 0.1294, "step": 2515 }, { "epoch": 0.35058872709538075, "grad_norm": 0.11122056096792221, "learning_rate": 7.932248891906657e-06, "loss": 0.1133, "step": 2516 }, { "epoch": 0.3507280707865951, "grad_norm": 0.1335253119468689, "learning_rate": 7.930339605411387e-06, "loss": 0.1239, "step": 2517 }, { "epoch": 0.3508674144778095, "grad_norm": 0.21724817156791687, "learning_rate": 7.92842966786156e-06, "loss": 0.1394, "step": 2518 }, { "epoch": 0.3510067581690239, "grad_norm": 0.2130798101425171, "learning_rate": 7.926519079681514e-06, "loss": 0.1315, "step": 2519 }, { "epoch": 0.35114610186023826, "grad_norm": 0.22550225257873535, "learning_rate": 7.924607841295744e-06, "loss": 0.199, "step": 2520 }, { "epoch": 0.35128544555145264, "grad_norm": 0.27146801352500916, "learning_rate": 7.92269595312888e-06, "loss": 0.1205, "step": 2521 }, { "epoch": 0.351424789242667, "grad_norm": 0.16483649611473083, "learning_rate": 7.920783415605703e-06, "loss": 0.1099, "step": 2522 }, { "epoch": 0.35156413293388145, "grad_norm": 0.20392410457134247, "learning_rate": 7.918870229151134e-06, "loss": 0.139, "step": 2523 }, { "epoch": 0.3517034766250958, "grad_norm": 0.13506339490413666, "learning_rate": 7.916956394190238e-06, "loss": 0.1148, "step": 2524 }, { "epoch": 0.3518428203163102, "grad_norm": 0.15627646446228027, "learning_rate": 7.915041911148229e-06, "loss": 0.1252, "step": 2525 }, { "epoch": 0.3519821640075246, "grad_norm": 0.19328875839710236, "learning_rate": 7.913126780450455e-06, "loss": 0.1353, "step": 2526 }, { "epoch": 0.35212150769873896, "grad_norm": 0.20773865282535553, "learning_rate": 7.911211002522422e-06, "loss": 0.1338, "step": 2527 }, { "epoch": 0.35226085138995333, "grad_norm": 0.1564660370349884, "learning_rate": 7.909294577789765e-06, "loss": 0.1153, "step": 2528 }, { "epoch": 0.3524001950811677, "grad_norm": 0.1343403309583664, "learning_rate": 7.907377506678274e-06, "loss": 0.105, "step": 2529 }, { "epoch": 0.3525395387723821, "grad_norm": 0.2316172868013382, "learning_rate": 7.905459789613878e-06, "loss": 0.1397, "step": 2530 }, { "epoch": 0.35267888246359647, "grad_norm": 0.2641228437423706, "learning_rate": 7.90354142702265e-06, "loss": 0.1222, "step": 2531 }, { "epoch": 0.35281822615481084, "grad_norm": 0.22919714450836182, "learning_rate": 7.901622419330805e-06, "loss": 0.1247, "step": 2532 }, { "epoch": 0.3529575698460252, "grad_norm": 0.13502027094364166, "learning_rate": 7.899702766964705e-06, "loss": 0.114, "step": 2533 }, { "epoch": 0.3530969135372396, "grad_norm": 0.18714244663715363, "learning_rate": 7.89778247035085e-06, "loss": 0.1327, "step": 2534 }, { "epoch": 0.353236257228454, "grad_norm": 0.1509743481874466, "learning_rate": 7.895861529915889e-06, "loss": 0.1383, "step": 2535 }, { "epoch": 0.35337560091966835, "grad_norm": 0.3059704005718231, "learning_rate": 7.893939946086609e-06, "loss": 0.1318, "step": 2536 }, { "epoch": 0.35351494461088273, "grad_norm": 0.16189970076084137, "learning_rate": 7.892017719289941e-06, "loss": 0.1138, "step": 2537 }, { "epoch": 0.3536542883020971, "grad_norm": 0.17712579667568207, "learning_rate": 7.890094849952964e-06, "loss": 0.1242, "step": 2538 }, { "epoch": 0.3537936319933115, "grad_norm": 0.22993308305740356, "learning_rate": 7.888171338502893e-06, "loss": 0.1477, "step": 2539 }, { "epoch": 0.35393297568452586, "grad_norm": 0.2146657556295395, "learning_rate": 7.886247185367088e-06, "loss": 0.1441, "step": 2540 }, { "epoch": 0.35407231937574024, "grad_norm": 0.18443796038627625, "learning_rate": 7.884322390973053e-06, "loss": 0.1044, "step": 2541 }, { "epoch": 0.3542116630669546, "grad_norm": 0.14796790480613708, "learning_rate": 7.882396955748432e-06, "loss": 0.1202, "step": 2542 }, { "epoch": 0.35435100675816905, "grad_norm": 0.13226059079170227, "learning_rate": 7.880470880121015e-06, "loss": 0.1311, "step": 2543 }, { "epoch": 0.35449035044938343, "grad_norm": 0.24694661796092987, "learning_rate": 7.878544164518731e-06, "loss": 0.1435, "step": 2544 }, { "epoch": 0.3546296941405978, "grad_norm": 0.1265929639339447, "learning_rate": 7.87661680936965e-06, "loss": 0.1158, "step": 2545 }, { "epoch": 0.3547690378318122, "grad_norm": 0.25843262672424316, "learning_rate": 7.87468881510199e-06, "loss": 0.1332, "step": 2546 }, { "epoch": 0.35490838152302656, "grad_norm": 0.23085922002792358, "learning_rate": 7.872760182144104e-06, "loss": 0.1495, "step": 2547 }, { "epoch": 0.35504772521424094, "grad_norm": 0.17148669064044952, "learning_rate": 7.870830910924491e-06, "loss": 0.1129, "step": 2548 }, { "epoch": 0.3551870689054553, "grad_norm": 0.20468787848949432, "learning_rate": 7.868901001871797e-06, "loss": 0.1312, "step": 2549 }, { "epoch": 0.3553264125966697, "grad_norm": 0.3069595396518707, "learning_rate": 7.866970455414793e-06, "loss": 0.1499, "step": 2550 }, { "epoch": 0.35546575628788407, "grad_norm": 0.11392776668071747, "learning_rate": 7.86503927198241e-06, "loss": 0.1069, "step": 2551 }, { "epoch": 0.35560509997909845, "grad_norm": 0.2556957006454468, "learning_rate": 7.863107452003711e-06, "loss": 0.1353, "step": 2552 }, { "epoch": 0.3557444436703128, "grad_norm": 0.3260710537433624, "learning_rate": 7.861174995907901e-06, "loss": 0.1586, "step": 2553 }, { "epoch": 0.3558837873615272, "grad_norm": 0.2227695882320404, "learning_rate": 7.85924190412433e-06, "loss": 0.1216, "step": 2554 }, { "epoch": 0.3560231310527416, "grad_norm": 0.2436976581811905, "learning_rate": 7.857308177082484e-06, "loss": 0.1933, "step": 2555 }, { "epoch": 0.35616247474395596, "grad_norm": 0.2909698188304901, "learning_rate": 7.855373815211995e-06, "loss": 0.1159, "step": 2556 }, { "epoch": 0.35630181843517034, "grad_norm": 0.22368700802326202, "learning_rate": 7.853438818942633e-06, "loss": 0.1102, "step": 2557 }, { "epoch": 0.3564411621263847, "grad_norm": 0.2570492625236511, "learning_rate": 7.851503188704312e-06, "loss": 0.1577, "step": 2558 }, { "epoch": 0.3565805058175991, "grad_norm": 0.26143163442611694, "learning_rate": 7.849566924927082e-06, "loss": 0.1645, "step": 2559 }, { "epoch": 0.35671984950881347, "grad_norm": 0.15046508610248566, "learning_rate": 7.84763002804114e-06, "loss": 0.1335, "step": 2560 }, { "epoch": 0.35685919320002785, "grad_norm": 0.20553293824195862, "learning_rate": 7.845692498476816e-06, "loss": 0.1422, "step": 2561 }, { "epoch": 0.3569985368912422, "grad_norm": 0.18887324631214142, "learning_rate": 7.843754336664589e-06, "loss": 0.134, "step": 2562 }, { "epoch": 0.3571378805824566, "grad_norm": 0.16484971344470978, "learning_rate": 7.84181554303507e-06, "loss": 0.1373, "step": 2563 }, { "epoch": 0.35727722427367103, "grad_norm": 0.29550662636756897, "learning_rate": 7.839876118019019e-06, "loss": 0.1594, "step": 2564 }, { "epoch": 0.3574165679648854, "grad_norm": 0.18217943608760834, "learning_rate": 7.837936062047329e-06, "loss": 0.111, "step": 2565 }, { "epoch": 0.3575559116560998, "grad_norm": 0.18852800130844116, "learning_rate": 7.835995375551038e-06, "loss": 0.1395, "step": 2566 }, { "epoch": 0.35769525534731417, "grad_norm": 0.29083213210105896, "learning_rate": 7.83405405896132e-06, "loss": 0.1529, "step": 2567 }, { "epoch": 0.35783459903852854, "grad_norm": 0.14665456116199493, "learning_rate": 7.832112112709496e-06, "loss": 0.1187, "step": 2568 }, { "epoch": 0.3579739427297429, "grad_norm": 0.09144387394189835, "learning_rate": 7.830169537227015e-06, "loss": 0.1091, "step": 2569 }, { "epoch": 0.3581132864209573, "grad_norm": 0.3071730434894562, "learning_rate": 7.828226332945479e-06, "loss": 0.1509, "step": 2570 }, { "epoch": 0.3582526301121717, "grad_norm": 0.39243870973587036, "learning_rate": 7.82628250029662e-06, "loss": 0.1615, "step": 2571 }, { "epoch": 0.35839197380338605, "grad_norm": 0.24468553066253662, "learning_rate": 7.824338039712316e-06, "loss": 0.2012, "step": 2572 }, { "epoch": 0.35853131749460043, "grad_norm": 0.22735264897346497, "learning_rate": 7.82239295162458e-06, "loss": 0.1371, "step": 2573 }, { "epoch": 0.3586706611858148, "grad_norm": 0.23871122300624847, "learning_rate": 7.820447236465565e-06, "loss": 0.1632, "step": 2574 }, { "epoch": 0.3588100048770292, "grad_norm": 0.25061270594596863, "learning_rate": 7.818500894667566e-06, "loss": 0.1638, "step": 2575 }, { "epoch": 0.35894934856824356, "grad_norm": 0.19867414236068726, "learning_rate": 7.816553926663018e-06, "loss": 0.1355, "step": 2576 }, { "epoch": 0.35908869225945794, "grad_norm": 0.2955053746700287, "learning_rate": 7.81460633288449e-06, "loss": 0.1239, "step": 2577 }, { "epoch": 0.3592280359506723, "grad_norm": 0.20729464292526245, "learning_rate": 7.812658113764691e-06, "loss": 0.1599, "step": 2578 }, { "epoch": 0.3593673796418867, "grad_norm": 0.22908204793930054, "learning_rate": 7.810709269736476e-06, "loss": 0.141, "step": 2579 }, { "epoch": 0.3595067233331011, "grad_norm": 0.2703387439250946, "learning_rate": 7.808759801232829e-06, "loss": 0.1373, "step": 2580 }, { "epoch": 0.35964606702431545, "grad_norm": 0.418262779712677, "learning_rate": 7.80680970868688e-06, "loss": 0.116, "step": 2581 }, { "epoch": 0.35978541071552983, "grad_norm": 0.5442250370979309, "learning_rate": 7.804858992531893e-06, "loss": 0.1603, "step": 2582 }, { "epoch": 0.3599247544067442, "grad_norm": 0.1323395073413849, "learning_rate": 7.802907653201275e-06, "loss": 0.1163, "step": 2583 }, { "epoch": 0.36006409809795864, "grad_norm": 0.3908007740974426, "learning_rate": 7.800955691128568e-06, "loss": 0.1411, "step": 2584 }, { "epoch": 0.360203441789173, "grad_norm": 0.39178895950317383, "learning_rate": 7.799003106747453e-06, "loss": 0.202, "step": 2585 }, { "epoch": 0.3603427854803874, "grad_norm": 0.234373539686203, "learning_rate": 7.79704990049175e-06, "loss": 0.1449, "step": 2586 }, { "epoch": 0.3604821291716018, "grad_norm": 0.1806045025587082, "learning_rate": 7.795096072795418e-06, "loss": 0.117, "step": 2587 }, { "epoch": 0.36062147286281615, "grad_norm": 0.2358265370130539, "learning_rate": 7.793141624092551e-06, "loss": 0.1308, "step": 2588 }, { "epoch": 0.36076081655403053, "grad_norm": 0.4657832086086273, "learning_rate": 7.791186554817383e-06, "loss": 0.1382, "step": 2589 }, { "epoch": 0.3609001602452449, "grad_norm": 0.17123329639434814, "learning_rate": 7.789230865404287e-06, "loss": 0.1115, "step": 2590 }, { "epoch": 0.3610395039364593, "grad_norm": 0.15011313557624817, "learning_rate": 7.787274556287771e-06, "loss": 0.1049, "step": 2591 }, { "epoch": 0.36117884762767366, "grad_norm": 0.1858903169631958, "learning_rate": 7.785317627902484e-06, "loss": 0.1361, "step": 2592 }, { "epoch": 0.36131819131888804, "grad_norm": 0.22152654826641083, "learning_rate": 7.783360080683212e-06, "loss": 0.109, "step": 2593 }, { "epoch": 0.3614575350101024, "grad_norm": 0.27720698714256287, "learning_rate": 7.781401915064873e-06, "loss": 0.1476, "step": 2594 }, { "epoch": 0.3615968787013168, "grad_norm": 0.3028486669063568, "learning_rate": 7.779443131482529e-06, "loss": 0.1347, "step": 2595 }, { "epoch": 0.36173622239253117, "grad_norm": 0.21920335292816162, "learning_rate": 7.777483730371375e-06, "loss": 0.144, "step": 2596 }, { "epoch": 0.36187556608374555, "grad_norm": 0.24427717924118042, "learning_rate": 7.77552371216675e-06, "loss": 0.1223, "step": 2597 }, { "epoch": 0.3620149097749599, "grad_norm": 0.26821139454841614, "learning_rate": 7.773563077304123e-06, "loss": 0.1312, "step": 2598 }, { "epoch": 0.3621542534661743, "grad_norm": 0.16800236701965332, "learning_rate": 7.7716018262191e-06, "loss": 0.0989, "step": 2599 }, { "epoch": 0.3622935971573887, "grad_norm": 0.423529714345932, "learning_rate": 7.769639959347428e-06, "loss": 0.1421, "step": 2600 }, { "epoch": 0.36243294084860306, "grad_norm": 0.25751620531082153, "learning_rate": 7.767677477124988e-06, "loss": 0.1315, "step": 2601 }, { "epoch": 0.36257228453981744, "grad_norm": 0.2100597769021988, "learning_rate": 7.765714379987804e-06, "loss": 0.1763, "step": 2602 }, { "epoch": 0.3627116282310318, "grad_norm": 0.3265955448150635, "learning_rate": 7.763750668372023e-06, "loss": 0.1601, "step": 2603 }, { "epoch": 0.36285097192224625, "grad_norm": 0.23247995972633362, "learning_rate": 7.761786342713941e-06, "loss": 0.139, "step": 2604 }, { "epoch": 0.3629903156134606, "grad_norm": 0.273277223110199, "learning_rate": 7.75982140344999e-06, "loss": 0.1794, "step": 2605 }, { "epoch": 0.363129659304675, "grad_norm": 0.20793099701404572, "learning_rate": 7.757855851016727e-06, "loss": 0.1141, "step": 2606 }, { "epoch": 0.3632690029958894, "grad_norm": 0.2667278051376343, "learning_rate": 7.755889685850858e-06, "loss": 0.129, "step": 2607 }, { "epoch": 0.36340834668710376, "grad_norm": 0.3322872221469879, "learning_rate": 7.75392290838922e-06, "loss": 0.1609, "step": 2608 }, { "epoch": 0.36354769037831813, "grad_norm": 0.16398198902606964, "learning_rate": 7.751955519068783e-06, "loss": 0.1342, "step": 2609 }, { "epoch": 0.3636870340695325, "grad_norm": 0.16268235445022583, "learning_rate": 7.74998751832666e-06, "loss": 0.1257, "step": 2610 }, { "epoch": 0.3638263777607469, "grad_norm": 0.2430034875869751, "learning_rate": 7.748018906600092e-06, "loss": 0.1481, "step": 2611 }, { "epoch": 0.36396572145196127, "grad_norm": 0.22657151520252228, "learning_rate": 7.746049684326462e-06, "loss": 0.1362, "step": 2612 }, { "epoch": 0.36410506514317564, "grad_norm": 0.24056631326675415, "learning_rate": 7.744079851943286e-06, "loss": 0.1303, "step": 2613 }, { "epoch": 0.36424440883439, "grad_norm": 0.16516944766044617, "learning_rate": 7.742109409888213e-06, "loss": 0.1346, "step": 2614 }, { "epoch": 0.3643837525256044, "grad_norm": 0.11540094017982483, "learning_rate": 7.740138358599035e-06, "loss": 0.1143, "step": 2615 }, { "epoch": 0.3645230962168188, "grad_norm": 0.22226153314113617, "learning_rate": 7.73816669851367e-06, "loss": 0.1279, "step": 2616 }, { "epoch": 0.36466243990803315, "grad_norm": 0.16160637140274048, "learning_rate": 7.73619443007018e-06, "loss": 0.1437, "step": 2617 }, { "epoch": 0.36480178359924753, "grad_norm": 0.3011608421802521, "learning_rate": 7.734221553706756e-06, "loss": 0.1258, "step": 2618 }, { "epoch": 0.3649411272904619, "grad_norm": 0.14670342206954956, "learning_rate": 7.732248069861726e-06, "loss": 0.1121, "step": 2619 }, { "epoch": 0.3650804709816763, "grad_norm": 0.24349023401737213, "learning_rate": 7.730273978973552e-06, "loss": 0.1351, "step": 2620 }, { "epoch": 0.36521981467289066, "grad_norm": 0.1729908138513565, "learning_rate": 7.728299281480833e-06, "loss": 0.1296, "step": 2621 }, { "epoch": 0.36535915836410504, "grad_norm": 0.14543204009532928, "learning_rate": 7.726323977822304e-06, "loss": 0.148, "step": 2622 }, { "epoch": 0.3654985020553194, "grad_norm": 0.14521466195583344, "learning_rate": 7.72434806843683e-06, "loss": 0.1076, "step": 2623 }, { "epoch": 0.36563784574653385, "grad_norm": 0.2407229095697403, "learning_rate": 7.72237155376341e-06, "loss": 0.116, "step": 2624 }, { "epoch": 0.36577718943774823, "grad_norm": 0.3803197145462036, "learning_rate": 7.720394434241185e-06, "loss": 0.1502, "step": 2625 }, { "epoch": 0.3659165331289626, "grad_norm": 0.2638128697872162, "learning_rate": 7.718416710309425e-06, "loss": 0.136, "step": 2626 }, { "epoch": 0.366055876820177, "grad_norm": 0.21663326025009155, "learning_rate": 7.716438382407534e-06, "loss": 0.1386, "step": 2627 }, { "epoch": 0.36619522051139136, "grad_norm": 0.3311983346939087, "learning_rate": 7.714459450975052e-06, "loss": 0.1736, "step": 2628 }, { "epoch": 0.36633456420260574, "grad_norm": 0.3009772002696991, "learning_rate": 7.712479916451651e-06, "loss": 0.1187, "step": 2629 }, { "epoch": 0.3664739078938201, "grad_norm": 0.22236676514148712, "learning_rate": 7.710499779277141e-06, "loss": 0.1361, "step": 2630 }, { "epoch": 0.3666132515850345, "grad_norm": 0.14680352807044983, "learning_rate": 7.708519039891462e-06, "loss": 0.128, "step": 2631 }, { "epoch": 0.36675259527624887, "grad_norm": 0.25335320830345154, "learning_rate": 7.70653769873469e-06, "loss": 0.1329, "step": 2632 }, { "epoch": 0.36689193896746325, "grad_norm": 0.3240615725517273, "learning_rate": 7.70455575624703e-06, "loss": 0.1636, "step": 2633 }, { "epoch": 0.3670312826586776, "grad_norm": 0.25330403447151184, "learning_rate": 7.702573212868827e-06, "loss": 0.1664, "step": 2634 }, { "epoch": 0.367170626349892, "grad_norm": 0.11631123721599579, "learning_rate": 7.70059006904056e-06, "loss": 0.0982, "step": 2635 }, { "epoch": 0.3673099700411064, "grad_norm": 0.532241940498352, "learning_rate": 7.698606325202832e-06, "loss": 0.1732, "step": 2636 }, { "epoch": 0.36744931373232076, "grad_norm": 0.1319311410188675, "learning_rate": 7.69662198179639e-06, "loss": 0.1265, "step": 2637 }, { "epoch": 0.36758865742353514, "grad_norm": 0.17134544253349304, "learning_rate": 7.694637039262109e-06, "loss": 0.153, "step": 2638 }, { "epoch": 0.3677280011147495, "grad_norm": 0.11490173637866974, "learning_rate": 7.692651498040996e-06, "loss": 0.1267, "step": 2639 }, { "epoch": 0.3678673448059639, "grad_norm": 0.21387259662151337, "learning_rate": 7.690665358574197e-06, "loss": 0.1452, "step": 2640 }, { "epoch": 0.36800668849717827, "grad_norm": 0.10236100852489471, "learning_rate": 7.688678621302981e-06, "loss": 0.1034, "step": 2641 }, { "epoch": 0.36814603218839265, "grad_norm": 0.15224038064479828, "learning_rate": 7.686691286668761e-06, "loss": 0.1331, "step": 2642 }, { "epoch": 0.368285375879607, "grad_norm": 0.17747411131858826, "learning_rate": 7.684703355113074e-06, "loss": 0.1452, "step": 2643 }, { "epoch": 0.36842471957082146, "grad_norm": 0.12771080434322357, "learning_rate": 7.682714827077595e-06, "loss": 0.1059, "step": 2644 }, { "epoch": 0.36856406326203583, "grad_norm": 0.18082821369171143, "learning_rate": 7.68072570300413e-06, "loss": 0.1184, "step": 2645 }, { "epoch": 0.3687034069532502, "grad_norm": 0.5562177300453186, "learning_rate": 7.678735983334615e-06, "loss": 0.1234, "step": 2646 }, { "epoch": 0.3688427506444646, "grad_norm": 0.14354154467582703, "learning_rate": 7.676745668511121e-06, "loss": 0.1284, "step": 2647 }, { "epoch": 0.36898209433567897, "grad_norm": 0.19152876734733582, "learning_rate": 7.67475475897585e-06, "loss": 0.1355, "step": 2648 }, { "epoch": 0.36912143802689334, "grad_norm": 0.16235211491584778, "learning_rate": 7.672763255171138e-06, "loss": 0.1418, "step": 2649 }, { "epoch": 0.3692607817181077, "grad_norm": 0.20694409310817719, "learning_rate": 7.67077115753945e-06, "loss": 0.1223, "step": 2650 }, { "epoch": 0.3694001254093221, "grad_norm": 0.24360127747058868, "learning_rate": 7.668778466523386e-06, "loss": 0.133, "step": 2651 }, { "epoch": 0.3695394691005365, "grad_norm": 0.19446530938148499, "learning_rate": 7.666785182565676e-06, "loss": 0.135, "step": 2652 }, { "epoch": 0.36967881279175085, "grad_norm": 0.16470025479793549, "learning_rate": 7.664791306109183e-06, "loss": 0.1367, "step": 2653 }, { "epoch": 0.36981815648296523, "grad_norm": 0.17001663148403168, "learning_rate": 7.6627968375969e-06, "loss": 0.1363, "step": 2654 }, { "epoch": 0.3699575001741796, "grad_norm": 0.2687976360321045, "learning_rate": 7.660801777471951e-06, "loss": 0.1786, "step": 2655 }, { "epoch": 0.370096843865394, "grad_norm": 0.13822294771671295, "learning_rate": 7.658806126177596e-06, "loss": 0.1276, "step": 2656 }, { "epoch": 0.37023618755660836, "grad_norm": 0.17397893965244293, "learning_rate": 7.65680988415722e-06, "loss": 0.1487, "step": 2657 }, { "epoch": 0.37037553124782274, "grad_norm": 0.10457558929920197, "learning_rate": 7.654813051854345e-06, "loss": 0.1035, "step": 2658 }, { "epoch": 0.3705148749390371, "grad_norm": 0.26389601826667786, "learning_rate": 7.652815629712616e-06, "loss": 0.1619, "step": 2659 }, { "epoch": 0.3706542186302515, "grad_norm": 0.21718627214431763, "learning_rate": 7.650817618175824e-06, "loss": 0.1692, "step": 2660 }, { "epoch": 0.3707935623214659, "grad_norm": 0.14166104793548584, "learning_rate": 7.648819017687875e-06, "loss": 0.1208, "step": 2661 }, { "epoch": 0.37093290601268025, "grad_norm": 0.2065916210412979, "learning_rate": 7.646819828692813e-06, "loss": 0.1185, "step": 2662 }, { "epoch": 0.37107224970389463, "grad_norm": 0.24196656048297882, "learning_rate": 7.644820051634813e-06, "loss": 0.1262, "step": 2663 }, { "epoch": 0.37121159339510906, "grad_norm": 0.218708798289299, "learning_rate": 7.64281968695818e-06, "loss": 0.1461, "step": 2664 }, { "epoch": 0.37135093708632344, "grad_norm": 0.2081722468137741, "learning_rate": 7.640818735107351e-06, "loss": 0.1116, "step": 2665 }, { "epoch": 0.3714902807775378, "grad_norm": 0.2084190398454666, "learning_rate": 7.638817196526887e-06, "loss": 0.1725, "step": 2666 }, { "epoch": 0.3716296244687522, "grad_norm": 0.17500039935112, "learning_rate": 7.636815071661488e-06, "loss": 0.1366, "step": 2667 }, { "epoch": 0.3717689681599666, "grad_norm": 0.20892639458179474, "learning_rate": 7.634812360955982e-06, "loss": 0.1261, "step": 2668 }, { "epoch": 0.37190831185118095, "grad_norm": 0.3162575364112854, "learning_rate": 7.63280906485532e-06, "loss": 0.1538, "step": 2669 }, { "epoch": 0.37204765554239533, "grad_norm": 0.24672967195510864, "learning_rate": 7.630805183804593e-06, "loss": 0.1359, "step": 2670 }, { "epoch": 0.3721869992336097, "grad_norm": 0.18557684123516083, "learning_rate": 7.628800718249017e-06, "loss": 0.1222, "step": 2671 }, { "epoch": 0.3723263429248241, "grad_norm": 0.15522901713848114, "learning_rate": 7.626795668633938e-06, "loss": 0.1037, "step": 2672 }, { "epoch": 0.37246568661603846, "grad_norm": 0.16100792586803436, "learning_rate": 7.624790035404831e-06, "loss": 0.1347, "step": 2673 }, { "epoch": 0.37260503030725284, "grad_norm": 0.16823825240135193, "learning_rate": 7.622783819007305e-06, "loss": 0.1193, "step": 2674 }, { "epoch": 0.3727443739984672, "grad_norm": 0.22035741806030273, "learning_rate": 7.620777019887091e-06, "loss": 0.1319, "step": 2675 }, { "epoch": 0.3728837176896816, "grad_norm": 0.22659611701965332, "learning_rate": 7.6187696384900585e-06, "loss": 0.1189, "step": 2676 }, { "epoch": 0.37302306138089597, "grad_norm": 0.23208087682724, "learning_rate": 7.616761675262199e-06, "loss": 0.1715, "step": 2677 }, { "epoch": 0.37316240507211035, "grad_norm": 0.14120756089687347, "learning_rate": 7.614753130649638e-06, "loss": 0.1536, "step": 2678 }, { "epoch": 0.3733017487633247, "grad_norm": 0.14669393002986908, "learning_rate": 7.612744005098625e-06, "loss": 0.1189, "step": 2679 }, { "epoch": 0.3734410924545391, "grad_norm": 0.12214616686105728, "learning_rate": 7.6107342990555466e-06, "loss": 0.0988, "step": 2680 }, { "epoch": 0.3735804361457535, "grad_norm": 0.2111162394285202, "learning_rate": 7.60872401296691e-06, "loss": 0.1191, "step": 2681 }, { "epoch": 0.37371977983696786, "grad_norm": 0.1806868016719818, "learning_rate": 7.606713147279356e-06, "loss": 0.16, "step": 2682 }, { "epoch": 0.37385912352818224, "grad_norm": 0.19081048667430878, "learning_rate": 7.604701702439652e-06, "loss": 0.166, "step": 2683 }, { "epoch": 0.37399846721939667, "grad_norm": 0.1510290801525116, "learning_rate": 7.602689678894697e-06, "loss": 0.1344, "step": 2684 }, { "epoch": 0.37413781091061105, "grad_norm": 0.1393859088420868, "learning_rate": 7.6006770770915165e-06, "loss": 0.1115, "step": 2685 }, { "epoch": 0.3742771546018254, "grad_norm": 0.12857399880886078, "learning_rate": 7.598663897477263e-06, "loss": 0.1318, "step": 2686 }, { "epoch": 0.3744164982930398, "grad_norm": 0.1929798722267151, "learning_rate": 7.59665014049922e-06, "loss": 0.1276, "step": 2687 }, { "epoch": 0.3745558419842542, "grad_norm": 0.17395101487636566, "learning_rate": 7.594635806604797e-06, "loss": 0.1323, "step": 2688 }, { "epoch": 0.37469518567546856, "grad_norm": 0.17123153805732727, "learning_rate": 7.592620896241536e-06, "loss": 0.1151, "step": 2689 }, { "epoch": 0.37483452936668293, "grad_norm": 0.16343003511428833, "learning_rate": 7.590605409857103e-06, "loss": 0.1383, "step": 2690 }, { "epoch": 0.3749738730578973, "grad_norm": 0.13347911834716797, "learning_rate": 7.58858934789929e-06, "loss": 0.1288, "step": 2691 }, { "epoch": 0.3751132167491117, "grad_norm": 0.15018080174922943, "learning_rate": 7.586572710816025e-06, "loss": 0.1619, "step": 2692 }, { "epoch": 0.37525256044032607, "grad_norm": 0.22090163826942444, "learning_rate": 7.584555499055355e-06, "loss": 0.1476, "step": 2693 }, { "epoch": 0.37539190413154044, "grad_norm": 0.17106537520885468, "learning_rate": 7.58253771306546e-06, "loss": 0.1259, "step": 2694 }, { "epoch": 0.3755312478227548, "grad_norm": 0.16330772638320923, "learning_rate": 7.5805193532946445e-06, "loss": 0.1143, "step": 2695 }, { "epoch": 0.3756705915139692, "grad_norm": 0.2008281797170639, "learning_rate": 7.578500420191344e-06, "loss": 0.1689, "step": 2696 }, { "epoch": 0.3758099352051836, "grad_norm": 0.1174507811665535, "learning_rate": 7.576480914204118e-06, "loss": 0.1168, "step": 2697 }, { "epoch": 0.37594927889639795, "grad_norm": 0.2149931937456131, "learning_rate": 7.574460835781654e-06, "loss": 0.1458, "step": 2698 }, { "epoch": 0.37608862258761233, "grad_norm": 0.20853818953037262, "learning_rate": 7.572440185372769e-06, "loss": 0.1184, "step": 2699 }, { "epoch": 0.3762279662788267, "grad_norm": 0.1465929001569748, "learning_rate": 7.570418963426405e-06, "loss": 0.1373, "step": 2700 }, { "epoch": 0.3763673099700411, "grad_norm": 0.20230050384998322, "learning_rate": 7.568397170391631e-06, "loss": 0.2139, "step": 2701 }, { "epoch": 0.37650665366125546, "grad_norm": 0.16209940612316132, "learning_rate": 7.566374806717642e-06, "loss": 0.1273, "step": 2702 }, { "epoch": 0.37664599735246984, "grad_norm": 0.15477994084358215, "learning_rate": 7.564351872853763e-06, "loss": 0.1478, "step": 2703 }, { "epoch": 0.3767853410436843, "grad_norm": 0.11533061414957047, "learning_rate": 7.562328369249443e-06, "loss": 0.1303, "step": 2704 }, { "epoch": 0.37692468473489865, "grad_norm": 0.16475261747837067, "learning_rate": 7.560304296354259e-06, "loss": 0.1385, "step": 2705 }, { "epoch": 0.37706402842611303, "grad_norm": 0.15742897987365723, "learning_rate": 7.5582796546179125e-06, "loss": 0.1346, "step": 2706 }, { "epoch": 0.3772033721173274, "grad_norm": 0.13662020862102509, "learning_rate": 7.556254444490232e-06, "loss": 0.131, "step": 2707 }, { "epoch": 0.3773427158085418, "grad_norm": 0.2125358134508133, "learning_rate": 7.554228666421176e-06, "loss": 0.1432, "step": 2708 }, { "epoch": 0.37748205949975616, "grad_norm": 0.09151940047740936, "learning_rate": 7.552202320860823e-06, "loss": 0.111, "step": 2709 }, { "epoch": 0.37762140319097054, "grad_norm": 0.39666885137557983, "learning_rate": 7.550175408259383e-06, "loss": 0.152, "step": 2710 }, { "epoch": 0.3777607468821849, "grad_norm": 0.19172163307666779, "learning_rate": 7.548147929067189e-06, "loss": 0.1312, "step": 2711 }, { "epoch": 0.3779000905733993, "grad_norm": 0.15031270682811737, "learning_rate": 7.546119883734699e-06, "loss": 0.1443, "step": 2712 }, { "epoch": 0.37803943426461367, "grad_norm": 0.37145906686782837, "learning_rate": 7.544091272712501e-06, "loss": 0.1756, "step": 2713 }, { "epoch": 0.37817877795582805, "grad_norm": 0.22986175119876862, "learning_rate": 7.542062096451306e-06, "loss": 0.1417, "step": 2714 }, { "epoch": 0.3783181216470424, "grad_norm": 0.16797523200511932, "learning_rate": 7.540032355401948e-06, "loss": 0.1241, "step": 2715 }, { "epoch": 0.3784574653382568, "grad_norm": 0.31914541125297546, "learning_rate": 7.53800205001539e-06, "loss": 0.1399, "step": 2716 }, { "epoch": 0.3785968090294712, "grad_norm": 0.18359464406967163, "learning_rate": 7.53597118074272e-06, "loss": 0.1249, "step": 2717 }, { "epoch": 0.37873615272068556, "grad_norm": 0.17173057794570923, "learning_rate": 7.5339397480351525e-06, "loss": 0.1313, "step": 2718 }, { "epoch": 0.37887549641189994, "grad_norm": 0.24844811856746674, "learning_rate": 7.531907752344023e-06, "loss": 0.1695, "step": 2719 }, { "epoch": 0.3790148401031143, "grad_norm": 0.19605030119419098, "learning_rate": 7.529875194120795e-06, "loss": 0.168, "step": 2720 }, { "epoch": 0.3791541837943287, "grad_norm": 0.33213678002357483, "learning_rate": 7.527842073817056e-06, "loss": 0.1317, "step": 2721 }, { "epoch": 0.37929352748554307, "grad_norm": 0.1989019215106964, "learning_rate": 7.525808391884521e-06, "loss": 0.1131, "step": 2722 }, { "epoch": 0.37943287117675745, "grad_norm": 0.16850851476192474, "learning_rate": 7.523774148775027e-06, "loss": 0.1197, "step": 2723 }, { "epoch": 0.3795722148679719, "grad_norm": 0.15308794379234314, "learning_rate": 7.521739344940535e-06, "loss": 0.1098, "step": 2724 }, { "epoch": 0.37971155855918626, "grad_norm": 0.17660194635391235, "learning_rate": 7.519703980833133e-06, "loss": 0.1254, "step": 2725 }, { "epoch": 0.37985090225040063, "grad_norm": 0.2832459509372711, "learning_rate": 7.517668056905033e-06, "loss": 0.1483, "step": 2726 }, { "epoch": 0.379990245941615, "grad_norm": 0.28529876470565796, "learning_rate": 7.515631573608568e-06, "loss": 0.1435, "step": 2727 }, { "epoch": 0.3801295896328294, "grad_norm": 0.17695140838623047, "learning_rate": 7.513594531396202e-06, "loss": 0.1158, "step": 2728 }, { "epoch": 0.38026893332404377, "grad_norm": 0.3915596008300781, "learning_rate": 7.511556930720517e-06, "loss": 0.1499, "step": 2729 }, { "epoch": 0.38040827701525814, "grad_norm": 0.20387481153011322, "learning_rate": 7.5095187720342224e-06, "loss": 0.1128, "step": 2730 }, { "epoch": 0.3805476207064725, "grad_norm": 0.22768566012382507, "learning_rate": 7.50748005579015e-06, "loss": 0.1466, "step": 2731 }, { "epoch": 0.3806869643976869, "grad_norm": 0.22572918236255646, "learning_rate": 7.505440782441256e-06, "loss": 0.1385, "step": 2732 }, { "epoch": 0.3808263080889013, "grad_norm": 0.2408112734556198, "learning_rate": 7.503400952440618e-06, "loss": 0.1533, "step": 2733 }, { "epoch": 0.38096565178011566, "grad_norm": 0.2949855923652649, "learning_rate": 7.501360566241444e-06, "loss": 0.1402, "step": 2734 }, { "epoch": 0.38110499547133003, "grad_norm": 0.237672820687294, "learning_rate": 7.499319624297059e-06, "loss": 0.1401, "step": 2735 }, { "epoch": 0.3812443391625444, "grad_norm": 0.1712993085384369, "learning_rate": 7.497278127060914e-06, "loss": 0.1145, "step": 2736 }, { "epoch": 0.3813836828537588, "grad_norm": 0.18000726401805878, "learning_rate": 7.4952360749865825e-06, "loss": 0.15, "step": 2737 }, { "epoch": 0.38152302654497317, "grad_norm": 0.3862842321395874, "learning_rate": 7.493193468527764e-06, "loss": 0.1452, "step": 2738 }, { "epoch": 0.38166237023618754, "grad_norm": 0.39610227942466736, "learning_rate": 7.491150308138275e-06, "loss": 0.1527, "step": 2739 }, { "epoch": 0.3818017139274019, "grad_norm": 0.25842422246932983, "learning_rate": 7.489106594272063e-06, "loss": 0.1653, "step": 2740 }, { "epoch": 0.3819410576186163, "grad_norm": 0.21015065908432007, "learning_rate": 7.487062327383192e-06, "loss": 0.1091, "step": 2741 }, { "epoch": 0.3820804013098307, "grad_norm": 0.1992882639169693, "learning_rate": 7.485017507925853e-06, "loss": 0.1442, "step": 2742 }, { "epoch": 0.38221974500104505, "grad_norm": 0.1777912974357605, "learning_rate": 7.482972136354359e-06, "loss": 0.1306, "step": 2743 }, { "epoch": 0.3823590886922595, "grad_norm": 0.20395325124263763, "learning_rate": 7.480926213123142e-06, "loss": 0.1537, "step": 2744 }, { "epoch": 0.38249843238347386, "grad_norm": 0.17183515429496765, "learning_rate": 7.4788797386867596e-06, "loss": 0.1052, "step": 2745 }, { "epoch": 0.38263777607468824, "grad_norm": 0.22160710394382477, "learning_rate": 7.476832713499896e-06, "loss": 0.1429, "step": 2746 }, { "epoch": 0.3827771197659026, "grad_norm": 0.16273318231105804, "learning_rate": 7.474785138017349e-06, "loss": 0.1345, "step": 2747 }, { "epoch": 0.382916463457117, "grad_norm": 0.24365343153476715, "learning_rate": 7.472737012694045e-06, "loss": 0.1889, "step": 2748 }, { "epoch": 0.3830558071483314, "grad_norm": 0.17747661471366882, "learning_rate": 7.470688337985029e-06, "loss": 0.1187, "step": 2749 }, { "epoch": 0.38319515083954575, "grad_norm": 0.190414160490036, "learning_rate": 7.468639114345473e-06, "loss": 0.1369, "step": 2750 }, { "epoch": 0.38333449453076013, "grad_norm": 0.24494735896587372, "learning_rate": 7.466589342230664e-06, "loss": 0.1629, "step": 2751 }, { "epoch": 0.3834738382219745, "grad_norm": 0.1748003363609314, "learning_rate": 7.464539022096018e-06, "loss": 0.1085, "step": 2752 }, { "epoch": 0.3836131819131889, "grad_norm": 0.4346935451030731, "learning_rate": 7.462488154397067e-06, "loss": 0.1701, "step": 2753 }, { "epoch": 0.38375252560440326, "grad_norm": 0.4748348295688629, "learning_rate": 7.460436739589467e-06, "loss": 0.1825, "step": 2754 }, { "epoch": 0.38389186929561764, "grad_norm": 0.15365630388259888, "learning_rate": 7.458384778128997e-06, "loss": 0.1138, "step": 2755 }, { "epoch": 0.384031212986832, "grad_norm": 0.19692175090312958, "learning_rate": 7.4563322704715556e-06, "loss": 0.1692, "step": 2756 }, { "epoch": 0.3841705566780464, "grad_norm": 0.17689895629882812, "learning_rate": 7.45427921707316e-06, "loss": 0.1223, "step": 2757 }, { "epoch": 0.38430990036926077, "grad_norm": 0.34108829498291016, "learning_rate": 7.452225618389959e-06, "loss": 0.1711, "step": 2758 }, { "epoch": 0.38444924406047515, "grad_norm": 0.13834674656391144, "learning_rate": 7.450171474878207e-06, "loss": 0.1257, "step": 2759 }, { "epoch": 0.3845885877516895, "grad_norm": 0.11614643782377243, "learning_rate": 7.4481167869942934e-06, "loss": 0.148, "step": 2760 }, { "epoch": 0.3847279314429039, "grad_norm": 0.2783724069595337, "learning_rate": 7.446061555194721e-06, "loss": 0.1112, "step": 2761 }, { "epoch": 0.3848672751341183, "grad_norm": 0.29649245738983154, "learning_rate": 7.4440057799361155e-06, "loss": 0.1114, "step": 2762 }, { "epoch": 0.38500661882533266, "grad_norm": 0.1822260618209839, "learning_rate": 7.441949461675223e-06, "loss": 0.123, "step": 2763 }, { "epoch": 0.38514596251654704, "grad_norm": 0.2060813009738922, "learning_rate": 7.439892600868911e-06, "loss": 0.1724, "step": 2764 }, { "epoch": 0.38528530620776147, "grad_norm": 0.12340342253446579, "learning_rate": 7.437835197974167e-06, "loss": 0.1262, "step": 2765 }, { "epoch": 0.38542464989897585, "grad_norm": 0.296153724193573, "learning_rate": 7.435777253448099e-06, "loss": 0.1667, "step": 2766 }, { "epoch": 0.3855639935901902, "grad_norm": 0.34556570649147034, "learning_rate": 7.433718767747934e-06, "loss": 0.1281, "step": 2767 }, { "epoch": 0.3857033372814046, "grad_norm": 0.2517361044883728, "learning_rate": 7.431659741331022e-06, "loss": 0.1082, "step": 2768 }, { "epoch": 0.385842680972619, "grad_norm": 0.2105344980955124, "learning_rate": 7.429600174654832e-06, "loss": 0.1412, "step": 2769 }, { "epoch": 0.38598202466383336, "grad_norm": 0.16146732866764069, "learning_rate": 7.427540068176951e-06, "loss": 0.1224, "step": 2770 }, { "epoch": 0.38612136835504773, "grad_norm": 0.1945921778678894, "learning_rate": 7.4254794223550885e-06, "loss": 0.1469, "step": 2771 }, { "epoch": 0.3862607120462621, "grad_norm": 0.4322197139263153, "learning_rate": 7.423418237647073e-06, "loss": 0.1614, "step": 2772 }, { "epoch": 0.3864000557374765, "grad_norm": 0.19196726381778717, "learning_rate": 7.421356514510853e-06, "loss": 0.116, "step": 2773 }, { "epoch": 0.38653939942869087, "grad_norm": 0.1297260820865631, "learning_rate": 7.419294253404497e-06, "loss": 0.1178, "step": 2774 }, { "epoch": 0.38667874311990524, "grad_norm": 0.16089008748531342, "learning_rate": 7.417231454786189e-06, "loss": 0.1333, "step": 2775 }, { "epoch": 0.3868180868111196, "grad_norm": 0.27012577652931213, "learning_rate": 7.41516811911424e-06, "loss": 0.1498, "step": 2776 }, { "epoch": 0.386957430502334, "grad_norm": 0.32889053225517273, "learning_rate": 7.4131042468470725e-06, "loss": 0.1561, "step": 2777 }, { "epoch": 0.3870967741935484, "grad_norm": 0.166663259267807, "learning_rate": 7.411039838443234e-06, "loss": 0.1357, "step": 2778 }, { "epoch": 0.38723611788476275, "grad_norm": 0.1276610642671585, "learning_rate": 7.4089748943613895e-06, "loss": 0.12, "step": 2779 }, { "epoch": 0.38737546157597713, "grad_norm": 0.21944326162338257, "learning_rate": 7.406909415060321e-06, "loss": 0.1667, "step": 2780 }, { "epoch": 0.3875148052671915, "grad_norm": 0.1519434005022049, "learning_rate": 7.404843400998931e-06, "loss": 0.144, "step": 2781 }, { "epoch": 0.3876541489584059, "grad_norm": 0.20105217397212982, "learning_rate": 7.4027768526362395e-06, "loss": 0.1414, "step": 2782 }, { "epoch": 0.38779349264962026, "grad_norm": 0.19346962869167328, "learning_rate": 7.4007097704313894e-06, "loss": 0.1416, "step": 2783 }, { "epoch": 0.38793283634083464, "grad_norm": 0.3200342059135437, "learning_rate": 7.398642154843637e-06, "loss": 0.1589, "step": 2784 }, { "epoch": 0.3880721800320491, "grad_norm": 0.13605166971683502, "learning_rate": 7.39657400633236e-06, "loss": 0.1009, "step": 2785 }, { "epoch": 0.38821152372326345, "grad_norm": 0.18485629558563232, "learning_rate": 7.394505325357053e-06, "loss": 0.1002, "step": 2786 }, { "epoch": 0.38835086741447783, "grad_norm": 0.17511023581027985, "learning_rate": 7.392436112377331e-06, "loss": 0.1195, "step": 2787 }, { "epoch": 0.3884902111056922, "grad_norm": 0.25965145230293274, "learning_rate": 7.390366367852923e-06, "loss": 0.1365, "step": 2788 }, { "epoch": 0.3886295547969066, "grad_norm": 0.14335061609745026, "learning_rate": 7.388296092243683e-06, "loss": 0.1126, "step": 2789 }, { "epoch": 0.38876889848812096, "grad_norm": 0.11652189493179321, "learning_rate": 7.386225286009576e-06, "loss": 0.1076, "step": 2790 }, { "epoch": 0.38890824217933534, "grad_norm": 0.1637805700302124, "learning_rate": 7.384153949610689e-06, "loss": 0.1162, "step": 2791 }, { "epoch": 0.3890475858705497, "grad_norm": 0.13808660209178925, "learning_rate": 7.382082083507226e-06, "loss": 0.1125, "step": 2792 }, { "epoch": 0.3891869295617641, "grad_norm": 0.17537923157215118, "learning_rate": 7.380009688159507e-06, "loss": 0.1365, "step": 2793 }, { "epoch": 0.38932627325297847, "grad_norm": 0.2224077731370926, "learning_rate": 7.377936764027973e-06, "loss": 0.1474, "step": 2794 }, { "epoch": 0.38946561694419285, "grad_norm": 0.16411715745925903, "learning_rate": 7.375863311573179e-06, "loss": 0.1242, "step": 2795 }, { "epoch": 0.3896049606354072, "grad_norm": 0.2033901810646057, "learning_rate": 7.373789331255799e-06, "loss": 0.1569, "step": 2796 }, { "epoch": 0.3897443043266216, "grad_norm": 0.26812607049942017, "learning_rate": 7.371714823536624e-06, "loss": 0.1704, "step": 2797 }, { "epoch": 0.389883648017836, "grad_norm": 0.1595817357301712, "learning_rate": 7.369639788876561e-06, "loss": 0.1452, "step": 2798 }, { "epoch": 0.39002299170905036, "grad_norm": 0.20041248202323914, "learning_rate": 7.367564227736639e-06, "loss": 0.1456, "step": 2799 }, { "epoch": 0.39016233540026474, "grad_norm": 0.16134417057037354, "learning_rate": 7.365488140577997e-06, "loss": 0.1244, "step": 2800 }, { "epoch": 0.3903016790914791, "grad_norm": 0.20222578942775726, "learning_rate": 7.3634115278618955e-06, "loss": 0.2105, "step": 2801 }, { "epoch": 0.3904410227826935, "grad_norm": 0.14835506677627563, "learning_rate": 7.36133439004971e-06, "loss": 0.1346, "step": 2802 }, { "epoch": 0.39058036647390787, "grad_norm": 0.2588559687137604, "learning_rate": 7.3592567276029336e-06, "loss": 0.1446, "step": 2803 }, { "epoch": 0.39071971016512225, "grad_norm": 0.12110699713230133, "learning_rate": 7.357178540983174e-06, "loss": 0.1021, "step": 2804 }, { "epoch": 0.3908590538563367, "grad_norm": 0.1381690353155136, "learning_rate": 7.355099830652159e-06, "loss": 0.1373, "step": 2805 }, { "epoch": 0.39099839754755106, "grad_norm": 0.16369594633579254, "learning_rate": 7.353020597071729e-06, "loss": 0.1213, "step": 2806 }, { "epoch": 0.39113774123876544, "grad_norm": 0.18176355957984924, "learning_rate": 7.350940840703842e-06, "loss": 0.1561, "step": 2807 }, { "epoch": 0.3912770849299798, "grad_norm": 0.14439474046230316, "learning_rate": 7.348860562010574e-06, "loss": 0.1316, "step": 2808 }, { "epoch": 0.3914164286211942, "grad_norm": 0.2636304497718811, "learning_rate": 7.346779761454113e-06, "loss": 0.1952, "step": 2809 }, { "epoch": 0.39155577231240857, "grad_norm": 0.30587613582611084, "learning_rate": 7.3446984394967705e-06, "loss": 0.1811, "step": 2810 }, { "epoch": 0.39169511600362295, "grad_norm": 0.21395984292030334, "learning_rate": 7.342616596600961e-06, "loss": 0.1505, "step": 2811 }, { "epoch": 0.3918344596948373, "grad_norm": 0.22702494263648987, "learning_rate": 7.3405342332292286e-06, "loss": 0.1255, "step": 2812 }, { "epoch": 0.3919738033860517, "grad_norm": 0.20488551259040833, "learning_rate": 7.338451349844225e-06, "loss": 0.1513, "step": 2813 }, { "epoch": 0.3921131470772661, "grad_norm": 0.24854695796966553, "learning_rate": 7.336367946908718e-06, "loss": 0.1552, "step": 2814 }, { "epoch": 0.39225249076848046, "grad_norm": 0.15754930675029755, "learning_rate": 7.334284024885595e-06, "loss": 0.1293, "step": 2815 }, { "epoch": 0.39239183445969483, "grad_norm": 0.32911911606788635, "learning_rate": 7.332199584237854e-06, "loss": 0.1447, "step": 2816 }, { "epoch": 0.3925311781509092, "grad_norm": 0.16353839635849, "learning_rate": 7.330114625428609e-06, "loss": 0.1329, "step": 2817 }, { "epoch": 0.3926705218421236, "grad_norm": 0.1399279534816742, "learning_rate": 7.328029148921093e-06, "loss": 0.1106, "step": 2818 }, { "epoch": 0.39280986553333797, "grad_norm": 0.13339181244373322, "learning_rate": 7.32594315517865e-06, "loss": 0.1156, "step": 2819 }, { "epoch": 0.39294920922455234, "grad_norm": 0.22063492238521576, "learning_rate": 7.32385664466474e-06, "loss": 0.1457, "step": 2820 }, { "epoch": 0.3930885529157667, "grad_norm": 0.10113904625177383, "learning_rate": 7.321769617842937e-06, "loss": 0.1162, "step": 2821 }, { "epoch": 0.3932278966069811, "grad_norm": 0.1424262672662735, "learning_rate": 7.319682075176932e-06, "loss": 0.1698, "step": 2822 }, { "epoch": 0.3933672402981955, "grad_norm": 0.13269829750061035, "learning_rate": 7.317594017130529e-06, "loss": 0.1509, "step": 2823 }, { "epoch": 0.39350658398940985, "grad_norm": 0.20017611980438232, "learning_rate": 7.3155054441676485e-06, "loss": 0.123, "step": 2824 }, { "epoch": 0.3936459276806243, "grad_norm": 0.17934516072273254, "learning_rate": 7.313416356752321e-06, "loss": 0.1323, "step": 2825 }, { "epoch": 0.39378527137183866, "grad_norm": 0.16302934288978577, "learning_rate": 7.311326755348697e-06, "loss": 0.187, "step": 2826 }, { "epoch": 0.39392461506305304, "grad_norm": 0.22664488852024078, "learning_rate": 7.309236640421033e-06, "loss": 0.1766, "step": 2827 }, { "epoch": 0.3940639587542674, "grad_norm": 0.17765139043331146, "learning_rate": 7.30714601243371e-06, "loss": 0.1691, "step": 2828 }, { "epoch": 0.3942033024454818, "grad_norm": 0.3376133441925049, "learning_rate": 7.305054871851217e-06, "loss": 0.1719, "step": 2829 }, { "epoch": 0.3943426461366962, "grad_norm": 0.15614384412765503, "learning_rate": 7.302963219138156e-06, "loss": 0.1406, "step": 2830 }, { "epoch": 0.39448198982791055, "grad_norm": 0.3659631907939911, "learning_rate": 7.3008710547592465e-06, "loss": 0.1733, "step": 2831 }, { "epoch": 0.39462133351912493, "grad_norm": 0.18369756639003754, "learning_rate": 7.298778379179317e-06, "loss": 0.1387, "step": 2832 }, { "epoch": 0.3947606772103393, "grad_norm": 0.1575116217136383, "learning_rate": 7.296685192863313e-06, "loss": 0.1333, "step": 2833 }, { "epoch": 0.3949000209015537, "grad_norm": 0.33748406171798706, "learning_rate": 7.2945914962762954e-06, "loss": 0.1511, "step": 2834 }, { "epoch": 0.39503936459276806, "grad_norm": 0.26121795177459717, "learning_rate": 7.292497289883432e-06, "loss": 0.1668, "step": 2835 }, { "epoch": 0.39517870828398244, "grad_norm": 0.24910761415958405, "learning_rate": 7.29040257415001e-06, "loss": 0.1309, "step": 2836 }, { "epoch": 0.3953180519751968, "grad_norm": 0.1637762039899826, "learning_rate": 7.288307349541427e-06, "loss": 0.1227, "step": 2837 }, { "epoch": 0.3954573956664112, "grad_norm": 0.2714988589286804, "learning_rate": 7.286211616523193e-06, "loss": 0.1184, "step": 2838 }, { "epoch": 0.39559673935762557, "grad_norm": 0.32752493023872375, "learning_rate": 7.284115375560934e-06, "loss": 0.1577, "step": 2839 }, { "epoch": 0.39573608304883995, "grad_norm": 0.12560662627220154, "learning_rate": 7.282018627120386e-06, "loss": 0.113, "step": 2840 }, { "epoch": 0.3958754267400543, "grad_norm": 0.16081230342388153, "learning_rate": 7.279921371667397e-06, "loss": 0.1124, "step": 2841 }, { "epoch": 0.3960147704312687, "grad_norm": 0.24636685848236084, "learning_rate": 7.2778236096679325e-06, "loss": 0.1365, "step": 2842 }, { "epoch": 0.3961541141224831, "grad_norm": 0.24062742292881012, "learning_rate": 7.275725341588064e-06, "loss": 0.1404, "step": 2843 }, { "epoch": 0.39629345781369746, "grad_norm": 0.1147652342915535, "learning_rate": 7.27362656789398e-06, "loss": 0.0979, "step": 2844 }, { "epoch": 0.3964328015049119, "grad_norm": 0.12112388759851456, "learning_rate": 7.2715272890519815e-06, "loss": 0.1285, "step": 2845 }, { "epoch": 0.39657214519612627, "grad_norm": 0.3388013541698456, "learning_rate": 7.2694275055284795e-06, "loss": 0.1532, "step": 2846 }, { "epoch": 0.39671148888734065, "grad_norm": 0.40543413162231445, "learning_rate": 7.267327217789998e-06, "loss": 0.1532, "step": 2847 }, { "epoch": 0.396850832578555, "grad_norm": 0.26131340861320496, "learning_rate": 7.26522642630317e-06, "loss": 0.1481, "step": 2848 }, { "epoch": 0.3969901762697694, "grad_norm": 0.1946236491203308, "learning_rate": 7.263125131534749e-06, "loss": 0.1258, "step": 2849 }, { "epoch": 0.3971295199609838, "grad_norm": 0.25478124618530273, "learning_rate": 7.26102333395159e-06, "loss": 0.1252, "step": 2850 }, { "epoch": 0.39726886365219816, "grad_norm": 0.21682138741016388, "learning_rate": 7.2589210340206675e-06, "loss": 0.1291, "step": 2851 }, { "epoch": 0.39740820734341253, "grad_norm": 0.16271629929542542, "learning_rate": 7.256818232209062e-06, "loss": 0.1353, "step": 2852 }, { "epoch": 0.3975475510346269, "grad_norm": 0.13877138495445251, "learning_rate": 7.25471492898397e-06, "loss": 0.1443, "step": 2853 }, { "epoch": 0.3976868947258413, "grad_norm": 0.18104355037212372, "learning_rate": 7.2526111248126976e-06, "loss": 0.1752, "step": 2854 }, { "epoch": 0.39782623841705567, "grad_norm": 0.265982061624527, "learning_rate": 7.250506820162661e-06, "loss": 0.1477, "step": 2855 }, { "epoch": 0.39796558210827004, "grad_norm": 0.24046474695205688, "learning_rate": 7.248402015501388e-06, "loss": 0.1305, "step": 2856 }, { "epoch": 0.3981049257994844, "grad_norm": 0.1032278835773468, "learning_rate": 7.246296711296519e-06, "loss": 0.1225, "step": 2857 }, { "epoch": 0.3982442694906988, "grad_norm": 0.1710490584373474, "learning_rate": 7.244190908015805e-06, "loss": 0.1217, "step": 2858 }, { "epoch": 0.3983836131819132, "grad_norm": 0.1883491575717926, "learning_rate": 7.2420846061271065e-06, "loss": 0.1547, "step": 2859 }, { "epoch": 0.39852295687312755, "grad_norm": 0.11662416160106659, "learning_rate": 7.239977806098398e-06, "loss": 0.1131, "step": 2860 }, { "epoch": 0.39866230056434193, "grad_norm": 0.2829379141330719, "learning_rate": 7.237870508397757e-06, "loss": 0.1324, "step": 2861 }, { "epoch": 0.3988016442555563, "grad_norm": 0.13095325231552124, "learning_rate": 7.235762713493384e-06, "loss": 0.1229, "step": 2862 }, { "epoch": 0.3989409879467707, "grad_norm": 0.1832575500011444, "learning_rate": 7.2336544218535776e-06, "loss": 0.144, "step": 2863 }, { "epoch": 0.39908033163798506, "grad_norm": 0.17613740265369415, "learning_rate": 7.231545633946755e-06, "loss": 0.1281, "step": 2864 }, { "epoch": 0.3992196753291995, "grad_norm": 0.45076557993888855, "learning_rate": 7.229436350241439e-06, "loss": 0.1555, "step": 2865 }, { "epoch": 0.3993590190204139, "grad_norm": 0.11891768872737885, "learning_rate": 7.2273265712062646e-06, "loss": 0.1078, "step": 2866 }, { "epoch": 0.39949836271162825, "grad_norm": 0.19581691920757294, "learning_rate": 7.225216297309977e-06, "loss": 0.1264, "step": 2867 }, { "epoch": 0.39963770640284263, "grad_norm": 0.30535653233528137, "learning_rate": 7.22310552902143e-06, "loss": 0.1334, "step": 2868 }, { "epoch": 0.399777050094057, "grad_norm": 0.14994283020496368, "learning_rate": 7.220994266809591e-06, "loss": 0.1093, "step": 2869 }, { "epoch": 0.3999163937852714, "grad_norm": 0.1358235478401184, "learning_rate": 7.21888251114353e-06, "loss": 0.1219, "step": 2870 }, { "epoch": 0.40005573747648576, "grad_norm": 0.1825864613056183, "learning_rate": 7.2167702624924345e-06, "loss": 0.1425, "step": 2871 }, { "epoch": 0.40019508116770014, "grad_norm": 0.2906436324119568, "learning_rate": 7.2146575213255945e-06, "loss": 0.1602, "step": 2872 }, { "epoch": 0.4003344248589145, "grad_norm": 0.20729312300682068, "learning_rate": 7.212544288112415e-06, "loss": 0.1139, "step": 2873 }, { "epoch": 0.4004737685501289, "grad_norm": 0.08537215739488602, "learning_rate": 7.21043056332241e-06, "loss": 0.1005, "step": 2874 }, { "epoch": 0.40061311224134327, "grad_norm": 0.2745418846607208, "learning_rate": 7.208316347425197e-06, "loss": 0.162, "step": 2875 }, { "epoch": 0.40075245593255765, "grad_norm": 0.1639496386051178, "learning_rate": 7.206201640890509e-06, "loss": 0.154, "step": 2876 }, { "epoch": 0.400891799623772, "grad_norm": 0.23212425410747528, "learning_rate": 7.204086444188184e-06, "loss": 0.1339, "step": 2877 }, { "epoch": 0.4010311433149864, "grad_norm": 0.1366678923368454, "learning_rate": 7.201970757788172e-06, "loss": 0.1171, "step": 2878 }, { "epoch": 0.4011704870062008, "grad_norm": 0.2469577193260193, "learning_rate": 7.199854582160529e-06, "loss": 0.1441, "step": 2879 }, { "epoch": 0.40130983069741516, "grad_norm": 0.20171840488910675, "learning_rate": 7.197737917775422e-06, "loss": 0.1487, "step": 2880 }, { "epoch": 0.40144917438862954, "grad_norm": 0.2997787892818451, "learning_rate": 7.1956207651031254e-06, "loss": 0.1507, "step": 2881 }, { "epoch": 0.4015885180798439, "grad_norm": 0.17462724447250366, "learning_rate": 7.193503124614021e-06, "loss": 0.1625, "step": 2882 }, { "epoch": 0.4017278617710583, "grad_norm": 0.2037411630153656, "learning_rate": 7.191384996778601e-06, "loss": 0.1381, "step": 2883 }, { "epoch": 0.40186720546227267, "grad_norm": 0.14419987797737122, "learning_rate": 7.189266382067464e-06, "loss": 0.1405, "step": 2884 }, { "epoch": 0.4020065491534871, "grad_norm": 0.1221531555056572, "learning_rate": 7.1871472809513185e-06, "loss": 0.118, "step": 2885 }, { "epoch": 0.4021458928447015, "grad_norm": 0.25351372361183167, "learning_rate": 7.185027693900982e-06, "loss": 0.1469, "step": 2886 }, { "epoch": 0.40228523653591586, "grad_norm": 0.1947154849767685, "learning_rate": 7.182907621387376e-06, "loss": 0.1544, "step": 2887 }, { "epoch": 0.40242458022713024, "grad_norm": 0.099117211997509, "learning_rate": 7.180787063881534e-06, "loss": 0.1011, "step": 2888 }, { "epoch": 0.4025639239183446, "grad_norm": 0.16486285626888275, "learning_rate": 7.178666021854593e-06, "loss": 0.1303, "step": 2889 }, { "epoch": 0.402703267609559, "grad_norm": 0.205363467335701, "learning_rate": 7.176544495777804e-06, "loss": 0.1186, "step": 2890 }, { "epoch": 0.40284261130077337, "grad_norm": 0.32025280594825745, "learning_rate": 7.174422486122517e-06, "loss": 0.1347, "step": 2891 }, { "epoch": 0.40298195499198775, "grad_norm": 0.20640358328819275, "learning_rate": 7.1722999933602e-06, "loss": 0.1214, "step": 2892 }, { "epoch": 0.4031212986832021, "grad_norm": 0.32689741253852844, "learning_rate": 7.170177017962415e-06, "loss": 0.1613, "step": 2893 }, { "epoch": 0.4032606423744165, "grad_norm": 0.25072720646858215, "learning_rate": 7.168053560400845e-06, "loss": 0.1305, "step": 2894 }, { "epoch": 0.4033999860656309, "grad_norm": 0.2586536407470703, "learning_rate": 7.16592962114727e-06, "loss": 0.1828, "step": 2895 }, { "epoch": 0.40353932975684526, "grad_norm": 0.15736651420593262, "learning_rate": 7.163805200673584e-06, "loss": 0.1358, "step": 2896 }, { "epoch": 0.40367867344805963, "grad_norm": 0.37686121463775635, "learning_rate": 7.161680299451782e-06, "loss": 0.1696, "step": 2897 }, { "epoch": 0.403818017139274, "grad_norm": 0.27584174275398254, "learning_rate": 7.159554917953968e-06, "loss": 0.1141, "step": 2898 }, { "epoch": 0.4039573608304884, "grad_norm": 0.1993250995874405, "learning_rate": 7.157429056652357e-06, "loss": 0.1656, "step": 2899 }, { "epoch": 0.40409670452170277, "grad_norm": 0.17704622447490692, "learning_rate": 7.155302716019263e-06, "loss": 0.0989, "step": 2900 }, { "epoch": 0.40423604821291714, "grad_norm": 0.2424338012933731, "learning_rate": 7.153175896527112e-06, "loss": 0.1338, "step": 2901 }, { "epoch": 0.4043753919041315, "grad_norm": 0.1251702457666397, "learning_rate": 7.151048598648436e-06, "loss": 0.1022, "step": 2902 }, { "epoch": 0.4045147355953459, "grad_norm": 0.1929388791322708, "learning_rate": 7.148920822855869e-06, "loss": 0.1304, "step": 2903 }, { "epoch": 0.4046540792865603, "grad_norm": 0.15176498889923096, "learning_rate": 7.146792569622157e-06, "loss": 0.1477, "step": 2904 }, { "epoch": 0.4047934229777747, "grad_norm": 0.1996036022901535, "learning_rate": 7.144663839420147e-06, "loss": 0.1573, "step": 2905 }, { "epoch": 0.4049327666689891, "grad_norm": 0.15615049004554749, "learning_rate": 7.142534632722797e-06, "loss": 0.1269, "step": 2906 }, { "epoch": 0.40507211036020346, "grad_norm": 0.2184285670518875, "learning_rate": 7.140404950003164e-06, "loss": 0.1346, "step": 2907 }, { "epoch": 0.40521145405141784, "grad_norm": 0.204367995262146, "learning_rate": 7.138274791734421e-06, "loss": 0.161, "step": 2908 }, { "epoch": 0.4053507977426322, "grad_norm": 0.1485452502965927, "learning_rate": 7.136144158389834e-06, "loss": 0.128, "step": 2909 }, { "epoch": 0.4054901414338466, "grad_norm": 0.16174201667308807, "learning_rate": 7.134013050442785e-06, "loss": 0.1194, "step": 2910 }, { "epoch": 0.405629485125061, "grad_norm": 0.27521848678588867, "learning_rate": 7.1318814683667555e-06, "loss": 0.1746, "step": 2911 }, { "epoch": 0.40576882881627535, "grad_norm": 0.19123761355876923, "learning_rate": 7.129749412635337e-06, "loss": 0.1175, "step": 2912 }, { "epoch": 0.40590817250748973, "grad_norm": 0.12413518130779266, "learning_rate": 7.1276168837222215e-06, "loss": 0.1125, "step": 2913 }, { "epoch": 0.4060475161987041, "grad_norm": 0.2733069956302643, "learning_rate": 7.125483882101208e-06, "loss": 0.1426, "step": 2914 }, { "epoch": 0.4061868598899185, "grad_norm": 0.15719880163669586, "learning_rate": 7.123350408246203e-06, "loss": 0.1301, "step": 2915 }, { "epoch": 0.40632620358113286, "grad_norm": 0.16974672675132751, "learning_rate": 7.121216462631213e-06, "loss": 0.1365, "step": 2916 }, { "epoch": 0.40646554727234724, "grad_norm": 0.11944865435361862, "learning_rate": 7.1190820457303535e-06, "loss": 0.0926, "step": 2917 }, { "epoch": 0.4066048909635616, "grad_norm": 0.2313021719455719, "learning_rate": 7.116947158017842e-06, "loss": 0.1565, "step": 2918 }, { "epoch": 0.406744234654776, "grad_norm": 0.21925829350948334, "learning_rate": 7.114811799968005e-06, "loss": 0.1504, "step": 2919 }, { "epoch": 0.40688357834599037, "grad_norm": 0.14721672236919403, "learning_rate": 7.1126759720552665e-06, "loss": 0.1327, "step": 2920 }, { "epoch": 0.40702292203720475, "grad_norm": 0.11755885183811188, "learning_rate": 7.11053967475416e-06, "loss": 0.1115, "step": 2921 }, { "epoch": 0.4071622657284191, "grad_norm": 0.2556135356426239, "learning_rate": 7.108402908539323e-06, "loss": 0.1385, "step": 2922 }, { "epoch": 0.4073016094196335, "grad_norm": 0.14779259264469147, "learning_rate": 7.106265673885494e-06, "loss": 0.1082, "step": 2923 }, { "epoch": 0.4074409531108479, "grad_norm": 0.13360798358917236, "learning_rate": 7.104127971267521e-06, "loss": 0.1182, "step": 2924 }, { "epoch": 0.4075802968020623, "grad_norm": 0.1377730667591095, "learning_rate": 7.10198980116035e-06, "loss": 0.1222, "step": 2925 }, { "epoch": 0.4077196404932767, "grad_norm": 0.16112034022808075, "learning_rate": 7.099851164039035e-06, "loss": 0.1374, "step": 2926 }, { "epoch": 0.40785898418449107, "grad_norm": 0.1645137369632721, "learning_rate": 7.0977120603787296e-06, "loss": 0.1204, "step": 2927 }, { "epoch": 0.40799832787570545, "grad_norm": 0.38201162219047546, "learning_rate": 7.095572490654698e-06, "loss": 0.1918, "step": 2928 }, { "epoch": 0.4081376715669198, "grad_norm": 0.21130768954753876, "learning_rate": 7.0934324553423015e-06, "loss": 0.1311, "step": 2929 }, { "epoch": 0.4082770152581342, "grad_norm": 0.3418940007686615, "learning_rate": 7.091291954917007e-06, "loss": 0.1616, "step": 2930 }, { "epoch": 0.4084163589493486, "grad_norm": 0.34987175464630127, "learning_rate": 7.089150989854385e-06, "loss": 0.172, "step": 2931 }, { "epoch": 0.40855570264056296, "grad_norm": 0.21722136437892914, "learning_rate": 7.0870095606301095e-06, "loss": 0.1293, "step": 2932 }, { "epoch": 0.40869504633177733, "grad_norm": 0.2694452404975891, "learning_rate": 7.084867667719957e-06, "loss": 0.1631, "step": 2933 }, { "epoch": 0.4088343900229917, "grad_norm": 0.1821424514055252, "learning_rate": 7.082725311599808e-06, "loss": 0.1535, "step": 2934 }, { "epoch": 0.4089737337142061, "grad_norm": 0.1443936675786972, "learning_rate": 7.080582492745642e-06, "loss": 0.1084, "step": 2935 }, { "epoch": 0.40911307740542047, "grad_norm": 0.19014276564121246, "learning_rate": 7.0784392116335475e-06, "loss": 0.1485, "step": 2936 }, { "epoch": 0.40925242109663484, "grad_norm": 0.21948117017745972, "learning_rate": 7.076295468739711e-06, "loss": 0.1593, "step": 2937 }, { "epoch": 0.4093917647878492, "grad_norm": 0.2190231829881668, "learning_rate": 7.074151264540425e-06, "loss": 0.1923, "step": 2938 }, { "epoch": 0.4095311084790636, "grad_norm": 0.17250195145606995, "learning_rate": 7.0720065995120815e-06, "loss": 0.1429, "step": 2939 }, { "epoch": 0.409670452170278, "grad_norm": 0.20157621800899506, "learning_rate": 7.069861474131176e-06, "loss": 0.1098, "step": 2940 }, { "epoch": 0.40980979586149235, "grad_norm": 0.1130887120962143, "learning_rate": 7.067715888874307e-06, "loss": 0.1236, "step": 2941 }, { "epoch": 0.40994913955270673, "grad_norm": 0.20335890352725983, "learning_rate": 7.065569844218175e-06, "loss": 0.1274, "step": 2942 }, { "epoch": 0.4100884832439211, "grad_norm": 0.1637859344482422, "learning_rate": 7.0634233406395806e-06, "loss": 0.1056, "step": 2943 }, { "epoch": 0.4102278269351355, "grad_norm": 0.24218663573265076, "learning_rate": 7.061276378615428e-06, "loss": 0.146, "step": 2944 }, { "epoch": 0.4103671706263499, "grad_norm": 0.2201913595199585, "learning_rate": 7.059128958622725e-06, "loss": 0.1549, "step": 2945 }, { "epoch": 0.4105065143175643, "grad_norm": 0.2586732506752014, "learning_rate": 7.056981081138578e-06, "loss": 0.163, "step": 2946 }, { "epoch": 0.4106458580087787, "grad_norm": 0.2051210105419159, "learning_rate": 7.054832746640196e-06, "loss": 0.1479, "step": 2947 }, { "epoch": 0.41078520169999305, "grad_norm": 0.23226703703403473, "learning_rate": 7.05268395560489e-06, "loss": 0.1543, "step": 2948 }, { "epoch": 0.41092454539120743, "grad_norm": 0.33606255054473877, "learning_rate": 7.050534708510073e-06, "loss": 0.1381, "step": 2949 }, { "epoch": 0.4110638890824218, "grad_norm": 0.15262837707996368, "learning_rate": 7.048385005833258e-06, "loss": 0.1087, "step": 2950 }, { "epoch": 0.4112032327736362, "grad_norm": 0.28736332058906555, "learning_rate": 7.04623484805206e-06, "loss": 0.1347, "step": 2951 }, { "epoch": 0.41134257646485056, "grad_norm": 0.20095038414001465, "learning_rate": 7.044084235644196e-06, "loss": 0.1205, "step": 2952 }, { "epoch": 0.41148192015606494, "grad_norm": 0.1502094715833664, "learning_rate": 7.041933169087482e-06, "loss": 0.1334, "step": 2953 }, { "epoch": 0.4116212638472793, "grad_norm": 0.26446452736854553, "learning_rate": 7.039781648859836e-06, "loss": 0.1547, "step": 2954 }, { "epoch": 0.4117606075384937, "grad_norm": 0.14132487773895264, "learning_rate": 7.037629675439276e-06, "loss": 0.1133, "step": 2955 }, { "epoch": 0.4118999512297081, "grad_norm": 0.46311822533607483, "learning_rate": 7.035477249303923e-06, "loss": 0.1538, "step": 2956 }, { "epoch": 0.41203929492092245, "grad_norm": 0.10927531123161316, "learning_rate": 7.033324370931993e-06, "loss": 0.1166, "step": 2957 }, { "epoch": 0.4121786386121368, "grad_norm": 0.15923020243644714, "learning_rate": 7.031171040801813e-06, "loss": 0.1277, "step": 2958 }, { "epoch": 0.4123179823033512, "grad_norm": 0.28749316930770874, "learning_rate": 7.029017259391797e-06, "loss": 0.1253, "step": 2959 }, { "epoch": 0.4124573259945656, "grad_norm": 0.19986186921596527, "learning_rate": 7.026863027180472e-06, "loss": 0.1526, "step": 2960 }, { "epoch": 0.41259666968577996, "grad_norm": 0.32821419835090637, "learning_rate": 7.024708344646455e-06, "loss": 0.1459, "step": 2961 }, { "epoch": 0.41273601337699434, "grad_norm": 0.22058281302452087, "learning_rate": 7.022553212268469e-06, "loss": 0.1441, "step": 2962 }, { "epoch": 0.4128753570682087, "grad_norm": 0.21306639909744263, "learning_rate": 7.020397630525336e-06, "loss": 0.133, "step": 2963 }, { "epoch": 0.4130147007594231, "grad_norm": 0.40256068110466003, "learning_rate": 7.018241599895974e-06, "loss": 0.1383, "step": 2964 }, { "epoch": 0.41315404445063747, "grad_norm": 0.20920555293560028, "learning_rate": 7.016085120859406e-06, "loss": 0.1417, "step": 2965 }, { "epoch": 0.4132933881418519, "grad_norm": 0.20189301669597626, "learning_rate": 7.013928193894753e-06, "loss": 0.1306, "step": 2966 }, { "epoch": 0.4134327318330663, "grad_norm": 0.2474035620689392, "learning_rate": 7.011770819481234e-06, "loss": 0.1429, "step": 2967 }, { "epoch": 0.41357207552428066, "grad_norm": 0.2138330489397049, "learning_rate": 7.0096129980981674e-06, "loss": 0.1264, "step": 2968 }, { "epoch": 0.41371141921549504, "grad_norm": 0.28859615325927734, "learning_rate": 7.0074547302249755e-06, "loss": 0.1451, "step": 2969 }, { "epoch": 0.4138507629067094, "grad_norm": 0.16099759936332703, "learning_rate": 7.005296016341171e-06, "loss": 0.0993, "step": 2970 }, { "epoch": 0.4139901065979238, "grad_norm": 0.21821044385433197, "learning_rate": 7.003136856926374e-06, "loss": 0.1251, "step": 2971 }, { "epoch": 0.41412945028913817, "grad_norm": 0.41147878766059875, "learning_rate": 7.0009772524603e-06, "loss": 0.1273, "step": 2972 }, { "epoch": 0.41426879398035255, "grad_norm": 0.2713273763656616, "learning_rate": 6.998817203422763e-06, "loss": 0.1413, "step": 2973 }, { "epoch": 0.4144081376715669, "grad_norm": 0.16212521493434906, "learning_rate": 6.996656710293679e-06, "loss": 0.1474, "step": 2974 }, { "epoch": 0.4145474813627813, "grad_norm": 0.17218437790870667, "learning_rate": 6.994495773553056e-06, "loss": 0.1371, "step": 2975 }, { "epoch": 0.4146868250539957, "grad_norm": 0.20975400507450104, "learning_rate": 6.992334393681008e-06, "loss": 0.1277, "step": 2976 }, { "epoch": 0.41482616874521006, "grad_norm": 0.09330683201551437, "learning_rate": 6.990172571157744e-06, "loss": 0.1024, "step": 2977 }, { "epoch": 0.41496551243642443, "grad_norm": 0.16154642403125763, "learning_rate": 6.988010306463571e-06, "loss": 0.1529, "step": 2978 }, { "epoch": 0.4151048561276388, "grad_norm": 0.1559843122959137, "learning_rate": 6.985847600078894e-06, "loss": 0.1665, "step": 2979 }, { "epoch": 0.4152441998188532, "grad_norm": 0.18881598114967346, "learning_rate": 6.98368445248422e-06, "loss": 0.138, "step": 2980 }, { "epoch": 0.41538354351006757, "grad_norm": 0.12747515738010406, "learning_rate": 6.981520864160147e-06, "loss": 0.1143, "step": 2981 }, { "epoch": 0.41552288720128194, "grad_norm": 0.2141481190919876, "learning_rate": 6.979356835587377e-06, "loss": 0.1335, "step": 2982 }, { "epoch": 0.4156622308924963, "grad_norm": 0.19386635720729828, "learning_rate": 6.977192367246709e-06, "loss": 0.1229, "step": 2983 }, { "epoch": 0.4158015745837107, "grad_norm": 0.19948609173297882, "learning_rate": 6.9750274596190344e-06, "loss": 0.1264, "step": 2984 }, { "epoch": 0.4159409182749251, "grad_norm": 0.1811331808567047, "learning_rate": 6.972862113185353e-06, "loss": 0.1434, "step": 2985 }, { "epoch": 0.4160802619661395, "grad_norm": 0.26845037937164307, "learning_rate": 6.970696328426749e-06, "loss": 0.136, "step": 2986 }, { "epoch": 0.4162196056573539, "grad_norm": 0.21596714854240417, "learning_rate": 6.968530105824413e-06, "loss": 0.1273, "step": 2987 }, { "epoch": 0.41635894934856826, "grad_norm": 0.11739884316921234, "learning_rate": 6.966363445859629e-06, "loss": 0.1012, "step": 2988 }, { "epoch": 0.41649829303978264, "grad_norm": 0.16593296825885773, "learning_rate": 6.96419634901378e-06, "loss": 0.1105, "step": 2989 }, { "epoch": 0.416637636730997, "grad_norm": 0.14141778647899628, "learning_rate": 6.962028815768347e-06, "loss": 0.1306, "step": 2990 }, { "epoch": 0.4167769804222114, "grad_norm": 0.15131376683712006, "learning_rate": 6.959860846604903e-06, "loss": 0.1264, "step": 2991 }, { "epoch": 0.4169163241134258, "grad_norm": 0.13378950953483582, "learning_rate": 6.957692442005126e-06, "loss": 0.138, "step": 2992 }, { "epoch": 0.41705566780464015, "grad_norm": 0.18610386550426483, "learning_rate": 6.95552360245078e-06, "loss": 0.1485, "step": 2993 }, { "epoch": 0.41719501149585453, "grad_norm": 0.1389809101819992, "learning_rate": 6.953354328423737e-06, "loss": 0.1094, "step": 2994 }, { "epoch": 0.4173343551870689, "grad_norm": 0.22015447914600372, "learning_rate": 6.951184620405958e-06, "loss": 0.1458, "step": 2995 }, { "epoch": 0.4174736988782833, "grad_norm": 0.1997324675321579, "learning_rate": 6.949014478879502e-06, "loss": 0.16, "step": 2996 }, { "epoch": 0.41761304256949766, "grad_norm": 0.287946879863739, "learning_rate": 6.946843904326527e-06, "loss": 0.1526, "step": 2997 }, { "epoch": 0.41775238626071204, "grad_norm": 0.1843378096818924, "learning_rate": 6.944672897229282e-06, "loss": 0.1287, "step": 2998 }, { "epoch": 0.4178917299519264, "grad_norm": 0.12001081556081772, "learning_rate": 6.942501458070117e-06, "loss": 0.1319, "step": 2999 }, { "epoch": 0.4180310736431408, "grad_norm": 0.15017972886562347, "learning_rate": 6.940329587331477e-06, "loss": 0.1041, "step": 3000 }, { "epoch": 0.41817041733435517, "grad_norm": 0.09574652463197708, "learning_rate": 6.938157285495901e-06, "loss": 0.1156, "step": 3001 }, { "epoch": 0.41830976102556955, "grad_norm": 0.09169705957174301, "learning_rate": 6.935984553046025e-06, "loss": 0.0924, "step": 3002 }, { "epoch": 0.4184491047167839, "grad_norm": 0.10053680837154388, "learning_rate": 6.93381139046458e-06, "loss": 0.1066, "step": 3003 }, { "epoch": 0.4185884484079983, "grad_norm": 0.19946618378162384, "learning_rate": 6.931637798234394e-06, "loss": 0.1153, "step": 3004 }, { "epoch": 0.4187277920992127, "grad_norm": 0.1622433066368103, "learning_rate": 6.929463776838389e-06, "loss": 0.1312, "step": 3005 }, { "epoch": 0.4188671357904271, "grad_norm": 0.22562290728092194, "learning_rate": 6.927289326759585e-06, "loss": 0.1377, "step": 3006 }, { "epoch": 0.4190064794816415, "grad_norm": 0.1255679875612259, "learning_rate": 6.925114448481089e-06, "loss": 0.1341, "step": 3007 }, { "epoch": 0.41914582317285587, "grad_norm": 0.16698966920375824, "learning_rate": 6.922939142486118e-06, "loss": 0.1461, "step": 3008 }, { "epoch": 0.41928516686407025, "grad_norm": 0.17918092012405396, "learning_rate": 6.9207634092579686e-06, "loss": 0.1402, "step": 3009 }, { "epoch": 0.4194245105552846, "grad_norm": 0.3390943109989166, "learning_rate": 6.9185872492800434e-06, "loss": 0.1287, "step": 3010 }, { "epoch": 0.419563854246499, "grad_norm": 0.22429195046424866, "learning_rate": 6.916410663035832e-06, "loss": 0.1509, "step": 3011 }, { "epoch": 0.4197031979377134, "grad_norm": 0.2847416400909424, "learning_rate": 6.9142336510089235e-06, "loss": 0.128, "step": 3012 }, { "epoch": 0.41984254162892776, "grad_norm": 0.14156293869018555, "learning_rate": 6.912056213683001e-06, "loss": 0.1364, "step": 3013 }, { "epoch": 0.41998188532014213, "grad_norm": 0.1201397106051445, "learning_rate": 6.909878351541841e-06, "loss": 0.1112, "step": 3014 }, { "epoch": 0.4201212290113565, "grad_norm": 0.16091319918632507, "learning_rate": 6.907700065069315e-06, "loss": 0.1255, "step": 3015 }, { "epoch": 0.4202605727025709, "grad_norm": 0.2543691098690033, "learning_rate": 6.905521354749387e-06, "loss": 0.1324, "step": 3016 }, { "epoch": 0.42039991639378527, "grad_norm": 0.14165829122066498, "learning_rate": 6.90334222106612e-06, "loss": 0.116, "step": 3017 }, { "epoch": 0.42053926008499964, "grad_norm": 0.18268899619579315, "learning_rate": 6.901162664503662e-06, "loss": 0.1188, "step": 3018 }, { "epoch": 0.420678603776214, "grad_norm": 0.2519872188568115, "learning_rate": 6.898982685546267e-06, "loss": 0.1219, "step": 3019 }, { "epoch": 0.4208179474674284, "grad_norm": 0.2747870683670044, "learning_rate": 6.896802284678273e-06, "loss": 0.1584, "step": 3020 }, { "epoch": 0.4209572911586428, "grad_norm": 0.229951411485672, "learning_rate": 6.894621462384116e-06, "loss": 0.1347, "step": 3021 }, { "epoch": 0.42109663484985715, "grad_norm": 0.14787493646144867, "learning_rate": 6.8924402191483245e-06, "loss": 0.115, "step": 3022 }, { "epoch": 0.42123597854107153, "grad_norm": 0.1319732815027237, "learning_rate": 6.890258555455521e-06, "loss": 0.13, "step": 3023 }, { "epoch": 0.4213753222322859, "grad_norm": 0.26045241951942444, "learning_rate": 6.888076471790423e-06, "loss": 0.1984, "step": 3024 }, { "epoch": 0.4215146659235003, "grad_norm": 0.19368131458759308, "learning_rate": 6.8858939686378376e-06, "loss": 0.157, "step": 3025 }, { "epoch": 0.4216540096147147, "grad_norm": 0.3847731649875641, "learning_rate": 6.8837110464826685e-06, "loss": 0.1552, "step": 3026 }, { "epoch": 0.4217933533059291, "grad_norm": 0.1566760092973709, "learning_rate": 6.881527705809912e-06, "loss": 0.1436, "step": 3027 }, { "epoch": 0.4219326969971435, "grad_norm": 0.18504740297794342, "learning_rate": 6.879343947104653e-06, "loss": 0.1507, "step": 3028 }, { "epoch": 0.42207204068835785, "grad_norm": 0.15457619726657867, "learning_rate": 6.8771597708520766e-06, "loss": 0.1302, "step": 3029 }, { "epoch": 0.42221138437957223, "grad_norm": 0.22426792979240417, "learning_rate": 6.874975177537455e-06, "loss": 0.1318, "step": 3030 }, { "epoch": 0.4223507280707866, "grad_norm": 0.2972908318042755, "learning_rate": 6.872790167646155e-06, "loss": 0.1726, "step": 3031 }, { "epoch": 0.422490071762001, "grad_norm": 0.1588592678308487, "learning_rate": 6.870604741663638e-06, "loss": 0.1372, "step": 3032 }, { "epoch": 0.42262941545321536, "grad_norm": 0.11000800877809525, "learning_rate": 6.868418900075452e-06, "loss": 0.1233, "step": 3033 }, { "epoch": 0.42276875914442974, "grad_norm": 0.11487546563148499, "learning_rate": 6.866232643367243e-06, "loss": 0.1159, "step": 3034 }, { "epoch": 0.4229081028356441, "grad_norm": 0.13089925050735474, "learning_rate": 6.864045972024749e-06, "loss": 0.1092, "step": 3035 }, { "epoch": 0.4230474465268585, "grad_norm": 0.1624763160943985, "learning_rate": 6.861858886533796e-06, "loss": 0.1264, "step": 3036 }, { "epoch": 0.4231867902180729, "grad_norm": 0.19917252659797668, "learning_rate": 6.859671387380307e-06, "loss": 0.1641, "step": 3037 }, { "epoch": 0.42332613390928725, "grad_norm": 0.16426856815814972, "learning_rate": 6.85748347505029e-06, "loss": 0.1093, "step": 3038 }, { "epoch": 0.4234654776005016, "grad_norm": 0.16863290965557098, "learning_rate": 6.855295150029853e-06, "loss": 0.1254, "step": 3039 }, { "epoch": 0.423604821291716, "grad_norm": 0.09509000927209854, "learning_rate": 6.853106412805192e-06, "loss": 0.1098, "step": 3040 }, { "epoch": 0.4237441649829304, "grad_norm": 0.15374745428562164, "learning_rate": 6.850917263862591e-06, "loss": 0.1437, "step": 3041 }, { "epoch": 0.42388350867414476, "grad_norm": 0.1677982658147812, "learning_rate": 6.848727703688432e-06, "loss": 0.1322, "step": 3042 }, { "epoch": 0.42402285236535914, "grad_norm": 0.14527639746665955, "learning_rate": 6.846537732769185e-06, "loss": 0.1442, "step": 3043 }, { "epoch": 0.4241621960565735, "grad_norm": 0.22356222569942474, "learning_rate": 6.8443473515914105e-06, "loss": 0.1459, "step": 3044 }, { "epoch": 0.4243015397477879, "grad_norm": 0.22787539660930634, "learning_rate": 6.842156560641762e-06, "loss": 0.1134, "step": 3045 }, { "epoch": 0.4244408834390023, "grad_norm": 0.14847098290920258, "learning_rate": 6.839965360406983e-06, "loss": 0.1158, "step": 3046 }, { "epoch": 0.4245802271302167, "grad_norm": 0.1867104023694992, "learning_rate": 6.837773751373908e-06, "loss": 0.1161, "step": 3047 }, { "epoch": 0.4247195708214311, "grad_norm": 0.37160834670066833, "learning_rate": 6.835581734029462e-06, "loss": 0.1497, "step": 3048 }, { "epoch": 0.42485891451264546, "grad_norm": 0.261395663022995, "learning_rate": 6.833389308860662e-06, "loss": 0.128, "step": 3049 }, { "epoch": 0.42499825820385984, "grad_norm": 0.17648519575595856, "learning_rate": 6.831196476354615e-06, "loss": 0.1295, "step": 3050 }, { "epoch": 0.4251376018950742, "grad_norm": 0.31438198685646057, "learning_rate": 6.829003236998517e-06, "loss": 0.1575, "step": 3051 }, { "epoch": 0.4252769455862886, "grad_norm": 0.183246910572052, "learning_rate": 6.8268095912796574e-06, "loss": 0.1106, "step": 3052 }, { "epoch": 0.42541628927750297, "grad_norm": 0.13832691311836243, "learning_rate": 6.824615539685413e-06, "loss": 0.1184, "step": 3053 }, { "epoch": 0.42555563296871735, "grad_norm": 0.22614195942878723, "learning_rate": 6.822421082703253e-06, "loss": 0.1574, "step": 3054 }, { "epoch": 0.4256949766599317, "grad_norm": 0.368586003780365, "learning_rate": 6.820226220820733e-06, "loss": 0.1304, "step": 3055 }, { "epoch": 0.4258343203511461, "grad_norm": 0.22224482893943787, "learning_rate": 6.818030954525505e-06, "loss": 0.1373, "step": 3056 }, { "epoch": 0.4259736640423605, "grad_norm": 0.1800996959209442, "learning_rate": 6.815835284305304e-06, "loss": 0.1541, "step": 3057 }, { "epoch": 0.42611300773357486, "grad_norm": 0.17929446697235107, "learning_rate": 6.8136392106479624e-06, "loss": 0.1368, "step": 3058 }, { "epoch": 0.42625235142478923, "grad_norm": 0.22148728370666504, "learning_rate": 6.81144273404139e-06, "loss": 0.1177, "step": 3059 }, { "epoch": 0.4263916951160036, "grad_norm": 0.17293313145637512, "learning_rate": 6.8092458549736e-06, "loss": 0.1322, "step": 3060 }, { "epoch": 0.426531038807218, "grad_norm": 0.16974391043186188, "learning_rate": 6.807048573932687e-06, "loss": 0.1267, "step": 3061 }, { "epoch": 0.42667038249843237, "grad_norm": 0.28035768866539, "learning_rate": 6.8048508914068355e-06, "loss": 0.1588, "step": 3062 }, { "epoch": 0.42680972618964674, "grad_norm": 0.1713661551475525, "learning_rate": 6.802652807884322e-06, "loss": 0.1568, "step": 3063 }, { "epoch": 0.4269490698808611, "grad_norm": 0.20611871778964996, "learning_rate": 6.80045432385351e-06, "loss": 0.1151, "step": 3064 }, { "epoch": 0.4270884135720755, "grad_norm": 0.12200687825679779, "learning_rate": 6.798255439802852e-06, "loss": 0.1224, "step": 3065 }, { "epoch": 0.42722775726328993, "grad_norm": 0.14617742598056793, "learning_rate": 6.796056156220892e-06, "loss": 0.1186, "step": 3066 }, { "epoch": 0.4273671009545043, "grad_norm": 0.2094227373600006, "learning_rate": 6.793856473596256e-06, "loss": 0.1425, "step": 3067 }, { "epoch": 0.4275064446457187, "grad_norm": 0.1333494335412979, "learning_rate": 6.791656392417666e-06, "loss": 0.1288, "step": 3068 }, { "epoch": 0.42764578833693306, "grad_norm": 0.11671163886785507, "learning_rate": 6.789455913173933e-06, "loss": 0.0864, "step": 3069 }, { "epoch": 0.42778513202814744, "grad_norm": 0.19901835918426514, "learning_rate": 6.787255036353947e-06, "loss": 0.1382, "step": 3070 }, { "epoch": 0.4279244757193618, "grad_norm": 0.15976816415786743, "learning_rate": 6.785053762446696e-06, "loss": 0.1373, "step": 3071 }, { "epoch": 0.4280638194105762, "grad_norm": 0.1290450394153595, "learning_rate": 6.782852091941254e-06, "loss": 0.1279, "step": 3072 }, { "epoch": 0.4282031631017906, "grad_norm": 0.2664297819137573, "learning_rate": 6.780650025326778e-06, "loss": 0.1601, "step": 3073 }, { "epoch": 0.42834250679300495, "grad_norm": 0.17345954477787018, "learning_rate": 6.778447563092523e-06, "loss": 0.1319, "step": 3074 }, { "epoch": 0.42848185048421933, "grad_norm": 0.15402373671531677, "learning_rate": 6.776244705727818e-06, "loss": 0.1523, "step": 3075 }, { "epoch": 0.4286211941754337, "grad_norm": 0.3346590995788574, "learning_rate": 6.774041453722093e-06, "loss": 0.2097, "step": 3076 }, { "epoch": 0.4287605378666481, "grad_norm": 0.12801529467105865, "learning_rate": 6.771837807564861e-06, "loss": 0.1413, "step": 3077 }, { "epoch": 0.42889988155786246, "grad_norm": 0.14243224263191223, "learning_rate": 6.769633767745718e-06, "loss": 0.1289, "step": 3078 }, { "epoch": 0.42903922524907684, "grad_norm": 0.19968926906585693, "learning_rate": 6.767429334754354e-06, "loss": 0.1414, "step": 3079 }, { "epoch": 0.4291785689402912, "grad_norm": 0.1533532440662384, "learning_rate": 6.7652245090805426e-06, "loss": 0.1529, "step": 3080 }, { "epoch": 0.4293179126315056, "grad_norm": 0.2304064929485321, "learning_rate": 6.763019291214146e-06, "loss": 0.1365, "step": 3081 }, { "epoch": 0.42945725632271997, "grad_norm": 0.1785271316766739, "learning_rate": 6.760813681645114e-06, "loss": 0.1259, "step": 3082 }, { "epoch": 0.42959660001393435, "grad_norm": 0.35384202003479004, "learning_rate": 6.758607680863481e-06, "loss": 0.1431, "step": 3083 }, { "epoch": 0.4297359437051487, "grad_norm": 0.23814673721790314, "learning_rate": 6.756401289359371e-06, "loss": 0.1282, "step": 3084 }, { "epoch": 0.4298752873963631, "grad_norm": 0.167812317609787, "learning_rate": 6.754194507622995e-06, "loss": 0.1345, "step": 3085 }, { "epoch": 0.43001463108757754, "grad_norm": 0.2355198860168457, "learning_rate": 6.7519873361446475e-06, "loss": 0.1273, "step": 3086 }, { "epoch": 0.4301539747787919, "grad_norm": 0.269479900598526, "learning_rate": 6.7497797754147134e-06, "loss": 0.1302, "step": 3087 }, { "epoch": 0.4302933184700063, "grad_norm": 0.2426455020904541, "learning_rate": 6.74757182592366e-06, "loss": 0.1157, "step": 3088 }, { "epoch": 0.43043266216122067, "grad_norm": 0.21633130311965942, "learning_rate": 6.7453634881620445e-06, "loss": 0.1434, "step": 3089 }, { "epoch": 0.43057200585243505, "grad_norm": 0.18545222282409668, "learning_rate": 6.743154762620511e-06, "loss": 0.1391, "step": 3090 }, { "epoch": 0.4307113495436494, "grad_norm": 0.1399315744638443, "learning_rate": 6.740945649789784e-06, "loss": 0.1253, "step": 3091 }, { "epoch": 0.4308506932348638, "grad_norm": 0.20994608104228973, "learning_rate": 6.738736150160681e-06, "loss": 0.1516, "step": 3092 }, { "epoch": 0.4309900369260782, "grad_norm": 0.20530052483081818, "learning_rate": 6.736526264224101e-06, "loss": 0.1291, "step": 3093 }, { "epoch": 0.43112938061729256, "grad_norm": 0.18164826929569244, "learning_rate": 6.734315992471032e-06, "loss": 0.1027, "step": 3094 }, { "epoch": 0.43126872430850693, "grad_norm": 0.2038748562335968, "learning_rate": 6.7321053353925446e-06, "loss": 0.1264, "step": 3095 }, { "epoch": 0.4314080679997213, "grad_norm": 0.16326050460338593, "learning_rate": 6.729894293479795e-06, "loss": 0.1458, "step": 3096 }, { "epoch": 0.4315474116909357, "grad_norm": 0.11036693304777145, "learning_rate": 6.727682867224028e-06, "loss": 0.1046, "step": 3097 }, { "epoch": 0.43168675538215007, "grad_norm": 0.16420963406562805, "learning_rate": 6.725471057116573e-06, "loss": 0.1325, "step": 3098 }, { "epoch": 0.43182609907336444, "grad_norm": 0.22513674199581146, "learning_rate": 6.723258863648841e-06, "loss": 0.1587, "step": 3099 }, { "epoch": 0.4319654427645788, "grad_norm": 0.30424875020980835, "learning_rate": 6.72104628731233e-06, "loss": 0.1616, "step": 3100 }, { "epoch": 0.4321047864557932, "grad_norm": 0.16780665516853333, "learning_rate": 6.718833328598629e-06, "loss": 0.1419, "step": 3101 }, { "epoch": 0.4322441301470076, "grad_norm": 0.1567767709493637, "learning_rate": 6.716619987999404e-06, "loss": 0.1119, "step": 3102 }, { "epoch": 0.43238347383822195, "grad_norm": 0.15707024931907654, "learning_rate": 6.714406266006408e-06, "loss": 0.1262, "step": 3103 }, { "epoch": 0.43252281752943633, "grad_norm": 0.2737908661365509, "learning_rate": 6.712192163111481e-06, "loss": 0.1785, "step": 3104 }, { "epoch": 0.4326621612206507, "grad_norm": 0.17449726164340973, "learning_rate": 6.709977679806543e-06, "loss": 0.1291, "step": 3105 }, { "epoch": 0.43280150491186514, "grad_norm": 0.17272408306598663, "learning_rate": 6.707762816583608e-06, "loss": 0.1444, "step": 3106 }, { "epoch": 0.4329408486030795, "grad_norm": 0.1448345184326172, "learning_rate": 6.705547573934759e-06, "loss": 0.1311, "step": 3107 }, { "epoch": 0.4330801922942939, "grad_norm": 0.22135412693023682, "learning_rate": 6.703331952352181e-06, "loss": 0.1168, "step": 3108 }, { "epoch": 0.4332195359855083, "grad_norm": 0.3637744188308716, "learning_rate": 6.70111595232813e-06, "loss": 0.1558, "step": 3109 }, { "epoch": 0.43335887967672265, "grad_norm": 0.1819816678762436, "learning_rate": 6.6988995743549516e-06, "loss": 0.1285, "step": 3110 }, { "epoch": 0.43349822336793703, "grad_norm": 0.14297427237033844, "learning_rate": 6.696682818925074e-06, "loss": 0.1096, "step": 3111 }, { "epoch": 0.4336375670591514, "grad_norm": 0.19032905995845795, "learning_rate": 6.694465686531011e-06, "loss": 0.1227, "step": 3112 }, { "epoch": 0.4337769107503658, "grad_norm": 0.4542831480503082, "learning_rate": 6.692248177665357e-06, "loss": 0.1555, "step": 3113 }, { "epoch": 0.43391625444158016, "grad_norm": 0.1937568485736847, "learning_rate": 6.690030292820792e-06, "loss": 0.0974, "step": 3114 }, { "epoch": 0.43405559813279454, "grad_norm": 0.4179820716381073, "learning_rate": 6.687812032490081e-06, "loss": 0.1475, "step": 3115 }, { "epoch": 0.4341949418240089, "grad_norm": 0.14523938298225403, "learning_rate": 6.685593397166069e-06, "loss": 0.1216, "step": 3116 }, { "epoch": 0.4343342855152233, "grad_norm": 0.21894574165344238, "learning_rate": 6.683374387341688e-06, "loss": 0.1058, "step": 3117 }, { "epoch": 0.4344736292064377, "grad_norm": 0.286627858877182, "learning_rate": 6.681155003509949e-06, "loss": 0.1253, "step": 3118 }, { "epoch": 0.43461297289765205, "grad_norm": 0.4105263650417328, "learning_rate": 6.67893524616395e-06, "loss": 0.1636, "step": 3119 }, { "epoch": 0.4347523165888664, "grad_norm": 0.2660905122756958, "learning_rate": 6.67671511579687e-06, "loss": 0.1382, "step": 3120 }, { "epoch": 0.4348916602800808, "grad_norm": 0.22316141426563263, "learning_rate": 6.67449461290197e-06, "loss": 0.1489, "step": 3121 }, { "epoch": 0.4350310039712952, "grad_norm": 0.2432800680398941, "learning_rate": 6.6722737379726e-06, "loss": 0.1061, "step": 3122 }, { "epoch": 0.43517034766250956, "grad_norm": 0.2524970769882202, "learning_rate": 6.670052491502182e-06, "loss": 0.1514, "step": 3123 }, { "epoch": 0.43530969135372394, "grad_norm": 0.18729178607463837, "learning_rate": 6.667830873984228e-06, "loss": 0.146, "step": 3124 }, { "epoch": 0.4354490350449383, "grad_norm": 0.16247500479221344, "learning_rate": 6.66560888591233e-06, "loss": 0.1433, "step": 3125 }, { "epoch": 0.43558837873615275, "grad_norm": 0.14575420320034027, "learning_rate": 6.663386527780166e-06, "loss": 0.1568, "step": 3126 }, { "epoch": 0.4357277224273671, "grad_norm": 0.1663914918899536, "learning_rate": 6.66116380008149e-06, "loss": 0.1229, "step": 3127 }, { "epoch": 0.4358670661185815, "grad_norm": 0.2992907762527466, "learning_rate": 6.6589407033101435e-06, "loss": 0.187, "step": 3128 }, { "epoch": 0.4360064098097959, "grad_norm": 0.1283617615699768, "learning_rate": 6.656717237960047e-06, "loss": 0.1138, "step": 3129 }, { "epoch": 0.43614575350101026, "grad_norm": 0.1527666449546814, "learning_rate": 6.654493404525204e-06, "loss": 0.1278, "step": 3130 }, { "epoch": 0.43628509719222464, "grad_norm": 0.15057092905044556, "learning_rate": 6.652269203499699e-06, "loss": 0.1359, "step": 3131 }, { "epoch": 0.436424440883439, "grad_norm": 0.1276874989271164, "learning_rate": 6.650044635377698e-06, "loss": 0.1199, "step": 3132 }, { "epoch": 0.4365637845746534, "grad_norm": 0.32442015409469604, "learning_rate": 6.64781970065345e-06, "loss": 0.1292, "step": 3133 }, { "epoch": 0.43670312826586777, "grad_norm": 0.10399647057056427, "learning_rate": 6.645594399821286e-06, "loss": 0.0967, "step": 3134 }, { "epoch": 0.43684247195708215, "grad_norm": 0.07677431404590607, "learning_rate": 6.6433687333756165e-06, "loss": 0.1073, "step": 3135 }, { "epoch": 0.4369818156482965, "grad_norm": 0.12170721590518951, "learning_rate": 6.641142701810932e-06, "loss": 0.0981, "step": 3136 }, { "epoch": 0.4371211593395109, "grad_norm": 0.15994662046432495, "learning_rate": 6.638916305621807e-06, "loss": 0.1301, "step": 3137 }, { "epoch": 0.4372605030307253, "grad_norm": 0.23478558659553528, "learning_rate": 6.636689545302898e-06, "loss": 0.147, "step": 3138 }, { "epoch": 0.43739984672193966, "grad_norm": 0.16150356829166412, "learning_rate": 6.634462421348935e-06, "loss": 0.144, "step": 3139 }, { "epoch": 0.43753919041315403, "grad_norm": 0.3156040906906128, "learning_rate": 6.63223493425474e-06, "loss": 0.1713, "step": 3140 }, { "epoch": 0.4376785341043684, "grad_norm": 0.2142297923564911, "learning_rate": 6.630007084515205e-06, "loss": 0.1224, "step": 3141 }, { "epoch": 0.4378178777955828, "grad_norm": 0.13902057707309723, "learning_rate": 6.627778872625311e-06, "loss": 0.1204, "step": 3142 }, { "epoch": 0.43795722148679717, "grad_norm": 0.28208911418914795, "learning_rate": 6.625550299080115e-06, "loss": 0.1308, "step": 3143 }, { "epoch": 0.43809656517801154, "grad_norm": 0.0836385041475296, "learning_rate": 6.6233213643747525e-06, "loss": 0.0879, "step": 3144 }, { "epoch": 0.4382359088692259, "grad_norm": 0.12254872173070908, "learning_rate": 6.621092069004445e-06, "loss": 0.135, "step": 3145 }, { "epoch": 0.4383752525604403, "grad_norm": 0.13548098504543304, "learning_rate": 6.618862413464491e-06, "loss": 0.1144, "step": 3146 }, { "epoch": 0.43851459625165473, "grad_norm": 0.33637890219688416, "learning_rate": 6.616632398250266e-06, "loss": 0.1521, "step": 3147 }, { "epoch": 0.4386539399428691, "grad_norm": 0.1381320357322693, "learning_rate": 6.614402023857231e-06, "loss": 0.1174, "step": 3148 }, { "epoch": 0.4387932836340835, "grad_norm": 0.11223619431257248, "learning_rate": 6.612171290780925e-06, "loss": 0.1124, "step": 3149 }, { "epoch": 0.43893262732529786, "grad_norm": 0.14760133624076843, "learning_rate": 6.6099401995169635e-06, "loss": 0.1232, "step": 3150 }, { "epoch": 0.43907197101651224, "grad_norm": 0.2936754822731018, "learning_rate": 6.607708750561046e-06, "loss": 0.1444, "step": 3151 }, { "epoch": 0.4392113147077266, "grad_norm": 0.1691160798072815, "learning_rate": 6.605476944408948e-06, "loss": 0.1409, "step": 3152 }, { "epoch": 0.439350658398941, "grad_norm": 0.3399117887020111, "learning_rate": 6.603244781556527e-06, "loss": 0.1744, "step": 3153 }, { "epoch": 0.4394900020901554, "grad_norm": 0.1384054720401764, "learning_rate": 6.601012262499718e-06, "loss": 0.1278, "step": 3154 }, { "epoch": 0.43962934578136975, "grad_norm": 0.18980196118354797, "learning_rate": 6.598779387734535e-06, "loss": 0.157, "step": 3155 }, { "epoch": 0.43976868947258413, "grad_norm": 0.19681502878665924, "learning_rate": 6.596546157757075e-06, "loss": 0.1382, "step": 3156 }, { "epoch": 0.4399080331637985, "grad_norm": 0.1502661108970642, "learning_rate": 6.594312573063506e-06, "loss": 0.1487, "step": 3157 }, { "epoch": 0.4400473768550129, "grad_norm": 0.1571062058210373, "learning_rate": 6.592078634150084e-06, "loss": 0.1372, "step": 3158 }, { "epoch": 0.44018672054622726, "grad_norm": 0.10936282575130463, "learning_rate": 6.589844341513137e-06, "loss": 0.1205, "step": 3159 }, { "epoch": 0.44032606423744164, "grad_norm": 0.10101167857646942, "learning_rate": 6.587609695649073e-06, "loss": 0.1148, "step": 3160 }, { "epoch": 0.440465407928656, "grad_norm": 0.16974858939647675, "learning_rate": 6.585374697054382e-06, "loss": 0.1569, "step": 3161 }, { "epoch": 0.4406047516198704, "grad_norm": 0.24061962962150574, "learning_rate": 6.583139346225627e-06, "loss": 0.1209, "step": 3162 }, { "epoch": 0.44074409531108477, "grad_norm": 0.12812326848506927, "learning_rate": 6.580903643659453e-06, "loss": 0.1196, "step": 3163 }, { "epoch": 0.44088343900229915, "grad_norm": 0.14221321046352386, "learning_rate": 6.578667589852583e-06, "loss": 0.129, "step": 3164 }, { "epoch": 0.4410227826935135, "grad_norm": 0.15541553497314453, "learning_rate": 6.576431185301815e-06, "loss": 0.1425, "step": 3165 }, { "epoch": 0.4411621263847279, "grad_norm": 0.16001121699810028, "learning_rate": 6.574194430504027e-06, "loss": 0.1233, "step": 3166 }, { "epoch": 0.44130147007594234, "grad_norm": 0.22327320277690887, "learning_rate": 6.571957325956178e-06, "loss": 0.1726, "step": 3167 }, { "epoch": 0.4414408137671567, "grad_norm": 0.17504824697971344, "learning_rate": 6.569719872155299e-06, "loss": 0.1393, "step": 3168 }, { "epoch": 0.4415801574583711, "grad_norm": 0.1398535817861557, "learning_rate": 6.567482069598503e-06, "loss": 0.1403, "step": 3169 }, { "epoch": 0.44171950114958547, "grad_norm": 0.28090766072273254, "learning_rate": 6.565243918782975e-06, "loss": 0.1521, "step": 3170 }, { "epoch": 0.44185884484079985, "grad_norm": 0.13865123689174652, "learning_rate": 6.563005420205984e-06, "loss": 0.1074, "step": 3171 }, { "epoch": 0.4419981885320142, "grad_norm": 0.1354800909757614, "learning_rate": 6.560766574364874e-06, "loss": 0.1243, "step": 3172 }, { "epoch": 0.4421375322232286, "grad_norm": 0.18059693276882172, "learning_rate": 6.558527381757063e-06, "loss": 0.1541, "step": 3173 }, { "epoch": 0.442276875914443, "grad_norm": 0.14844807982444763, "learning_rate": 6.55628784288005e-06, "loss": 0.1415, "step": 3174 }, { "epoch": 0.44241621960565736, "grad_norm": 0.16252973675727844, "learning_rate": 6.5540479582314085e-06, "loss": 0.1251, "step": 3175 }, { "epoch": 0.44255556329687173, "grad_norm": 0.10280066728591919, "learning_rate": 6.55180772830879e-06, "loss": 0.1042, "step": 3176 }, { "epoch": 0.4426949069880861, "grad_norm": 0.13284097611904144, "learning_rate": 6.5495671536099235e-06, "loss": 0.1149, "step": 3177 }, { "epoch": 0.4428342506793005, "grad_norm": 0.1321290284395218, "learning_rate": 6.5473262346326125e-06, "loss": 0.1197, "step": 3178 }, { "epoch": 0.44297359437051487, "grad_norm": 0.2157050520181656, "learning_rate": 6.545084971874738e-06, "loss": 0.1489, "step": 3179 }, { "epoch": 0.44311293806172924, "grad_norm": 0.19520863890647888, "learning_rate": 6.542843365834257e-06, "loss": 0.1614, "step": 3180 }, { "epoch": 0.4432522817529436, "grad_norm": 0.199344664812088, "learning_rate": 6.540601417009205e-06, "loss": 0.1022, "step": 3181 }, { "epoch": 0.443391625444158, "grad_norm": 0.17459839582443237, "learning_rate": 6.538359125897691e-06, "loss": 0.1387, "step": 3182 }, { "epoch": 0.4435309691353724, "grad_norm": 0.16271750628948212, "learning_rate": 6.536116492997899e-06, "loss": 0.1413, "step": 3183 }, { "epoch": 0.44367031282658675, "grad_norm": 0.1592579036951065, "learning_rate": 6.5338735188080916e-06, "loss": 0.1526, "step": 3184 }, { "epoch": 0.44380965651780113, "grad_norm": 0.24836577475070953, "learning_rate": 6.53163020382661e-06, "loss": 0.1502, "step": 3185 }, { "epoch": 0.4439490002090155, "grad_norm": 0.1944848746061325, "learning_rate": 6.529386548551864e-06, "loss": 0.1314, "step": 3186 }, { "epoch": 0.44408834390022994, "grad_norm": 0.11355461180210114, "learning_rate": 6.5271425534823415e-06, "loss": 0.0986, "step": 3187 }, { "epoch": 0.4442276875914443, "grad_norm": 0.0938311442732811, "learning_rate": 6.524898219116612e-06, "loss": 0.1106, "step": 3188 }, { "epoch": 0.4443670312826587, "grad_norm": 0.2226783186197281, "learning_rate": 6.522653545953309e-06, "loss": 0.1323, "step": 3189 }, { "epoch": 0.4445063749738731, "grad_norm": 0.11901045590639114, "learning_rate": 6.520408534491154e-06, "loss": 0.1066, "step": 3190 }, { "epoch": 0.44464571866508745, "grad_norm": 0.178849458694458, "learning_rate": 6.518163185228932e-06, "loss": 0.1388, "step": 3191 }, { "epoch": 0.44478506235630183, "grad_norm": 0.25223293900489807, "learning_rate": 6.515917498665511e-06, "loss": 0.1832, "step": 3192 }, { "epoch": 0.4449244060475162, "grad_norm": 0.2638988196849823, "learning_rate": 6.51367147529983e-06, "loss": 0.1148, "step": 3193 }, { "epoch": 0.4450637497387306, "grad_norm": 0.19738714396953583, "learning_rate": 6.511425115630906e-06, "loss": 0.1213, "step": 3194 }, { "epoch": 0.44520309342994496, "grad_norm": 0.16604283452033997, "learning_rate": 6.509178420157828e-06, "loss": 0.1129, "step": 3195 }, { "epoch": 0.44534243712115934, "grad_norm": 0.1953386813402176, "learning_rate": 6.506931389379759e-06, "loss": 0.1387, "step": 3196 }, { "epoch": 0.4454817808123737, "grad_norm": 0.22469967603683472, "learning_rate": 6.50468402379594e-06, "loss": 0.1445, "step": 3197 }, { "epoch": 0.4456211245035881, "grad_norm": 0.233474999666214, "learning_rate": 6.502436323905683e-06, "loss": 0.1577, "step": 3198 }, { "epoch": 0.4457604681948025, "grad_norm": 0.16659189760684967, "learning_rate": 6.500188290208377e-06, "loss": 0.1116, "step": 3199 }, { "epoch": 0.44589981188601685, "grad_norm": 0.1397882103919983, "learning_rate": 6.49793992320348e-06, "loss": 0.1382, "step": 3200 }, { "epoch": 0.44603915557723123, "grad_norm": 0.2806037664413452, "learning_rate": 6.495691223390534e-06, "loss": 0.1271, "step": 3201 }, { "epoch": 0.4461784992684456, "grad_norm": 0.10804932564496994, "learning_rate": 6.4934421912691445e-06, "loss": 0.1176, "step": 3202 }, { "epoch": 0.44631784295966, "grad_norm": 0.15859892964363098, "learning_rate": 6.4911928273389946e-06, "loss": 0.1687, "step": 3203 }, { "epoch": 0.44645718665087436, "grad_norm": 0.10387219488620758, "learning_rate": 6.488943132099845e-06, "loss": 0.1104, "step": 3204 }, { "epoch": 0.44659653034208874, "grad_norm": 0.23420436680316925, "learning_rate": 6.486693106051523e-06, "loss": 0.1733, "step": 3205 }, { "epoch": 0.4467358740333031, "grad_norm": 0.12187747657299042, "learning_rate": 6.484442749693935e-06, "loss": 0.1268, "step": 3206 }, { "epoch": 0.44687521772451755, "grad_norm": 0.2260984629392624, "learning_rate": 6.482192063527058e-06, "loss": 0.1369, "step": 3207 }, { "epoch": 0.4470145614157319, "grad_norm": 0.1589089334011078, "learning_rate": 6.479941048050944e-06, "loss": 0.1416, "step": 3208 }, { "epoch": 0.4471539051069463, "grad_norm": 0.10014448314905167, "learning_rate": 6.477689703765717e-06, "loss": 0.1353, "step": 3209 }, { "epoch": 0.4472932487981607, "grad_norm": 0.11694809049367905, "learning_rate": 6.475438031171574e-06, "loss": 0.1039, "step": 3210 }, { "epoch": 0.44743259248937506, "grad_norm": 0.11254328489303589, "learning_rate": 6.4731860307687845e-06, "loss": 0.1327, "step": 3211 }, { "epoch": 0.44757193618058944, "grad_norm": 0.23772016167640686, "learning_rate": 6.470933703057693e-06, "loss": 0.1372, "step": 3212 }, { "epoch": 0.4477112798718038, "grad_norm": 0.16105899214744568, "learning_rate": 6.468681048538715e-06, "loss": 0.1129, "step": 3213 }, { "epoch": 0.4478506235630182, "grad_norm": 0.19982244074344635, "learning_rate": 6.4664280677123385e-06, "loss": 0.1795, "step": 3214 }, { "epoch": 0.44798996725423257, "grad_norm": 0.10810845345258713, "learning_rate": 6.464174761079124e-06, "loss": 0.118, "step": 3215 }, { "epoch": 0.44812931094544695, "grad_norm": 0.20746327936649323, "learning_rate": 6.461921129139704e-06, "loss": 0.1667, "step": 3216 }, { "epoch": 0.4482686546366613, "grad_norm": 0.1270713061094284, "learning_rate": 6.459667172394788e-06, "loss": 0.1101, "step": 3217 }, { "epoch": 0.4484079983278757, "grad_norm": 0.21280142664909363, "learning_rate": 6.4574128913451495e-06, "loss": 0.1833, "step": 3218 }, { "epoch": 0.4485473420190901, "grad_norm": 0.34720247983932495, "learning_rate": 6.455158286491641e-06, "loss": 0.2116, "step": 3219 }, { "epoch": 0.44868668571030446, "grad_norm": 0.15756389498710632, "learning_rate": 6.452903358335182e-06, "loss": 0.1297, "step": 3220 }, { "epoch": 0.44882602940151883, "grad_norm": 0.33073070645332336, "learning_rate": 6.450648107376767e-06, "loss": 0.2186, "step": 3221 }, { "epoch": 0.4489653730927332, "grad_norm": 0.14817583560943604, "learning_rate": 6.4483925341174625e-06, "loss": 0.1128, "step": 3222 }, { "epoch": 0.4491047167839476, "grad_norm": 0.2986081838607788, "learning_rate": 6.4461366390584025e-06, "loss": 0.1357, "step": 3223 }, { "epoch": 0.44924406047516197, "grad_norm": 0.1506604254245758, "learning_rate": 6.443880422700799e-06, "loss": 0.1402, "step": 3224 }, { "epoch": 0.44938340416637634, "grad_norm": 0.18996131420135498, "learning_rate": 6.441623885545929e-06, "loss": 0.113, "step": 3225 }, { "epoch": 0.4495227478575907, "grad_norm": 0.2749135196208954, "learning_rate": 6.439367028095145e-06, "loss": 0.1427, "step": 3226 }, { "epoch": 0.44966209154880515, "grad_norm": 0.15692585706710815, "learning_rate": 6.437109850849868e-06, "loss": 0.1281, "step": 3227 }, { "epoch": 0.44980143524001953, "grad_norm": 0.2186313271522522, "learning_rate": 6.434852354311592e-06, "loss": 0.1618, "step": 3228 }, { "epoch": 0.4499407789312339, "grad_norm": 0.21636012196540833, "learning_rate": 6.432594538981881e-06, "loss": 0.1485, "step": 3229 }, { "epoch": 0.4500801226224483, "grad_norm": 0.20476952195167542, "learning_rate": 6.430336405362371e-06, "loss": 0.1571, "step": 3230 }, { "epoch": 0.45021946631366266, "grad_norm": 0.256698876619339, "learning_rate": 6.428077953954766e-06, "loss": 0.134, "step": 3231 }, { "epoch": 0.45035881000487704, "grad_norm": 0.13581086695194244, "learning_rate": 6.425819185260842e-06, "loss": 0.1238, "step": 3232 }, { "epoch": 0.4504981536960914, "grad_norm": 0.3012063205242157, "learning_rate": 6.42356009978245e-06, "loss": 0.1915, "step": 3233 }, { "epoch": 0.4506374973873058, "grad_norm": 0.18102411925792694, "learning_rate": 6.421300698021502e-06, "loss": 0.1243, "step": 3234 }, { "epoch": 0.4507768410785202, "grad_norm": 0.1623261570930481, "learning_rate": 6.419040980479989e-06, "loss": 0.1274, "step": 3235 }, { "epoch": 0.45091618476973455, "grad_norm": 0.16351671516895294, "learning_rate": 6.416780947659967e-06, "loss": 0.1438, "step": 3236 }, { "epoch": 0.45105552846094893, "grad_norm": 0.5277675986289978, "learning_rate": 6.4145206000635626e-06, "loss": 0.1799, "step": 3237 }, { "epoch": 0.4511948721521633, "grad_norm": 0.1260432004928589, "learning_rate": 6.412259938192978e-06, "loss": 0.1062, "step": 3238 }, { "epoch": 0.4513342158433777, "grad_norm": 0.26408588886260986, "learning_rate": 6.4099989625504756e-06, "loss": 0.1539, "step": 3239 }, { "epoch": 0.45147355953459206, "grad_norm": 0.1447526514530182, "learning_rate": 6.4077376736383954e-06, "loss": 0.123, "step": 3240 }, { "epoch": 0.45161290322580644, "grad_norm": 0.291225790977478, "learning_rate": 6.405476071959142e-06, "loss": 0.1674, "step": 3241 }, { "epoch": 0.4517522469170208, "grad_norm": 0.1876283884048462, "learning_rate": 6.403214158015194e-06, "loss": 0.1338, "step": 3242 }, { "epoch": 0.4518915906082352, "grad_norm": 0.2181694656610489, "learning_rate": 6.400951932309097e-06, "loss": 0.1435, "step": 3243 }, { "epoch": 0.45203093429944957, "grad_norm": 0.16985490918159485, "learning_rate": 6.3986893953434625e-06, "loss": 0.1238, "step": 3244 }, { "epoch": 0.45217027799066395, "grad_norm": 0.25200313329696655, "learning_rate": 6.396426547620979e-06, "loss": 0.1402, "step": 3245 }, { "epoch": 0.4523096216818783, "grad_norm": 0.1951931118965149, "learning_rate": 6.394163389644397e-06, "loss": 0.1376, "step": 3246 }, { "epoch": 0.45244896537309276, "grad_norm": 0.14712505042552948, "learning_rate": 6.391899921916538e-06, "loss": 0.1322, "step": 3247 }, { "epoch": 0.45258830906430714, "grad_norm": 0.28999030590057373, "learning_rate": 6.389636144940294e-06, "loss": 0.1258, "step": 3248 }, { "epoch": 0.4527276527555215, "grad_norm": 0.16692449152469635, "learning_rate": 6.387372059218626e-06, "loss": 0.1145, "step": 3249 }, { "epoch": 0.4528669964467359, "grad_norm": 0.16448307037353516, "learning_rate": 6.38510766525456e-06, "loss": 0.162, "step": 3250 }, { "epoch": 0.45300634013795027, "grad_norm": 0.16455039381980896, "learning_rate": 6.382842963551193e-06, "loss": 0.1197, "step": 3251 }, { "epoch": 0.45314568382916465, "grad_norm": 0.23756764829158783, "learning_rate": 6.380577954611691e-06, "loss": 0.1327, "step": 3252 }, { "epoch": 0.453285027520379, "grad_norm": 0.13708841800689697, "learning_rate": 6.378312638939286e-06, "loss": 0.1253, "step": 3253 }, { "epoch": 0.4534243712115934, "grad_norm": 0.4298514723777771, "learning_rate": 6.3760470170372815e-06, "loss": 0.1939, "step": 3254 }, { "epoch": 0.4535637149028078, "grad_norm": 0.13418303430080414, "learning_rate": 6.373781089409043e-06, "loss": 0.1127, "step": 3255 }, { "epoch": 0.45370305859402216, "grad_norm": 0.17134308815002441, "learning_rate": 6.371514856558013e-06, "loss": 0.1353, "step": 3256 }, { "epoch": 0.45384240228523653, "grad_norm": 0.25277695059776306, "learning_rate": 6.369248318987692e-06, "loss": 0.1801, "step": 3257 }, { "epoch": 0.4539817459764509, "grad_norm": 0.20955361425876617, "learning_rate": 6.3669814772016555e-06, "loss": 0.1344, "step": 3258 }, { "epoch": 0.4541210896676653, "grad_norm": 0.1976916491985321, "learning_rate": 6.3647143317035445e-06, "loss": 0.123, "step": 3259 }, { "epoch": 0.45426043335887967, "grad_norm": 0.21705274283885956, "learning_rate": 6.362446882997064e-06, "loss": 0.1376, "step": 3260 }, { "epoch": 0.45439977705009404, "grad_norm": 0.14750711619853973, "learning_rate": 6.360179131585993e-06, "loss": 0.1227, "step": 3261 }, { "epoch": 0.4545391207413084, "grad_norm": 0.25485292077064514, "learning_rate": 6.357911077974173e-06, "loss": 0.1384, "step": 3262 }, { "epoch": 0.4546784644325228, "grad_norm": 0.22049055993556976, "learning_rate": 6.355642722665512e-06, "loss": 0.1372, "step": 3263 }, { "epoch": 0.4548178081237372, "grad_norm": 0.3097513020038605, "learning_rate": 6.353374066163988e-06, "loss": 0.1613, "step": 3264 }, { "epoch": 0.45495715181495155, "grad_norm": 0.3529205024242401, "learning_rate": 6.351105108973644e-06, "loss": 0.1915, "step": 3265 }, { "epoch": 0.45509649550616593, "grad_norm": 0.23625819385051727, "learning_rate": 6.34883585159859e-06, "loss": 0.1324, "step": 3266 }, { "epoch": 0.45523583919738037, "grad_norm": 0.1530512422323227, "learning_rate": 6.346566294543008e-06, "loss": 0.1242, "step": 3267 }, { "epoch": 0.45537518288859474, "grad_norm": 0.20142009854316711, "learning_rate": 6.344296438311134e-06, "loss": 0.1906, "step": 3268 }, { "epoch": 0.4555145265798091, "grad_norm": 0.2114061415195465, "learning_rate": 6.342026283407286e-06, "loss": 0.1498, "step": 3269 }, { "epoch": 0.4556538702710235, "grad_norm": 0.20078985393047333, "learning_rate": 6.339755830335834e-06, "loss": 0.12, "step": 3270 }, { "epoch": 0.4557932139622379, "grad_norm": 0.18368464708328247, "learning_rate": 6.337485079601224e-06, "loss": 0.1017, "step": 3271 }, { "epoch": 0.45593255765345225, "grad_norm": 0.291920006275177, "learning_rate": 6.335214031707966e-06, "loss": 0.1309, "step": 3272 }, { "epoch": 0.45607190134466663, "grad_norm": 0.2976363003253937, "learning_rate": 6.332942687160632e-06, "loss": 0.1448, "step": 3273 }, { "epoch": 0.456211245035881, "grad_norm": 0.26682111620903015, "learning_rate": 6.3306710464638645e-06, "loss": 0.1765, "step": 3274 }, { "epoch": 0.4563505887270954, "grad_norm": 0.14264047145843506, "learning_rate": 6.328399110122371e-06, "loss": 0.1458, "step": 3275 }, { "epoch": 0.45648993241830976, "grad_norm": 0.11889311671257019, "learning_rate": 6.3261268786409225e-06, "loss": 0.127, "step": 3276 }, { "epoch": 0.45662927610952414, "grad_norm": 0.11054948717355728, "learning_rate": 6.323854352524359e-06, "loss": 0.1166, "step": 3277 }, { "epoch": 0.4567686198007385, "grad_norm": 0.16746924817562103, "learning_rate": 6.321581532277581e-06, "loss": 0.1522, "step": 3278 }, { "epoch": 0.4569079634919529, "grad_norm": 0.21515530347824097, "learning_rate": 6.319308418405559e-06, "loss": 0.1544, "step": 3279 }, { "epoch": 0.4570473071831673, "grad_norm": 0.17249557375907898, "learning_rate": 6.317035011413327e-06, "loss": 0.1371, "step": 3280 }, { "epoch": 0.45718665087438165, "grad_norm": 0.18045377731323242, "learning_rate": 6.314761311805983e-06, "loss": 0.1224, "step": 3281 }, { "epoch": 0.45732599456559603, "grad_norm": 0.2033880203962326, "learning_rate": 6.312487320088693e-06, "loss": 0.1439, "step": 3282 }, { "epoch": 0.4574653382568104, "grad_norm": 0.13437709212303162, "learning_rate": 6.3102130367666855e-06, "loss": 0.1483, "step": 3283 }, { "epoch": 0.4576046819480248, "grad_norm": 0.17908070981502533, "learning_rate": 6.307938462345253e-06, "loss": 0.1746, "step": 3284 }, { "epoch": 0.45774402563923916, "grad_norm": 0.24949093163013458, "learning_rate": 6.305663597329756e-06, "loss": 0.174, "step": 3285 }, { "epoch": 0.45788336933045354, "grad_norm": 0.2558707892894745, "learning_rate": 6.303388442225616e-06, "loss": 0.1608, "step": 3286 }, { "epoch": 0.45802271302166797, "grad_norm": 0.11487262696027756, "learning_rate": 6.30111299753832e-06, "loss": 0.1123, "step": 3287 }, { "epoch": 0.45816205671288235, "grad_norm": 0.13300736248493195, "learning_rate": 6.298837263773423e-06, "loss": 0.1025, "step": 3288 }, { "epoch": 0.4583014004040967, "grad_norm": 0.14497162401676178, "learning_rate": 6.2965612414365365e-06, "loss": 0.1165, "step": 3289 }, { "epoch": 0.4584407440953111, "grad_norm": 0.0841308981180191, "learning_rate": 6.294284931033344e-06, "loss": 0.0965, "step": 3290 }, { "epoch": 0.4585800877865255, "grad_norm": 0.25386694073677063, "learning_rate": 6.292008333069589e-06, "loss": 0.1474, "step": 3291 }, { "epoch": 0.45871943147773986, "grad_norm": 0.13579890131950378, "learning_rate": 6.289731448051079e-06, "loss": 0.1345, "step": 3292 }, { "epoch": 0.45885877516895424, "grad_norm": 0.20814983546733856, "learning_rate": 6.287454276483687e-06, "loss": 0.1337, "step": 3293 }, { "epoch": 0.4589981188601686, "grad_norm": 0.22002843022346497, "learning_rate": 6.2851768188733485e-06, "loss": 0.1474, "step": 3294 }, { "epoch": 0.459137462551383, "grad_norm": 0.2601630389690399, "learning_rate": 6.282899075726061e-06, "loss": 0.1251, "step": 3295 }, { "epoch": 0.45927680624259737, "grad_norm": 0.12278737127780914, "learning_rate": 6.280621047547888e-06, "loss": 0.1156, "step": 3296 }, { "epoch": 0.45941614993381175, "grad_norm": 0.2866038680076599, "learning_rate": 6.278342734844955e-06, "loss": 0.1425, "step": 3297 }, { "epoch": 0.4595554936250261, "grad_norm": 0.26422634720802307, "learning_rate": 6.276064138123453e-06, "loss": 0.1321, "step": 3298 }, { "epoch": 0.4596948373162405, "grad_norm": 0.16841620206832886, "learning_rate": 6.27378525788963e-06, "loss": 0.1398, "step": 3299 }, { "epoch": 0.4598341810074549, "grad_norm": 0.12601648271083832, "learning_rate": 6.271506094649804e-06, "loss": 0.1339, "step": 3300 }, { "epoch": 0.45997352469866926, "grad_norm": 0.16070561110973358, "learning_rate": 6.269226648910356e-06, "loss": 0.1175, "step": 3301 }, { "epoch": 0.46011286838988363, "grad_norm": 0.16739791631698608, "learning_rate": 6.266946921177721e-06, "loss": 0.1588, "step": 3302 }, { "epoch": 0.460252212081098, "grad_norm": 0.19202779233455658, "learning_rate": 6.264666911958404e-06, "loss": 0.1519, "step": 3303 }, { "epoch": 0.4603915557723124, "grad_norm": 0.1341973841190338, "learning_rate": 6.262386621758975e-06, "loss": 0.1083, "step": 3304 }, { "epoch": 0.46053089946352677, "grad_norm": 0.2388978898525238, "learning_rate": 6.2601060510860565e-06, "loss": 0.1457, "step": 3305 }, { "epoch": 0.46067024315474114, "grad_norm": 0.16637100279331207, "learning_rate": 6.2578252004463436e-06, "loss": 0.1513, "step": 3306 }, { "epoch": 0.4608095868459556, "grad_norm": 0.18851950764656067, "learning_rate": 6.255544070346588e-06, "loss": 0.1139, "step": 3307 }, { "epoch": 0.46094893053716995, "grad_norm": 0.14182370901107788, "learning_rate": 6.2532626612936035e-06, "loss": 0.1411, "step": 3308 }, { "epoch": 0.46108827422838433, "grad_norm": 0.17772969603538513, "learning_rate": 6.250980973794268e-06, "loss": 0.1388, "step": 3309 }, { "epoch": 0.4612276179195987, "grad_norm": 0.22948870062828064, "learning_rate": 6.248699008355522e-06, "loss": 0.1688, "step": 3310 }, { "epoch": 0.4613669616108131, "grad_norm": 0.16563570499420166, "learning_rate": 6.2464167654843645e-06, "loss": 0.1204, "step": 3311 }, { "epoch": 0.46150630530202746, "grad_norm": 0.29703009128570557, "learning_rate": 6.2441342456878565e-06, "loss": 0.1556, "step": 3312 }, { "epoch": 0.46164564899324184, "grad_norm": 0.1892867535352707, "learning_rate": 6.2418514494731245e-06, "loss": 0.1167, "step": 3313 }, { "epoch": 0.4617849926844562, "grad_norm": 0.2869168817996979, "learning_rate": 6.239568377347352e-06, "loss": 0.1512, "step": 3314 }, { "epoch": 0.4619243363756706, "grad_norm": 0.09453748166561127, "learning_rate": 6.237285029817786e-06, "loss": 0.1115, "step": 3315 }, { "epoch": 0.462063680066885, "grad_norm": 0.17260205745697021, "learning_rate": 6.235001407391732e-06, "loss": 0.1383, "step": 3316 }, { "epoch": 0.46220302375809935, "grad_norm": 0.15045489370822906, "learning_rate": 6.232717510576563e-06, "loss": 0.1269, "step": 3317 }, { "epoch": 0.46234236744931373, "grad_norm": 0.30751940608024597, "learning_rate": 6.230433339879706e-06, "loss": 0.1616, "step": 3318 }, { "epoch": 0.4624817111405281, "grad_norm": 0.1989762932062149, "learning_rate": 6.228148895808652e-06, "loss": 0.1228, "step": 3319 }, { "epoch": 0.4626210548317425, "grad_norm": 0.23544441163539886, "learning_rate": 6.225864178870954e-06, "loss": 0.1367, "step": 3320 }, { "epoch": 0.46276039852295686, "grad_norm": 0.18352572619915009, "learning_rate": 6.22357918957422e-06, "loss": 0.1055, "step": 3321 }, { "epoch": 0.46289974221417124, "grad_norm": 0.17263270914554596, "learning_rate": 6.221293928426128e-06, "loss": 0.1232, "step": 3322 }, { "epoch": 0.4630390859053856, "grad_norm": 0.2093302458524704, "learning_rate": 6.219008395934405e-06, "loss": 0.1582, "step": 3323 }, { "epoch": 0.4631784295966, "grad_norm": 0.1758924424648285, "learning_rate": 6.216722592606847e-06, "loss": 0.1353, "step": 3324 }, { "epoch": 0.46331777328781437, "grad_norm": 0.24146918952465057, "learning_rate": 6.214436518951308e-06, "loss": 0.0991, "step": 3325 }, { "epoch": 0.46345711697902875, "grad_norm": 0.654606282711029, "learning_rate": 6.212150175475701e-06, "loss": 0.1871, "step": 3326 }, { "epoch": 0.4635964606702432, "grad_norm": 0.1400870382785797, "learning_rate": 6.209863562687998e-06, "loss": 0.1502, "step": 3327 }, { "epoch": 0.46373580436145756, "grad_norm": 0.17828631401062012, "learning_rate": 6.207576681096233e-06, "loss": 0.1225, "step": 3328 }, { "epoch": 0.46387514805267194, "grad_norm": 0.23287925124168396, "learning_rate": 6.2052895312085e-06, "loss": 0.1282, "step": 3329 }, { "epoch": 0.4640144917438863, "grad_norm": 0.2737099528312683, "learning_rate": 6.203002113532949e-06, "loss": 0.1182, "step": 3330 }, { "epoch": 0.4641538354351007, "grad_norm": 0.31693360209465027, "learning_rate": 6.200714428577794e-06, "loss": 0.1263, "step": 3331 }, { "epoch": 0.46429317912631507, "grad_norm": 0.2761116921901703, "learning_rate": 6.198426476851305e-06, "loss": 0.1833, "step": 3332 }, { "epoch": 0.46443252281752945, "grad_norm": 0.07989468425512314, "learning_rate": 6.196138258861815e-06, "loss": 0.1149, "step": 3333 }, { "epoch": 0.4645718665087438, "grad_norm": 0.19951431453227997, "learning_rate": 6.193849775117709e-06, "loss": 0.1181, "step": 3334 }, { "epoch": 0.4647112101999582, "grad_norm": 0.3162323236465454, "learning_rate": 6.191561026127444e-06, "loss": 0.1171, "step": 3335 }, { "epoch": 0.4648505538911726, "grad_norm": 0.25147172808647156, "learning_rate": 6.18927201239952e-06, "loss": 0.1507, "step": 3336 }, { "epoch": 0.46498989758238696, "grad_norm": 0.1657368242740631, "learning_rate": 6.186982734442505e-06, "loss": 0.1338, "step": 3337 }, { "epoch": 0.46512924127360133, "grad_norm": 0.22206725180149078, "learning_rate": 6.184693192765028e-06, "loss": 0.1694, "step": 3338 }, { "epoch": 0.4652685849648157, "grad_norm": 0.1119595393538475, "learning_rate": 6.1824033878757685e-06, "loss": 0.1112, "step": 3339 }, { "epoch": 0.4654079286560301, "grad_norm": 0.1870494782924652, "learning_rate": 6.180113320283473e-06, "loss": 0.1377, "step": 3340 }, { "epoch": 0.46554727234724447, "grad_norm": 0.15955142676830292, "learning_rate": 6.177822990496939e-06, "loss": 0.147, "step": 3341 }, { "epoch": 0.46568661603845884, "grad_norm": 0.12810292840003967, "learning_rate": 6.175532399025027e-06, "loss": 0.1289, "step": 3342 }, { "epoch": 0.4658259597296732, "grad_norm": 0.18825647234916687, "learning_rate": 6.173241546376654e-06, "loss": 0.1372, "step": 3343 }, { "epoch": 0.4659653034208876, "grad_norm": 0.2289009392261505, "learning_rate": 6.170950433060795e-06, "loss": 0.1801, "step": 3344 }, { "epoch": 0.466104647112102, "grad_norm": 0.1456526219844818, "learning_rate": 6.168659059586483e-06, "loss": 0.1298, "step": 3345 }, { "epoch": 0.46624399080331635, "grad_norm": 0.1477270871400833, "learning_rate": 6.166367426462808e-06, "loss": 0.131, "step": 3346 }, { "epoch": 0.46638333449453073, "grad_norm": 0.09514472633600235, "learning_rate": 6.16407553419892e-06, "loss": 0.1032, "step": 3347 }, { "epoch": 0.46652267818574517, "grad_norm": 0.16746631264686584, "learning_rate": 6.161783383304024e-06, "loss": 0.1381, "step": 3348 }, { "epoch": 0.46666202187695954, "grad_norm": 0.16996827721595764, "learning_rate": 6.159490974287386e-06, "loss": 0.1368, "step": 3349 }, { "epoch": 0.4668013655681739, "grad_norm": 0.19449900090694427, "learning_rate": 6.157198307658323e-06, "loss": 0.1312, "step": 3350 }, { "epoch": 0.4669407092593883, "grad_norm": 0.22975611686706543, "learning_rate": 6.154905383926218e-06, "loss": 0.1383, "step": 3351 }, { "epoch": 0.4670800529506027, "grad_norm": 0.26595228910446167, "learning_rate": 6.152612203600502e-06, "loss": 0.1474, "step": 3352 }, { "epoch": 0.46721939664181705, "grad_norm": 0.13087749481201172, "learning_rate": 6.150318767190668e-06, "loss": 0.1094, "step": 3353 }, { "epoch": 0.46735874033303143, "grad_norm": 0.20100447535514832, "learning_rate": 6.148025075206268e-06, "loss": 0.1297, "step": 3354 }, { "epoch": 0.4674980840242458, "grad_norm": 0.1521243304014206, "learning_rate": 6.145731128156904e-06, "loss": 0.1085, "step": 3355 }, { "epoch": 0.4676374277154602, "grad_norm": 0.15718375146389008, "learning_rate": 6.143436926552242e-06, "loss": 0.132, "step": 3356 }, { "epoch": 0.46777677140667456, "grad_norm": 0.22069089114665985, "learning_rate": 6.141142470902001e-06, "loss": 0.1413, "step": 3357 }, { "epoch": 0.46791611509788894, "grad_norm": 0.12480304390192032, "learning_rate": 6.138847761715955e-06, "loss": 0.1228, "step": 3358 }, { "epoch": 0.4680554587891033, "grad_norm": 0.16711093485355377, "learning_rate": 6.1365527995039366e-06, "loss": 0.1309, "step": 3359 }, { "epoch": 0.4681948024803177, "grad_norm": 0.07931802421808243, "learning_rate": 6.134257584775833e-06, "loss": 0.1078, "step": 3360 }, { "epoch": 0.4683341461715321, "grad_norm": 0.16224370896816254, "learning_rate": 6.131962118041591e-06, "loss": 0.1195, "step": 3361 }, { "epoch": 0.46847348986274645, "grad_norm": 0.2090865671634674, "learning_rate": 6.129666399811209e-06, "loss": 0.1187, "step": 3362 }, { "epoch": 0.46861283355396083, "grad_norm": 0.33492204546928406, "learning_rate": 6.127370430594745e-06, "loss": 0.1495, "step": 3363 }, { "epoch": 0.4687521772451752, "grad_norm": 0.14810998737812042, "learning_rate": 6.125074210902307e-06, "loss": 0.1261, "step": 3364 }, { "epoch": 0.4688915209363896, "grad_norm": 0.2511131465435028, "learning_rate": 6.122777741244067e-06, "loss": 0.1798, "step": 3365 }, { "epoch": 0.46903086462760396, "grad_norm": 0.2727276682853699, "learning_rate": 6.120481022130245e-06, "loss": 0.1527, "step": 3366 }, { "epoch": 0.46917020831881834, "grad_norm": 0.12740576267242432, "learning_rate": 6.118184054071124e-06, "loss": 0.1154, "step": 3367 }, { "epoch": 0.46930955201003277, "grad_norm": 0.12023366242647171, "learning_rate": 6.115886837577031e-06, "loss": 0.1048, "step": 3368 }, { "epoch": 0.46944889570124715, "grad_norm": 0.252539724111557, "learning_rate": 6.113589373158361e-06, "loss": 0.1405, "step": 3369 }, { "epoch": 0.4695882393924615, "grad_norm": 0.17627671360969543, "learning_rate": 6.111291661325556e-06, "loss": 0.1358, "step": 3370 }, { "epoch": 0.4697275830836759, "grad_norm": 0.1905210167169571, "learning_rate": 6.108993702589114e-06, "loss": 0.1251, "step": 3371 }, { "epoch": 0.4698669267748903, "grad_norm": 0.13104566931724548, "learning_rate": 6.106695497459591e-06, "loss": 0.1125, "step": 3372 }, { "epoch": 0.47000627046610466, "grad_norm": 0.16789554059505463, "learning_rate": 6.104397046447593e-06, "loss": 0.1276, "step": 3373 }, { "epoch": 0.47014561415731904, "grad_norm": 0.1157066598534584, "learning_rate": 6.102098350063786e-06, "loss": 0.1344, "step": 3374 }, { "epoch": 0.4702849578485334, "grad_norm": 0.1070687472820282, "learning_rate": 6.099799408818889e-06, "loss": 0.1247, "step": 3375 }, { "epoch": 0.4704243015397478, "grad_norm": 0.1967388093471527, "learning_rate": 6.097500223223669e-06, "loss": 0.147, "step": 3376 }, { "epoch": 0.47056364523096217, "grad_norm": 0.1120259016752243, "learning_rate": 6.095200793788958e-06, "loss": 0.1099, "step": 3377 }, { "epoch": 0.47070298892217655, "grad_norm": 0.23060274124145508, "learning_rate": 6.092901121025634e-06, "loss": 0.115, "step": 3378 }, { "epoch": 0.4708423326133909, "grad_norm": 0.13485819101333618, "learning_rate": 6.090601205444632e-06, "loss": 0.1369, "step": 3379 }, { "epoch": 0.4709816763046053, "grad_norm": 0.19028010964393616, "learning_rate": 6.088301047556942e-06, "loss": 0.1564, "step": 3380 }, { "epoch": 0.4711210199958197, "grad_norm": 0.29268503189086914, "learning_rate": 6.086000647873604e-06, "loss": 0.1476, "step": 3381 }, { "epoch": 0.47126036368703406, "grad_norm": 0.16061106324195862, "learning_rate": 6.083700006905715e-06, "loss": 0.1211, "step": 3382 }, { "epoch": 0.47139970737824843, "grad_norm": 0.08534707129001617, "learning_rate": 6.081399125164429e-06, "loss": 0.1182, "step": 3383 }, { "epoch": 0.4715390510694628, "grad_norm": 0.2167179435491562, "learning_rate": 6.079098003160943e-06, "loss": 0.1497, "step": 3384 }, { "epoch": 0.4716783947606772, "grad_norm": 0.09792459011077881, "learning_rate": 6.076796641406518e-06, "loss": 0.1038, "step": 3385 }, { "epoch": 0.47181773845189157, "grad_norm": 0.2696021795272827, "learning_rate": 6.074495040412465e-06, "loss": 0.1487, "step": 3386 }, { "epoch": 0.47195708214310594, "grad_norm": 0.19045355916023254, "learning_rate": 6.072193200690142e-06, "loss": 0.1327, "step": 3387 }, { "epoch": 0.4720964258343204, "grad_norm": 0.11103473603725433, "learning_rate": 6.069891122750971e-06, "loss": 0.1197, "step": 3388 }, { "epoch": 0.47223576952553475, "grad_norm": 0.2139395922422409, "learning_rate": 6.067588807106416e-06, "loss": 0.1245, "step": 3389 }, { "epoch": 0.47237511321674913, "grad_norm": 0.16040076315402985, "learning_rate": 6.0652862542680034e-06, "loss": 0.1404, "step": 3390 }, { "epoch": 0.4725144569079635, "grad_norm": 0.26169267296791077, "learning_rate": 6.062983464747305e-06, "loss": 0.1602, "step": 3391 }, { "epoch": 0.4726538005991779, "grad_norm": 0.1430189311504364, "learning_rate": 6.06068043905595e-06, "loss": 0.0944, "step": 3392 }, { "epoch": 0.47279314429039226, "grad_norm": 0.19397306442260742, "learning_rate": 6.0583771777056166e-06, "loss": 0.1337, "step": 3393 }, { "epoch": 0.47293248798160664, "grad_norm": 0.12712959945201874, "learning_rate": 6.056073681208038e-06, "loss": 0.1369, "step": 3394 }, { "epoch": 0.473071831672821, "grad_norm": 0.15996025502681732, "learning_rate": 6.053769950074997e-06, "loss": 0.1356, "step": 3395 }, { "epoch": 0.4732111753640354, "grad_norm": 0.23531660437583923, "learning_rate": 6.051465984818332e-06, "loss": 0.1356, "step": 3396 }, { "epoch": 0.4733505190552498, "grad_norm": 0.253754585981369, "learning_rate": 6.049161785949931e-06, "loss": 0.1357, "step": 3397 }, { "epoch": 0.47348986274646415, "grad_norm": 0.1448870152235031, "learning_rate": 6.046857353981732e-06, "loss": 0.1355, "step": 3398 }, { "epoch": 0.47362920643767853, "grad_norm": 0.1910003125667572, "learning_rate": 6.044552689425731e-06, "loss": 0.145, "step": 3399 }, { "epoch": 0.4737685501288929, "grad_norm": 0.345491498708725, "learning_rate": 6.042247792793968e-06, "loss": 0.2098, "step": 3400 }, { "epoch": 0.4739078938201073, "grad_norm": 0.157235786318779, "learning_rate": 6.0399426645985424e-06, "loss": 0.1104, "step": 3401 }, { "epoch": 0.47404723751132166, "grad_norm": 0.14583556354045868, "learning_rate": 6.037637305351599e-06, "loss": 0.1399, "step": 3402 }, { "epoch": 0.47418658120253604, "grad_norm": 0.30839771032333374, "learning_rate": 6.035331715565333e-06, "loss": 0.1978, "step": 3403 }, { "epoch": 0.4743259248937504, "grad_norm": 0.13280850648880005, "learning_rate": 6.033025895752002e-06, "loss": 0.1267, "step": 3404 }, { "epoch": 0.4744652685849648, "grad_norm": 0.12655384838581085, "learning_rate": 6.030719846423897e-06, "loss": 0.1063, "step": 3405 }, { "epoch": 0.47460461227617917, "grad_norm": 0.2200043797492981, "learning_rate": 6.028413568093375e-06, "loss": 0.1227, "step": 3406 }, { "epoch": 0.47474395596739355, "grad_norm": 0.41157519817352295, "learning_rate": 6.026107061272838e-06, "loss": 0.1814, "step": 3407 }, { "epoch": 0.474883299658608, "grad_norm": 0.1128225177526474, "learning_rate": 6.023800326474738e-06, "loss": 0.1251, "step": 3408 }, { "epoch": 0.47502264334982236, "grad_norm": 0.13166743516921997, "learning_rate": 6.0214933642115794e-06, "loss": 0.1257, "step": 3409 }, { "epoch": 0.47516198704103674, "grad_norm": 0.11185501515865326, "learning_rate": 6.019186174995916e-06, "loss": 0.1059, "step": 3410 }, { "epoch": 0.4753013307322511, "grad_norm": 0.1679621785879135, "learning_rate": 6.016878759340352e-06, "loss": 0.1411, "step": 3411 }, { "epoch": 0.4754406744234655, "grad_norm": 0.119534432888031, "learning_rate": 6.014571117757545e-06, "loss": 0.1142, "step": 3412 }, { "epoch": 0.47558001811467987, "grad_norm": 0.2126813381910324, "learning_rate": 6.012263250760199e-06, "loss": 0.1322, "step": 3413 }, { "epoch": 0.47571936180589425, "grad_norm": 0.16385751962661743, "learning_rate": 6.009955158861066e-06, "loss": 0.1565, "step": 3414 }, { "epoch": 0.4758587054971086, "grad_norm": 0.1266603171825409, "learning_rate": 6.007646842572959e-06, "loss": 0.1143, "step": 3415 }, { "epoch": 0.475998049188323, "grad_norm": 0.1170651838183403, "learning_rate": 6.005338302408724e-06, "loss": 0.1185, "step": 3416 }, { "epoch": 0.4761373928795374, "grad_norm": 0.16774976253509521, "learning_rate": 6.0030295388812736e-06, "loss": 0.1386, "step": 3417 }, { "epoch": 0.47627673657075176, "grad_norm": 0.11561767011880875, "learning_rate": 6.000720552503557e-06, "loss": 0.1152, "step": 3418 }, { "epoch": 0.47641608026196614, "grad_norm": 0.20481808483600616, "learning_rate": 5.998411343788582e-06, "loss": 0.1454, "step": 3419 }, { "epoch": 0.4765554239531805, "grad_norm": 0.1350640505552292, "learning_rate": 5.996101913249402e-06, "loss": 0.116, "step": 3420 }, { "epoch": 0.4766947676443949, "grad_norm": 0.09782880544662476, "learning_rate": 5.993792261399115e-06, "loss": 0.0911, "step": 3421 }, { "epoch": 0.47683411133560927, "grad_norm": 0.2240537852048874, "learning_rate": 5.991482388750878e-06, "loss": 0.1187, "step": 3422 }, { "epoch": 0.47697345502682365, "grad_norm": 0.19483615458011627, "learning_rate": 5.989172295817889e-06, "loss": 0.1349, "step": 3423 }, { "epoch": 0.477112798718038, "grad_norm": 0.2305227518081665, "learning_rate": 5.9868619831134e-06, "loss": 0.1572, "step": 3424 }, { "epoch": 0.4772521424092524, "grad_norm": 0.18845878541469574, "learning_rate": 5.984551451150709e-06, "loss": 0.1551, "step": 3425 }, { "epoch": 0.4773914861004668, "grad_norm": 0.18412330746650696, "learning_rate": 5.9822407004431625e-06, "loss": 0.1354, "step": 3426 }, { "epoch": 0.47753082979168116, "grad_norm": 0.18863849341869354, "learning_rate": 5.979929731504158e-06, "loss": 0.1561, "step": 3427 }, { "epoch": 0.4776701734828956, "grad_norm": 0.26210281252861023, "learning_rate": 5.977618544847139e-06, "loss": 0.2118, "step": 3428 }, { "epoch": 0.47780951717410997, "grad_norm": 0.11213861405849457, "learning_rate": 5.975307140985599e-06, "loss": 0.1175, "step": 3429 }, { "epoch": 0.47794886086532434, "grad_norm": 0.13701266050338745, "learning_rate": 5.972995520433078e-06, "loss": 0.1494, "step": 3430 }, { "epoch": 0.4780882045565387, "grad_norm": 0.12305961549282074, "learning_rate": 5.970683683703168e-06, "loss": 0.1268, "step": 3431 }, { "epoch": 0.4782275482477531, "grad_norm": 0.10548906773328781, "learning_rate": 5.968371631309502e-06, "loss": 0.1086, "step": 3432 }, { "epoch": 0.4783668919389675, "grad_norm": 0.15683339536190033, "learning_rate": 5.966059363765771e-06, "loss": 0.1598, "step": 3433 }, { "epoch": 0.47850623563018185, "grad_norm": 0.1303861141204834, "learning_rate": 5.9637468815857016e-06, "loss": 0.1277, "step": 3434 }, { "epoch": 0.47864557932139623, "grad_norm": 0.11648806184530258, "learning_rate": 5.961434185283079e-06, "loss": 0.1098, "step": 3435 }, { "epoch": 0.4787849230126106, "grad_norm": 0.23329800367355347, "learning_rate": 5.959121275371732e-06, "loss": 0.1438, "step": 3436 }, { "epoch": 0.478924266703825, "grad_norm": 0.25445300340652466, "learning_rate": 5.956808152365532e-06, "loss": 0.142, "step": 3437 }, { "epoch": 0.47906361039503936, "grad_norm": 0.13322603702545166, "learning_rate": 5.954494816778408e-06, "loss": 0.1357, "step": 3438 }, { "epoch": 0.47920295408625374, "grad_norm": 0.13580451905727386, "learning_rate": 5.952181269124324e-06, "loss": 0.1556, "step": 3439 }, { "epoch": 0.4793422977774681, "grad_norm": 0.17357321083545685, "learning_rate": 5.949867509917303e-06, "loss": 0.1763, "step": 3440 }, { "epoch": 0.4794816414686825, "grad_norm": 0.14922170341014862, "learning_rate": 5.9475535396714055e-06, "loss": 0.1319, "step": 3441 }, { "epoch": 0.4796209851598969, "grad_norm": 0.11198395490646362, "learning_rate": 5.945239358900746e-06, "loss": 0.1193, "step": 3442 }, { "epoch": 0.47976032885111125, "grad_norm": 0.14136576652526855, "learning_rate": 5.94292496811948e-06, "loss": 0.1364, "step": 3443 }, { "epoch": 0.47989967254232563, "grad_norm": 0.13770829141139984, "learning_rate": 5.940610367841815e-06, "loss": 0.1191, "step": 3444 }, { "epoch": 0.48003901623354, "grad_norm": 0.2142518311738968, "learning_rate": 5.938295558581999e-06, "loss": 0.1243, "step": 3445 }, { "epoch": 0.4801783599247544, "grad_norm": 0.21157050132751465, "learning_rate": 5.935980540854332e-06, "loss": 0.1842, "step": 3446 }, { "epoch": 0.48031770361596876, "grad_norm": 0.13482841849327087, "learning_rate": 5.933665315173158e-06, "loss": 0.1111, "step": 3447 }, { "epoch": 0.4804570473071832, "grad_norm": 0.13313980400562286, "learning_rate": 5.931349882052866e-06, "loss": 0.1272, "step": 3448 }, { "epoch": 0.48059639099839757, "grad_norm": 0.2973596751689911, "learning_rate": 5.929034242007895e-06, "loss": 0.1494, "step": 3449 }, { "epoch": 0.48073573468961195, "grad_norm": 0.1587332934141159, "learning_rate": 5.926718395552723e-06, "loss": 0.1413, "step": 3450 }, { "epoch": 0.4808750783808263, "grad_norm": 0.15093858540058136, "learning_rate": 5.924402343201883e-06, "loss": 0.1397, "step": 3451 }, { "epoch": 0.4810144220720407, "grad_norm": 0.18785890936851501, "learning_rate": 5.922086085469947e-06, "loss": 0.1325, "step": 3452 }, { "epoch": 0.4811537657632551, "grad_norm": 0.19225941598415375, "learning_rate": 5.919769622871533e-06, "loss": 0.1194, "step": 3453 }, { "epoch": 0.48129310945446946, "grad_norm": 0.17269547283649445, "learning_rate": 5.917452955921309e-06, "loss": 0.1218, "step": 3454 }, { "epoch": 0.48143245314568384, "grad_norm": 0.3141980469226837, "learning_rate": 5.915136085133983e-06, "loss": 0.1679, "step": 3455 }, { "epoch": 0.4815717968368982, "grad_norm": 0.280619353055954, "learning_rate": 5.9128190110243115e-06, "loss": 0.1411, "step": 3456 }, { "epoch": 0.4817111405281126, "grad_norm": 0.1386594921350479, "learning_rate": 5.910501734107097e-06, "loss": 0.1154, "step": 3457 }, { "epoch": 0.48185048421932697, "grad_norm": 0.23088784515857697, "learning_rate": 5.908184254897183e-06, "loss": 0.1236, "step": 3458 }, { "epoch": 0.48198982791054135, "grad_norm": 0.28648239374160767, "learning_rate": 5.905866573909462e-06, "loss": 0.152, "step": 3459 }, { "epoch": 0.4821291716017557, "grad_norm": 0.23177115619182587, "learning_rate": 5.9035486916588705e-06, "loss": 0.1264, "step": 3460 }, { "epoch": 0.4822685152929701, "grad_norm": 0.15862776339054108, "learning_rate": 5.901230608660386e-06, "loss": 0.1375, "step": 3461 }, { "epoch": 0.4824078589841845, "grad_norm": 0.14765559136867523, "learning_rate": 5.898912325429038e-06, "loss": 0.1337, "step": 3462 }, { "epoch": 0.48254720267539886, "grad_norm": 0.4261264204978943, "learning_rate": 5.896593842479893e-06, "loss": 0.1507, "step": 3463 }, { "epoch": 0.48268654636661323, "grad_norm": 0.16491621732711792, "learning_rate": 5.8942751603280645e-06, "loss": 0.129, "step": 3464 }, { "epoch": 0.4828258900578276, "grad_norm": 0.24379263818264008, "learning_rate": 5.891956279488715e-06, "loss": 0.1228, "step": 3465 }, { "epoch": 0.482965233749042, "grad_norm": 0.2698400914669037, "learning_rate": 5.889637200477041e-06, "loss": 0.1254, "step": 3466 }, { "epoch": 0.48310457744025637, "grad_norm": 0.21491576731204987, "learning_rate": 5.887317923808294e-06, "loss": 0.1503, "step": 3467 }, { "epoch": 0.4832439211314708, "grad_norm": 0.14756467938423157, "learning_rate": 5.88499844999776e-06, "loss": 0.1389, "step": 3468 }, { "epoch": 0.4833832648226852, "grad_norm": 0.18194018304347992, "learning_rate": 5.882678779560776e-06, "loss": 0.1442, "step": 3469 }, { "epoch": 0.48352260851389955, "grad_norm": 0.30962857604026794, "learning_rate": 5.880358913012722e-06, "loss": 0.1408, "step": 3470 }, { "epoch": 0.48366195220511393, "grad_norm": 0.2053878754377365, "learning_rate": 5.878038850869012e-06, "loss": 0.1243, "step": 3471 }, { "epoch": 0.4838012958963283, "grad_norm": 0.18000558018684387, "learning_rate": 5.875718593645118e-06, "loss": 0.1107, "step": 3472 }, { "epoch": 0.4839406395875427, "grad_norm": 0.14113099873065948, "learning_rate": 5.873398141856545e-06, "loss": 0.1153, "step": 3473 }, { "epoch": 0.48407998327875706, "grad_norm": 0.14940594136714935, "learning_rate": 5.871077496018844e-06, "loss": 0.1382, "step": 3474 }, { "epoch": 0.48421932696997144, "grad_norm": 0.16471806168556213, "learning_rate": 5.868756656647611e-06, "loss": 0.1264, "step": 3475 }, { "epoch": 0.4843586706611858, "grad_norm": 0.15364761650562286, "learning_rate": 5.866435624258483e-06, "loss": 0.1339, "step": 3476 }, { "epoch": 0.4844980143524002, "grad_norm": 0.11486202478408813, "learning_rate": 5.86411439936714e-06, "loss": 0.1148, "step": 3477 }, { "epoch": 0.4846373580436146, "grad_norm": 0.1675747185945511, "learning_rate": 5.861792982489306e-06, "loss": 0.13, "step": 3478 }, { "epoch": 0.48477670173482895, "grad_norm": 0.16530382633209229, "learning_rate": 5.8594713741407465e-06, "loss": 0.1527, "step": 3479 }, { "epoch": 0.48491604542604333, "grad_norm": 0.238743394613266, "learning_rate": 5.857149574837269e-06, "loss": 0.1442, "step": 3480 }, { "epoch": 0.4850553891172577, "grad_norm": 0.12745137512683868, "learning_rate": 5.854827585094725e-06, "loss": 0.1267, "step": 3481 }, { "epoch": 0.4851947328084721, "grad_norm": 0.14382661879062653, "learning_rate": 5.852505405429007e-06, "loss": 0.1161, "step": 3482 }, { "epoch": 0.48533407649968646, "grad_norm": 0.12465420365333557, "learning_rate": 5.850183036356054e-06, "loss": 0.1313, "step": 3483 }, { "epoch": 0.48547342019090084, "grad_norm": 0.27253201603889465, "learning_rate": 5.847860478391838e-06, "loss": 0.1403, "step": 3484 }, { "epoch": 0.4856127638821152, "grad_norm": 0.16415418684482574, "learning_rate": 5.845537732052381e-06, "loss": 0.0931, "step": 3485 }, { "epoch": 0.4857521075733296, "grad_norm": 0.23777294158935547, "learning_rate": 5.8432147978537444e-06, "loss": 0.1901, "step": 3486 }, { "epoch": 0.48589145126454397, "grad_norm": 0.14197829365730286, "learning_rate": 5.840891676312029e-06, "loss": 0.1192, "step": 3487 }, { "epoch": 0.4860307949557584, "grad_norm": 0.25621941685676575, "learning_rate": 5.838568367943383e-06, "loss": 0.133, "step": 3488 }, { "epoch": 0.4861701386469728, "grad_norm": 0.14425848424434662, "learning_rate": 5.836244873263989e-06, "loss": 0.1119, "step": 3489 }, { "epoch": 0.48630948233818716, "grad_norm": 0.16664469242095947, "learning_rate": 5.8339211927900776e-06, "loss": 0.1285, "step": 3490 }, { "epoch": 0.48644882602940154, "grad_norm": 0.1821020543575287, "learning_rate": 5.831597327037914e-06, "loss": 0.1416, "step": 3491 }, { "epoch": 0.4865881697206159, "grad_norm": 0.30329418182373047, "learning_rate": 5.829273276523811e-06, "loss": 0.1815, "step": 3492 }, { "epoch": 0.4867275134118303, "grad_norm": 0.1604434847831726, "learning_rate": 5.82694904176412e-06, "loss": 0.1159, "step": 3493 }, { "epoch": 0.48686685710304467, "grad_norm": 0.22364303469657898, "learning_rate": 5.82462462327523e-06, "loss": 0.1204, "step": 3494 }, { "epoch": 0.48700620079425905, "grad_norm": 0.2587149739265442, "learning_rate": 5.822300021573574e-06, "loss": 0.181, "step": 3495 }, { "epoch": 0.4871455444854734, "grad_norm": 0.17569178342819214, "learning_rate": 5.819975237175629e-06, "loss": 0.1288, "step": 3496 }, { "epoch": 0.4872848881766878, "grad_norm": 0.13809044659137726, "learning_rate": 5.817650270597906e-06, "loss": 0.1189, "step": 3497 }, { "epoch": 0.4874242318679022, "grad_norm": 0.262813001871109, "learning_rate": 5.815325122356959e-06, "loss": 0.1414, "step": 3498 }, { "epoch": 0.48756357555911656, "grad_norm": 0.48350149393081665, "learning_rate": 5.8129997929693845e-06, "loss": 0.1799, "step": 3499 }, { "epoch": 0.48770291925033094, "grad_norm": 0.14634832739830017, "learning_rate": 5.810674282951817e-06, "loss": 0.1085, "step": 3500 }, { "epoch": 0.4878422629415453, "grad_norm": 0.159870907664299, "learning_rate": 5.808348592820932e-06, "loss": 0.125, "step": 3501 }, { "epoch": 0.4879816066327597, "grad_norm": 0.2128797173500061, "learning_rate": 5.806022723093445e-06, "loss": 0.1445, "step": 3502 }, { "epoch": 0.48812095032397407, "grad_norm": 0.2658242881298065, "learning_rate": 5.80369667428611e-06, "loss": 0.1386, "step": 3503 }, { "epoch": 0.48826029401518845, "grad_norm": 0.24384081363677979, "learning_rate": 5.801370446915724e-06, "loss": 0.131, "step": 3504 }, { "epoch": 0.4883996377064028, "grad_norm": 0.11936026066541672, "learning_rate": 5.799044041499119e-06, "loss": 0.1235, "step": 3505 }, { "epoch": 0.4885389813976172, "grad_norm": 0.260445237159729, "learning_rate": 5.7967174585531705e-06, "loss": 0.1274, "step": 3506 }, { "epoch": 0.4886783250888316, "grad_norm": 0.32483744621276855, "learning_rate": 5.794390698594793e-06, "loss": 0.1303, "step": 3507 }, { "epoch": 0.488817668780046, "grad_norm": 0.20499864220619202, "learning_rate": 5.792063762140938e-06, "loss": 0.1241, "step": 3508 }, { "epoch": 0.4889570124712604, "grad_norm": 0.3295113444328308, "learning_rate": 5.789736649708598e-06, "loss": 0.1553, "step": 3509 }, { "epoch": 0.48909635616247477, "grad_norm": 0.2001582831144333, "learning_rate": 5.787409361814805e-06, "loss": 0.1473, "step": 3510 }, { "epoch": 0.48923569985368914, "grad_norm": 0.14934878051280975, "learning_rate": 5.785081898976627e-06, "loss": 0.1199, "step": 3511 }, { "epoch": 0.4893750435449035, "grad_norm": 0.23940548300743103, "learning_rate": 5.782754261711177e-06, "loss": 0.1369, "step": 3512 }, { "epoch": 0.4895143872361179, "grad_norm": 0.192873015999794, "learning_rate": 5.7804264505356e-06, "loss": 0.1333, "step": 3513 }, { "epoch": 0.4896537309273323, "grad_norm": 0.1351386308670044, "learning_rate": 5.778098465967082e-06, "loss": 0.1406, "step": 3514 }, { "epoch": 0.48979307461854665, "grad_norm": 0.4239400029182434, "learning_rate": 5.7757703085228515e-06, "loss": 0.1562, "step": 3515 }, { "epoch": 0.48993241830976103, "grad_norm": 0.1635677069425583, "learning_rate": 5.773441978720167e-06, "loss": 0.1335, "step": 3516 }, { "epoch": 0.4900717620009754, "grad_norm": 0.1181398332118988, "learning_rate": 5.771113477076335e-06, "loss": 0.1395, "step": 3517 }, { "epoch": 0.4902111056921898, "grad_norm": 0.2854607403278351, "learning_rate": 5.7687848041086905e-06, "loss": 0.1707, "step": 3518 }, { "epoch": 0.49035044938340416, "grad_norm": 0.14043819904327393, "learning_rate": 5.766455960334616e-06, "loss": 0.1145, "step": 3519 }, { "epoch": 0.49048979307461854, "grad_norm": 0.11847855150699615, "learning_rate": 5.764126946271526e-06, "loss": 0.109, "step": 3520 }, { "epoch": 0.4906291367658329, "grad_norm": 0.10463044047355652, "learning_rate": 5.761797762436872e-06, "loss": 0.1109, "step": 3521 }, { "epoch": 0.4907684804570473, "grad_norm": 0.11306805908679962, "learning_rate": 5.759468409348149e-06, "loss": 0.1025, "step": 3522 }, { "epoch": 0.4909078241482617, "grad_norm": 0.11473090946674347, "learning_rate": 5.757138887522884e-06, "loss": 0.1214, "step": 3523 }, { "epoch": 0.49104716783947605, "grad_norm": 0.1328452229499817, "learning_rate": 5.754809197478644e-06, "loss": 0.1327, "step": 3524 }, { "epoch": 0.49118651153069043, "grad_norm": 0.2169794738292694, "learning_rate": 5.752479339733033e-06, "loss": 0.1744, "step": 3525 }, { "epoch": 0.4913258552219048, "grad_norm": 0.3402409553527832, "learning_rate": 5.750149314803691e-06, "loss": 0.1777, "step": 3526 }, { "epoch": 0.4914651989131192, "grad_norm": 0.2573500871658325, "learning_rate": 5.747819123208299e-06, "loss": 0.1814, "step": 3527 }, { "epoch": 0.4916045426043336, "grad_norm": 0.13973075151443481, "learning_rate": 5.7454887654645706e-06, "loss": 0.156, "step": 3528 }, { "epoch": 0.491743886295548, "grad_norm": 0.13540779054164886, "learning_rate": 5.7431582420902576e-06, "loss": 0.1626, "step": 3529 }, { "epoch": 0.49188322998676237, "grad_norm": 0.20008644461631775, "learning_rate": 5.740827553603149e-06, "loss": 0.1432, "step": 3530 }, { "epoch": 0.49202257367797675, "grad_norm": 0.14059418439865112, "learning_rate": 5.738496700521073e-06, "loss": 0.1189, "step": 3531 }, { "epoch": 0.4921619173691911, "grad_norm": 0.30083081126213074, "learning_rate": 5.736165683361889e-06, "loss": 0.1448, "step": 3532 }, { "epoch": 0.4923012610604055, "grad_norm": 0.18059633672237396, "learning_rate": 5.7338345026434995e-06, "loss": 0.132, "step": 3533 }, { "epoch": 0.4924406047516199, "grad_norm": 0.2855188548564911, "learning_rate": 5.731503158883835e-06, "loss": 0.1365, "step": 3534 }, { "epoch": 0.49257994844283426, "grad_norm": 0.15688684582710266, "learning_rate": 5.729171652600869e-06, "loss": 0.1574, "step": 3535 }, { "epoch": 0.49271929213404864, "grad_norm": 0.13850493729114532, "learning_rate": 5.726839984312611e-06, "loss": 0.124, "step": 3536 }, { "epoch": 0.492858635825263, "grad_norm": 0.17265504598617554, "learning_rate": 5.724508154537101e-06, "loss": 0.12, "step": 3537 }, { "epoch": 0.4929979795164774, "grad_norm": 0.1786041408777237, "learning_rate": 5.72217616379242e-06, "loss": 0.1422, "step": 3538 }, { "epoch": 0.49313732320769177, "grad_norm": 0.09754381328821182, "learning_rate": 5.719844012596683e-06, "loss": 0.1077, "step": 3539 }, { "epoch": 0.49327666689890615, "grad_norm": 0.18587234616279602, "learning_rate": 5.7175117014680415e-06, "loss": 0.1386, "step": 3540 }, { "epoch": 0.4934160105901205, "grad_norm": 0.20907145738601685, "learning_rate": 5.71517923092468e-06, "loss": 0.1778, "step": 3541 }, { "epoch": 0.4935553542813349, "grad_norm": 0.19511868059635162, "learning_rate": 5.712846601484822e-06, "loss": 0.194, "step": 3542 }, { "epoch": 0.4936946979725493, "grad_norm": 0.2540413439273834, "learning_rate": 5.710513813666722e-06, "loss": 0.1302, "step": 3543 }, { "epoch": 0.49383404166376366, "grad_norm": 0.1444428414106369, "learning_rate": 5.708180867988676e-06, "loss": 0.1422, "step": 3544 }, { "epoch": 0.49397338535497803, "grad_norm": 0.1120552197098732, "learning_rate": 5.705847764969008e-06, "loss": 0.1144, "step": 3545 }, { "epoch": 0.4941127290461924, "grad_norm": 0.30012521147727966, "learning_rate": 5.703514505126081e-06, "loss": 0.1638, "step": 3546 }, { "epoch": 0.4942520727374068, "grad_norm": 0.18999770283699036, "learning_rate": 5.701181088978295e-06, "loss": 0.1219, "step": 3547 }, { "epoch": 0.49439141642862117, "grad_norm": 0.15559126436710358, "learning_rate": 5.698847517044076e-06, "loss": 0.144, "step": 3548 }, { "epoch": 0.4945307601198356, "grad_norm": 0.1975560486316681, "learning_rate": 5.696513789841897e-06, "loss": 0.1612, "step": 3549 }, { "epoch": 0.49467010381105, "grad_norm": 0.17980404198169708, "learning_rate": 5.6941799078902525e-06, "loss": 0.1377, "step": 3550 }, { "epoch": 0.49480944750226435, "grad_norm": 0.2726992666721344, "learning_rate": 5.691845871707682e-06, "loss": 0.1342, "step": 3551 }, { "epoch": 0.49494879119347873, "grad_norm": 0.13296593725681305, "learning_rate": 5.689511681812755e-06, "loss": 0.1076, "step": 3552 }, { "epoch": 0.4950881348846931, "grad_norm": 0.12699508666992188, "learning_rate": 5.687177338724073e-06, "loss": 0.1148, "step": 3553 }, { "epoch": 0.4952274785759075, "grad_norm": 0.4270729422569275, "learning_rate": 5.684842842960276e-06, "loss": 0.2264, "step": 3554 }, { "epoch": 0.49536682226712186, "grad_norm": 0.2855929136276245, "learning_rate": 5.682508195040032e-06, "loss": 0.1768, "step": 3555 }, { "epoch": 0.49550616595833624, "grad_norm": 0.38371357321739197, "learning_rate": 5.68017339548205e-06, "loss": 0.1699, "step": 3556 }, { "epoch": 0.4956455096495506, "grad_norm": 0.15440157055854797, "learning_rate": 5.6778384448050694e-06, "loss": 0.1349, "step": 3557 }, { "epoch": 0.495784853340765, "grad_norm": 0.12442585080862045, "learning_rate": 5.675503343527861e-06, "loss": 0.134, "step": 3558 }, { "epoch": 0.4959241970319794, "grad_norm": 0.14737465977668762, "learning_rate": 5.673168092169231e-06, "loss": 0.1176, "step": 3559 }, { "epoch": 0.49606354072319375, "grad_norm": 0.17397546768188477, "learning_rate": 5.670832691248021e-06, "loss": 0.1234, "step": 3560 }, { "epoch": 0.49620288441440813, "grad_norm": 0.1284254789352417, "learning_rate": 5.668497141283101e-06, "loss": 0.1027, "step": 3561 }, { "epoch": 0.4963422281056225, "grad_norm": 0.16579855978488922, "learning_rate": 5.66616144279338e-06, "loss": 0.1381, "step": 3562 }, { "epoch": 0.4964815717968369, "grad_norm": 0.20845873653888702, "learning_rate": 5.663825596297794e-06, "loss": 0.1527, "step": 3563 }, { "epoch": 0.49662091548805126, "grad_norm": 0.23730644583702087, "learning_rate": 5.661489602315314e-06, "loss": 0.1579, "step": 3564 }, { "epoch": 0.49676025917926564, "grad_norm": 0.1677922010421753, "learning_rate": 5.6591534613649505e-06, "loss": 0.1079, "step": 3565 }, { "epoch": 0.49689960287048, "grad_norm": 0.19271603226661682, "learning_rate": 5.656817173965733e-06, "loss": 0.1256, "step": 3566 }, { "epoch": 0.4970389465616944, "grad_norm": 0.10827665776014328, "learning_rate": 5.6544807406367365e-06, "loss": 0.0998, "step": 3567 }, { "epoch": 0.49717829025290877, "grad_norm": 0.1808168739080429, "learning_rate": 5.6521441618970605e-06, "loss": 0.1292, "step": 3568 }, { "epoch": 0.4973176339441232, "grad_norm": 0.14228582382202148, "learning_rate": 5.649807438265842e-06, "loss": 0.1501, "step": 3569 }, { "epoch": 0.4974569776353376, "grad_norm": 0.1402050256729126, "learning_rate": 5.647470570262246e-06, "loss": 0.1468, "step": 3570 }, { "epoch": 0.49759632132655196, "grad_norm": 0.20337913930416107, "learning_rate": 5.64513355840547e-06, "loss": 0.1366, "step": 3571 }, { "epoch": 0.49773566501776634, "grad_norm": 0.15939295291900635, "learning_rate": 5.642796403214747e-06, "loss": 0.1205, "step": 3572 }, { "epoch": 0.4978750087089807, "grad_norm": 0.15095724165439606, "learning_rate": 5.640459105209337e-06, "loss": 0.1028, "step": 3573 }, { "epoch": 0.4980143524001951, "grad_norm": 0.2813248336315155, "learning_rate": 5.638121664908537e-06, "loss": 0.1568, "step": 3574 }, { "epoch": 0.49815369609140947, "grad_norm": 0.15100309252738953, "learning_rate": 5.635784082831671e-06, "loss": 0.1236, "step": 3575 }, { "epoch": 0.49829303978262385, "grad_norm": 0.24629874527454376, "learning_rate": 5.633446359498098e-06, "loss": 0.1384, "step": 3576 }, { "epoch": 0.4984323834738382, "grad_norm": 0.2308843582868576, "learning_rate": 5.6311084954272055e-06, "loss": 0.1479, "step": 3577 }, { "epoch": 0.4985717271650526, "grad_norm": 0.10758781433105469, "learning_rate": 5.628770491138414e-06, "loss": 0.1121, "step": 3578 }, { "epoch": 0.498711070856267, "grad_norm": 0.24236635863780975, "learning_rate": 5.626432347151173e-06, "loss": 0.1412, "step": 3579 }, { "epoch": 0.49885041454748136, "grad_norm": 0.2475147396326065, "learning_rate": 5.624094063984967e-06, "loss": 0.14, "step": 3580 }, { "epoch": 0.49898975823869574, "grad_norm": 0.1749931126832962, "learning_rate": 5.621755642159309e-06, "loss": 0.1211, "step": 3581 }, { "epoch": 0.4991291019299101, "grad_norm": 0.14229841530323029, "learning_rate": 5.61941708219374e-06, "loss": 0.1471, "step": 3582 }, { "epoch": 0.4992684456211245, "grad_norm": 0.16007976233959198, "learning_rate": 5.617078384607839e-06, "loss": 0.1181, "step": 3583 }, { "epoch": 0.49940778931233887, "grad_norm": 0.19291739165782928, "learning_rate": 5.614739549921208e-06, "loss": 0.1562, "step": 3584 }, { "epoch": 0.49954713300355325, "grad_norm": 0.13009048998355865, "learning_rate": 5.612400578653484e-06, "loss": 0.1255, "step": 3585 }, { "epoch": 0.4996864766947676, "grad_norm": 0.3110722005367279, "learning_rate": 5.610061471324335e-06, "loss": 0.1378, "step": 3586 }, { "epoch": 0.499825820385982, "grad_norm": 0.14610706269741058, "learning_rate": 5.607722228453452e-06, "loss": 0.1277, "step": 3587 }, { "epoch": 0.4999651640771964, "grad_norm": 0.16857828199863434, "learning_rate": 5.605382850560565e-06, "loss": 0.1229, "step": 3588 }, { "epoch": 0.5001045077684108, "grad_norm": 0.3253779411315918, "learning_rate": 5.6030433381654305e-06, "loss": 0.1433, "step": 3589 }, { "epoch": 0.5002438514596251, "grad_norm": 0.2539801001548767, "learning_rate": 5.600703691787833e-06, "loss": 0.1288, "step": 3590 }, { "epoch": 0.5003831951508395, "grad_norm": 0.22692172229290009, "learning_rate": 5.598363911947591e-06, "loss": 0.1713, "step": 3591 }, { "epoch": 0.5005225388420539, "grad_norm": 0.25987112522125244, "learning_rate": 5.596023999164547e-06, "loss": 0.1269, "step": 3592 }, { "epoch": 0.5006618825332683, "grad_norm": 0.20547173917293549, "learning_rate": 5.593683953958579e-06, "loss": 0.1351, "step": 3593 }, { "epoch": 0.5008012262244826, "grad_norm": 0.16652068495750427, "learning_rate": 5.591343776849591e-06, "loss": 0.1289, "step": 3594 }, { "epoch": 0.500940569915697, "grad_norm": 0.16857220232486725, "learning_rate": 5.5890034683575145e-06, "loss": 0.1251, "step": 3595 }, { "epoch": 0.5010799136069114, "grad_norm": 0.16234412789344788, "learning_rate": 5.586663029002314e-06, "loss": 0.1205, "step": 3596 }, { "epoch": 0.5012192572981258, "grad_norm": 0.34118402004241943, "learning_rate": 5.584322459303984e-06, "loss": 0.1261, "step": 3597 }, { "epoch": 0.5013586009893402, "grad_norm": 0.19465990364551544, "learning_rate": 5.581981759782543e-06, "loss": 0.0824, "step": 3598 }, { "epoch": 0.5014979446805546, "grad_norm": 0.14893725514411926, "learning_rate": 5.579640930958043e-06, "loss": 0.1328, "step": 3599 }, { "epoch": 0.501637288371769, "grad_norm": 0.11078134179115295, "learning_rate": 5.57729997335056e-06, "loss": 0.1154, "step": 3600 }, { "epoch": 0.5017766320629834, "grad_norm": 0.40035760402679443, "learning_rate": 5.5749588874802055e-06, "loss": 0.1868, "step": 3601 }, { "epoch": 0.5019159757541978, "grad_norm": 0.16425278782844543, "learning_rate": 5.572617673867111e-06, "loss": 0.1299, "step": 3602 }, { "epoch": 0.5020553194454122, "grad_norm": 0.15987616777420044, "learning_rate": 5.570276333031441e-06, "loss": 0.1202, "step": 3603 }, { "epoch": 0.5021946631366265, "grad_norm": 0.1320316344499588, "learning_rate": 5.567934865493392e-06, "loss": 0.1172, "step": 3604 }, { "epoch": 0.5023340068278409, "grad_norm": 0.17358139157295227, "learning_rate": 5.5655932717731805e-06, "loss": 0.1334, "step": 3605 }, { "epoch": 0.5024733505190553, "grad_norm": 0.3999723792076111, "learning_rate": 5.563251552391058e-06, "loss": 0.1536, "step": 3606 }, { "epoch": 0.5026126942102697, "grad_norm": 0.20092065632343292, "learning_rate": 5.560909707867299e-06, "loss": 0.1457, "step": 3607 }, { "epoch": 0.502752037901484, "grad_norm": 0.16737323999404907, "learning_rate": 5.558567738722208e-06, "loss": 0.1216, "step": 3608 }, { "epoch": 0.5028913815926984, "grad_norm": 0.16040730476379395, "learning_rate": 5.556225645476119e-06, "loss": 0.111, "step": 3609 }, { "epoch": 0.5030307252839128, "grad_norm": 0.29198428988456726, "learning_rate": 5.55388342864939e-06, "loss": 0.1479, "step": 3610 }, { "epoch": 0.5031700689751272, "grad_norm": 0.20217223465442657, "learning_rate": 5.5515410887624085e-06, "loss": 0.1213, "step": 3611 }, { "epoch": 0.5033094126663415, "grad_norm": 0.16957813501358032, "learning_rate": 5.549198626335589e-06, "loss": 0.1391, "step": 3612 }, { "epoch": 0.5034487563575559, "grad_norm": 0.15495918691158295, "learning_rate": 5.546856041889374e-06, "loss": 0.1455, "step": 3613 }, { "epoch": 0.5035881000487703, "grad_norm": 0.2342723160982132, "learning_rate": 5.544513335944228e-06, "loss": 0.1541, "step": 3614 }, { "epoch": 0.5037274437399847, "grad_norm": 0.29284581542015076, "learning_rate": 5.542170509020655e-06, "loss": 0.15, "step": 3615 }, { "epoch": 0.5038667874311991, "grad_norm": 0.18282349407672882, "learning_rate": 5.539827561639169e-06, "loss": 0.1444, "step": 3616 }, { "epoch": 0.5040061311224134, "grad_norm": 0.23352880775928497, "learning_rate": 5.537484494320324e-06, "loss": 0.1412, "step": 3617 }, { "epoch": 0.5041454748136278, "grad_norm": 0.08883483707904816, "learning_rate": 5.535141307584697e-06, "loss": 0.118, "step": 3618 }, { "epoch": 0.5042848185048422, "grad_norm": 0.19007055461406708, "learning_rate": 5.532798001952888e-06, "loss": 0.1152, "step": 3619 }, { "epoch": 0.5044241621960566, "grad_norm": 0.12655115127563477, "learning_rate": 5.530454577945529e-06, "loss": 0.1109, "step": 3620 }, { "epoch": 0.504563505887271, "grad_norm": 0.19461189210414886, "learning_rate": 5.52811103608327e-06, "loss": 0.1245, "step": 3621 }, { "epoch": 0.5047028495784853, "grad_norm": 0.2020706683397293, "learning_rate": 5.525767376886797e-06, "loss": 0.1278, "step": 3622 }, { "epoch": 0.5048421932696997, "grad_norm": 0.18497690558433533, "learning_rate": 5.523423600876816e-06, "loss": 0.1757, "step": 3623 }, { "epoch": 0.5049815369609141, "grad_norm": 0.16757532954216003, "learning_rate": 5.521079708574062e-06, "loss": 0.1315, "step": 3624 }, { "epoch": 0.5051208806521285, "grad_norm": 0.25135481357574463, "learning_rate": 5.5187357004992926e-06, "loss": 0.1233, "step": 3625 }, { "epoch": 0.5052602243433428, "grad_norm": 0.14432939887046814, "learning_rate": 5.516391577173293e-06, "loss": 0.1199, "step": 3626 }, { "epoch": 0.5053995680345572, "grad_norm": 0.1866653710603714, "learning_rate": 5.514047339116874e-06, "loss": 0.136, "step": 3627 }, { "epoch": 0.5055389117257716, "grad_norm": 0.10943882912397385, "learning_rate": 5.511702986850873e-06, "loss": 0.1141, "step": 3628 }, { "epoch": 0.505678255416986, "grad_norm": 0.1118234246969223, "learning_rate": 5.509358520896151e-06, "loss": 0.1366, "step": 3629 }, { "epoch": 0.5058175991082003, "grad_norm": 0.159036323428154, "learning_rate": 5.507013941773593e-06, "loss": 0.1202, "step": 3630 }, { "epoch": 0.5059569427994147, "grad_norm": 0.1937943398952484, "learning_rate": 5.504669250004116e-06, "loss": 0.1244, "step": 3631 }, { "epoch": 0.5060962864906291, "grad_norm": 0.12963944673538208, "learning_rate": 5.502324446108649e-06, "loss": 0.1267, "step": 3632 }, { "epoch": 0.5062356301818435, "grad_norm": 0.14162424206733704, "learning_rate": 5.49997953060816e-06, "loss": 0.1227, "step": 3633 }, { "epoch": 0.5063749738730579, "grad_norm": 0.1250815987586975, "learning_rate": 5.497634504023634e-06, "loss": 0.0974, "step": 3634 }, { "epoch": 0.5065143175642722, "grad_norm": 0.32761847972869873, "learning_rate": 5.495289366876083e-06, "loss": 0.1377, "step": 3635 }, { "epoch": 0.5066536612554866, "grad_norm": 0.1541571468114853, "learning_rate": 5.492944119686544e-06, "loss": 0.15, "step": 3636 }, { "epoch": 0.506793004946701, "grad_norm": 0.1457635462284088, "learning_rate": 5.4905987629760724e-06, "loss": 0.1405, "step": 3637 }, { "epoch": 0.5069323486379154, "grad_norm": 0.15552185475826263, "learning_rate": 5.488253297265757e-06, "loss": 0.1144, "step": 3638 }, { "epoch": 0.5070716923291299, "grad_norm": 0.18211783468723297, "learning_rate": 5.485907723076708e-06, "loss": 0.1168, "step": 3639 }, { "epoch": 0.5072110360203442, "grad_norm": 0.18865874409675598, "learning_rate": 5.483562040930055e-06, "loss": 0.1376, "step": 3640 }, { "epoch": 0.5073503797115586, "grad_norm": 0.21661540865898132, "learning_rate": 5.481216251346956e-06, "loss": 0.1393, "step": 3641 }, { "epoch": 0.507489723402773, "grad_norm": 0.1794067621231079, "learning_rate": 5.478870354848593e-06, "loss": 0.1641, "step": 3642 }, { "epoch": 0.5076290670939874, "grad_norm": 0.31011295318603516, "learning_rate": 5.47652435195617e-06, "loss": 0.1485, "step": 3643 }, { "epoch": 0.5077684107852017, "grad_norm": 0.17551551759243011, "learning_rate": 5.4741782431909144e-06, "loss": 0.113, "step": 3644 }, { "epoch": 0.5079077544764161, "grad_norm": 0.15674062073230743, "learning_rate": 5.471832029074079e-06, "loss": 0.1417, "step": 3645 }, { "epoch": 0.5080470981676305, "grad_norm": 0.10473240911960602, "learning_rate": 5.469485710126938e-06, "loss": 0.1009, "step": 3646 }, { "epoch": 0.5081864418588449, "grad_norm": 0.1293591409921646, "learning_rate": 5.467139286870794e-06, "loss": 0.1442, "step": 3647 }, { "epoch": 0.5083257855500593, "grad_norm": 0.1919606626033783, "learning_rate": 5.464792759826962e-06, "loss": 0.1741, "step": 3648 }, { "epoch": 0.5084651292412736, "grad_norm": 0.21807852387428284, "learning_rate": 5.462446129516793e-06, "loss": 0.1914, "step": 3649 }, { "epoch": 0.508604472932488, "grad_norm": 0.0838003009557724, "learning_rate": 5.460099396461649e-06, "loss": 0.1026, "step": 3650 }, { "epoch": 0.5087438166237024, "grad_norm": 0.14652512967586517, "learning_rate": 5.457752561182924e-06, "loss": 0.1565, "step": 3651 }, { "epoch": 0.5088831603149168, "grad_norm": 0.1066829264163971, "learning_rate": 5.455405624202032e-06, "loss": 0.1015, "step": 3652 }, { "epoch": 0.5090225040061311, "grad_norm": 0.24790741503238678, "learning_rate": 5.453058586040406e-06, "loss": 0.1444, "step": 3653 }, { "epoch": 0.5091618476973455, "grad_norm": 0.15378080308437347, "learning_rate": 5.450711447219507e-06, "loss": 0.1366, "step": 3654 }, { "epoch": 0.5093011913885599, "grad_norm": 0.07865368574857712, "learning_rate": 5.448364208260813e-06, "loss": 0.1112, "step": 3655 }, { "epoch": 0.5094405350797743, "grad_norm": 0.16512690484523773, "learning_rate": 5.446016869685829e-06, "loss": 0.1006, "step": 3656 }, { "epoch": 0.5095798787709886, "grad_norm": 0.1418343037366867, "learning_rate": 5.44366943201608e-06, "loss": 0.1137, "step": 3657 }, { "epoch": 0.509719222462203, "grad_norm": 0.18789857625961304, "learning_rate": 5.441321895773112e-06, "loss": 0.1178, "step": 3658 }, { "epoch": 0.5098585661534174, "grad_norm": 0.14510446786880493, "learning_rate": 5.438974261478494e-06, "loss": 0.1278, "step": 3659 }, { "epoch": 0.5099979098446318, "grad_norm": 0.13630257546901703, "learning_rate": 5.436626529653817e-06, "loss": 0.1302, "step": 3660 }, { "epoch": 0.5101372535358462, "grad_norm": 0.16441750526428223, "learning_rate": 5.434278700820693e-06, "loss": 0.1343, "step": 3661 }, { "epoch": 0.5102765972270605, "grad_norm": 0.1695556789636612, "learning_rate": 5.431930775500756e-06, "loss": 0.1261, "step": 3662 }, { "epoch": 0.5104159409182749, "grad_norm": 0.2021275758743286, "learning_rate": 5.429582754215664e-06, "loss": 0.1199, "step": 3663 }, { "epoch": 0.5105552846094893, "grad_norm": 0.27075082063674927, "learning_rate": 5.4272346374870885e-06, "loss": 0.1752, "step": 3664 }, { "epoch": 0.5106946283007037, "grad_norm": 0.1248660758137703, "learning_rate": 5.424886425836734e-06, "loss": 0.1285, "step": 3665 }, { "epoch": 0.510833971991918, "grad_norm": 0.35762879252433777, "learning_rate": 5.4225381197863135e-06, "loss": 0.1341, "step": 3666 }, { "epoch": 0.5109733156831324, "grad_norm": 0.15909592807292938, "learning_rate": 5.420189719857571e-06, "loss": 0.1081, "step": 3667 }, { "epoch": 0.5111126593743468, "grad_norm": 0.1667933464050293, "learning_rate": 5.417841226572263e-06, "loss": 0.1379, "step": 3668 }, { "epoch": 0.5112520030655612, "grad_norm": 0.1232602447271347, "learning_rate": 5.415492640452177e-06, "loss": 0.1161, "step": 3669 }, { "epoch": 0.5113913467567756, "grad_norm": 0.19538095593452454, "learning_rate": 5.4131439620191115e-06, "loss": 0.0978, "step": 3670 }, { "epoch": 0.5115306904479899, "grad_norm": 0.36730054020881653, "learning_rate": 5.4107951917948896e-06, "loss": 0.1739, "step": 3671 }, { "epoch": 0.5116700341392043, "grad_norm": 0.11831597238779068, "learning_rate": 5.408446330301355e-06, "loss": 0.1214, "step": 3672 }, { "epoch": 0.5118093778304187, "grad_norm": 0.1002749651670456, "learning_rate": 5.40609737806037e-06, "loss": 0.1124, "step": 3673 }, { "epoch": 0.5119487215216331, "grad_norm": 0.17111124098300934, "learning_rate": 5.403748335593819e-06, "loss": 0.1125, "step": 3674 }, { "epoch": 0.5120880652128474, "grad_norm": 0.1410379707813263, "learning_rate": 5.4013992034236065e-06, "loss": 0.1077, "step": 3675 }, { "epoch": 0.5122274089040618, "grad_norm": 0.12108222395181656, "learning_rate": 5.3990499820716545e-06, "loss": 0.1278, "step": 3676 }, { "epoch": 0.5123667525952762, "grad_norm": 0.19666996598243713, "learning_rate": 5.396700672059907e-06, "loss": 0.1559, "step": 3677 }, { "epoch": 0.5125060962864906, "grad_norm": 0.11784319579601288, "learning_rate": 5.394351273910327e-06, "loss": 0.1238, "step": 3678 }, { "epoch": 0.5126454399777051, "grad_norm": 0.17994418740272522, "learning_rate": 5.392001788144897e-06, "loss": 0.1484, "step": 3679 }, { "epoch": 0.5127847836689194, "grad_norm": 0.1736072152853012, "learning_rate": 5.389652215285618e-06, "loss": 0.1318, "step": 3680 }, { "epoch": 0.5129241273601338, "grad_norm": 0.12157990783452988, "learning_rate": 5.387302555854516e-06, "loss": 0.1483, "step": 3681 }, { "epoch": 0.5130634710513482, "grad_norm": 0.1497231423854828, "learning_rate": 5.384952810373625e-06, "loss": 0.1233, "step": 3682 }, { "epoch": 0.5132028147425626, "grad_norm": 0.268271803855896, "learning_rate": 5.382602979365009e-06, "loss": 0.117, "step": 3683 }, { "epoch": 0.513342158433777, "grad_norm": 0.15818867087364197, "learning_rate": 5.380253063350747e-06, "loss": 0.1145, "step": 3684 }, { "epoch": 0.5134815021249913, "grad_norm": 0.22551776468753815, "learning_rate": 5.377903062852935e-06, "loss": 0.1273, "step": 3685 }, { "epoch": 0.5136208458162057, "grad_norm": 0.11910475790500641, "learning_rate": 5.375552978393691e-06, "loss": 0.126, "step": 3686 }, { "epoch": 0.5137601895074201, "grad_norm": 0.19955195486545563, "learning_rate": 5.373202810495149e-06, "loss": 0.143, "step": 3687 }, { "epoch": 0.5138995331986345, "grad_norm": 0.11285372823476791, "learning_rate": 5.370852559679461e-06, "loss": 0.1017, "step": 3688 }, { "epoch": 0.5140388768898488, "grad_norm": 0.15986360609531403, "learning_rate": 5.368502226468803e-06, "loss": 0.1229, "step": 3689 }, { "epoch": 0.5141782205810632, "grad_norm": 0.17528150975704193, "learning_rate": 5.366151811385363e-06, "loss": 0.1524, "step": 3690 }, { "epoch": 0.5143175642722776, "grad_norm": 0.15228579938411713, "learning_rate": 5.363801314951349e-06, "loss": 0.1546, "step": 3691 }, { "epoch": 0.514456907963492, "grad_norm": 0.14171916246414185, "learning_rate": 5.361450737688989e-06, "loss": 0.1259, "step": 3692 }, { "epoch": 0.5145962516547063, "grad_norm": 0.14685267210006714, "learning_rate": 5.359100080120527e-06, "loss": 0.1015, "step": 3693 }, { "epoch": 0.5147355953459207, "grad_norm": 0.16889320313930511, "learning_rate": 5.356749342768226e-06, "loss": 0.1675, "step": 3694 }, { "epoch": 0.5148749390371351, "grad_norm": 0.21658658981323242, "learning_rate": 5.354398526154365e-06, "loss": 0.1192, "step": 3695 }, { "epoch": 0.5150142827283495, "grad_norm": 0.16900816559791565, "learning_rate": 5.352047630801242e-06, "loss": 0.129, "step": 3696 }, { "epoch": 0.5151536264195639, "grad_norm": 0.26329362392425537, "learning_rate": 5.349696657231176e-06, "loss": 0.1434, "step": 3697 }, { "epoch": 0.5152929701107782, "grad_norm": 0.2353244423866272, "learning_rate": 5.347345605966493e-06, "loss": 0.1685, "step": 3698 }, { "epoch": 0.5154323138019926, "grad_norm": 0.14986088871955872, "learning_rate": 5.344994477529548e-06, "loss": 0.1094, "step": 3699 }, { "epoch": 0.515571657493207, "grad_norm": 0.2544260025024414, "learning_rate": 5.342643272442706e-06, "loss": 0.145, "step": 3700 }, { "epoch": 0.5157110011844214, "grad_norm": 0.19314733147621155, "learning_rate": 5.340291991228352e-06, "loss": 0.1522, "step": 3701 }, { "epoch": 0.5158503448756357, "grad_norm": 0.11180569976568222, "learning_rate": 5.337940634408888e-06, "loss": 0.1108, "step": 3702 }, { "epoch": 0.5159896885668501, "grad_norm": 0.16755364835262299, "learning_rate": 5.335589202506727e-06, "loss": 0.1131, "step": 3703 }, { "epoch": 0.5161290322580645, "grad_norm": 0.2741885483264923, "learning_rate": 5.333237696044309e-06, "loss": 0.1617, "step": 3704 }, { "epoch": 0.5162683759492789, "grad_norm": 0.13478878140449524, "learning_rate": 5.330886115544081e-06, "loss": 0.1287, "step": 3705 }, { "epoch": 0.5164077196404933, "grad_norm": 0.29998674988746643, "learning_rate": 5.328534461528515e-06, "loss": 0.1166, "step": 3706 }, { "epoch": 0.5165470633317076, "grad_norm": 0.5842090249061584, "learning_rate": 5.326182734520091e-06, "loss": 0.1669, "step": 3707 }, { "epoch": 0.516686407022922, "grad_norm": 0.36157912015914917, "learning_rate": 5.32383093504131e-06, "loss": 0.184, "step": 3708 }, { "epoch": 0.5168257507141364, "grad_norm": 0.3248327076435089, "learning_rate": 5.32147906361469e-06, "loss": 0.1388, "step": 3709 }, { "epoch": 0.5169650944053508, "grad_norm": 0.27788349986076355, "learning_rate": 5.31912712076276e-06, "loss": 0.152, "step": 3710 }, { "epoch": 0.5171044380965651, "grad_norm": 0.2275935411453247, "learning_rate": 5.316775107008069e-06, "loss": 0.1255, "step": 3711 }, { "epoch": 0.5172437817877795, "grad_norm": 0.27094370126724243, "learning_rate": 5.314423022873181e-06, "loss": 0.1291, "step": 3712 }, { "epoch": 0.5173831254789939, "grad_norm": 0.22037732601165771, "learning_rate": 5.312070868880678e-06, "loss": 0.1261, "step": 3713 }, { "epoch": 0.5175224691702083, "grad_norm": 0.1938849687576294, "learning_rate": 5.3097186455531506e-06, "loss": 0.1429, "step": 3714 }, { "epoch": 0.5176618128614227, "grad_norm": 0.21306638419628143, "learning_rate": 5.307366353413214e-06, "loss": 0.123, "step": 3715 }, { "epoch": 0.517801156552637, "grad_norm": 0.1782802790403366, "learning_rate": 5.305013992983487e-06, "loss": 0.137, "step": 3716 }, { "epoch": 0.5179405002438514, "grad_norm": 0.20963704586029053, "learning_rate": 5.302661564786617e-06, "loss": 0.1314, "step": 3717 }, { "epoch": 0.5180798439350658, "grad_norm": 0.14569132030010223, "learning_rate": 5.300309069345257e-06, "loss": 0.114, "step": 3718 }, { "epoch": 0.5182191876262803, "grad_norm": 0.15275561809539795, "learning_rate": 5.297956507182077e-06, "loss": 0.1094, "step": 3719 }, { "epoch": 0.5183585313174947, "grad_norm": 0.2822737991809845, "learning_rate": 5.295603878819764e-06, "loss": 0.154, "step": 3720 }, { "epoch": 0.518497875008709, "grad_norm": 0.19834014773368835, "learning_rate": 5.2932511847810175e-06, "loss": 0.1405, "step": 3721 }, { "epoch": 0.5186372186999234, "grad_norm": 0.20737771689891815, "learning_rate": 5.290898425588553e-06, "loss": 0.1317, "step": 3722 }, { "epoch": 0.5187765623911378, "grad_norm": 0.26307839155197144, "learning_rate": 5.2885456017651e-06, "loss": 0.1402, "step": 3723 }, { "epoch": 0.5189159060823522, "grad_norm": 0.15004487335681915, "learning_rate": 5.286192713833402e-06, "loss": 0.1617, "step": 3724 }, { "epoch": 0.5190552497735665, "grad_norm": 0.1632804423570633, "learning_rate": 5.283839762316217e-06, "loss": 0.1189, "step": 3725 }, { "epoch": 0.5191945934647809, "grad_norm": 0.14773696660995483, "learning_rate": 5.281486747736316e-06, "loss": 0.1161, "step": 3726 }, { "epoch": 0.5193339371559953, "grad_norm": 0.17850619554519653, "learning_rate": 5.279133670616488e-06, "loss": 0.1475, "step": 3727 }, { "epoch": 0.5194732808472097, "grad_norm": 0.13858482241630554, "learning_rate": 5.276780531479528e-06, "loss": 0.1512, "step": 3728 }, { "epoch": 0.519612624538424, "grad_norm": 0.15704113245010376, "learning_rate": 5.274427330848257e-06, "loss": 0.1444, "step": 3729 }, { "epoch": 0.5197519682296384, "grad_norm": 0.12237545847892761, "learning_rate": 5.2720740692454944e-06, "loss": 0.1229, "step": 3730 }, { "epoch": 0.5198913119208528, "grad_norm": 0.1584734469652176, "learning_rate": 5.269720747194088e-06, "loss": 0.1477, "step": 3731 }, { "epoch": 0.5200306556120672, "grad_norm": 0.3762796223163605, "learning_rate": 5.267367365216887e-06, "loss": 0.1773, "step": 3732 }, { "epoch": 0.5201699993032816, "grad_norm": 0.19485263526439667, "learning_rate": 5.265013923836763e-06, "loss": 0.1447, "step": 3733 }, { "epoch": 0.5203093429944959, "grad_norm": 0.2330644279718399, "learning_rate": 5.262660423576595e-06, "loss": 0.1277, "step": 3734 }, { "epoch": 0.5204486866857103, "grad_norm": 0.09526348114013672, "learning_rate": 5.260306864959278e-06, "loss": 0.1024, "step": 3735 }, { "epoch": 0.5205880303769247, "grad_norm": 0.25163349509239197, "learning_rate": 5.25795324850772e-06, "loss": 0.1305, "step": 3736 }, { "epoch": 0.5207273740681391, "grad_norm": 0.20404841005802155, "learning_rate": 5.255599574744836e-06, "loss": 0.1464, "step": 3737 }, { "epoch": 0.5208667177593534, "grad_norm": 0.23481470346450806, "learning_rate": 5.253245844193564e-06, "loss": 0.1458, "step": 3738 }, { "epoch": 0.5210060614505678, "grad_norm": 0.1185222715139389, "learning_rate": 5.250892057376848e-06, "loss": 0.1116, "step": 3739 }, { "epoch": 0.5211454051417822, "grad_norm": 0.16359864175319672, "learning_rate": 5.248538214817642e-06, "loss": 0.1298, "step": 3740 }, { "epoch": 0.5212847488329966, "grad_norm": 0.20110216736793518, "learning_rate": 5.246184317038922e-06, "loss": 0.1421, "step": 3741 }, { "epoch": 0.521424092524211, "grad_norm": 0.24751663208007812, "learning_rate": 5.243830364563665e-06, "loss": 0.1249, "step": 3742 }, { "epoch": 0.5215634362154253, "grad_norm": 0.14255505800247192, "learning_rate": 5.241476357914869e-06, "loss": 0.1111, "step": 3743 }, { "epoch": 0.5217027799066397, "grad_norm": 0.14497917890548706, "learning_rate": 5.239122297615539e-06, "loss": 0.123, "step": 3744 }, { "epoch": 0.5218421235978541, "grad_norm": 0.2126566469669342, "learning_rate": 5.236768184188693e-06, "loss": 0.1631, "step": 3745 }, { "epoch": 0.5219814672890685, "grad_norm": 0.21692876517772675, "learning_rate": 5.234414018157361e-06, "loss": 0.1408, "step": 3746 }, { "epoch": 0.5221208109802828, "grad_norm": 0.17364074289798737, "learning_rate": 5.232059800044589e-06, "loss": 0.1469, "step": 3747 }, { "epoch": 0.5222601546714972, "grad_norm": 0.17251156270503998, "learning_rate": 5.229705530373424e-06, "loss": 0.1465, "step": 3748 }, { "epoch": 0.5223994983627116, "grad_norm": 0.1272074580192566, "learning_rate": 5.2273512096669364e-06, "loss": 0.1235, "step": 3749 }, { "epoch": 0.522538842053926, "grad_norm": 0.16411764919757843, "learning_rate": 5.2249968384482e-06, "loss": 0.1252, "step": 3750 }, { "epoch": 0.5226781857451404, "grad_norm": 0.20275825262069702, "learning_rate": 5.222642417240305e-06, "loss": 0.1497, "step": 3751 }, { "epoch": 0.5228175294363547, "grad_norm": 0.16925044357776642, "learning_rate": 5.220287946566347e-06, "loss": 0.1487, "step": 3752 }, { "epoch": 0.5229568731275691, "grad_norm": 0.1443253755569458, "learning_rate": 5.2179334269494345e-06, "loss": 0.1209, "step": 3753 }, { "epoch": 0.5230962168187835, "grad_norm": 0.19272688031196594, "learning_rate": 5.215578858912691e-06, "loss": 0.1413, "step": 3754 }, { "epoch": 0.5232355605099979, "grad_norm": 0.2626553773880005, "learning_rate": 5.213224242979247e-06, "loss": 0.1324, "step": 3755 }, { "epoch": 0.5233749042012122, "grad_norm": 0.16824652254581451, "learning_rate": 5.2108695796722446e-06, "loss": 0.1225, "step": 3756 }, { "epoch": 0.5235142478924266, "grad_norm": 0.13574832677841187, "learning_rate": 5.208514869514835e-06, "loss": 0.1179, "step": 3757 }, { "epoch": 0.523653591583641, "grad_norm": 0.15064142644405365, "learning_rate": 5.206160113030182e-06, "loss": 0.1336, "step": 3758 }, { "epoch": 0.5237929352748554, "grad_norm": 0.39461153745651245, "learning_rate": 5.203805310741459e-06, "loss": 0.1415, "step": 3759 }, { "epoch": 0.5239322789660699, "grad_norm": 0.21218493580818176, "learning_rate": 5.201450463171849e-06, "loss": 0.1393, "step": 3760 }, { "epoch": 0.5240716226572842, "grad_norm": 0.21658079326152802, "learning_rate": 5.199095570844546e-06, "loss": 0.1198, "step": 3761 }, { "epoch": 0.5242109663484986, "grad_norm": 0.1673465520143509, "learning_rate": 5.19674063428275e-06, "loss": 0.1316, "step": 3762 }, { "epoch": 0.524350310039713, "grad_norm": 0.19912612438201904, "learning_rate": 5.1943856540096795e-06, "loss": 0.1237, "step": 3763 }, { "epoch": 0.5244896537309274, "grad_norm": 0.25126025080680847, "learning_rate": 5.192030630548552e-06, "loss": 0.1769, "step": 3764 }, { "epoch": 0.5246289974221418, "grad_norm": 0.14055998623371124, "learning_rate": 5.1896755644226046e-06, "loss": 0.1267, "step": 3765 }, { "epoch": 0.5247683411133561, "grad_norm": 0.1891988217830658, "learning_rate": 5.1873204561550764e-06, "loss": 0.1095, "step": 3766 }, { "epoch": 0.5249076848045705, "grad_norm": 0.17774802446365356, "learning_rate": 5.18496530626922e-06, "loss": 0.1146, "step": 3767 }, { "epoch": 0.5250470284957849, "grad_norm": 0.1470150649547577, "learning_rate": 5.182610115288296e-06, "loss": 0.1261, "step": 3768 }, { "epoch": 0.5251863721869993, "grad_norm": 0.15520530939102173, "learning_rate": 5.180254883735571e-06, "loss": 0.1186, "step": 3769 }, { "epoch": 0.5253257158782136, "grad_norm": 0.17716871201992035, "learning_rate": 5.1778996121343274e-06, "loss": 0.1227, "step": 3770 }, { "epoch": 0.525465059569428, "grad_norm": 0.19692550599575043, "learning_rate": 5.175544301007852e-06, "loss": 0.1556, "step": 3771 }, { "epoch": 0.5256044032606424, "grad_norm": 0.2153237909078598, "learning_rate": 5.173188950879441e-06, "loss": 0.1426, "step": 3772 }, { "epoch": 0.5257437469518568, "grad_norm": 0.13195271790027618, "learning_rate": 5.170833562272398e-06, "loss": 0.1033, "step": 3773 }, { "epoch": 0.5258830906430711, "grad_norm": 0.13673460483551025, "learning_rate": 5.168478135710038e-06, "loss": 0.1354, "step": 3774 }, { "epoch": 0.5260224343342855, "grad_norm": 0.18850401043891907, "learning_rate": 5.166122671715683e-06, "loss": 0.1699, "step": 3775 }, { "epoch": 0.5261617780254999, "grad_norm": 0.21620166301727295, "learning_rate": 5.163767170812663e-06, "loss": 0.1497, "step": 3776 }, { "epoch": 0.5263011217167143, "grad_norm": 0.36943623423576355, "learning_rate": 5.1614116335243155e-06, "loss": 0.1569, "step": 3777 }, { "epoch": 0.5264404654079287, "grad_norm": 0.1735323965549469, "learning_rate": 5.1590560603739885e-06, "loss": 0.1359, "step": 3778 }, { "epoch": 0.526579809099143, "grad_norm": 0.18517078459262848, "learning_rate": 5.156700451885037e-06, "loss": 0.1555, "step": 3779 }, { "epoch": 0.5267191527903574, "grad_norm": 0.14274795353412628, "learning_rate": 5.154344808580821e-06, "loss": 0.1128, "step": 3780 }, { "epoch": 0.5268584964815718, "grad_norm": 0.1602649837732315, "learning_rate": 5.151989130984715e-06, "loss": 0.1705, "step": 3781 }, { "epoch": 0.5269978401727862, "grad_norm": 0.17927470803260803, "learning_rate": 5.149633419620092e-06, "loss": 0.1421, "step": 3782 }, { "epoch": 0.5271371838640005, "grad_norm": 0.13839159905910492, "learning_rate": 5.147277675010339e-06, "loss": 0.1136, "step": 3783 }, { "epoch": 0.5272765275552149, "grad_norm": 0.2395663857460022, "learning_rate": 5.144921897678851e-06, "loss": 0.1237, "step": 3784 }, { "epoch": 0.5274158712464293, "grad_norm": 0.15071967244148254, "learning_rate": 5.142566088149024e-06, "loss": 0.1549, "step": 3785 }, { "epoch": 0.5275552149376437, "grad_norm": 0.12094993889331818, "learning_rate": 5.1402102469442686e-06, "loss": 0.1328, "step": 3786 }, { "epoch": 0.5276945586288581, "grad_norm": 0.14788421988487244, "learning_rate": 5.137854374587996e-06, "loss": 0.1145, "step": 3787 }, { "epoch": 0.5278339023200724, "grad_norm": 0.12381824851036072, "learning_rate": 5.135498471603629e-06, "loss": 0.106, "step": 3788 }, { "epoch": 0.5279732460112868, "grad_norm": 0.1521851271390915, "learning_rate": 5.133142538514596e-06, "loss": 0.1342, "step": 3789 }, { "epoch": 0.5281125897025012, "grad_norm": 0.1798226684331894, "learning_rate": 5.130786575844329e-06, "loss": 0.1289, "step": 3790 }, { "epoch": 0.5282519333937156, "grad_norm": 0.12638941407203674, "learning_rate": 5.128430584116273e-06, "loss": 0.1149, "step": 3791 }, { "epoch": 0.52839127708493, "grad_norm": 0.10833004862070084, "learning_rate": 5.126074563853872e-06, "loss": 0.1102, "step": 3792 }, { "epoch": 0.5285306207761443, "grad_norm": 0.11556274443864822, "learning_rate": 5.123718515580581e-06, "loss": 0.1098, "step": 3793 }, { "epoch": 0.5286699644673587, "grad_norm": 0.11583209037780762, "learning_rate": 5.1213624398198606e-06, "loss": 0.1205, "step": 3794 }, { "epoch": 0.5288093081585731, "grad_norm": 0.1470421999692917, "learning_rate": 5.119006337095178e-06, "loss": 0.129, "step": 3795 }, { "epoch": 0.5289486518497875, "grad_norm": 0.16855667531490326, "learning_rate": 5.1166502079300015e-06, "loss": 0.182, "step": 3796 }, { "epoch": 0.5290879955410018, "grad_norm": 0.3016617000102997, "learning_rate": 5.114294052847814e-06, "loss": 0.1898, "step": 3797 }, { "epoch": 0.5292273392322162, "grad_norm": 0.18285351991653442, "learning_rate": 5.111937872372097e-06, "loss": 0.1677, "step": 3798 }, { "epoch": 0.5293666829234306, "grad_norm": 0.17475683987140656, "learning_rate": 5.109581667026341e-06, "loss": 0.1648, "step": 3799 }, { "epoch": 0.5295060266146451, "grad_norm": 0.22880646586418152, "learning_rate": 5.107225437334039e-06, "loss": 0.1463, "step": 3800 }, { "epoch": 0.5296453703058595, "grad_norm": 0.184454083442688, "learning_rate": 5.1048691838186935e-06, "loss": 0.1201, "step": 3801 }, { "epoch": 0.5297847139970738, "grad_norm": 0.14215031266212463, "learning_rate": 5.102512907003812e-06, "loss": 0.1314, "step": 3802 }, { "epoch": 0.5299240576882882, "grad_norm": 0.1358967125415802, "learning_rate": 5.100156607412899e-06, "loss": 0.137, "step": 3803 }, { "epoch": 0.5300634013795026, "grad_norm": 0.18068064749240875, "learning_rate": 5.097800285569476e-06, "loss": 0.1369, "step": 3804 }, { "epoch": 0.530202745070717, "grad_norm": 0.11567666381597519, "learning_rate": 5.095443941997062e-06, "loss": 0.1146, "step": 3805 }, { "epoch": 0.5303420887619313, "grad_norm": 0.12456755340099335, "learning_rate": 5.093087577219183e-06, "loss": 0.1191, "step": 3806 }, { "epoch": 0.5304814324531457, "grad_norm": 0.11840630322694778, "learning_rate": 5.090731191759371e-06, "loss": 0.1026, "step": 3807 }, { "epoch": 0.5306207761443601, "grad_norm": 0.14085406064987183, "learning_rate": 5.088374786141159e-06, "loss": 0.1091, "step": 3808 }, { "epoch": 0.5307601198355745, "grad_norm": 0.11465581506490707, "learning_rate": 5.086018360888087e-06, "loss": 0.1232, "step": 3809 }, { "epoch": 0.5308994635267889, "grad_norm": 0.1794787496328354, "learning_rate": 5.083661916523699e-06, "loss": 0.1448, "step": 3810 }, { "epoch": 0.5310388072180032, "grad_norm": 0.22352075576782227, "learning_rate": 5.081305453571543e-06, "loss": 0.1252, "step": 3811 }, { "epoch": 0.5311781509092176, "grad_norm": 0.17990805208683014, "learning_rate": 5.07894897255517e-06, "loss": 0.1393, "step": 3812 }, { "epoch": 0.531317494600432, "grad_norm": 0.21350662410259247, "learning_rate": 5.076592473998141e-06, "loss": 0.1139, "step": 3813 }, { "epoch": 0.5314568382916464, "grad_norm": 0.10078813135623932, "learning_rate": 5.07423595842401e-06, "loss": 0.121, "step": 3814 }, { "epoch": 0.5315961819828607, "grad_norm": 0.1336137354373932, "learning_rate": 5.071879426356345e-06, "loss": 0.1056, "step": 3815 }, { "epoch": 0.5317355256740751, "grad_norm": 0.17094941437244415, "learning_rate": 5.069522878318712e-06, "loss": 0.1607, "step": 3816 }, { "epoch": 0.5318748693652895, "grad_norm": 0.16973508894443512, "learning_rate": 5.067166314834684e-06, "loss": 0.1503, "step": 3817 }, { "epoch": 0.5320142130565039, "grad_norm": 0.2668301463127136, "learning_rate": 5.064809736427835e-06, "loss": 0.1972, "step": 3818 }, { "epoch": 0.5321535567477182, "grad_norm": 0.26225021481513977, "learning_rate": 5.062453143621739e-06, "loss": 0.1473, "step": 3819 }, { "epoch": 0.5322929004389326, "grad_norm": 0.1554911732673645, "learning_rate": 5.060096536939982e-06, "loss": 0.1346, "step": 3820 }, { "epoch": 0.532432244130147, "grad_norm": 0.1581868678331375, "learning_rate": 5.057739916906147e-06, "loss": 0.1233, "step": 3821 }, { "epoch": 0.5325715878213614, "grad_norm": 0.25016549229621887, "learning_rate": 5.05538328404382e-06, "loss": 0.1384, "step": 3822 }, { "epoch": 0.5327109315125758, "grad_norm": 0.19349735975265503, "learning_rate": 5.053026638876591e-06, "loss": 0.1405, "step": 3823 }, { "epoch": 0.5328502752037901, "grad_norm": 0.24018490314483643, "learning_rate": 5.050669981928056e-06, "loss": 0.1621, "step": 3824 }, { "epoch": 0.5329896188950045, "grad_norm": 0.18488220870494843, "learning_rate": 5.048313313721806e-06, "loss": 0.1395, "step": 3825 }, { "epoch": 0.5331289625862189, "grad_norm": 0.25427335500717163, "learning_rate": 5.04595663478144e-06, "loss": 0.1772, "step": 3826 }, { "epoch": 0.5332683062774333, "grad_norm": 0.26202288269996643, "learning_rate": 5.0435999456305605e-06, "loss": 0.1327, "step": 3827 }, { "epoch": 0.5334076499686476, "grad_norm": 0.1899571716785431, "learning_rate": 5.0412432467927674e-06, "loss": 0.1264, "step": 3828 }, { "epoch": 0.533546993659862, "grad_norm": 0.1859656274318695, "learning_rate": 5.038886538791668e-06, "loss": 0.1147, "step": 3829 }, { "epoch": 0.5336863373510764, "grad_norm": 0.1489161252975464, "learning_rate": 5.036529822150865e-06, "loss": 0.1259, "step": 3830 }, { "epoch": 0.5338256810422908, "grad_norm": 0.08756230026483536, "learning_rate": 5.034173097393973e-06, "loss": 0.0964, "step": 3831 }, { "epoch": 0.5339650247335052, "grad_norm": 0.17495886981487274, "learning_rate": 5.031816365044595e-06, "loss": 0.1154, "step": 3832 }, { "epoch": 0.5341043684247195, "grad_norm": 0.26278769969940186, "learning_rate": 5.02945962562635e-06, "loss": 0.1344, "step": 3833 }, { "epoch": 0.5342437121159339, "grad_norm": 0.1720660924911499, "learning_rate": 5.027102879662847e-06, "loss": 0.119, "step": 3834 }, { "epoch": 0.5343830558071483, "grad_norm": 0.19000329077243805, "learning_rate": 5.024746127677703e-06, "loss": 0.1564, "step": 3835 }, { "epoch": 0.5345223994983627, "grad_norm": 0.20102541148662567, "learning_rate": 5.022389370194536e-06, "loss": 0.154, "step": 3836 }, { "epoch": 0.534661743189577, "grad_norm": 0.12187948822975159, "learning_rate": 5.020032607736961e-06, "loss": 0.107, "step": 3837 }, { "epoch": 0.5348010868807914, "grad_norm": 0.2614387273788452, "learning_rate": 5.017675840828597e-06, "loss": 0.1152, "step": 3838 }, { "epoch": 0.5349404305720058, "grad_norm": 0.2379898875951767, "learning_rate": 5.015319069993066e-06, "loss": 0.1668, "step": 3839 }, { "epoch": 0.5350797742632203, "grad_norm": 0.10142805427312851, "learning_rate": 5.012962295753988e-06, "loss": 0.1119, "step": 3840 }, { "epoch": 0.5352191179544347, "grad_norm": 0.16390401124954224, "learning_rate": 5.010605518634982e-06, "loss": 0.1151, "step": 3841 }, { "epoch": 0.535358461645649, "grad_norm": 0.34505385160446167, "learning_rate": 5.008248739159674e-06, "loss": 0.1915, "step": 3842 }, { "epoch": 0.5354978053368634, "grad_norm": 0.2743937075138092, "learning_rate": 5.005891957851683e-06, "loss": 0.142, "step": 3843 }, { "epoch": 0.5356371490280778, "grad_norm": 0.21814998984336853, "learning_rate": 5.003535175234633e-06, "loss": 0.1257, "step": 3844 }, { "epoch": 0.5357764927192922, "grad_norm": 0.1282457858324051, "learning_rate": 5.001178391832149e-06, "loss": 0.1234, "step": 3845 }, { "epoch": 0.5359158364105066, "grad_norm": 0.12617486715316772, "learning_rate": 4.998821608167853e-06, "loss": 0.1168, "step": 3846 }, { "epoch": 0.5360551801017209, "grad_norm": 0.15371054410934448, "learning_rate": 4.996464824765369e-06, "loss": 0.1215, "step": 3847 }, { "epoch": 0.5361945237929353, "grad_norm": 0.7270401120185852, "learning_rate": 4.994108042148318e-06, "loss": 0.1623, "step": 3848 }, { "epoch": 0.5363338674841497, "grad_norm": 0.14382587373256683, "learning_rate": 4.991751260840328e-06, "loss": 0.1055, "step": 3849 }, { "epoch": 0.5364732111753641, "grad_norm": 0.1614401489496231, "learning_rate": 4.9893944813650185e-06, "loss": 0.125, "step": 3850 }, { "epoch": 0.5366125548665784, "grad_norm": 0.18571534752845764, "learning_rate": 4.987037704246015e-06, "loss": 0.0979, "step": 3851 }, { "epoch": 0.5367518985577928, "grad_norm": 0.34341058135032654, "learning_rate": 4.984680930006936e-06, "loss": 0.1189, "step": 3852 }, { "epoch": 0.5368912422490072, "grad_norm": 0.10460875183343887, "learning_rate": 4.982324159171404e-06, "loss": 0.1091, "step": 3853 }, { "epoch": 0.5370305859402216, "grad_norm": 0.2197696566581726, "learning_rate": 4.979967392263041e-06, "loss": 0.1357, "step": 3854 }, { "epoch": 0.537169929631436, "grad_norm": 0.20890924334526062, "learning_rate": 4.977610629805465e-06, "loss": 0.1145, "step": 3855 }, { "epoch": 0.5373092733226503, "grad_norm": 0.20825611054897308, "learning_rate": 4.975253872322297e-06, "loss": 0.1547, "step": 3856 }, { "epoch": 0.5374486170138647, "grad_norm": 0.27047377824783325, "learning_rate": 4.972897120337155e-06, "loss": 0.1528, "step": 3857 }, { "epoch": 0.5375879607050791, "grad_norm": 0.36444684863090515, "learning_rate": 4.970540374373653e-06, "loss": 0.1551, "step": 3858 }, { "epoch": 0.5377273043962935, "grad_norm": 0.21814538538455963, "learning_rate": 4.9681836349554064e-06, "loss": 0.1524, "step": 3859 }, { "epoch": 0.5378666480875078, "grad_norm": 0.14351637661457062, "learning_rate": 4.965826902606029e-06, "loss": 0.1185, "step": 3860 }, { "epoch": 0.5380059917787222, "grad_norm": 0.23828169703483582, "learning_rate": 4.963470177849135e-06, "loss": 0.1364, "step": 3861 }, { "epoch": 0.5381453354699366, "grad_norm": 0.282906174659729, "learning_rate": 4.961113461208335e-06, "loss": 0.1337, "step": 3862 }, { "epoch": 0.538284679161151, "grad_norm": 0.1592186987400055, "learning_rate": 4.958756753207234e-06, "loss": 0.1419, "step": 3863 }, { "epoch": 0.5384240228523653, "grad_norm": 0.15406613051891327, "learning_rate": 4.956400054369441e-06, "loss": 0.1372, "step": 3864 }, { "epoch": 0.5385633665435797, "grad_norm": 0.08143582195043564, "learning_rate": 4.954043365218561e-06, "loss": 0.0961, "step": 3865 }, { "epoch": 0.5387027102347941, "grad_norm": 0.22473855316638947, "learning_rate": 4.951686686278195e-06, "loss": 0.1457, "step": 3866 }, { "epoch": 0.5388420539260085, "grad_norm": 0.22886136174201965, "learning_rate": 4.949330018071947e-06, "loss": 0.1436, "step": 3867 }, { "epoch": 0.5389813976172229, "grad_norm": 0.16726398468017578, "learning_rate": 4.946973361123411e-06, "loss": 0.1193, "step": 3868 }, { "epoch": 0.5391207413084372, "grad_norm": 0.2828342914581299, "learning_rate": 4.9446167159561814e-06, "loss": 0.1464, "step": 3869 }, { "epoch": 0.5392600849996516, "grad_norm": 0.11433780938386917, "learning_rate": 4.942260083093854e-06, "loss": 0.15, "step": 3870 }, { "epoch": 0.539399428690866, "grad_norm": 0.1220400482416153, "learning_rate": 4.939903463060018e-06, "loss": 0.1336, "step": 3871 }, { "epoch": 0.5395387723820804, "grad_norm": 0.15540926158428192, "learning_rate": 4.937546856378263e-06, "loss": 0.1259, "step": 3872 }, { "epoch": 0.5396781160732947, "grad_norm": 0.17916008830070496, "learning_rate": 4.935190263572168e-06, "loss": 0.1183, "step": 3873 }, { "epoch": 0.5398174597645091, "grad_norm": 0.1937001794576645, "learning_rate": 4.932833685165318e-06, "loss": 0.1489, "step": 3874 }, { "epoch": 0.5399568034557235, "grad_norm": 0.1507619321346283, "learning_rate": 4.930477121681289e-06, "loss": 0.0959, "step": 3875 }, { "epoch": 0.5400961471469379, "grad_norm": 0.09793160855770111, "learning_rate": 4.9281205736436555e-06, "loss": 0.0975, "step": 3876 }, { "epoch": 0.5402354908381523, "grad_norm": 0.20604674518108368, "learning_rate": 4.925764041575991e-06, "loss": 0.1416, "step": 3877 }, { "epoch": 0.5403748345293666, "grad_norm": 0.20914925634860992, "learning_rate": 4.9234075260018615e-06, "loss": 0.1497, "step": 3878 }, { "epoch": 0.540514178220581, "grad_norm": 0.352590948343277, "learning_rate": 4.921051027444831e-06, "loss": 0.1763, "step": 3879 }, { "epoch": 0.5406535219117955, "grad_norm": 0.14155764877796173, "learning_rate": 4.918694546428458e-06, "loss": 0.1517, "step": 3880 }, { "epoch": 0.5407928656030099, "grad_norm": 0.14029797911643982, "learning_rate": 4.916338083476303e-06, "loss": 0.1365, "step": 3881 }, { "epoch": 0.5409322092942243, "grad_norm": 0.1500246822834015, "learning_rate": 4.913981639111914e-06, "loss": 0.1243, "step": 3882 }, { "epoch": 0.5410715529854386, "grad_norm": 0.26562225818634033, "learning_rate": 4.9116252138588435e-06, "loss": 0.1471, "step": 3883 }, { "epoch": 0.541210896676653, "grad_norm": 0.2636946141719818, "learning_rate": 4.90926880824063e-06, "loss": 0.1798, "step": 3884 }, { "epoch": 0.5413502403678674, "grad_norm": 0.175410658121109, "learning_rate": 4.906912422780818e-06, "loss": 0.1163, "step": 3885 }, { "epoch": 0.5414895840590818, "grad_norm": 0.3152373731136322, "learning_rate": 4.904556058002939e-06, "loss": 0.1486, "step": 3886 }, { "epoch": 0.5416289277502961, "grad_norm": 0.27945905923843384, "learning_rate": 4.902199714430525e-06, "loss": 0.1248, "step": 3887 }, { "epoch": 0.5417682714415105, "grad_norm": 0.2380257397890091, "learning_rate": 4.899843392587104e-06, "loss": 0.1276, "step": 3888 }, { "epoch": 0.5419076151327249, "grad_norm": 0.1512509286403656, "learning_rate": 4.8974870929961915e-06, "loss": 0.1293, "step": 3889 }, { "epoch": 0.5420469588239393, "grad_norm": 0.15134720504283905, "learning_rate": 4.895130816181307e-06, "loss": 0.1237, "step": 3890 }, { "epoch": 0.5421863025151537, "grad_norm": 0.17498035728931427, "learning_rate": 4.8927745626659625e-06, "loss": 0.1326, "step": 3891 }, { "epoch": 0.542325646206368, "grad_norm": 0.20290887355804443, "learning_rate": 4.89041833297366e-06, "loss": 0.1231, "step": 3892 }, { "epoch": 0.5424649898975824, "grad_norm": 0.18876983225345612, "learning_rate": 4.888062127627904e-06, "loss": 0.1542, "step": 3893 }, { "epoch": 0.5426043335887968, "grad_norm": 0.20150183141231537, "learning_rate": 4.885705947152187e-06, "loss": 0.129, "step": 3894 }, { "epoch": 0.5427436772800112, "grad_norm": 0.17139644920825958, "learning_rate": 4.883349792069999e-06, "loss": 0.1281, "step": 3895 }, { "epoch": 0.5428830209712255, "grad_norm": 0.1023486778140068, "learning_rate": 4.880993662904824e-06, "loss": 0.1104, "step": 3896 }, { "epoch": 0.5430223646624399, "grad_norm": 0.18921515345573425, "learning_rate": 4.87863756018014e-06, "loss": 0.134, "step": 3897 }, { "epoch": 0.5431617083536543, "grad_norm": 0.515184223651886, "learning_rate": 4.87628148441942e-06, "loss": 0.1314, "step": 3898 }, { "epoch": 0.5433010520448687, "grad_norm": 0.10511498898267746, "learning_rate": 4.8739254361461305e-06, "loss": 0.107, "step": 3899 }, { "epoch": 0.543440395736083, "grad_norm": 0.23576541244983673, "learning_rate": 4.871569415883729e-06, "loss": 0.1614, "step": 3900 }, { "epoch": 0.5435797394272974, "grad_norm": 0.16629931330680847, "learning_rate": 4.869213424155671e-06, "loss": 0.1385, "step": 3901 }, { "epoch": 0.5437190831185118, "grad_norm": 0.2340085506439209, "learning_rate": 4.8668574614854055e-06, "loss": 0.1232, "step": 3902 }, { "epoch": 0.5438584268097262, "grad_norm": 0.1698063611984253, "learning_rate": 4.864501528396371e-06, "loss": 0.1201, "step": 3903 }, { "epoch": 0.5439977705009406, "grad_norm": 0.08110645413398743, "learning_rate": 4.862145625412006e-06, "loss": 0.1054, "step": 3904 }, { "epoch": 0.5441371141921549, "grad_norm": 0.16881084442138672, "learning_rate": 4.859789753055734e-06, "loss": 0.1089, "step": 3905 }, { "epoch": 0.5442764578833693, "grad_norm": 0.17103759944438934, "learning_rate": 4.857433911850977e-06, "loss": 0.1304, "step": 3906 }, { "epoch": 0.5444158015745837, "grad_norm": 0.20794160664081573, "learning_rate": 4.8550781023211516e-06, "loss": 0.1751, "step": 3907 }, { "epoch": 0.5445551452657981, "grad_norm": 0.20043644309043884, "learning_rate": 4.852722324989661e-06, "loss": 0.1566, "step": 3908 }, { "epoch": 0.5446944889570124, "grad_norm": 0.14523786306381226, "learning_rate": 4.85036658037991e-06, "loss": 0.1428, "step": 3909 }, { "epoch": 0.5448338326482268, "grad_norm": 0.14379358291625977, "learning_rate": 4.848010869015288e-06, "loss": 0.1448, "step": 3910 }, { "epoch": 0.5449731763394412, "grad_norm": 0.20370900630950928, "learning_rate": 4.84565519141918e-06, "loss": 0.1353, "step": 3911 }, { "epoch": 0.5451125200306556, "grad_norm": 0.14837029576301575, "learning_rate": 4.843299548114964e-06, "loss": 0.1184, "step": 3912 }, { "epoch": 0.54525186372187, "grad_norm": 0.14038947224617004, "learning_rate": 4.840943939626012e-06, "loss": 0.139, "step": 3913 }, { "epoch": 0.5453912074130843, "grad_norm": 0.13215935230255127, "learning_rate": 4.838588366475685e-06, "loss": 0.1312, "step": 3914 }, { "epoch": 0.5455305511042987, "grad_norm": 0.1798163652420044, "learning_rate": 4.83623282918734e-06, "loss": 0.1066, "step": 3915 }, { "epoch": 0.5456698947955131, "grad_norm": 0.14898745715618134, "learning_rate": 4.833877328284319e-06, "loss": 0.1388, "step": 3916 }, { "epoch": 0.5458092384867275, "grad_norm": 0.18092671036720276, "learning_rate": 4.831521864289964e-06, "loss": 0.1465, "step": 3917 }, { "epoch": 0.5459485821779418, "grad_norm": 0.2978741228580475, "learning_rate": 4.829166437727603e-06, "loss": 0.1548, "step": 3918 }, { "epoch": 0.5460879258691562, "grad_norm": 0.137820303440094, "learning_rate": 4.82681104912056e-06, "loss": 0.166, "step": 3919 }, { "epoch": 0.5462272695603707, "grad_norm": 0.24990811944007874, "learning_rate": 4.82445569899215e-06, "loss": 0.1486, "step": 3920 }, { "epoch": 0.5463666132515851, "grad_norm": 0.19379082322120667, "learning_rate": 4.822100387865673e-06, "loss": 0.1248, "step": 3921 }, { "epoch": 0.5465059569427995, "grad_norm": 0.22537581622600555, "learning_rate": 4.8197451162644305e-06, "loss": 0.1349, "step": 3922 }, { "epoch": 0.5466453006340138, "grad_norm": 0.14563210308551788, "learning_rate": 4.817389884711706e-06, "loss": 0.1244, "step": 3923 }, { "epoch": 0.5467846443252282, "grad_norm": 0.15736545622348785, "learning_rate": 4.815034693730781e-06, "loss": 0.1581, "step": 3924 }, { "epoch": 0.5469239880164426, "grad_norm": 0.27037104964256287, "learning_rate": 4.812679543844924e-06, "loss": 0.156, "step": 3925 }, { "epoch": 0.547063331707657, "grad_norm": 0.11371428519487381, "learning_rate": 4.810324435577397e-06, "loss": 0.1198, "step": 3926 }, { "epoch": 0.5472026753988714, "grad_norm": 0.12247440963983536, "learning_rate": 4.807969369451449e-06, "loss": 0.1137, "step": 3927 }, { "epoch": 0.5473420190900857, "grad_norm": 0.3715260326862335, "learning_rate": 4.805614345990322e-06, "loss": 0.1823, "step": 3928 }, { "epoch": 0.5474813627813001, "grad_norm": 0.16929790377616882, "learning_rate": 4.803259365717251e-06, "loss": 0.1219, "step": 3929 }, { "epoch": 0.5476207064725145, "grad_norm": 0.1124860867857933, "learning_rate": 4.800904429155458e-06, "loss": 0.0981, "step": 3930 }, { "epoch": 0.5477600501637289, "grad_norm": 0.1390918642282486, "learning_rate": 4.7985495368281534e-06, "loss": 0.1113, "step": 3931 }, { "epoch": 0.5478993938549432, "grad_norm": 0.17664307355880737, "learning_rate": 4.796194689258542e-06, "loss": 0.1416, "step": 3932 }, { "epoch": 0.5480387375461576, "grad_norm": 0.16853134334087372, "learning_rate": 4.793839886969819e-06, "loss": 0.1301, "step": 3933 }, { "epoch": 0.548178081237372, "grad_norm": 0.16166825592517853, "learning_rate": 4.791485130485167e-06, "loss": 0.1417, "step": 3934 }, { "epoch": 0.5483174249285864, "grad_norm": 0.1629984825849533, "learning_rate": 4.789130420327756e-06, "loss": 0.1184, "step": 3935 }, { "epoch": 0.5484567686198007, "grad_norm": 0.22560635209083557, "learning_rate": 4.786775757020755e-06, "loss": 0.1273, "step": 3936 }, { "epoch": 0.5485961123110151, "grad_norm": 0.20513896644115448, "learning_rate": 4.784421141087311e-06, "loss": 0.1463, "step": 3937 }, { "epoch": 0.5487354560022295, "grad_norm": 0.1405578851699829, "learning_rate": 4.782066573050567e-06, "loss": 0.1188, "step": 3938 }, { "epoch": 0.5488747996934439, "grad_norm": 0.258179247379303, "learning_rate": 4.779712053433655e-06, "loss": 0.1199, "step": 3939 }, { "epoch": 0.5490141433846583, "grad_norm": 0.15338951349258423, "learning_rate": 4.777357582759696e-06, "loss": 0.1101, "step": 3940 }, { "epoch": 0.5491534870758726, "grad_norm": 0.15036259591579437, "learning_rate": 4.7750031615518e-06, "loss": 0.1188, "step": 3941 }, { "epoch": 0.549292830767087, "grad_norm": 0.14865002036094666, "learning_rate": 4.772648790333065e-06, "loss": 0.1375, "step": 3942 }, { "epoch": 0.5494321744583014, "grad_norm": 0.18849480152130127, "learning_rate": 4.7702944696265766e-06, "loss": 0.1118, "step": 3943 }, { "epoch": 0.5495715181495158, "grad_norm": 0.1798589676618576, "learning_rate": 4.767940199955413e-06, "loss": 0.153, "step": 3944 }, { "epoch": 0.5497108618407301, "grad_norm": 0.17472976446151733, "learning_rate": 4.765585981842639e-06, "loss": 0.1132, "step": 3945 }, { "epoch": 0.5498502055319445, "grad_norm": 0.11023211479187012, "learning_rate": 4.76323181581131e-06, "loss": 0.0906, "step": 3946 }, { "epoch": 0.5499895492231589, "grad_norm": 0.08912818133831024, "learning_rate": 4.760877702384464e-06, "loss": 0.0905, "step": 3947 }, { "epoch": 0.5501288929143733, "grad_norm": 0.23800258338451385, "learning_rate": 4.758523642085133e-06, "loss": 0.1311, "step": 3948 }, { "epoch": 0.5502682366055877, "grad_norm": 0.15319404006004333, "learning_rate": 4.756169635436336e-06, "loss": 0.1228, "step": 3949 }, { "epoch": 0.550407580296802, "grad_norm": 0.11981511861085892, "learning_rate": 4.75381568296108e-06, "loss": 0.1072, "step": 3950 }, { "epoch": 0.5505469239880164, "grad_norm": 0.3069796562194824, "learning_rate": 4.751461785182358e-06, "loss": 0.1388, "step": 3951 }, { "epoch": 0.5506862676792308, "grad_norm": 0.21595314145088196, "learning_rate": 4.7491079426231556e-06, "loss": 0.147, "step": 3952 }, { "epoch": 0.5508256113704452, "grad_norm": 0.17458899319171906, "learning_rate": 4.746754155806437e-06, "loss": 0.1278, "step": 3953 }, { "epoch": 0.5509649550616595, "grad_norm": 0.23134003579616547, "learning_rate": 4.744400425255165e-06, "loss": 0.1449, "step": 3954 }, { "epoch": 0.5511042987528739, "grad_norm": 0.12294303625822067, "learning_rate": 4.7420467514922815e-06, "loss": 0.1199, "step": 3955 }, { "epoch": 0.5512436424440883, "grad_norm": 0.180484801530838, "learning_rate": 4.739693135040722e-06, "loss": 0.1191, "step": 3956 }, { "epoch": 0.5513829861353027, "grad_norm": 0.12875999510288239, "learning_rate": 4.737339576423406e-06, "loss": 0.092, "step": 3957 }, { "epoch": 0.551522329826517, "grad_norm": 0.18465085327625275, "learning_rate": 4.734986076163238e-06, "loss": 0.1768, "step": 3958 }, { "epoch": 0.5516616735177314, "grad_norm": 0.12039000540971756, "learning_rate": 4.732632634783114e-06, "loss": 0.1223, "step": 3959 }, { "epoch": 0.5518010172089458, "grad_norm": 0.229115292429924, "learning_rate": 4.730279252805914e-06, "loss": 0.1389, "step": 3960 }, { "epoch": 0.5519403609001603, "grad_norm": 0.18026074767112732, "learning_rate": 4.727925930754506e-06, "loss": 0.1486, "step": 3961 }, { "epoch": 0.5520797045913747, "grad_norm": 0.17059630155563354, "learning_rate": 4.725572669151747e-06, "loss": 0.1315, "step": 3962 }, { "epoch": 0.552219048282589, "grad_norm": 0.136863112449646, "learning_rate": 4.723219468520474e-06, "loss": 0.1339, "step": 3963 }, { "epoch": 0.5523583919738034, "grad_norm": 0.1892027109861374, "learning_rate": 4.720866329383514e-06, "loss": 0.1349, "step": 3964 }, { "epoch": 0.5524977356650178, "grad_norm": 0.14126384258270264, "learning_rate": 4.718513252263685e-06, "loss": 0.1074, "step": 3965 }, { "epoch": 0.5526370793562322, "grad_norm": 0.13204772770404816, "learning_rate": 4.716160237683785e-06, "loss": 0.1312, "step": 3966 }, { "epoch": 0.5527764230474466, "grad_norm": 0.11141040921211243, "learning_rate": 4.7138072861666e-06, "loss": 0.1079, "step": 3967 }, { "epoch": 0.5529157667386609, "grad_norm": 0.1962149441242218, "learning_rate": 4.711454398234902e-06, "loss": 0.1273, "step": 3968 }, { "epoch": 0.5530551104298753, "grad_norm": 0.18047119677066803, "learning_rate": 4.7091015744114475e-06, "loss": 0.1298, "step": 3969 }, { "epoch": 0.5531944541210897, "grad_norm": 0.16534937918186188, "learning_rate": 4.706748815218984e-06, "loss": 0.1138, "step": 3970 }, { "epoch": 0.5533337978123041, "grad_norm": 0.3049725294113159, "learning_rate": 4.704396121180237e-06, "loss": 0.1526, "step": 3971 }, { "epoch": 0.5534731415035185, "grad_norm": 0.19477520883083344, "learning_rate": 4.702043492817924e-06, "loss": 0.1292, "step": 3972 }, { "epoch": 0.5536124851947328, "grad_norm": 0.09567081928253174, "learning_rate": 4.6996909306547455e-06, "loss": 0.0993, "step": 3973 }, { "epoch": 0.5537518288859472, "grad_norm": 0.1519699990749359, "learning_rate": 4.697338435213385e-06, "loss": 0.1113, "step": 3974 }, { "epoch": 0.5538911725771616, "grad_norm": 0.17156165838241577, "learning_rate": 4.694986007016514e-06, "loss": 0.1333, "step": 3975 }, { "epoch": 0.554030516268376, "grad_norm": 0.2432863563299179, "learning_rate": 4.692633646586788e-06, "loss": 0.1619, "step": 3976 }, { "epoch": 0.5541698599595903, "grad_norm": 0.2185806781053543, "learning_rate": 4.690281354446849e-06, "loss": 0.1472, "step": 3977 }, { "epoch": 0.5543092036508047, "grad_norm": 0.19809776544570923, "learning_rate": 4.6879291311193244e-06, "loss": 0.1725, "step": 3978 }, { "epoch": 0.5544485473420191, "grad_norm": 0.17421719431877136, "learning_rate": 4.68557697712682e-06, "loss": 0.1592, "step": 3979 }, { "epoch": 0.5545878910332335, "grad_norm": 0.1352337896823883, "learning_rate": 4.683224892991932e-06, "loss": 0.1059, "step": 3980 }, { "epoch": 0.5547272347244478, "grad_norm": 0.15207523107528687, "learning_rate": 4.680872879237242e-06, "loss": 0.1529, "step": 3981 }, { "epoch": 0.5548665784156622, "grad_norm": 0.12921848893165588, "learning_rate": 4.678520936385313e-06, "loss": 0.1414, "step": 3982 }, { "epoch": 0.5550059221068766, "grad_norm": 0.14392682909965515, "learning_rate": 4.676169064958692e-06, "loss": 0.1096, "step": 3983 }, { "epoch": 0.555145265798091, "grad_norm": 0.14685781300067902, "learning_rate": 4.6738172654799105e-06, "loss": 0.1065, "step": 3984 }, { "epoch": 0.5552846094893054, "grad_norm": 0.10273278504610062, "learning_rate": 4.671465538471487e-06, "loss": 0.119, "step": 3985 }, { "epoch": 0.5554239531805197, "grad_norm": 0.10214895755052567, "learning_rate": 4.66911388445592e-06, "loss": 0.1018, "step": 3986 }, { "epoch": 0.5555632968717341, "grad_norm": 0.1102248951792717, "learning_rate": 4.666762303955692e-06, "loss": 0.1015, "step": 3987 }, { "epoch": 0.5557026405629485, "grad_norm": 0.13357309997081757, "learning_rate": 4.664410797493275e-06, "loss": 0.1279, "step": 3988 }, { "epoch": 0.5558419842541629, "grad_norm": 0.19092267751693726, "learning_rate": 4.662059365591115e-06, "loss": 0.1738, "step": 3989 }, { "epoch": 0.5559813279453772, "grad_norm": 0.18553031980991364, "learning_rate": 4.6597080087716494e-06, "loss": 0.1363, "step": 3990 }, { "epoch": 0.5561206716365916, "grad_norm": 0.16080082952976227, "learning_rate": 4.657356727557295e-06, "loss": 0.1168, "step": 3991 }, { "epoch": 0.556260015327806, "grad_norm": 0.2641857862472534, "learning_rate": 4.655005522470453e-06, "loss": 0.1573, "step": 3992 }, { "epoch": 0.5563993590190204, "grad_norm": 0.09673983603715897, "learning_rate": 4.652654394033508e-06, "loss": 0.1044, "step": 3993 }, { "epoch": 0.5565387027102348, "grad_norm": 0.12248989939689636, "learning_rate": 4.650303342768827e-06, "loss": 0.1259, "step": 3994 }, { "epoch": 0.5566780464014491, "grad_norm": 0.15159998834133148, "learning_rate": 4.6479523691987585e-06, "loss": 0.1273, "step": 3995 }, { "epoch": 0.5568173900926635, "grad_norm": 0.3828209936618805, "learning_rate": 4.645601473845636e-06, "loss": 0.1486, "step": 3996 }, { "epoch": 0.5569567337838779, "grad_norm": 0.3591316342353821, "learning_rate": 4.6432506572317754e-06, "loss": 0.1519, "step": 3997 }, { "epoch": 0.5570960774750923, "grad_norm": 0.18554429709911346, "learning_rate": 4.6408999198794744e-06, "loss": 0.1388, "step": 3998 }, { "epoch": 0.5572354211663066, "grad_norm": 0.1859561949968338, "learning_rate": 4.6385492623110135e-06, "loss": 0.1537, "step": 3999 }, { "epoch": 0.557374764857521, "grad_norm": 0.09215746074914932, "learning_rate": 4.636198685048653e-06, "loss": 0.1088, "step": 4000 }, { "epoch": 0.5575141085487355, "grad_norm": 0.18383872509002686, "learning_rate": 4.633848188614639e-06, "loss": 0.1442, "step": 4001 }, { "epoch": 0.5576534522399499, "grad_norm": 0.1646018773317337, "learning_rate": 4.631497773531199e-06, "loss": 0.1284, "step": 4002 }, { "epoch": 0.5577927959311643, "grad_norm": 0.11336896568536758, "learning_rate": 4.629147440320539e-06, "loss": 0.1301, "step": 4003 }, { "epoch": 0.5579321396223786, "grad_norm": 0.24405068159103394, "learning_rate": 4.626797189504855e-06, "loss": 0.1262, "step": 4004 }, { "epoch": 0.558071483313593, "grad_norm": 0.0821252092719078, "learning_rate": 4.624447021606311e-06, "loss": 0.0903, "step": 4005 }, { "epoch": 0.5582108270048074, "grad_norm": 0.1524919718503952, "learning_rate": 4.6220969371470665e-06, "loss": 0.1436, "step": 4006 }, { "epoch": 0.5583501706960218, "grad_norm": 0.10177060961723328, "learning_rate": 4.619746936649254e-06, "loss": 0.0998, "step": 4007 }, { "epoch": 0.5584895143872362, "grad_norm": 0.15875668823719025, "learning_rate": 4.617397020634991e-06, "loss": 0.1386, "step": 4008 }, { "epoch": 0.5586288580784505, "grad_norm": 0.16542385518550873, "learning_rate": 4.615047189626376e-06, "loss": 0.1202, "step": 4009 }, { "epoch": 0.5587682017696649, "grad_norm": 0.08767358958721161, "learning_rate": 4.612697444145487e-06, "loss": 0.0884, "step": 4010 }, { "epoch": 0.5589075454608793, "grad_norm": 0.1042068600654602, "learning_rate": 4.610347784714383e-06, "loss": 0.111, "step": 4011 }, { "epoch": 0.5590468891520937, "grad_norm": 0.12254679203033447, "learning_rate": 4.6079982118551045e-06, "loss": 0.1171, "step": 4012 }, { "epoch": 0.559186232843308, "grad_norm": 0.1768549233675003, "learning_rate": 4.605648726089674e-06, "loss": 0.1532, "step": 4013 }, { "epoch": 0.5593255765345224, "grad_norm": 0.11942578107118607, "learning_rate": 4.603299327940094e-06, "loss": 0.1091, "step": 4014 }, { "epoch": 0.5594649202257368, "grad_norm": 0.1557246446609497, "learning_rate": 4.600950017928348e-06, "loss": 0.126, "step": 4015 }, { "epoch": 0.5596042639169512, "grad_norm": 0.1648760586977005, "learning_rate": 4.598600796576395e-06, "loss": 0.1254, "step": 4016 }, { "epoch": 0.5597436076081655, "grad_norm": 0.30658113956451416, "learning_rate": 4.596251664406182e-06, "loss": 0.1381, "step": 4017 }, { "epoch": 0.5598829512993799, "grad_norm": 0.18834109604358673, "learning_rate": 4.593902621939632e-06, "loss": 0.1382, "step": 4018 }, { "epoch": 0.5600222949905943, "grad_norm": 0.187435120344162, "learning_rate": 4.591553669698646e-06, "loss": 0.1397, "step": 4019 }, { "epoch": 0.5601616386818087, "grad_norm": 0.1446254402399063, "learning_rate": 4.589204808205113e-06, "loss": 0.1188, "step": 4020 }, { "epoch": 0.5603009823730231, "grad_norm": 0.151625394821167, "learning_rate": 4.58685603798089e-06, "loss": 0.1523, "step": 4021 }, { "epoch": 0.5604403260642374, "grad_norm": 0.16067586839199066, "learning_rate": 4.5845073595478245e-06, "loss": 0.1105, "step": 4022 }, { "epoch": 0.5605796697554518, "grad_norm": 0.18048077821731567, "learning_rate": 4.5821587734277374e-06, "loss": 0.1356, "step": 4023 }, { "epoch": 0.5607190134466662, "grad_norm": 0.11781925708055496, "learning_rate": 4.57981028014243e-06, "loss": 0.1172, "step": 4024 }, { "epoch": 0.5608583571378806, "grad_norm": 0.3246220350265503, "learning_rate": 4.577461880213688e-06, "loss": 0.152, "step": 4025 }, { "epoch": 0.560997700829095, "grad_norm": 0.22901126742362976, "learning_rate": 4.575113574163269e-06, "loss": 0.1965, "step": 4026 }, { "epoch": 0.5611370445203093, "grad_norm": 0.17257720232009888, "learning_rate": 4.572765362512912e-06, "loss": 0.1432, "step": 4027 }, { "epoch": 0.5612763882115237, "grad_norm": 0.14492720365524292, "learning_rate": 4.570417245784337e-06, "loss": 0.1131, "step": 4028 }, { "epoch": 0.5614157319027381, "grad_norm": 0.24086737632751465, "learning_rate": 4.568069224499244e-06, "loss": 0.1322, "step": 4029 }, { "epoch": 0.5615550755939525, "grad_norm": 0.10151997953653336, "learning_rate": 4.565721299179308e-06, "loss": 0.1089, "step": 4030 }, { "epoch": 0.5616944192851668, "grad_norm": 0.11268268525600433, "learning_rate": 4.563373470346186e-06, "loss": 0.1175, "step": 4031 }, { "epoch": 0.5618337629763812, "grad_norm": 0.19765160977840424, "learning_rate": 4.561025738521508e-06, "loss": 0.1479, "step": 4032 }, { "epoch": 0.5619731066675956, "grad_norm": 0.30117708444595337, "learning_rate": 4.55867810422689e-06, "loss": 0.1645, "step": 4033 }, { "epoch": 0.56211245035881, "grad_norm": 0.20835468173027039, "learning_rate": 4.5563305679839214e-06, "loss": 0.1339, "step": 4034 }, { "epoch": 0.5622517940500243, "grad_norm": 0.24713432788848877, "learning_rate": 4.553983130314171e-06, "loss": 0.1612, "step": 4035 }, { "epoch": 0.5623911377412387, "grad_norm": 0.22982148826122284, "learning_rate": 4.551635791739188e-06, "loss": 0.1341, "step": 4036 }, { "epoch": 0.5625304814324531, "grad_norm": 0.13793544471263885, "learning_rate": 4.549288552780494e-06, "loss": 0.1095, "step": 4037 }, { "epoch": 0.5626698251236675, "grad_norm": 0.20701901614665985, "learning_rate": 4.546941413959595e-06, "loss": 0.1807, "step": 4038 }, { "epoch": 0.5628091688148819, "grad_norm": 0.4364398419857025, "learning_rate": 4.544594375797969e-06, "loss": 0.1502, "step": 4039 }, { "epoch": 0.5629485125060962, "grad_norm": 0.14326605200767517, "learning_rate": 4.542247438817076e-06, "loss": 0.1239, "step": 4040 }, { "epoch": 0.5630878561973107, "grad_norm": 0.17358693480491638, "learning_rate": 4.539900603538352e-06, "loss": 0.1287, "step": 4041 }, { "epoch": 0.5632271998885251, "grad_norm": 0.1348208487033844, "learning_rate": 4.53755387048321e-06, "loss": 0.1109, "step": 4042 }, { "epoch": 0.5633665435797395, "grad_norm": 0.4283924400806427, "learning_rate": 4.53520724017304e-06, "loss": 0.124, "step": 4043 }, { "epoch": 0.5635058872709539, "grad_norm": 0.17859743535518646, "learning_rate": 4.532860713129208e-06, "loss": 0.1187, "step": 4044 }, { "epoch": 0.5636452309621682, "grad_norm": 0.13133969902992249, "learning_rate": 4.530514289873062e-06, "loss": 0.1061, "step": 4045 }, { "epoch": 0.5637845746533826, "grad_norm": 0.27714619040489197, "learning_rate": 4.528167970925922e-06, "loss": 0.1216, "step": 4046 }, { "epoch": 0.563923918344597, "grad_norm": 0.17900823056697845, "learning_rate": 4.525821756809088e-06, "loss": 0.1383, "step": 4047 }, { "epoch": 0.5640632620358114, "grad_norm": 0.22564487159252167, "learning_rate": 4.523475648043832e-06, "loss": 0.1438, "step": 4048 }, { "epoch": 0.5642026057270257, "grad_norm": 0.13264478743076324, "learning_rate": 4.5211296451514085e-06, "loss": 0.0949, "step": 4049 }, { "epoch": 0.5643419494182401, "grad_norm": 0.1702674925327301, "learning_rate": 4.518783748653045e-06, "loss": 0.1371, "step": 4050 }, { "epoch": 0.5644812931094545, "grad_norm": 0.155312642455101, "learning_rate": 4.516437959069946e-06, "loss": 0.1181, "step": 4051 }, { "epoch": 0.5646206368006689, "grad_norm": 0.36420950293540955, "learning_rate": 4.514092276923295e-06, "loss": 0.1488, "step": 4052 }, { "epoch": 0.5647599804918833, "grad_norm": 0.25342583656311035, "learning_rate": 4.5117467027342435e-06, "loss": 0.1728, "step": 4053 }, { "epoch": 0.5648993241830976, "grad_norm": 0.2430289089679718, "learning_rate": 4.509401237023928e-06, "loss": 0.1563, "step": 4054 }, { "epoch": 0.565038667874312, "grad_norm": 0.31256818771362305, "learning_rate": 4.507055880313458e-06, "loss": 0.1236, "step": 4055 }, { "epoch": 0.5651780115655264, "grad_norm": 0.28640520572662354, "learning_rate": 4.504710633123917e-06, "loss": 0.1383, "step": 4056 }, { "epoch": 0.5653173552567408, "grad_norm": 0.2697027325630188, "learning_rate": 4.502365495976367e-06, "loss": 0.1401, "step": 4057 }, { "epoch": 0.5654566989479551, "grad_norm": 0.15245148539543152, "learning_rate": 4.5000204693918405e-06, "loss": 0.1153, "step": 4058 }, { "epoch": 0.5655960426391695, "grad_norm": 0.2046864628791809, "learning_rate": 4.497675553891352e-06, "loss": 0.1277, "step": 4059 }, { "epoch": 0.5657353863303839, "grad_norm": 0.3693530857563019, "learning_rate": 4.495330749995887e-06, "loss": 0.1366, "step": 4060 }, { "epoch": 0.5658747300215983, "grad_norm": 0.14406892657279968, "learning_rate": 4.492986058226407e-06, "loss": 0.142, "step": 4061 }, { "epoch": 0.5660140737128126, "grad_norm": 0.1551835685968399, "learning_rate": 4.490641479103851e-06, "loss": 0.15, "step": 4062 }, { "epoch": 0.566153417404027, "grad_norm": 0.24953407049179077, "learning_rate": 4.4882970131491286e-06, "loss": 0.1362, "step": 4063 }, { "epoch": 0.5662927610952414, "grad_norm": 0.2662874758243561, "learning_rate": 4.485952660883126e-06, "loss": 0.1619, "step": 4064 }, { "epoch": 0.5664321047864558, "grad_norm": 0.19556790590286255, "learning_rate": 4.483608422826708e-06, "loss": 0.1526, "step": 4065 }, { "epoch": 0.5665714484776702, "grad_norm": 0.18311496078968048, "learning_rate": 4.481264299500709e-06, "loss": 0.157, "step": 4066 }, { "epoch": 0.5667107921688845, "grad_norm": 0.24867384135723114, "learning_rate": 4.478920291425939e-06, "loss": 0.158, "step": 4067 }, { "epoch": 0.5668501358600989, "grad_norm": 0.1738162785768509, "learning_rate": 4.476576399123187e-06, "loss": 0.1392, "step": 4068 }, { "epoch": 0.5669894795513133, "grad_norm": 0.25907209515571594, "learning_rate": 4.474232623113204e-06, "loss": 0.1195, "step": 4069 }, { "epoch": 0.5671288232425277, "grad_norm": 0.15903231501579285, "learning_rate": 4.471888963916732e-06, "loss": 0.1417, "step": 4070 }, { "epoch": 0.567268166933742, "grad_norm": 0.1462508738040924, "learning_rate": 4.4695454220544735e-06, "loss": 0.1492, "step": 4071 }, { "epoch": 0.5674075106249564, "grad_norm": 0.12060706317424774, "learning_rate": 4.467201998047112e-06, "loss": 0.1072, "step": 4072 }, { "epoch": 0.5675468543161708, "grad_norm": 0.13369354605674744, "learning_rate": 4.464858692415304e-06, "loss": 0.1126, "step": 4073 }, { "epoch": 0.5676861980073852, "grad_norm": 0.14035223424434662, "learning_rate": 4.462515505679677e-06, "loss": 0.1226, "step": 4074 }, { "epoch": 0.5678255416985996, "grad_norm": 0.17432469129562378, "learning_rate": 4.460172438360832e-06, "loss": 0.1512, "step": 4075 }, { "epoch": 0.5679648853898139, "grad_norm": 0.13321912288665771, "learning_rate": 4.457829490979347e-06, "loss": 0.1427, "step": 4076 }, { "epoch": 0.5681042290810283, "grad_norm": 0.17250312864780426, "learning_rate": 4.455486664055772e-06, "loss": 0.1307, "step": 4077 }, { "epoch": 0.5682435727722427, "grad_norm": 0.19806554913520813, "learning_rate": 4.4531439581106295e-06, "loss": 0.1324, "step": 4078 }, { "epoch": 0.5683829164634571, "grad_norm": 0.12044713646173477, "learning_rate": 4.450801373664413e-06, "loss": 0.1064, "step": 4079 }, { "epoch": 0.5685222601546714, "grad_norm": 0.15881580114364624, "learning_rate": 4.448458911237593e-06, "loss": 0.1176, "step": 4080 }, { "epoch": 0.5686616038458859, "grad_norm": 0.2722937762737274, "learning_rate": 4.446116571350611e-06, "loss": 0.1661, "step": 4081 }, { "epoch": 0.5688009475371003, "grad_norm": 0.14388473331928253, "learning_rate": 4.443774354523883e-06, "loss": 0.1144, "step": 4082 }, { "epoch": 0.5689402912283147, "grad_norm": 0.12067573517560959, "learning_rate": 4.441432261277794e-06, "loss": 0.1143, "step": 4083 }, { "epoch": 0.5690796349195291, "grad_norm": 0.17553329467773438, "learning_rate": 4.4390902921327025e-06, "loss": 0.1571, "step": 4084 }, { "epoch": 0.5692189786107434, "grad_norm": 0.18425817787647247, "learning_rate": 4.436748447608944e-06, "loss": 0.1299, "step": 4085 }, { "epoch": 0.5693583223019578, "grad_norm": 0.12016800045967102, "learning_rate": 4.43440672822682e-06, "loss": 0.111, "step": 4086 }, { "epoch": 0.5694976659931722, "grad_norm": 0.09299242496490479, "learning_rate": 4.432065134506608e-06, "loss": 0.0955, "step": 4087 }, { "epoch": 0.5696370096843866, "grad_norm": 0.1744958758354187, "learning_rate": 4.429723666968559e-06, "loss": 0.1324, "step": 4088 }, { "epoch": 0.569776353375601, "grad_norm": 0.19302576780319214, "learning_rate": 4.427382326132892e-06, "loss": 0.1265, "step": 4089 }, { "epoch": 0.5699156970668153, "grad_norm": 0.11499957740306854, "learning_rate": 4.425041112519797e-06, "loss": 0.1047, "step": 4090 }, { "epoch": 0.5700550407580297, "grad_norm": 0.4687086343765259, "learning_rate": 4.42270002664944e-06, "loss": 0.1673, "step": 4091 }, { "epoch": 0.5701943844492441, "grad_norm": 0.144057959318161, "learning_rate": 4.4203590690419575e-06, "loss": 0.1163, "step": 4092 }, { "epoch": 0.5703337281404585, "grad_norm": 0.25890010595321655, "learning_rate": 4.418018240217457e-06, "loss": 0.1253, "step": 4093 }, { "epoch": 0.5704730718316728, "grad_norm": 0.24689142405986786, "learning_rate": 4.415677540696017e-06, "loss": 0.1429, "step": 4094 }, { "epoch": 0.5706124155228872, "grad_norm": 0.14255937933921814, "learning_rate": 4.413336970997687e-06, "loss": 0.104, "step": 4095 }, { "epoch": 0.5707517592141016, "grad_norm": 0.18402379751205444, "learning_rate": 4.410996531642487e-06, "loss": 0.1124, "step": 4096 }, { "epoch": 0.570891102905316, "grad_norm": 0.16128046810626984, "learning_rate": 4.408656223150412e-06, "loss": 0.1253, "step": 4097 }, { "epoch": 0.5710304465965303, "grad_norm": 0.14992262423038483, "learning_rate": 4.406316046041423e-06, "loss": 0.1666, "step": 4098 }, { "epoch": 0.5711697902877447, "grad_norm": 0.14325439929962158, "learning_rate": 4.4039760008354556e-06, "loss": 0.1185, "step": 4099 }, { "epoch": 0.5713091339789591, "grad_norm": 0.15336725115776062, "learning_rate": 4.401636088052411e-06, "loss": 0.1168, "step": 4100 }, { "epoch": 0.5714484776701735, "grad_norm": 0.19616490602493286, "learning_rate": 4.399296308212168e-06, "loss": 0.1309, "step": 4101 }, { "epoch": 0.5715878213613879, "grad_norm": 0.1470308154821396, "learning_rate": 4.396956661834571e-06, "loss": 0.1189, "step": 4102 }, { "epoch": 0.5717271650526022, "grad_norm": 0.1400749385356903, "learning_rate": 4.394617149439435e-06, "loss": 0.1256, "step": 4103 }, { "epoch": 0.5718665087438166, "grad_norm": 0.15495418012142181, "learning_rate": 4.392277771546549e-06, "loss": 0.1319, "step": 4104 }, { "epoch": 0.572005852435031, "grad_norm": 0.2136242389678955, "learning_rate": 4.389938528675668e-06, "loss": 0.1561, "step": 4105 }, { "epoch": 0.5721451961262454, "grad_norm": 0.10832103341817856, "learning_rate": 4.387599421346517e-06, "loss": 0.1224, "step": 4106 }, { "epoch": 0.5722845398174597, "grad_norm": 0.12675555050373077, "learning_rate": 4.385260450078793e-06, "loss": 0.11, "step": 4107 }, { "epoch": 0.5724238835086741, "grad_norm": 0.13275590538978577, "learning_rate": 4.382921615392162e-06, "loss": 0.1328, "step": 4108 }, { "epoch": 0.5725632271998885, "grad_norm": 0.24124769866466522, "learning_rate": 4.38058291780626e-06, "loss": 0.1515, "step": 4109 }, { "epoch": 0.5727025708911029, "grad_norm": 0.08726596087217331, "learning_rate": 4.378244357840694e-06, "loss": 0.1076, "step": 4110 }, { "epoch": 0.5728419145823173, "grad_norm": 0.18357767164707184, "learning_rate": 4.375905936015035e-06, "loss": 0.142, "step": 4111 }, { "epoch": 0.5729812582735316, "grad_norm": 0.09764313697814941, "learning_rate": 4.373567652848828e-06, "loss": 0.1024, "step": 4112 }, { "epoch": 0.573120601964746, "grad_norm": 0.09550351649522781, "learning_rate": 4.371229508861588e-06, "loss": 0.1241, "step": 4113 }, { "epoch": 0.5732599456559604, "grad_norm": 0.16950714588165283, "learning_rate": 4.368891504572796e-06, "loss": 0.149, "step": 4114 }, { "epoch": 0.5733992893471748, "grad_norm": 0.1092088595032692, "learning_rate": 4.3665536405019045e-06, "loss": 0.1124, "step": 4115 }, { "epoch": 0.5735386330383891, "grad_norm": 0.2618410587310791, "learning_rate": 4.36421591716833e-06, "loss": 0.1603, "step": 4116 }, { "epoch": 0.5736779767296035, "grad_norm": 0.40197938680648804, "learning_rate": 4.361878335091464e-06, "loss": 0.1273, "step": 4117 }, { "epoch": 0.5738173204208179, "grad_norm": 0.14552728831768036, "learning_rate": 4.3595408947906644e-06, "loss": 0.1316, "step": 4118 }, { "epoch": 0.5739566641120323, "grad_norm": 0.1273612380027771, "learning_rate": 4.357203596785254e-06, "loss": 0.1021, "step": 4119 }, { "epoch": 0.5740960078032467, "grad_norm": 0.18383492529392242, "learning_rate": 4.3548664415945326e-06, "loss": 0.1575, "step": 4120 }, { "epoch": 0.5742353514944611, "grad_norm": 0.324393093585968, "learning_rate": 4.3525294297377566e-06, "loss": 0.1576, "step": 4121 }, { "epoch": 0.5743746951856755, "grad_norm": 0.13882021605968475, "learning_rate": 4.35019256173416e-06, "loss": 0.1267, "step": 4122 }, { "epoch": 0.5745140388768899, "grad_norm": 0.17347204685211182, "learning_rate": 4.34785583810294e-06, "loss": 0.1217, "step": 4123 }, { "epoch": 0.5746533825681043, "grad_norm": 0.1538739800453186, "learning_rate": 4.345519259363264e-06, "loss": 0.1056, "step": 4124 }, { "epoch": 0.5747927262593187, "grad_norm": 0.13685345649719238, "learning_rate": 4.343182826034268e-06, "loss": 0.1112, "step": 4125 }, { "epoch": 0.574932069950533, "grad_norm": 0.17151173949241638, "learning_rate": 4.340846538635053e-06, "loss": 0.1089, "step": 4126 }, { "epoch": 0.5750714136417474, "grad_norm": 0.1819237768650055, "learning_rate": 4.338510397684687e-06, "loss": 0.1055, "step": 4127 }, { "epoch": 0.5752107573329618, "grad_norm": 0.16248860955238342, "learning_rate": 4.336174403702208e-06, "loss": 0.1268, "step": 4128 }, { "epoch": 0.5753501010241762, "grad_norm": 0.19246630370616913, "learning_rate": 4.333838557206623e-06, "loss": 0.1211, "step": 4129 }, { "epoch": 0.5754894447153905, "grad_norm": 0.2504200041294098, "learning_rate": 4.3315028587169e-06, "loss": 0.1589, "step": 4130 }, { "epoch": 0.5756287884066049, "grad_norm": 0.10593544691801071, "learning_rate": 4.329167308751982e-06, "loss": 0.1109, "step": 4131 }, { "epoch": 0.5757681320978193, "grad_norm": 0.11696050316095352, "learning_rate": 4.3268319078307695e-06, "loss": 0.1158, "step": 4132 }, { "epoch": 0.5759074757890337, "grad_norm": 0.15908895432949066, "learning_rate": 4.324496656472141e-06, "loss": 0.1336, "step": 4133 }, { "epoch": 0.576046819480248, "grad_norm": 0.1211305633187294, "learning_rate": 4.322161555194932e-06, "loss": 0.1126, "step": 4134 }, { "epoch": 0.5761861631714624, "grad_norm": 0.12936371564865112, "learning_rate": 4.31982660451795e-06, "loss": 0.1178, "step": 4135 }, { "epoch": 0.5763255068626768, "grad_norm": 0.22055011987686157, "learning_rate": 4.3174918049599705e-06, "loss": 0.1231, "step": 4136 }, { "epoch": 0.5764648505538912, "grad_norm": 0.10163840651512146, "learning_rate": 4.315157157039727e-06, "loss": 0.1014, "step": 4137 }, { "epoch": 0.5766041942451056, "grad_norm": 0.1438245326280594, "learning_rate": 4.312822661275929e-06, "loss": 0.1211, "step": 4138 }, { "epoch": 0.5767435379363199, "grad_norm": 0.19666066765785217, "learning_rate": 4.310488318187247e-06, "loss": 0.14, "step": 4139 }, { "epoch": 0.5768828816275343, "grad_norm": 0.17048677802085876, "learning_rate": 4.308154128292318e-06, "loss": 0.1141, "step": 4140 }, { "epoch": 0.5770222253187487, "grad_norm": 0.13644501566886902, "learning_rate": 4.305820092109748e-06, "loss": 0.1146, "step": 4141 }, { "epoch": 0.5771615690099631, "grad_norm": 0.24658775329589844, "learning_rate": 4.303486210158106e-06, "loss": 0.1775, "step": 4142 }, { "epoch": 0.5773009127011774, "grad_norm": 0.27274399995803833, "learning_rate": 4.301152482955926e-06, "loss": 0.1666, "step": 4143 }, { "epoch": 0.5774402563923918, "grad_norm": 0.13916300237178802, "learning_rate": 4.298818911021707e-06, "loss": 0.1124, "step": 4144 }, { "epoch": 0.5775796000836062, "grad_norm": 0.12540897727012634, "learning_rate": 4.296485494873919e-06, "loss": 0.1044, "step": 4145 }, { "epoch": 0.5777189437748206, "grad_norm": 0.18484213948249817, "learning_rate": 4.294152235030993e-06, "loss": 0.1068, "step": 4146 }, { "epoch": 0.577858287466035, "grad_norm": 0.10581894218921661, "learning_rate": 4.291819132011327e-06, "loss": 0.1065, "step": 4147 }, { "epoch": 0.5779976311572493, "grad_norm": 0.19258373975753784, "learning_rate": 4.2894861863332785e-06, "loss": 0.1452, "step": 4148 }, { "epoch": 0.5781369748484637, "grad_norm": 0.2222920060157776, "learning_rate": 4.28715339851518e-06, "loss": 0.1727, "step": 4149 }, { "epoch": 0.5782763185396781, "grad_norm": 0.17203187942504883, "learning_rate": 4.284820769075322e-06, "loss": 0.1477, "step": 4150 }, { "epoch": 0.5784156622308925, "grad_norm": 0.15647265315055847, "learning_rate": 4.282488298531959e-06, "loss": 0.0918, "step": 4151 }, { "epoch": 0.5785550059221068, "grad_norm": 0.15849444270133972, "learning_rate": 4.28015598740332e-06, "loss": 0.1241, "step": 4152 }, { "epoch": 0.5786943496133212, "grad_norm": 0.15332220494747162, "learning_rate": 4.277823836207581e-06, "loss": 0.1166, "step": 4153 }, { "epoch": 0.5788336933045356, "grad_norm": 0.17836405336856842, "learning_rate": 4.275491845462901e-06, "loss": 0.1366, "step": 4154 }, { "epoch": 0.57897303699575, "grad_norm": 0.16546589136123657, "learning_rate": 4.27316001568739e-06, "loss": 0.146, "step": 4155 }, { "epoch": 0.5791123806869644, "grad_norm": 0.17289143800735474, "learning_rate": 4.270828347399131e-06, "loss": 0.1546, "step": 4156 }, { "epoch": 0.5792517243781787, "grad_norm": 0.1526181995868683, "learning_rate": 4.268496841116166e-06, "loss": 0.1555, "step": 4157 }, { "epoch": 0.5793910680693931, "grad_norm": 0.09929165244102478, "learning_rate": 4.266165497356503e-06, "loss": 0.1113, "step": 4158 }, { "epoch": 0.5795304117606075, "grad_norm": 0.161477193236351, "learning_rate": 4.2638343166381115e-06, "loss": 0.1288, "step": 4159 }, { "epoch": 0.5796697554518219, "grad_norm": 0.1921394020318985, "learning_rate": 4.261503299478928e-06, "loss": 0.1081, "step": 4160 }, { "epoch": 0.5798090991430362, "grad_norm": 0.10496401786804199, "learning_rate": 4.259172446396851e-06, "loss": 0.1055, "step": 4161 }, { "epoch": 0.5799484428342507, "grad_norm": 0.1603691577911377, "learning_rate": 4.256841757909744e-06, "loss": 0.1244, "step": 4162 }, { "epoch": 0.5800877865254651, "grad_norm": 0.17411574721336365, "learning_rate": 4.254511234535432e-06, "loss": 0.1319, "step": 4163 }, { "epoch": 0.5802271302166795, "grad_norm": 0.12904243171215057, "learning_rate": 4.2521808767917024e-06, "loss": 0.1355, "step": 4164 }, { "epoch": 0.5803664739078939, "grad_norm": 0.1341603547334671, "learning_rate": 4.2498506851963095e-06, "loss": 0.1291, "step": 4165 }, { "epoch": 0.5805058175991082, "grad_norm": 0.11220335960388184, "learning_rate": 4.247520660266969e-06, "loss": 0.1288, "step": 4166 }, { "epoch": 0.5806451612903226, "grad_norm": 0.1391015648841858, "learning_rate": 4.245190802521356e-06, "loss": 0.1461, "step": 4167 }, { "epoch": 0.580784504981537, "grad_norm": 0.13807623088359833, "learning_rate": 4.2428611124771184e-06, "loss": 0.1396, "step": 4168 }, { "epoch": 0.5809238486727514, "grad_norm": 0.21379531919956207, "learning_rate": 4.240531590651853e-06, "loss": 0.1307, "step": 4169 }, { "epoch": 0.5810631923639658, "grad_norm": 0.12971659004688263, "learning_rate": 4.238202237563129e-06, "loss": 0.1331, "step": 4170 }, { "epoch": 0.5812025360551801, "grad_norm": 0.14016377925872803, "learning_rate": 4.235873053728475e-06, "loss": 0.1255, "step": 4171 }, { "epoch": 0.5813418797463945, "grad_norm": 0.15544064342975616, "learning_rate": 4.233544039665385e-06, "loss": 0.1524, "step": 4172 }, { "epoch": 0.5814812234376089, "grad_norm": 0.1445273905992508, "learning_rate": 4.231215195891311e-06, "loss": 0.1275, "step": 4173 }, { "epoch": 0.5816205671288233, "grad_norm": 0.2718923091888428, "learning_rate": 4.228886522923668e-06, "loss": 0.1468, "step": 4174 }, { "epoch": 0.5817599108200376, "grad_norm": 0.09764406085014343, "learning_rate": 4.2265580212798355e-06, "loss": 0.1134, "step": 4175 }, { "epoch": 0.581899254511252, "grad_norm": 0.1537209451198578, "learning_rate": 4.224229691477151e-06, "loss": 0.1493, "step": 4176 }, { "epoch": 0.5820385982024664, "grad_norm": 0.16912858188152313, "learning_rate": 4.221901534032918e-06, "loss": 0.1335, "step": 4177 }, { "epoch": 0.5821779418936808, "grad_norm": 0.12886416912078857, "learning_rate": 4.219573549464403e-06, "loss": 0.1281, "step": 4178 }, { "epoch": 0.5823172855848952, "grad_norm": 0.07943876832723618, "learning_rate": 4.217245738288825e-06, "loss": 0.0969, "step": 4179 }, { "epoch": 0.5824566292761095, "grad_norm": 0.16658027470111847, "learning_rate": 4.2149181010233734e-06, "loss": 0.1334, "step": 4180 }, { "epoch": 0.5825959729673239, "grad_norm": 0.1326170265674591, "learning_rate": 4.212590638185196e-06, "loss": 0.1011, "step": 4181 }, { "epoch": 0.5827353166585383, "grad_norm": 0.12731482088565826, "learning_rate": 4.2102633502914035e-06, "loss": 0.1029, "step": 4182 }, { "epoch": 0.5828746603497527, "grad_norm": 0.1586771160364151, "learning_rate": 4.2079362378590625e-06, "loss": 0.121, "step": 4183 }, { "epoch": 0.583014004040967, "grad_norm": 0.1717168539762497, "learning_rate": 4.2056093014052085e-06, "loss": 0.1382, "step": 4184 }, { "epoch": 0.5831533477321814, "grad_norm": 0.11491516977548599, "learning_rate": 4.20328254144683e-06, "loss": 0.0978, "step": 4185 }, { "epoch": 0.5832926914233958, "grad_norm": 0.2094271183013916, "learning_rate": 4.2009559585008826e-06, "loss": 0.1409, "step": 4186 }, { "epoch": 0.5834320351146102, "grad_norm": 0.11345230787992477, "learning_rate": 4.198629553084277e-06, "loss": 0.1235, "step": 4187 }, { "epoch": 0.5835713788058245, "grad_norm": 0.12911799550056458, "learning_rate": 4.1963033257138904e-06, "loss": 0.1138, "step": 4188 }, { "epoch": 0.5837107224970389, "grad_norm": 0.15943537652492523, "learning_rate": 4.193977276906557e-06, "loss": 0.1304, "step": 4189 }, { "epoch": 0.5838500661882533, "grad_norm": 0.0867990180850029, "learning_rate": 4.191651407179069e-06, "loss": 0.1008, "step": 4190 }, { "epoch": 0.5839894098794677, "grad_norm": 0.24193885922431946, "learning_rate": 4.189325717048185e-06, "loss": 0.156, "step": 4191 }, { "epoch": 0.5841287535706821, "grad_norm": 0.21246443688869476, "learning_rate": 4.187000207030616e-06, "loss": 0.1352, "step": 4192 }, { "epoch": 0.5842680972618964, "grad_norm": 0.2960437834262848, "learning_rate": 4.184674877643042e-06, "loss": 0.1709, "step": 4193 }, { "epoch": 0.5844074409531108, "grad_norm": 0.153218075633049, "learning_rate": 4.182349729402097e-06, "loss": 0.1409, "step": 4194 }, { "epoch": 0.5845467846443252, "grad_norm": 0.1682654768228531, "learning_rate": 4.180024762824374e-06, "loss": 0.1627, "step": 4195 }, { "epoch": 0.5846861283355396, "grad_norm": 0.22671067714691162, "learning_rate": 4.177699978426426e-06, "loss": 0.1282, "step": 4196 }, { "epoch": 0.584825472026754, "grad_norm": 0.2818920314311981, "learning_rate": 4.175375376724772e-06, "loss": 0.1235, "step": 4197 }, { "epoch": 0.5849648157179683, "grad_norm": 0.14652426540851593, "learning_rate": 4.173050958235882e-06, "loss": 0.1201, "step": 4198 }, { "epoch": 0.5851041594091827, "grad_norm": 0.14355561137199402, "learning_rate": 4.170726723476189e-06, "loss": 0.1341, "step": 4199 }, { "epoch": 0.5852435031003971, "grad_norm": 0.14022938907146454, "learning_rate": 4.168402672962086e-06, "loss": 0.1158, "step": 4200 }, { "epoch": 0.5853828467916115, "grad_norm": 0.13792890310287476, "learning_rate": 4.166078807209924e-06, "loss": 0.1087, "step": 4201 }, { "epoch": 0.5855221904828259, "grad_norm": 0.15530875325202942, "learning_rate": 4.163755126736011e-06, "loss": 0.1328, "step": 4202 }, { "epoch": 0.5856615341740403, "grad_norm": 0.16599784791469574, "learning_rate": 4.1614316320566174e-06, "loss": 0.1294, "step": 4203 }, { "epoch": 0.5858008778652547, "grad_norm": 0.3050483763217926, "learning_rate": 4.159108323687971e-06, "loss": 0.1465, "step": 4204 }, { "epoch": 0.5859402215564691, "grad_norm": 0.15325629711151123, "learning_rate": 4.156785202146257e-06, "loss": 0.144, "step": 4205 }, { "epoch": 0.5860795652476835, "grad_norm": 0.11655028164386749, "learning_rate": 4.154462267947621e-06, "loss": 0.1173, "step": 4206 }, { "epoch": 0.5862189089388978, "grad_norm": 0.16149325668811798, "learning_rate": 4.152139521608164e-06, "loss": 0.1225, "step": 4207 }, { "epoch": 0.5863582526301122, "grad_norm": 0.1329786777496338, "learning_rate": 4.149816963643947e-06, "loss": 0.1118, "step": 4208 }, { "epoch": 0.5864975963213266, "grad_norm": 0.20140495896339417, "learning_rate": 4.147494594570992e-06, "loss": 0.1184, "step": 4209 }, { "epoch": 0.586636940012541, "grad_norm": 0.18051378428936005, "learning_rate": 4.1451724149052764e-06, "loss": 0.1429, "step": 4210 }, { "epoch": 0.5867762837037553, "grad_norm": 0.09150006622076035, "learning_rate": 4.1428504251627335e-06, "loss": 0.1095, "step": 4211 }, { "epoch": 0.5869156273949697, "grad_norm": 0.16993603110313416, "learning_rate": 4.140528625859254e-06, "loss": 0.1154, "step": 4212 }, { "epoch": 0.5870549710861841, "grad_norm": 0.15275518596172333, "learning_rate": 4.138207017510696e-06, "loss": 0.1152, "step": 4213 }, { "epoch": 0.5871943147773985, "grad_norm": 0.20114947855472565, "learning_rate": 4.1358856006328614e-06, "loss": 0.1424, "step": 4214 }, { "epoch": 0.5873336584686129, "grad_norm": 0.20144307613372803, "learning_rate": 4.1335643757415195e-06, "loss": 0.1587, "step": 4215 }, { "epoch": 0.5874730021598272, "grad_norm": 0.11902589350938797, "learning_rate": 4.131243343352391e-06, "loss": 0.0975, "step": 4216 }, { "epoch": 0.5876123458510416, "grad_norm": 0.23852498829364777, "learning_rate": 4.128922503981158e-06, "loss": 0.123, "step": 4217 }, { "epoch": 0.587751689542256, "grad_norm": 0.19589237868785858, "learning_rate": 4.126601858143457e-06, "loss": 0.1428, "step": 4218 }, { "epoch": 0.5878910332334704, "grad_norm": 0.13606981933116913, "learning_rate": 4.124281406354883e-06, "loss": 0.1154, "step": 4219 }, { "epoch": 0.5880303769246847, "grad_norm": 0.19673947989940643, "learning_rate": 4.121961149130989e-06, "loss": 0.1486, "step": 4220 }, { "epoch": 0.5881697206158991, "grad_norm": 0.23950190842151642, "learning_rate": 4.119641086987282e-06, "loss": 0.1364, "step": 4221 }, { "epoch": 0.5883090643071135, "grad_norm": 0.11556488275527954, "learning_rate": 4.1173212204392245e-06, "loss": 0.1059, "step": 4222 }, { "epoch": 0.5884484079983279, "grad_norm": 0.09173863381147385, "learning_rate": 4.115001550002241e-06, "loss": 0.1023, "step": 4223 }, { "epoch": 0.5885877516895422, "grad_norm": 0.21074438095092773, "learning_rate": 4.1126820761917075e-06, "loss": 0.1135, "step": 4224 }, { "epoch": 0.5887270953807566, "grad_norm": 0.2163611799478531, "learning_rate": 4.11036279952296e-06, "loss": 0.1328, "step": 4225 }, { "epoch": 0.588866439071971, "grad_norm": 0.21737077832221985, "learning_rate": 4.108043720511287e-06, "loss": 0.1256, "step": 4226 }, { "epoch": 0.5890057827631854, "grad_norm": 0.14963126182556152, "learning_rate": 4.105724839671936e-06, "loss": 0.1086, "step": 4227 }, { "epoch": 0.5891451264543998, "grad_norm": 0.13313992321491241, "learning_rate": 4.103406157520108e-06, "loss": 0.1009, "step": 4228 }, { "epoch": 0.5892844701456141, "grad_norm": 0.11642465740442276, "learning_rate": 4.101087674570963e-06, "loss": 0.124, "step": 4229 }, { "epoch": 0.5894238138368285, "grad_norm": 0.3870823383331299, "learning_rate": 4.0987693913396145e-06, "loss": 0.1834, "step": 4230 }, { "epoch": 0.5895631575280429, "grad_norm": 0.15783017873764038, "learning_rate": 4.096451308341132e-06, "loss": 0.1261, "step": 4231 }, { "epoch": 0.5897025012192573, "grad_norm": 0.16922925412654877, "learning_rate": 4.094133426090539e-06, "loss": 0.1285, "step": 4232 }, { "epoch": 0.5898418449104716, "grad_norm": 0.1494169533252716, "learning_rate": 4.091815745102818e-06, "loss": 0.106, "step": 4233 }, { "epoch": 0.589981188601686, "grad_norm": 0.18152621388435364, "learning_rate": 4.089498265892905e-06, "loss": 0.1332, "step": 4234 }, { "epoch": 0.5901205322929004, "grad_norm": 0.22758102416992188, "learning_rate": 4.0871809889756884e-06, "loss": 0.0938, "step": 4235 }, { "epoch": 0.5902598759841148, "grad_norm": 0.2003236562013626, "learning_rate": 4.084863914866018e-06, "loss": 0.1529, "step": 4236 }, { "epoch": 0.5903992196753292, "grad_norm": 0.15553756058216095, "learning_rate": 4.082547044078693e-06, "loss": 0.127, "step": 4237 }, { "epoch": 0.5905385633665435, "grad_norm": 0.15083777904510498, "learning_rate": 4.0802303771284685e-06, "loss": 0.1252, "step": 4238 }, { "epoch": 0.5906779070577579, "grad_norm": 0.2319016456604004, "learning_rate": 4.0779139145300536e-06, "loss": 0.1168, "step": 4239 }, { "epoch": 0.5908172507489723, "grad_norm": 0.20223042368888855, "learning_rate": 4.075597656798117e-06, "loss": 0.1515, "step": 4240 }, { "epoch": 0.5909565944401867, "grad_norm": 0.20020964741706848, "learning_rate": 4.073281604447277e-06, "loss": 0.1142, "step": 4241 }, { "epoch": 0.5910959381314012, "grad_norm": 0.18929843604564667, "learning_rate": 4.0709657579921075e-06, "loss": 0.1471, "step": 4242 }, { "epoch": 0.5912352818226155, "grad_norm": 0.21078510582447052, "learning_rate": 4.068650117947135e-06, "loss": 0.1483, "step": 4243 }, { "epoch": 0.5913746255138299, "grad_norm": 0.14263483881950378, "learning_rate": 4.0663346848268435e-06, "loss": 0.1335, "step": 4244 }, { "epoch": 0.5915139692050443, "grad_norm": 0.37847262620925903, "learning_rate": 4.064019459145669e-06, "loss": 0.134, "step": 4245 }, { "epoch": 0.5916533128962587, "grad_norm": 0.2799068093299866, "learning_rate": 4.061704441418002e-06, "loss": 0.1446, "step": 4246 }, { "epoch": 0.591792656587473, "grad_norm": 0.1620243638753891, "learning_rate": 4.059389632158189e-06, "loss": 0.1272, "step": 4247 }, { "epoch": 0.5919320002786874, "grad_norm": 0.19909608364105225, "learning_rate": 4.057075031880521e-06, "loss": 0.1253, "step": 4248 }, { "epoch": 0.5920713439699018, "grad_norm": 0.17814737558364868, "learning_rate": 4.054760641099256e-06, "loss": 0.1257, "step": 4249 }, { "epoch": 0.5922106876611162, "grad_norm": 0.15605734288692474, "learning_rate": 4.052446460328595e-06, "loss": 0.13, "step": 4250 }, { "epoch": 0.5923500313523306, "grad_norm": 0.20567677915096283, "learning_rate": 4.050132490082698e-06, "loss": 0.1445, "step": 4251 }, { "epoch": 0.5924893750435449, "grad_norm": 0.13837683200836182, "learning_rate": 4.0478187308756775e-06, "loss": 0.1124, "step": 4252 }, { "epoch": 0.5926287187347593, "grad_norm": 0.22123748064041138, "learning_rate": 4.045505183221594e-06, "loss": 0.1548, "step": 4253 }, { "epoch": 0.5927680624259737, "grad_norm": 0.13614501059055328, "learning_rate": 4.043191847634469e-06, "loss": 0.1158, "step": 4254 }, { "epoch": 0.5929074061171881, "grad_norm": 0.2630317807197571, "learning_rate": 4.040878724628269e-06, "loss": 0.153, "step": 4255 }, { "epoch": 0.5930467498084024, "grad_norm": 0.10817182809114456, "learning_rate": 4.038565814716921e-06, "loss": 0.123, "step": 4256 }, { "epoch": 0.5931860934996168, "grad_norm": 0.19792672991752625, "learning_rate": 4.036253118414299e-06, "loss": 0.1625, "step": 4257 }, { "epoch": 0.5933254371908312, "grad_norm": 0.29456228017807007, "learning_rate": 4.033940636234233e-06, "loss": 0.1264, "step": 4258 }, { "epoch": 0.5934647808820456, "grad_norm": 0.204203799366951, "learning_rate": 4.0316283686905e-06, "loss": 0.1326, "step": 4259 }, { "epoch": 0.59360412457326, "grad_norm": 0.24546250700950623, "learning_rate": 4.029316316296834e-06, "loss": 0.163, "step": 4260 }, { "epoch": 0.5937434682644743, "grad_norm": 0.1525445133447647, "learning_rate": 4.027004479566923e-06, "loss": 0.124, "step": 4261 }, { "epoch": 0.5938828119556887, "grad_norm": 0.18488822877407074, "learning_rate": 4.024692859014403e-06, "loss": 0.1399, "step": 4262 }, { "epoch": 0.5940221556469031, "grad_norm": 0.14541564881801605, "learning_rate": 4.022381455152863e-06, "loss": 0.1118, "step": 4263 }, { "epoch": 0.5941614993381175, "grad_norm": 0.31428202986717224, "learning_rate": 4.020070268495844e-06, "loss": 0.1763, "step": 4264 }, { "epoch": 0.5943008430293318, "grad_norm": 0.21875664591789246, "learning_rate": 4.017759299556838e-06, "loss": 0.1419, "step": 4265 }, { "epoch": 0.5944401867205462, "grad_norm": 0.1504283845424652, "learning_rate": 4.015448548849293e-06, "loss": 0.1402, "step": 4266 }, { "epoch": 0.5945795304117606, "grad_norm": 0.13496489822864532, "learning_rate": 4.0131380168866e-06, "loss": 0.1353, "step": 4267 }, { "epoch": 0.594718874102975, "grad_norm": 0.07810497283935547, "learning_rate": 4.010827704182113e-06, "loss": 0.0939, "step": 4268 }, { "epoch": 0.5948582177941893, "grad_norm": 0.11954120546579361, "learning_rate": 4.0085176112491245e-06, "loss": 0.1276, "step": 4269 }, { "epoch": 0.5949975614854037, "grad_norm": 0.10950357466936111, "learning_rate": 4.006207738600887e-06, "loss": 0.1108, "step": 4270 }, { "epoch": 0.5951369051766181, "grad_norm": 0.11530250310897827, "learning_rate": 4.0038980867506e-06, "loss": 0.1168, "step": 4271 }, { "epoch": 0.5952762488678325, "grad_norm": 0.12548936903476715, "learning_rate": 4.001588656211418e-06, "loss": 0.1323, "step": 4272 }, { "epoch": 0.5954155925590469, "grad_norm": 0.12019386887550354, "learning_rate": 3.999279447496444e-06, "loss": 0.1204, "step": 4273 }, { "epoch": 0.5955549362502612, "grad_norm": 0.08279507607221603, "learning_rate": 3.996970461118729e-06, "loss": 0.1072, "step": 4274 }, { "epoch": 0.5956942799414756, "grad_norm": 0.13267481327056885, "learning_rate": 3.994661697591278e-06, "loss": 0.1366, "step": 4275 }, { "epoch": 0.59583362363269, "grad_norm": 0.17548996210098267, "learning_rate": 3.992353157427044e-06, "loss": 0.1368, "step": 4276 }, { "epoch": 0.5959729673239044, "grad_norm": 0.1566266119480133, "learning_rate": 3.990044841138934e-06, "loss": 0.1291, "step": 4277 }, { "epoch": 0.5961123110151187, "grad_norm": 0.14588594436645508, "learning_rate": 3.987736749239804e-06, "loss": 0.1633, "step": 4278 }, { "epoch": 0.5962516547063331, "grad_norm": 0.17875318229198456, "learning_rate": 3.985428882242458e-06, "loss": 0.1538, "step": 4279 }, { "epoch": 0.5963909983975475, "grad_norm": 0.2537069618701935, "learning_rate": 3.983121240659649e-06, "loss": 0.1591, "step": 4280 }, { "epoch": 0.5965303420887619, "grad_norm": 0.13925643265247345, "learning_rate": 3.980813825004086e-06, "loss": 0.1334, "step": 4281 }, { "epoch": 0.5966696857799764, "grad_norm": 0.1830863654613495, "learning_rate": 3.978506635788423e-06, "loss": 0.1288, "step": 4282 }, { "epoch": 0.5968090294711907, "grad_norm": 0.1428317278623581, "learning_rate": 3.976199673525263e-06, "loss": 0.144, "step": 4283 }, { "epoch": 0.5969483731624051, "grad_norm": 0.12170638889074326, "learning_rate": 3.973892938727164e-06, "loss": 0.1174, "step": 4284 }, { "epoch": 0.5970877168536195, "grad_norm": 0.13720670342445374, "learning_rate": 3.971586431906627e-06, "loss": 0.1213, "step": 4285 }, { "epoch": 0.5972270605448339, "grad_norm": 0.12103616446256638, "learning_rate": 3.969280153576105e-06, "loss": 0.125, "step": 4286 }, { "epoch": 0.5973664042360483, "grad_norm": 0.1885569840669632, "learning_rate": 3.966974104248001e-06, "loss": 0.1552, "step": 4287 }, { "epoch": 0.5975057479272626, "grad_norm": 0.14133763313293457, "learning_rate": 3.964668284434666e-06, "loss": 0.15, "step": 4288 }, { "epoch": 0.597645091618477, "grad_norm": 0.10447778552770615, "learning_rate": 3.962362694648404e-06, "loss": 0.1164, "step": 4289 }, { "epoch": 0.5977844353096914, "grad_norm": 0.1157258003950119, "learning_rate": 3.960057335401459e-06, "loss": 0.1213, "step": 4290 }, { "epoch": 0.5979237790009058, "grad_norm": 0.21730567514896393, "learning_rate": 3.9577522072060336e-06, "loss": 0.1384, "step": 4291 }, { "epoch": 0.5980631226921201, "grad_norm": 0.19034181535243988, "learning_rate": 3.95544731057427e-06, "loss": 0.1562, "step": 4292 }, { "epoch": 0.5982024663833345, "grad_norm": 0.13138940930366516, "learning_rate": 3.953142646018269e-06, "loss": 0.1087, "step": 4293 }, { "epoch": 0.5983418100745489, "grad_norm": 0.1396118700504303, "learning_rate": 3.95083821405007e-06, "loss": 0.117, "step": 4294 }, { "epoch": 0.5984811537657633, "grad_norm": 0.1348079890012741, "learning_rate": 3.948534015181671e-06, "loss": 0.1516, "step": 4295 }, { "epoch": 0.5986204974569777, "grad_norm": 0.15959355235099792, "learning_rate": 3.946230049925004e-06, "loss": 0.1527, "step": 4296 }, { "epoch": 0.598759841148192, "grad_norm": 0.19105781614780426, "learning_rate": 3.9439263187919635e-06, "loss": 0.1237, "step": 4297 }, { "epoch": 0.5988991848394064, "grad_norm": 0.10915183275938034, "learning_rate": 3.941622822294385e-06, "loss": 0.1177, "step": 4298 }, { "epoch": 0.5990385285306208, "grad_norm": 0.11044751107692719, "learning_rate": 3.939319560944051e-06, "loss": 0.1182, "step": 4299 }, { "epoch": 0.5991778722218352, "grad_norm": 0.07475240528583527, "learning_rate": 3.937016535252696e-06, "loss": 0.0906, "step": 4300 }, { "epoch": 0.5993172159130495, "grad_norm": 0.2450461983680725, "learning_rate": 3.934713745731998e-06, "loss": 0.1203, "step": 4301 }, { "epoch": 0.5994565596042639, "grad_norm": 0.28259825706481934, "learning_rate": 3.932411192893586e-06, "loss": 0.141, "step": 4302 }, { "epoch": 0.5995959032954783, "grad_norm": 0.08261166512966156, "learning_rate": 3.93010887724903e-06, "loss": 0.0947, "step": 4303 }, { "epoch": 0.5997352469866927, "grad_norm": 0.1697700470685959, "learning_rate": 3.927806799309859e-06, "loss": 0.1284, "step": 4304 }, { "epoch": 0.599874590677907, "grad_norm": 0.14999298751354218, "learning_rate": 3.925504959587538e-06, "loss": 0.1, "step": 4305 }, { "epoch": 0.6000139343691214, "grad_norm": 0.3357866108417511, "learning_rate": 3.9232033585934835e-06, "loss": 0.1256, "step": 4306 }, { "epoch": 0.6001532780603358, "grad_norm": 0.1626581847667694, "learning_rate": 3.920901996839059e-06, "loss": 0.1217, "step": 4307 }, { "epoch": 0.6002926217515502, "grad_norm": 0.21047811210155487, "learning_rate": 3.918600874835573e-06, "loss": 0.1471, "step": 4308 }, { "epoch": 0.6004319654427646, "grad_norm": 0.14026802778244019, "learning_rate": 3.916299993094285e-06, "loss": 0.1161, "step": 4309 }, { "epoch": 0.6005713091339789, "grad_norm": 0.235953226685524, "learning_rate": 3.913999352126399e-06, "loss": 0.1066, "step": 4310 }, { "epoch": 0.6007106528251933, "grad_norm": 0.21419523656368256, "learning_rate": 3.9116989524430615e-06, "loss": 0.1587, "step": 4311 }, { "epoch": 0.6008499965164077, "grad_norm": 0.2021835893392563, "learning_rate": 3.90939879455537e-06, "loss": 0.1257, "step": 4312 }, { "epoch": 0.6009893402076221, "grad_norm": 0.1940818578004837, "learning_rate": 3.907098878974367e-06, "loss": 0.1095, "step": 4313 }, { "epoch": 0.6011286838988364, "grad_norm": 0.1370704621076584, "learning_rate": 3.9047992062110435e-06, "loss": 0.1437, "step": 4314 }, { "epoch": 0.6012680275900508, "grad_norm": 0.139576718211174, "learning_rate": 3.902499776776331e-06, "loss": 0.1223, "step": 4315 }, { "epoch": 0.6014073712812652, "grad_norm": 0.22035634517669678, "learning_rate": 3.900200591181114e-06, "loss": 0.1401, "step": 4316 }, { "epoch": 0.6015467149724796, "grad_norm": 0.1916629523038864, "learning_rate": 3.897901649936215e-06, "loss": 0.1008, "step": 4317 }, { "epoch": 0.601686058663694, "grad_norm": 0.1085834875702858, "learning_rate": 3.895602953552408e-06, "loss": 0.1082, "step": 4318 }, { "epoch": 0.6018254023549083, "grad_norm": 0.17580708861351013, "learning_rate": 3.8933045025404105e-06, "loss": 0.1326, "step": 4319 }, { "epoch": 0.6019647460461227, "grad_norm": 0.11323322355747223, "learning_rate": 3.891006297410887e-06, "loss": 0.1068, "step": 4320 }, { "epoch": 0.6021040897373371, "grad_norm": 0.1284841150045395, "learning_rate": 3.888708338674447e-06, "loss": 0.1211, "step": 4321 }, { "epoch": 0.6022434334285516, "grad_norm": 0.1422480046749115, "learning_rate": 3.8864106268416416e-06, "loss": 0.1222, "step": 4322 }, { "epoch": 0.602382777119766, "grad_norm": 0.16361550986766815, "learning_rate": 3.884113162422971e-06, "loss": 0.1029, "step": 4323 }, { "epoch": 0.6025221208109803, "grad_norm": 0.21553897857666016, "learning_rate": 3.881815945928879e-06, "loss": 0.1229, "step": 4324 }, { "epoch": 0.6026614645021947, "grad_norm": 0.20124076306819916, "learning_rate": 3.879518977869755e-06, "loss": 0.157, "step": 4325 }, { "epoch": 0.6028008081934091, "grad_norm": 0.11446461826562881, "learning_rate": 3.8772222587559345e-06, "loss": 0.1182, "step": 4326 }, { "epoch": 0.6029401518846235, "grad_norm": 0.15026599168777466, "learning_rate": 3.874925789097695e-06, "loss": 0.1305, "step": 4327 }, { "epoch": 0.6030794955758378, "grad_norm": 0.32949012517929077, "learning_rate": 3.872629569405257e-06, "loss": 0.1095, "step": 4328 }, { "epoch": 0.6032188392670522, "grad_norm": 0.19304126501083374, "learning_rate": 3.870333600188792e-06, "loss": 0.1339, "step": 4329 }, { "epoch": 0.6033581829582666, "grad_norm": 0.18154646456241608, "learning_rate": 3.86803788195841e-06, "loss": 0.1423, "step": 4330 }, { "epoch": 0.603497526649481, "grad_norm": 0.23023289442062378, "learning_rate": 3.865742415224169e-06, "loss": 0.139, "step": 4331 }, { "epoch": 0.6036368703406954, "grad_norm": 0.332530677318573, "learning_rate": 3.863447200496065e-06, "loss": 0.1661, "step": 4332 }, { "epoch": 0.6037762140319097, "grad_norm": 0.3054477572441101, "learning_rate": 3.8611522382840476e-06, "loss": 0.1446, "step": 4333 }, { "epoch": 0.6039155577231241, "grad_norm": 0.1615372896194458, "learning_rate": 3.858857529098001e-06, "loss": 0.1395, "step": 4334 }, { "epoch": 0.6040549014143385, "grad_norm": 0.3283761441707611, "learning_rate": 3.8565630734477575e-06, "loss": 0.1385, "step": 4335 }, { "epoch": 0.6041942451055529, "grad_norm": 0.15782010555267334, "learning_rate": 3.854268871843096e-06, "loss": 0.1404, "step": 4336 }, { "epoch": 0.6043335887967672, "grad_norm": 0.198979914188385, "learning_rate": 3.851974924793734e-06, "loss": 0.1438, "step": 4337 }, { "epoch": 0.6044729324879816, "grad_norm": 0.21504665911197662, "learning_rate": 3.8496812328093335e-06, "loss": 0.1404, "step": 4338 }, { "epoch": 0.604612276179196, "grad_norm": 0.1737816333770752, "learning_rate": 3.8473877963995e-06, "loss": 0.1183, "step": 4339 }, { "epoch": 0.6047516198704104, "grad_norm": 0.08537336438894272, "learning_rate": 3.845094616073783e-06, "loss": 0.1016, "step": 4340 }, { "epoch": 0.6048909635616248, "grad_norm": 0.05992095544934273, "learning_rate": 3.8428016923416775e-06, "loss": 0.0947, "step": 4341 }, { "epoch": 0.6050303072528391, "grad_norm": 0.14182037115097046, "learning_rate": 3.840509025712616e-06, "loss": 0.1186, "step": 4342 }, { "epoch": 0.6051696509440535, "grad_norm": 0.1927037090063095, "learning_rate": 3.838216616695977e-06, "loss": 0.1665, "step": 4343 }, { "epoch": 0.6053089946352679, "grad_norm": 0.13594050705432892, "learning_rate": 3.835924465801081e-06, "loss": 0.1128, "step": 4344 }, { "epoch": 0.6054483383264823, "grad_norm": 0.2716216444969177, "learning_rate": 3.833632573537193e-06, "loss": 0.1654, "step": 4345 }, { "epoch": 0.6055876820176966, "grad_norm": 0.10056901723146439, "learning_rate": 3.831340940413519e-06, "loss": 0.1294, "step": 4346 }, { "epoch": 0.605727025708911, "grad_norm": 0.13285264372825623, "learning_rate": 3.8290495669392085e-06, "loss": 0.1193, "step": 4347 }, { "epoch": 0.6058663694001254, "grad_norm": 0.17926277220249176, "learning_rate": 3.826758453623348e-06, "loss": 0.1746, "step": 4348 }, { "epoch": 0.6060057130913398, "grad_norm": 0.25602802634239197, "learning_rate": 3.8244676009749745e-06, "loss": 0.156, "step": 4349 }, { "epoch": 0.6061450567825541, "grad_norm": 0.14121519029140472, "learning_rate": 3.8221770095030625e-06, "loss": 0.1096, "step": 4350 }, { "epoch": 0.6062844004737685, "grad_norm": 0.2320110946893692, "learning_rate": 3.819886679716528e-06, "loss": 0.1445, "step": 4351 }, { "epoch": 0.6064237441649829, "grad_norm": 0.10012206435203552, "learning_rate": 3.8175966121242314e-06, "loss": 0.1018, "step": 4352 }, { "epoch": 0.6065630878561973, "grad_norm": 0.1668437123298645, "learning_rate": 3.815306807234974e-06, "loss": 0.1618, "step": 4353 }, { "epoch": 0.6067024315474117, "grad_norm": 0.13453622162342072, "learning_rate": 3.8130172655574963e-06, "loss": 0.1314, "step": 4354 }, { "epoch": 0.606841775238626, "grad_norm": 0.12191693484783173, "learning_rate": 3.810727987600482e-06, "loss": 0.1275, "step": 4355 }, { "epoch": 0.6069811189298404, "grad_norm": 0.15527194738388062, "learning_rate": 3.808438973872558e-06, "loss": 0.1151, "step": 4356 }, { "epoch": 0.6071204626210548, "grad_norm": 0.1620427370071411, "learning_rate": 3.80615022488229e-06, "loss": 0.1183, "step": 4357 }, { "epoch": 0.6072598063122692, "grad_norm": 0.1495034396648407, "learning_rate": 3.8038617411381876e-06, "loss": 0.1304, "step": 4358 }, { "epoch": 0.6073991500034835, "grad_norm": 0.17272727191448212, "learning_rate": 3.8015735231486974e-06, "loss": 0.1558, "step": 4359 }, { "epoch": 0.6075384936946979, "grad_norm": 0.18711654841899872, "learning_rate": 3.799285571422208e-06, "loss": 0.1328, "step": 4360 }, { "epoch": 0.6076778373859123, "grad_norm": 0.21242335438728333, "learning_rate": 3.7969978864670527e-06, "loss": 0.1183, "step": 4361 }, { "epoch": 0.6078171810771267, "grad_norm": 0.1503937691450119, "learning_rate": 3.794710468791502e-06, "loss": 0.1313, "step": 4362 }, { "epoch": 0.6079565247683412, "grad_norm": 0.16277578473091125, "learning_rate": 3.7924233189037697e-06, "loss": 0.0959, "step": 4363 }, { "epoch": 0.6080958684595555, "grad_norm": 0.14202001690864563, "learning_rate": 3.7901364373120036e-06, "loss": 0.1271, "step": 4364 }, { "epoch": 0.6082352121507699, "grad_norm": 0.19129399955272675, "learning_rate": 3.787849824524301e-06, "loss": 0.1336, "step": 4365 }, { "epoch": 0.6083745558419843, "grad_norm": 0.17695605754852295, "learning_rate": 3.7855634810486936e-06, "loss": 0.1114, "step": 4366 }, { "epoch": 0.6085138995331987, "grad_norm": 0.17757846415042877, "learning_rate": 3.7832774073931535e-06, "loss": 0.1348, "step": 4367 }, { "epoch": 0.608653243224413, "grad_norm": 0.17446132004261017, "learning_rate": 3.780991604065598e-06, "loss": 0.1323, "step": 4368 }, { "epoch": 0.6087925869156274, "grad_norm": 0.18022486567497253, "learning_rate": 3.778706071573875e-06, "loss": 0.1282, "step": 4369 }, { "epoch": 0.6089319306068418, "grad_norm": 0.2302403301000595, "learning_rate": 3.776420810425781e-06, "loss": 0.1727, "step": 4370 }, { "epoch": 0.6090712742980562, "grad_norm": 0.23508627712726593, "learning_rate": 3.774135821129047e-06, "loss": 0.1207, "step": 4371 }, { "epoch": 0.6092106179892706, "grad_norm": 0.14100702106952667, "learning_rate": 3.771851104191348e-06, "loss": 0.1003, "step": 4372 }, { "epoch": 0.6093499616804849, "grad_norm": 0.16879719495773315, "learning_rate": 3.7695666601202944e-06, "loss": 0.145, "step": 4373 }, { "epoch": 0.6094893053716993, "grad_norm": 0.10462018102407455, "learning_rate": 3.7672824894234388e-06, "loss": 0.1184, "step": 4374 }, { "epoch": 0.6096286490629137, "grad_norm": 0.11923114955425262, "learning_rate": 3.7649985926082695e-06, "loss": 0.131, "step": 4375 }, { "epoch": 0.6097679927541281, "grad_norm": 0.17369617521762848, "learning_rate": 3.762714970182216e-06, "loss": 0.1344, "step": 4376 }, { "epoch": 0.6099073364453425, "grad_norm": 0.16216544806957245, "learning_rate": 3.76043162265265e-06, "loss": 0.116, "step": 4377 }, { "epoch": 0.6100466801365568, "grad_norm": 0.2983247637748718, "learning_rate": 3.758148550526877e-06, "loss": 0.1412, "step": 4378 }, { "epoch": 0.6101860238277712, "grad_norm": 0.1043354794383049, "learning_rate": 3.7558657543121456e-06, "loss": 0.0969, "step": 4379 }, { "epoch": 0.6103253675189856, "grad_norm": 0.14634844660758972, "learning_rate": 3.7535832345156376e-06, "loss": 0.1178, "step": 4380 }, { "epoch": 0.6104647112102, "grad_norm": 0.44975677132606506, "learning_rate": 3.7513009916444797e-06, "loss": 0.1925, "step": 4381 }, { "epoch": 0.6106040549014143, "grad_norm": 0.15751302242279053, "learning_rate": 3.7490190262057322e-06, "loss": 0.1267, "step": 4382 }, { "epoch": 0.6107433985926287, "grad_norm": 0.10846684128046036, "learning_rate": 3.7467373387063973e-06, "loss": 0.1079, "step": 4383 }, { "epoch": 0.6108827422838431, "grad_norm": 0.18189144134521484, "learning_rate": 3.7444559296534144e-06, "loss": 0.1341, "step": 4384 }, { "epoch": 0.6110220859750575, "grad_norm": 0.2839486002922058, "learning_rate": 3.7421747995536585e-06, "loss": 0.1585, "step": 4385 }, { "epoch": 0.6111614296662718, "grad_norm": 0.24778695404529572, "learning_rate": 3.739893948913945e-06, "loss": 0.1361, "step": 4386 }, { "epoch": 0.6113007733574862, "grad_norm": 0.19148410856723785, "learning_rate": 3.7376133782410275e-06, "loss": 0.1486, "step": 4387 }, { "epoch": 0.6114401170487006, "grad_norm": 0.12646149098873138, "learning_rate": 3.7353330880415963e-06, "loss": 0.1082, "step": 4388 }, { "epoch": 0.611579460739915, "grad_norm": 0.19380688667297363, "learning_rate": 3.7330530788222807e-06, "loss": 0.1422, "step": 4389 }, { "epoch": 0.6117188044311294, "grad_norm": 0.16354387998580933, "learning_rate": 3.730773351089647e-06, "loss": 0.099, "step": 4390 }, { "epoch": 0.6118581481223437, "grad_norm": 0.18047770857810974, "learning_rate": 3.7284939053501966e-06, "loss": 0.1178, "step": 4391 }, { "epoch": 0.6119974918135581, "grad_norm": 0.24228432774543762, "learning_rate": 3.7262147421103713e-06, "loss": 0.1284, "step": 4392 }, { "epoch": 0.6121368355047725, "grad_norm": 0.1675819605588913, "learning_rate": 3.723935861876549e-06, "loss": 0.1418, "step": 4393 }, { "epoch": 0.6122761791959869, "grad_norm": 0.1308342069387436, "learning_rate": 3.7216572651550453e-06, "loss": 0.1126, "step": 4394 }, { "epoch": 0.6124155228872012, "grad_norm": 0.1079743430018425, "learning_rate": 3.7193789524521146e-06, "loss": 0.0926, "step": 4395 }, { "epoch": 0.6125548665784156, "grad_norm": 0.11875308305025101, "learning_rate": 3.717100924273941e-06, "loss": 0.1101, "step": 4396 }, { "epoch": 0.61269421026963, "grad_norm": 0.17393623292446136, "learning_rate": 3.714823181126653e-06, "loss": 0.1411, "step": 4397 }, { "epoch": 0.6128335539608444, "grad_norm": 0.2487453669309616, "learning_rate": 3.7125457235163144e-06, "loss": 0.1368, "step": 4398 }, { "epoch": 0.6129728976520588, "grad_norm": 0.23057202994823456, "learning_rate": 3.710268551948921e-06, "loss": 0.1417, "step": 4399 }, { "epoch": 0.6131122413432731, "grad_norm": 0.20459389686584473, "learning_rate": 3.7079916669304127e-06, "loss": 0.1345, "step": 4400 }, { "epoch": 0.6132515850344875, "grad_norm": 0.17863543331623077, "learning_rate": 3.7057150689666577e-06, "loss": 0.1193, "step": 4401 }, { "epoch": 0.6133909287257019, "grad_norm": 0.12350375205278397, "learning_rate": 3.7034387585634656e-06, "loss": 0.0975, "step": 4402 }, { "epoch": 0.6135302724169164, "grad_norm": 0.160102978348732, "learning_rate": 3.701162736226579e-06, "loss": 0.1294, "step": 4403 }, { "epoch": 0.6136696161081308, "grad_norm": 0.09834218770265579, "learning_rate": 3.6988870024616807e-06, "loss": 0.1163, "step": 4404 }, { "epoch": 0.6138089597993451, "grad_norm": 0.12409201264381409, "learning_rate": 3.6966115577743865e-06, "loss": 0.1536, "step": 4405 }, { "epoch": 0.6139483034905595, "grad_norm": 0.11669593304395676, "learning_rate": 3.6943364026702466e-06, "loss": 0.1188, "step": 4406 }, { "epoch": 0.6140876471817739, "grad_norm": 0.12680979073047638, "learning_rate": 3.6920615376547487e-06, "loss": 0.1266, "step": 4407 }, { "epoch": 0.6142269908729883, "grad_norm": 0.3735344707965851, "learning_rate": 3.6897869632333157e-06, "loss": 0.1934, "step": 4408 }, { "epoch": 0.6143663345642026, "grad_norm": 0.11221999675035477, "learning_rate": 3.687512679911307e-06, "loss": 0.1191, "step": 4409 }, { "epoch": 0.614505678255417, "grad_norm": 0.1028902679681778, "learning_rate": 3.685238688194016e-06, "loss": 0.1127, "step": 4410 }, { "epoch": 0.6146450219466314, "grad_norm": 0.15105774998664856, "learning_rate": 3.682964988586675e-06, "loss": 0.1236, "step": 4411 }, { "epoch": 0.6147843656378458, "grad_norm": 0.1923595815896988, "learning_rate": 3.6806915815944422e-06, "loss": 0.1734, "step": 4412 }, { "epoch": 0.6149237093290602, "grad_norm": 0.1554013043642044, "learning_rate": 3.6784184677224204e-06, "loss": 0.0991, "step": 4413 }, { "epoch": 0.6150630530202745, "grad_norm": 0.16209430992603302, "learning_rate": 3.676145647475643e-06, "loss": 0.1105, "step": 4414 }, { "epoch": 0.6152023967114889, "grad_norm": 0.20808856189250946, "learning_rate": 3.673873121359077e-06, "loss": 0.136, "step": 4415 }, { "epoch": 0.6153417404027033, "grad_norm": 0.12345068156719208, "learning_rate": 3.6716008898776306e-06, "loss": 0.1237, "step": 4416 }, { "epoch": 0.6154810840939177, "grad_norm": 0.11196300387382507, "learning_rate": 3.669328953536137e-06, "loss": 0.1127, "step": 4417 }, { "epoch": 0.615620427785132, "grad_norm": 0.12801145017147064, "learning_rate": 3.6670573128393704e-06, "loss": 0.141, "step": 4418 }, { "epoch": 0.6157597714763464, "grad_norm": 0.15932121872901917, "learning_rate": 3.664785968292036e-06, "loss": 0.1228, "step": 4419 }, { "epoch": 0.6158991151675608, "grad_norm": 0.12888821959495544, "learning_rate": 3.662514920398777e-06, "loss": 0.1291, "step": 4420 }, { "epoch": 0.6160384588587752, "grad_norm": 0.13710322976112366, "learning_rate": 3.6602441696641684e-06, "loss": 0.1215, "step": 4421 }, { "epoch": 0.6161778025499896, "grad_norm": 0.15985748171806335, "learning_rate": 3.6579737165927176e-06, "loss": 0.1598, "step": 4422 }, { "epoch": 0.6163171462412039, "grad_norm": 0.17385238409042358, "learning_rate": 3.655703561688867e-06, "loss": 0.1351, "step": 4423 }, { "epoch": 0.6164564899324183, "grad_norm": 0.14216020703315735, "learning_rate": 3.653433705456994e-06, "loss": 0.144, "step": 4424 }, { "epoch": 0.6165958336236327, "grad_norm": 0.14270348846912384, "learning_rate": 3.651164148401409e-06, "loss": 0.1256, "step": 4425 }, { "epoch": 0.6167351773148471, "grad_norm": 0.1341039091348648, "learning_rate": 3.648894891026358e-06, "loss": 0.1104, "step": 4426 }, { "epoch": 0.6168745210060614, "grad_norm": 0.2255815714597702, "learning_rate": 3.646625933836015e-06, "loss": 0.1388, "step": 4427 }, { "epoch": 0.6170138646972758, "grad_norm": 0.08524800837039948, "learning_rate": 3.64435727733449e-06, "loss": 0.1025, "step": 4428 }, { "epoch": 0.6171532083884902, "grad_norm": 0.19048212468624115, "learning_rate": 3.6420889220258295e-06, "loss": 0.1761, "step": 4429 }, { "epoch": 0.6172925520797046, "grad_norm": 0.13640595972537994, "learning_rate": 3.639820868414008e-06, "loss": 0.1183, "step": 4430 }, { "epoch": 0.617431895770919, "grad_norm": 0.12850046157836914, "learning_rate": 3.6375531170029356e-06, "loss": 0.1371, "step": 4431 }, { "epoch": 0.6175712394621333, "grad_norm": 0.21443340182304382, "learning_rate": 3.6352856682964576e-06, "loss": 0.1379, "step": 4432 }, { "epoch": 0.6177105831533477, "grad_norm": 0.1000034287571907, "learning_rate": 3.633018522798346e-06, "loss": 0.1102, "step": 4433 }, { "epoch": 0.6178499268445621, "grad_norm": 0.21170160174369812, "learning_rate": 3.6307516810123095e-06, "loss": 0.1181, "step": 4434 }, { "epoch": 0.6179892705357765, "grad_norm": 0.13434064388275146, "learning_rate": 3.6284851434419886e-06, "loss": 0.1031, "step": 4435 }, { "epoch": 0.6181286142269908, "grad_norm": 0.1371869444847107, "learning_rate": 3.6262189105909574e-06, "loss": 0.1097, "step": 4436 }, { "epoch": 0.6182679579182052, "grad_norm": 0.1975993514060974, "learning_rate": 3.6239529829627214e-06, "loss": 0.1665, "step": 4437 }, { "epoch": 0.6184073016094196, "grad_norm": 0.20237423479557037, "learning_rate": 3.6216873610607155e-06, "loss": 0.1515, "step": 4438 }, { "epoch": 0.618546645300634, "grad_norm": 0.12222549319267273, "learning_rate": 3.61942204538831e-06, "loss": 0.1228, "step": 4439 }, { "epoch": 0.6186859889918483, "grad_norm": 0.14637120068073273, "learning_rate": 3.6171570364488075e-06, "loss": 0.1166, "step": 4440 }, { "epoch": 0.6188253326830627, "grad_norm": 0.11194673180580139, "learning_rate": 3.6148923347454413e-06, "loss": 0.1045, "step": 4441 }, { "epoch": 0.6189646763742771, "grad_norm": 0.1451418101787567, "learning_rate": 3.6126279407813765e-06, "loss": 0.1396, "step": 4442 }, { "epoch": 0.6191040200654916, "grad_norm": 0.20364616811275482, "learning_rate": 3.6103638550597074e-06, "loss": 0.1567, "step": 4443 }, { "epoch": 0.619243363756706, "grad_norm": 0.2134614884853363, "learning_rate": 3.6081000780834635e-06, "loss": 0.1221, "step": 4444 }, { "epoch": 0.6193827074479203, "grad_norm": 0.1376180499792099, "learning_rate": 3.6058366103556055e-06, "loss": 0.1352, "step": 4445 }, { "epoch": 0.6195220511391347, "grad_norm": 0.17459197342395782, "learning_rate": 3.6035734523790235e-06, "loss": 0.1558, "step": 4446 }, { "epoch": 0.6196613948303491, "grad_norm": 0.10460767149925232, "learning_rate": 3.6013106046565383e-06, "loss": 0.1127, "step": 4447 }, { "epoch": 0.6198007385215635, "grad_norm": 0.13655954599380493, "learning_rate": 3.5990480676909055e-06, "loss": 0.1218, "step": 4448 }, { "epoch": 0.6199400822127779, "grad_norm": 0.17673295736312866, "learning_rate": 3.5967858419848077e-06, "loss": 0.1175, "step": 4449 }, { "epoch": 0.6200794259039922, "grad_norm": 0.1538500338792801, "learning_rate": 3.5945239280408596e-06, "loss": 0.1264, "step": 4450 }, { "epoch": 0.6202187695952066, "grad_norm": 0.15675674378871918, "learning_rate": 3.592262326361606e-06, "loss": 0.1223, "step": 4451 }, { "epoch": 0.620358113286421, "grad_norm": 0.14454489946365356, "learning_rate": 3.5900010374495252e-06, "loss": 0.1324, "step": 4452 }, { "epoch": 0.6204974569776354, "grad_norm": 0.1355178952217102, "learning_rate": 3.587740061807024e-06, "loss": 0.1323, "step": 4453 }, { "epoch": 0.6206368006688497, "grad_norm": 0.13727647066116333, "learning_rate": 3.585479399936438e-06, "loss": 0.1192, "step": 4454 }, { "epoch": 0.6207761443600641, "grad_norm": 0.10442978143692017, "learning_rate": 3.583219052340034e-06, "loss": 0.1175, "step": 4455 }, { "epoch": 0.6209154880512785, "grad_norm": 0.16889351606369019, "learning_rate": 3.5809590195200115e-06, "loss": 0.1405, "step": 4456 }, { "epoch": 0.6210548317424929, "grad_norm": 0.1819448471069336, "learning_rate": 3.578699301978499e-06, "loss": 0.1263, "step": 4457 }, { "epoch": 0.6211941754337073, "grad_norm": 0.1330970674753189, "learning_rate": 3.576439900217552e-06, "loss": 0.1176, "step": 4458 }, { "epoch": 0.6213335191249216, "grad_norm": 0.23402096331119537, "learning_rate": 3.5741808147391587e-06, "loss": 0.1506, "step": 4459 }, { "epoch": 0.621472862816136, "grad_norm": 0.1975078582763672, "learning_rate": 3.571922046045235e-06, "loss": 0.1541, "step": 4460 }, { "epoch": 0.6216122065073504, "grad_norm": 0.2078387290239334, "learning_rate": 3.5696635946376305e-06, "loss": 0.1311, "step": 4461 }, { "epoch": 0.6217515501985648, "grad_norm": 0.17728854715824127, "learning_rate": 3.5674054610181203e-06, "loss": 0.1305, "step": 4462 }, { "epoch": 0.6218908938897791, "grad_norm": 0.12310479581356049, "learning_rate": 3.5651476456884103e-06, "loss": 0.114, "step": 4463 }, { "epoch": 0.6220302375809935, "grad_norm": 0.1429939717054367, "learning_rate": 3.562890149150134e-06, "loss": 0.1534, "step": 4464 }, { "epoch": 0.6221695812722079, "grad_norm": 0.07209622114896774, "learning_rate": 3.560632971904857e-06, "loss": 0.0989, "step": 4465 }, { "epoch": 0.6223089249634223, "grad_norm": 0.15981359779834747, "learning_rate": 3.558376114454073e-06, "loss": 0.1484, "step": 4466 }, { "epoch": 0.6224482686546366, "grad_norm": 0.12872260808944702, "learning_rate": 3.556119577299202e-06, "loss": 0.1222, "step": 4467 }, { "epoch": 0.622587612345851, "grad_norm": 0.1373349130153656, "learning_rate": 3.553863360941598e-06, "loss": 0.1297, "step": 4468 }, { "epoch": 0.6227269560370654, "grad_norm": 0.1195082888007164, "learning_rate": 3.55160746588254e-06, "loss": 0.1217, "step": 4469 }, { "epoch": 0.6228662997282798, "grad_norm": 0.16281922161579132, "learning_rate": 3.5493518926232352e-06, "loss": 0.1224, "step": 4470 }, { "epoch": 0.6230056434194942, "grad_norm": 0.15760566294193268, "learning_rate": 3.547096641664819e-06, "loss": 0.1392, "step": 4471 }, { "epoch": 0.6231449871107085, "grad_norm": 0.18419945240020752, "learning_rate": 3.5448417135083603e-06, "loss": 0.1272, "step": 4472 }, { "epoch": 0.6232843308019229, "grad_norm": 0.09631090611219406, "learning_rate": 3.5425871086548513e-06, "loss": 0.104, "step": 4473 }, { "epoch": 0.6234236744931373, "grad_norm": 0.0691721960902214, "learning_rate": 3.540332827605214e-06, "loss": 0.0857, "step": 4474 }, { "epoch": 0.6235630181843517, "grad_norm": 0.14825375378131866, "learning_rate": 3.538078870860297e-06, "loss": 0.1469, "step": 4475 }, { "epoch": 0.623702361875566, "grad_norm": 0.16667155921459198, "learning_rate": 3.5358252389208777e-06, "loss": 0.1242, "step": 4476 }, { "epoch": 0.6238417055667804, "grad_norm": 0.16569583117961884, "learning_rate": 3.533571932287663e-06, "loss": 0.1267, "step": 4477 }, { "epoch": 0.6239810492579948, "grad_norm": 0.11955393850803375, "learning_rate": 3.5313189514612867e-06, "loss": 0.104, "step": 4478 }, { "epoch": 0.6241203929492092, "grad_norm": 0.18792027235031128, "learning_rate": 3.5290662969423097e-06, "loss": 0.162, "step": 4479 }, { "epoch": 0.6242597366404236, "grad_norm": 0.15894587337970734, "learning_rate": 3.5268139692312163e-06, "loss": 0.1383, "step": 4480 }, { "epoch": 0.6243990803316379, "grad_norm": 0.17334811389446259, "learning_rate": 3.5245619688284277e-06, "loss": 0.1304, "step": 4481 }, { "epoch": 0.6245384240228523, "grad_norm": 0.1101289913058281, "learning_rate": 3.522310296234285e-06, "loss": 0.1407, "step": 4482 }, { "epoch": 0.6246777677140668, "grad_norm": 0.1776943951845169, "learning_rate": 3.520058951949056e-06, "loss": 0.1238, "step": 4483 }, { "epoch": 0.6248171114052812, "grad_norm": 0.16357702016830444, "learning_rate": 3.517807936472942e-06, "loss": 0.163, "step": 4484 }, { "epoch": 0.6249564550964956, "grad_norm": 0.16104285418987274, "learning_rate": 3.515557250306067e-06, "loss": 0.1672, "step": 4485 }, { "epoch": 0.6250957987877099, "grad_norm": 0.1746043711900711, "learning_rate": 3.5133068939484793e-06, "loss": 0.1389, "step": 4486 }, { "epoch": 0.6252351424789243, "grad_norm": 0.16161179542541504, "learning_rate": 3.511056867900157e-06, "loss": 0.1533, "step": 4487 }, { "epoch": 0.6253744861701387, "grad_norm": 0.12511014938354492, "learning_rate": 3.508807172661006e-06, "loss": 0.1381, "step": 4488 }, { "epoch": 0.6255138298613531, "grad_norm": 0.1620807647705078, "learning_rate": 3.506557808730857e-06, "loss": 0.1521, "step": 4489 }, { "epoch": 0.6256531735525674, "grad_norm": 0.18491317331790924, "learning_rate": 3.504308776609468e-06, "loss": 0.1172, "step": 4490 }, { "epoch": 0.6257925172437818, "grad_norm": 0.13572536408901215, "learning_rate": 3.502060076796521e-06, "loss": 0.1331, "step": 4491 }, { "epoch": 0.6259318609349962, "grad_norm": 0.17783284187316895, "learning_rate": 3.4998117097916247e-06, "loss": 0.1324, "step": 4492 }, { "epoch": 0.6260712046262106, "grad_norm": 0.15136772394180298, "learning_rate": 3.4975636760943177e-06, "loss": 0.1224, "step": 4493 }, { "epoch": 0.626210548317425, "grad_norm": 0.13626472651958466, "learning_rate": 3.49531597620406e-06, "loss": 0.121, "step": 4494 }, { "epoch": 0.6263498920086393, "grad_norm": 0.15985016524791718, "learning_rate": 3.4930686106202428e-06, "loss": 0.1358, "step": 4495 }, { "epoch": 0.6264892356998537, "grad_norm": 0.20919133722782135, "learning_rate": 3.4908215798421737e-06, "loss": 0.1446, "step": 4496 }, { "epoch": 0.6266285793910681, "grad_norm": 0.23934604227542877, "learning_rate": 3.488574884369095e-06, "loss": 0.1185, "step": 4497 }, { "epoch": 0.6267679230822825, "grad_norm": 0.14461708068847656, "learning_rate": 3.486328524700171e-06, "loss": 0.1175, "step": 4498 }, { "epoch": 0.6269072667734968, "grad_norm": 0.14529545605182648, "learning_rate": 3.4840825013344897e-06, "loss": 0.1478, "step": 4499 }, { "epoch": 0.6270466104647112, "grad_norm": 0.10973912477493286, "learning_rate": 3.48183681477107e-06, "loss": 0.1161, "step": 4500 }, { "epoch": 0.6271859541559256, "grad_norm": 0.15668903291225433, "learning_rate": 3.4795914655088486e-06, "loss": 0.1297, "step": 4501 }, { "epoch": 0.62732529784714, "grad_norm": 0.14282508194446564, "learning_rate": 3.4773464540466917e-06, "loss": 0.1191, "step": 4502 }, { "epoch": 0.6274646415383544, "grad_norm": 0.11722876876592636, "learning_rate": 3.47510178088339e-06, "loss": 0.1025, "step": 4503 }, { "epoch": 0.6276039852295687, "grad_norm": 0.10319387912750244, "learning_rate": 3.4728574465176585e-06, "loss": 0.1186, "step": 4504 }, { "epoch": 0.6277433289207831, "grad_norm": 0.20139196515083313, "learning_rate": 3.4706134514481372e-06, "loss": 0.1482, "step": 4505 }, { "epoch": 0.6278826726119975, "grad_norm": 0.13482670485973358, "learning_rate": 3.468369796173392e-06, "loss": 0.1331, "step": 4506 }, { "epoch": 0.6280220163032119, "grad_norm": 0.17098037898540497, "learning_rate": 3.4661264811919093e-06, "loss": 0.1368, "step": 4507 }, { "epoch": 0.6281613599944262, "grad_norm": 0.11416925489902496, "learning_rate": 3.4638835070021027e-06, "loss": 0.1007, "step": 4508 }, { "epoch": 0.6283007036856406, "grad_norm": 0.1290002465248108, "learning_rate": 3.4616408741023113e-06, "loss": 0.1001, "step": 4509 }, { "epoch": 0.628440047376855, "grad_norm": 0.0767245665192604, "learning_rate": 3.459398582990795e-06, "loss": 0.0956, "step": 4510 }, { "epoch": 0.6285793910680694, "grad_norm": 0.16831931471824646, "learning_rate": 3.4571566341657446e-06, "loss": 0.1509, "step": 4511 }, { "epoch": 0.6287187347592837, "grad_norm": 0.12005900591611862, "learning_rate": 3.4549150281252635e-06, "loss": 0.0903, "step": 4512 }, { "epoch": 0.6288580784504981, "grad_norm": 0.12555186450481415, "learning_rate": 3.452673765367389e-06, "loss": 0.1021, "step": 4513 }, { "epoch": 0.6289974221417125, "grad_norm": 0.15979798138141632, "learning_rate": 3.450432846390078e-06, "loss": 0.1465, "step": 4514 }, { "epoch": 0.6291367658329269, "grad_norm": 0.1511552929878235, "learning_rate": 3.4481922716912097e-06, "loss": 0.1224, "step": 4515 }, { "epoch": 0.6292761095241413, "grad_norm": 0.22897854447364807, "learning_rate": 3.445952041768593e-06, "loss": 0.1582, "step": 4516 }, { "epoch": 0.6294154532153556, "grad_norm": 0.09292495995759964, "learning_rate": 3.443712157119952e-06, "loss": 0.1065, "step": 4517 }, { "epoch": 0.62955479690657, "grad_norm": 0.2685968279838562, "learning_rate": 3.4414726182429388e-06, "loss": 0.1389, "step": 4518 }, { "epoch": 0.6296941405977844, "grad_norm": 0.23361027240753174, "learning_rate": 3.4392334256351265e-06, "loss": 0.1799, "step": 4519 }, { "epoch": 0.6298334842889988, "grad_norm": 0.357620507478714, "learning_rate": 3.436994579794016e-06, "loss": 0.1769, "step": 4520 }, { "epoch": 0.6299728279802131, "grad_norm": 0.12679167091846466, "learning_rate": 3.4347560812170267e-06, "loss": 0.1085, "step": 4521 }, { "epoch": 0.6301121716714275, "grad_norm": 0.12787286937236786, "learning_rate": 3.4325179304014997e-06, "loss": 0.1031, "step": 4522 }, { "epoch": 0.6302515153626419, "grad_norm": 0.22584056854248047, "learning_rate": 3.4302801278447028e-06, "loss": 0.1475, "step": 4523 }, { "epoch": 0.6303908590538564, "grad_norm": 0.26075100898742676, "learning_rate": 3.428042674043822e-06, "loss": 0.1423, "step": 4524 }, { "epoch": 0.6305302027450708, "grad_norm": 0.19654089212417603, "learning_rate": 3.425805569495973e-06, "loss": 0.1318, "step": 4525 }, { "epoch": 0.6306695464362851, "grad_norm": 0.20039574801921844, "learning_rate": 3.4235688146981854e-06, "loss": 0.1657, "step": 4526 }, { "epoch": 0.6308088901274995, "grad_norm": 0.2577967345714569, "learning_rate": 3.42133241014742e-06, "loss": 0.1515, "step": 4527 }, { "epoch": 0.6309482338187139, "grad_norm": 0.23710817098617554, "learning_rate": 3.4190963563405482e-06, "loss": 0.1471, "step": 4528 }, { "epoch": 0.6310875775099283, "grad_norm": 0.15373921394348145, "learning_rate": 3.416860653774374e-06, "loss": 0.1417, "step": 4529 }, { "epoch": 0.6312269212011427, "grad_norm": 0.16567708551883698, "learning_rate": 3.4146253029456195e-06, "loss": 0.1437, "step": 4530 }, { "epoch": 0.631366264892357, "grad_norm": 0.15805430710315704, "learning_rate": 3.4123903043509267e-06, "loss": 0.1593, "step": 4531 }, { "epoch": 0.6315056085835714, "grad_norm": 0.2242424041032791, "learning_rate": 3.4101556584868646e-06, "loss": 0.1254, "step": 4532 }, { "epoch": 0.6316449522747858, "grad_norm": 0.23762498795986176, "learning_rate": 3.407921365849917e-06, "loss": 0.1296, "step": 4533 }, { "epoch": 0.6317842959660002, "grad_norm": 0.19160792231559753, "learning_rate": 3.4056874269364946e-06, "loss": 0.1329, "step": 4534 }, { "epoch": 0.6319236396572145, "grad_norm": 0.20357520878314972, "learning_rate": 3.4034538422429263e-06, "loss": 0.135, "step": 4535 }, { "epoch": 0.6320629833484289, "grad_norm": 0.15096889436244965, "learning_rate": 3.401220612265465e-06, "loss": 0.1482, "step": 4536 }, { "epoch": 0.6322023270396433, "grad_norm": 0.18511559069156647, "learning_rate": 3.3989877375002846e-06, "loss": 0.13, "step": 4537 }, { "epoch": 0.6323416707308577, "grad_norm": 0.23178313672542572, "learning_rate": 3.3967552184434753e-06, "loss": 0.132, "step": 4538 }, { "epoch": 0.632481014422072, "grad_norm": 0.2036421000957489, "learning_rate": 3.3945230555910534e-06, "loss": 0.1032, "step": 4539 }, { "epoch": 0.6326203581132864, "grad_norm": 0.30930790305137634, "learning_rate": 3.3922912494389554e-06, "loss": 0.1769, "step": 4540 }, { "epoch": 0.6327597018045008, "grad_norm": 0.18890617787837982, "learning_rate": 3.3900598004830377e-06, "loss": 0.1289, "step": 4541 }, { "epoch": 0.6328990454957152, "grad_norm": 0.1674586534500122, "learning_rate": 3.387828709219075e-06, "loss": 0.1321, "step": 4542 }, { "epoch": 0.6330383891869296, "grad_norm": 0.286621630191803, "learning_rate": 3.3855979761427705e-06, "loss": 0.133, "step": 4543 }, { "epoch": 0.6331777328781439, "grad_norm": 0.11673478037118912, "learning_rate": 3.3833676017497353e-06, "loss": 0.1022, "step": 4544 }, { "epoch": 0.6333170765693583, "grad_norm": 0.14172612130641937, "learning_rate": 3.381137586535511e-06, "loss": 0.1162, "step": 4545 }, { "epoch": 0.6334564202605727, "grad_norm": 0.23401908576488495, "learning_rate": 3.3789079309955556e-06, "loss": 0.1458, "step": 4546 }, { "epoch": 0.6335957639517871, "grad_norm": 0.1519927829504013, "learning_rate": 3.3766786356252466e-06, "loss": 0.1302, "step": 4547 }, { "epoch": 0.6337351076430014, "grad_norm": 0.2016056329011917, "learning_rate": 3.374449700919887e-06, "loss": 0.1278, "step": 4548 }, { "epoch": 0.6338744513342158, "grad_norm": 0.18566866219043732, "learning_rate": 3.37222112737469e-06, "loss": 0.1298, "step": 4549 }, { "epoch": 0.6340137950254302, "grad_norm": 0.1666613668203354, "learning_rate": 3.3699929154847957e-06, "loss": 0.1217, "step": 4550 }, { "epoch": 0.6341531387166446, "grad_norm": 0.2257758527994156, "learning_rate": 3.367765065745261e-06, "loss": 0.1293, "step": 4551 }, { "epoch": 0.634292482407859, "grad_norm": 0.201994851231575, "learning_rate": 3.365537578651065e-06, "loss": 0.1116, "step": 4552 }, { "epoch": 0.6344318260990733, "grad_norm": 0.15440550446510315, "learning_rate": 3.3633104546971052e-06, "loss": 0.1392, "step": 4553 }, { "epoch": 0.6345711697902877, "grad_norm": 0.13815847039222717, "learning_rate": 3.3610836943781945e-06, "loss": 0.1298, "step": 4554 }, { "epoch": 0.6347105134815021, "grad_norm": 0.16646148264408112, "learning_rate": 3.358857298189069e-06, "loss": 0.1147, "step": 4555 }, { "epoch": 0.6348498571727165, "grad_norm": 0.22279593348503113, "learning_rate": 3.356631266624385e-06, "loss": 0.1473, "step": 4556 }, { "epoch": 0.6349892008639308, "grad_norm": 0.1360669881105423, "learning_rate": 3.3544056001787146e-06, "loss": 0.1318, "step": 4557 }, { "epoch": 0.6351285445551452, "grad_norm": 0.16236506402492523, "learning_rate": 3.3521802993465513e-06, "loss": 0.1305, "step": 4558 }, { "epoch": 0.6352678882463596, "grad_norm": 0.2648206055164337, "learning_rate": 3.3499553646223037e-06, "loss": 0.1239, "step": 4559 }, { "epoch": 0.635407231937574, "grad_norm": 0.1374082863330841, "learning_rate": 3.3477307965003026e-06, "loss": 0.1315, "step": 4560 }, { "epoch": 0.6355465756287884, "grad_norm": 0.18557877838611603, "learning_rate": 3.345506595474798e-06, "loss": 0.1522, "step": 4561 }, { "epoch": 0.6356859193200027, "grad_norm": 0.1789369434118271, "learning_rate": 3.3432827620399543e-06, "loss": 0.1254, "step": 4562 }, { "epoch": 0.6358252630112171, "grad_norm": 0.10824865847826004, "learning_rate": 3.3410592966898565e-06, "loss": 0.1171, "step": 4563 }, { "epoch": 0.6359646067024316, "grad_norm": 0.0947556346654892, "learning_rate": 3.3388361999185105e-06, "loss": 0.1168, "step": 4564 }, { "epoch": 0.636103950393646, "grad_norm": 0.3097313940525055, "learning_rate": 3.3366134722198352e-06, "loss": 0.1566, "step": 4565 }, { "epoch": 0.6362432940848604, "grad_norm": 0.16250839829444885, "learning_rate": 3.3343911140876704e-06, "loss": 0.1378, "step": 4566 }, { "epoch": 0.6363826377760747, "grad_norm": 0.12837815284729004, "learning_rate": 3.332169126015773e-06, "loss": 0.099, "step": 4567 }, { "epoch": 0.6365219814672891, "grad_norm": 0.17724275588989258, "learning_rate": 3.3299475084978195e-06, "loss": 0.1606, "step": 4568 }, { "epoch": 0.6366613251585035, "grad_norm": 0.2697242498397827, "learning_rate": 3.3277262620274025e-06, "loss": 0.1606, "step": 4569 }, { "epoch": 0.6368006688497179, "grad_norm": 0.20035767555236816, "learning_rate": 3.3255053870980304e-06, "loss": 0.1383, "step": 4570 }, { "epoch": 0.6369400125409322, "grad_norm": 0.12353895604610443, "learning_rate": 3.3232848842031306e-06, "loss": 0.128, "step": 4571 }, { "epoch": 0.6370793562321466, "grad_norm": 0.38148438930511475, "learning_rate": 3.3210647538360514e-06, "loss": 0.1436, "step": 4572 }, { "epoch": 0.637218699923361, "grad_norm": 0.2113981395959854, "learning_rate": 3.3188449964900527e-06, "loss": 0.1706, "step": 4573 }, { "epoch": 0.6373580436145754, "grad_norm": 0.11808397620916367, "learning_rate": 3.316625612658315e-06, "loss": 0.1041, "step": 4574 }, { "epoch": 0.6374973873057898, "grad_norm": 0.19761823117733002, "learning_rate": 3.314406602833933e-06, "loss": 0.1306, "step": 4575 }, { "epoch": 0.6376367309970041, "grad_norm": 0.2094108611345291, "learning_rate": 3.3121879675099205e-06, "loss": 0.1394, "step": 4576 }, { "epoch": 0.6377760746882185, "grad_norm": 0.18015167117118835, "learning_rate": 3.3099697071792093e-06, "loss": 0.123, "step": 4577 }, { "epoch": 0.6379154183794329, "grad_norm": 0.2720041275024414, "learning_rate": 3.3077518223346448e-06, "loss": 0.1373, "step": 4578 }, { "epoch": 0.6380547620706473, "grad_norm": 0.08912428468465805, "learning_rate": 3.30553431346899e-06, "loss": 0.1001, "step": 4579 }, { "epoch": 0.6381941057618616, "grad_norm": 0.1555250585079193, "learning_rate": 3.3033171810749274e-06, "loss": 0.1294, "step": 4580 }, { "epoch": 0.638333449453076, "grad_norm": 0.1101718321442604, "learning_rate": 3.3011004256450497e-06, "loss": 0.1181, "step": 4581 }, { "epoch": 0.6384727931442904, "grad_norm": 0.1770041435956955, "learning_rate": 3.2988840476718713e-06, "loss": 0.1191, "step": 4582 }, { "epoch": 0.6386121368355048, "grad_norm": 0.11016257852315903, "learning_rate": 3.2966680476478196e-06, "loss": 0.1169, "step": 4583 }, { "epoch": 0.6387514805267192, "grad_norm": 0.1643301546573639, "learning_rate": 3.294452426065241e-06, "loss": 0.1318, "step": 4584 }, { "epoch": 0.6388908242179335, "grad_norm": 0.12291722744703293, "learning_rate": 3.2922371834163958e-06, "loss": 0.1167, "step": 4585 }, { "epoch": 0.6390301679091479, "grad_norm": 0.2459285408258438, "learning_rate": 3.2900223201934584e-06, "loss": 0.1624, "step": 4586 }, { "epoch": 0.6391695116003623, "grad_norm": 0.1465587317943573, "learning_rate": 3.287807836888521e-06, "loss": 0.1238, "step": 4587 }, { "epoch": 0.6393088552915767, "grad_norm": 0.21826040744781494, "learning_rate": 3.2855937339935933e-06, "loss": 0.1298, "step": 4588 }, { "epoch": 0.639448198982791, "grad_norm": 0.10441294312477112, "learning_rate": 3.2833800120005977e-06, "loss": 0.111, "step": 4589 }, { "epoch": 0.6395875426740054, "grad_norm": 0.1750117987394333, "learning_rate": 3.2811666714013724e-06, "loss": 0.1407, "step": 4590 }, { "epoch": 0.6397268863652198, "grad_norm": 0.10839090496301651, "learning_rate": 3.2789537126876714e-06, "loss": 0.1114, "step": 4591 }, { "epoch": 0.6398662300564342, "grad_norm": 0.18152688443660736, "learning_rate": 3.2767411363511613e-06, "loss": 0.126, "step": 4592 }, { "epoch": 0.6400055737476485, "grad_norm": 0.23166631162166595, "learning_rate": 3.2745289428834294e-06, "loss": 0.1493, "step": 4593 }, { "epoch": 0.6401449174388629, "grad_norm": 0.09827148914337158, "learning_rate": 3.272317132775972e-06, "loss": 0.1108, "step": 4594 }, { "epoch": 0.6402842611300773, "grad_norm": 0.11888638138771057, "learning_rate": 3.270105706520207e-06, "loss": 0.1186, "step": 4595 }, { "epoch": 0.6404236048212917, "grad_norm": 0.3084118962287903, "learning_rate": 3.267894664607457e-06, "loss": 0.1363, "step": 4596 }, { "epoch": 0.6405629485125061, "grad_norm": 0.1969621181488037, "learning_rate": 3.265684007528969e-06, "loss": 0.1221, "step": 4597 }, { "epoch": 0.6407022922037204, "grad_norm": 0.1880316287279129, "learning_rate": 3.2634737357758994e-06, "loss": 0.1401, "step": 4598 }, { "epoch": 0.6408416358949348, "grad_norm": 0.17178519070148468, "learning_rate": 3.261263849839319e-06, "loss": 0.1367, "step": 4599 }, { "epoch": 0.6409809795861492, "grad_norm": 0.26059913635253906, "learning_rate": 3.2590543502102163e-06, "loss": 0.138, "step": 4600 }, { "epoch": 0.6411203232773636, "grad_norm": 0.11951188743114471, "learning_rate": 3.256845237379491e-06, "loss": 0.1098, "step": 4601 }, { "epoch": 0.641259666968578, "grad_norm": 0.10717316716909409, "learning_rate": 3.254636511837957e-06, "loss": 0.1073, "step": 4602 }, { "epoch": 0.6413990106597923, "grad_norm": 0.1505274772644043, "learning_rate": 3.252428174076341e-06, "loss": 0.1318, "step": 4603 }, { "epoch": 0.6415383543510068, "grad_norm": 0.12344568222761154, "learning_rate": 3.2502202245852887e-06, "loss": 0.1197, "step": 4604 }, { "epoch": 0.6416776980422212, "grad_norm": 0.1660483330488205, "learning_rate": 3.2480126638553533e-06, "loss": 0.1366, "step": 4605 }, { "epoch": 0.6418170417334356, "grad_norm": 0.09258406609296799, "learning_rate": 3.245805492377007e-06, "loss": 0.1225, "step": 4606 }, { "epoch": 0.64195638542465, "grad_norm": 0.20520175993442535, "learning_rate": 3.243598710640631e-06, "loss": 0.1673, "step": 4607 }, { "epoch": 0.6420957291158643, "grad_norm": 0.17290599644184113, "learning_rate": 3.2413923191365203e-06, "loss": 0.1287, "step": 4608 }, { "epoch": 0.6422350728070787, "grad_norm": 0.12938642501831055, "learning_rate": 3.2391863183548877e-06, "loss": 0.1436, "step": 4609 }, { "epoch": 0.6423744164982931, "grad_norm": 0.16289789974689484, "learning_rate": 3.236980708785854e-06, "loss": 0.1273, "step": 4610 }, { "epoch": 0.6425137601895075, "grad_norm": 0.1996363252401352, "learning_rate": 3.2347754909194595e-06, "loss": 0.1049, "step": 4611 }, { "epoch": 0.6426531038807218, "grad_norm": 0.23858751356601715, "learning_rate": 3.232570665245648e-06, "loss": 0.142, "step": 4612 }, { "epoch": 0.6427924475719362, "grad_norm": 0.2517339289188385, "learning_rate": 3.2303662322542835e-06, "loss": 0.1834, "step": 4613 }, { "epoch": 0.6429317912631506, "grad_norm": 0.20550765097141266, "learning_rate": 3.2281621924351407e-06, "loss": 0.1495, "step": 4614 }, { "epoch": 0.643071134954365, "grad_norm": 0.14900891482830048, "learning_rate": 3.2259585462779063e-06, "loss": 0.1207, "step": 4615 }, { "epoch": 0.6432104786455793, "grad_norm": 0.36990776658058167, "learning_rate": 3.2237552942721832e-06, "loss": 0.1481, "step": 4616 }, { "epoch": 0.6433498223367937, "grad_norm": 0.211228147149086, "learning_rate": 3.2215524369074802e-06, "loss": 0.1828, "step": 4617 }, { "epoch": 0.6434891660280081, "grad_norm": 0.18013422191143036, "learning_rate": 3.219349974673223e-06, "loss": 0.1277, "step": 4618 }, { "epoch": 0.6436285097192225, "grad_norm": 0.2738135755062103, "learning_rate": 3.2171479080587475e-06, "loss": 0.1354, "step": 4619 }, { "epoch": 0.6437678534104369, "grad_norm": 0.1763664036989212, "learning_rate": 3.2149462375533046e-06, "loss": 0.1419, "step": 4620 }, { "epoch": 0.6439071971016512, "grad_norm": 0.15580900013446808, "learning_rate": 3.212744963646054e-06, "loss": 0.1172, "step": 4621 }, { "epoch": 0.6440465407928656, "grad_norm": 0.13455180823802948, "learning_rate": 3.2105440868260706e-06, "loss": 0.1365, "step": 4622 }, { "epoch": 0.64418588448408, "grad_norm": 0.10328611731529236, "learning_rate": 3.2083436075823353e-06, "loss": 0.1218, "step": 4623 }, { "epoch": 0.6443252281752944, "grad_norm": 0.11223463714122772, "learning_rate": 3.2061435264037457e-06, "loss": 0.098, "step": 4624 }, { "epoch": 0.6444645718665087, "grad_norm": 0.08608075976371765, "learning_rate": 3.2039438437791105e-06, "loss": 0.1053, "step": 4625 }, { "epoch": 0.6446039155577231, "grad_norm": 0.14982475340366364, "learning_rate": 3.2017445601971474e-06, "loss": 0.1302, "step": 4626 }, { "epoch": 0.6447432592489375, "grad_norm": 0.07742787152528763, "learning_rate": 3.199545676146492e-06, "loss": 0.0892, "step": 4627 }, { "epoch": 0.6448826029401519, "grad_norm": 0.11400775611400604, "learning_rate": 3.197347192115679e-06, "loss": 0.0997, "step": 4628 }, { "epoch": 0.6450219466313662, "grad_norm": 0.16045130789279938, "learning_rate": 3.1951491085931657e-06, "loss": 0.127, "step": 4629 }, { "epoch": 0.6451612903225806, "grad_norm": 0.13996101915836334, "learning_rate": 3.1929514260673145e-06, "loss": 0.119, "step": 4630 }, { "epoch": 0.645300634013795, "grad_norm": 0.17338983714580536, "learning_rate": 3.1907541450264003e-06, "loss": 0.115, "step": 4631 }, { "epoch": 0.6454399777050094, "grad_norm": 0.2609097957611084, "learning_rate": 3.188557265958612e-06, "loss": 0.1495, "step": 4632 }, { "epoch": 0.6455793213962238, "grad_norm": 0.17306369543075562, "learning_rate": 3.186360789352041e-06, "loss": 0.1077, "step": 4633 }, { "epoch": 0.6457186650874381, "grad_norm": 0.15846765041351318, "learning_rate": 3.184164715694697e-06, "loss": 0.1371, "step": 4634 }, { "epoch": 0.6458580087786525, "grad_norm": 0.1955769956111908, "learning_rate": 3.1819690454744956e-06, "loss": 0.1111, "step": 4635 }, { "epoch": 0.6459973524698669, "grad_norm": 0.18094833195209503, "learning_rate": 3.1797737791792672e-06, "loss": 0.155, "step": 4636 }, { "epoch": 0.6461366961610813, "grad_norm": 0.15715131163597107, "learning_rate": 3.1775789172967486e-06, "loss": 0.1219, "step": 4637 }, { "epoch": 0.6462760398522956, "grad_norm": 0.20758558809757233, "learning_rate": 3.1753844603145894e-06, "loss": 0.1369, "step": 4638 }, { "epoch": 0.64641538354351, "grad_norm": 0.300819993019104, "learning_rate": 3.1731904087203442e-06, "loss": 0.1827, "step": 4639 }, { "epoch": 0.6465547272347244, "grad_norm": 0.10503435134887695, "learning_rate": 3.1709967630014844e-06, "loss": 0.0933, "step": 4640 }, { "epoch": 0.6466940709259388, "grad_norm": 0.21230536699295044, "learning_rate": 3.168803523645387e-06, "loss": 0.12, "step": 4641 }, { "epoch": 0.6468334146171532, "grad_norm": 0.19207324087619781, "learning_rate": 3.166610691139338e-06, "loss": 0.1588, "step": 4642 }, { "epoch": 0.6469727583083675, "grad_norm": 0.09701169282197952, "learning_rate": 3.1644182659705403e-06, "loss": 0.1094, "step": 4643 }, { "epoch": 0.647112101999582, "grad_norm": 0.2168685495853424, "learning_rate": 3.1622262486260936e-06, "loss": 0.1588, "step": 4644 }, { "epoch": 0.6472514456907964, "grad_norm": 0.12282249331474304, "learning_rate": 3.160034639593018e-06, "loss": 0.1197, "step": 4645 }, { "epoch": 0.6473907893820108, "grad_norm": 0.14507192373275757, "learning_rate": 3.1578434393582392e-06, "loss": 0.1366, "step": 4646 }, { "epoch": 0.6475301330732252, "grad_norm": 0.181330606341362, "learning_rate": 3.155652648408589e-06, "loss": 0.1395, "step": 4647 }, { "epoch": 0.6476694767644395, "grad_norm": 0.16909855604171753, "learning_rate": 3.1534622672308165e-06, "loss": 0.1349, "step": 4648 }, { "epoch": 0.6478088204556539, "grad_norm": 0.20311014354228973, "learning_rate": 3.1512722963115693e-06, "loss": 0.1017, "step": 4649 }, { "epoch": 0.6479481641468683, "grad_norm": 0.29443100094795227, "learning_rate": 3.1490827361374105e-06, "loss": 0.1406, "step": 4650 }, { "epoch": 0.6480875078380827, "grad_norm": 0.22911299765110016, "learning_rate": 3.1468935871948096e-06, "loss": 0.1662, "step": 4651 }, { "epoch": 0.648226851529297, "grad_norm": 0.1694926619529724, "learning_rate": 3.1447048499701478e-06, "loss": 0.1541, "step": 4652 }, { "epoch": 0.6483661952205114, "grad_norm": 0.20404255390167236, "learning_rate": 3.1425165249497118e-06, "loss": 0.1547, "step": 4653 }, { "epoch": 0.6485055389117258, "grad_norm": 0.25778406858444214, "learning_rate": 3.1403286126196963e-06, "loss": 0.135, "step": 4654 }, { "epoch": 0.6486448826029402, "grad_norm": 0.20681443810462952, "learning_rate": 3.138141113466205e-06, "loss": 0.1543, "step": 4655 }, { "epoch": 0.6487842262941546, "grad_norm": 0.09620930999517441, "learning_rate": 3.135954027975252e-06, "loss": 0.1108, "step": 4656 }, { "epoch": 0.6489235699853689, "grad_norm": 0.22164911031723022, "learning_rate": 3.1337673566327575e-06, "loss": 0.1304, "step": 4657 }, { "epoch": 0.6490629136765833, "grad_norm": 0.23750261962413788, "learning_rate": 3.1315810999245483e-06, "loss": 0.1295, "step": 4658 }, { "epoch": 0.6492022573677977, "grad_norm": 0.4710144102573395, "learning_rate": 3.1293952583363653e-06, "loss": 0.1541, "step": 4659 }, { "epoch": 0.6493416010590121, "grad_norm": 0.14325940608978271, "learning_rate": 3.127209832353846e-06, "loss": 0.1195, "step": 4660 }, { "epoch": 0.6494809447502264, "grad_norm": 0.205625981092453, "learning_rate": 3.1250248224625463e-06, "loss": 0.131, "step": 4661 }, { "epoch": 0.6496202884414408, "grad_norm": 0.46911168098449707, "learning_rate": 3.1228402291479243e-06, "loss": 0.1513, "step": 4662 }, { "epoch": 0.6497596321326552, "grad_norm": 0.15347501635551453, "learning_rate": 3.1206560528953467e-06, "loss": 0.1348, "step": 4663 }, { "epoch": 0.6498989758238696, "grad_norm": 0.19213218986988068, "learning_rate": 3.1184722941900902e-06, "loss": 0.1497, "step": 4664 }, { "epoch": 0.650038319515084, "grad_norm": 0.18448607623577118, "learning_rate": 3.1162889535173323e-06, "loss": 0.1106, "step": 4665 }, { "epoch": 0.6501776632062983, "grad_norm": 0.1258983314037323, "learning_rate": 3.1141060313621637e-06, "loss": 0.1094, "step": 4666 }, { "epoch": 0.6503170068975127, "grad_norm": 0.39198681712150574, "learning_rate": 3.111923528209577e-06, "loss": 0.162, "step": 4667 }, { "epoch": 0.6504563505887271, "grad_norm": 0.2006344050168991, "learning_rate": 3.1097414445444796e-06, "loss": 0.1346, "step": 4668 }, { "epoch": 0.6505956942799415, "grad_norm": 0.27651655673980713, "learning_rate": 3.1075597808516776e-06, "loss": 0.1319, "step": 4669 }, { "epoch": 0.6507350379711558, "grad_norm": 0.3561016023159027, "learning_rate": 3.1053785376158865e-06, "loss": 0.181, "step": 4670 }, { "epoch": 0.6508743816623702, "grad_norm": 0.168105348944664, "learning_rate": 3.1031977153217286e-06, "loss": 0.1362, "step": 4671 }, { "epoch": 0.6510137253535846, "grad_norm": 0.2558591663837433, "learning_rate": 3.1010173144537348e-06, "loss": 0.1693, "step": 4672 }, { "epoch": 0.651153069044799, "grad_norm": 0.14275290071964264, "learning_rate": 3.0988373354963387e-06, "loss": 0.151, "step": 4673 }, { "epoch": 0.6512924127360133, "grad_norm": 0.14122942090034485, "learning_rate": 3.0966577789338812e-06, "loss": 0.12, "step": 4674 }, { "epoch": 0.6514317564272277, "grad_norm": 0.20612749457359314, "learning_rate": 3.0944786452506147e-06, "loss": 0.1224, "step": 4675 }, { "epoch": 0.6515711001184421, "grad_norm": 0.10062018036842346, "learning_rate": 3.092299934930686e-06, "loss": 0.1183, "step": 4676 }, { "epoch": 0.6517104438096565, "grad_norm": 0.13764333724975586, "learning_rate": 3.0901216484581597e-06, "loss": 0.1335, "step": 4677 }, { "epoch": 0.6518497875008709, "grad_norm": 0.23065796494483948, "learning_rate": 3.087943786316999e-06, "loss": 0.1633, "step": 4678 }, { "epoch": 0.6519891311920852, "grad_norm": 0.10427039861679077, "learning_rate": 3.085766348991076e-06, "loss": 0.1076, "step": 4679 }, { "epoch": 0.6521284748832996, "grad_norm": 0.15045106410980225, "learning_rate": 3.0835893369641694e-06, "loss": 0.1306, "step": 4680 }, { "epoch": 0.652267818574514, "grad_norm": 0.15665152668952942, "learning_rate": 3.0814127507199587e-06, "loss": 0.1583, "step": 4681 }, { "epoch": 0.6524071622657284, "grad_norm": 0.12943150103092194, "learning_rate": 3.0792365907420323e-06, "loss": 0.137, "step": 4682 }, { "epoch": 0.6525465059569427, "grad_norm": 0.16130927205085754, "learning_rate": 3.0770608575138825e-06, "loss": 0.14, "step": 4683 }, { "epoch": 0.6526858496481572, "grad_norm": 0.1469935178756714, "learning_rate": 3.0748855515189104e-06, "loss": 0.1114, "step": 4684 }, { "epoch": 0.6528251933393716, "grad_norm": 0.0992724597454071, "learning_rate": 3.0727106732404183e-06, "loss": 0.0949, "step": 4685 }, { "epoch": 0.652964537030586, "grad_norm": 0.13929855823516846, "learning_rate": 3.0705362231616133e-06, "loss": 0.1207, "step": 4686 }, { "epoch": 0.6531038807218004, "grad_norm": 0.1751270443201065, "learning_rate": 3.0683622017656074e-06, "loss": 0.1254, "step": 4687 }, { "epoch": 0.6532432244130147, "grad_norm": 0.09766337275505066, "learning_rate": 3.066188609535421e-06, "loss": 0.1093, "step": 4688 }, { "epoch": 0.6533825681042291, "grad_norm": 0.10972302407026291, "learning_rate": 3.064015446953977e-06, "loss": 0.1141, "step": 4689 }, { "epoch": 0.6535219117954435, "grad_norm": 0.1574559062719345, "learning_rate": 3.0618427145041017e-06, "loss": 0.1177, "step": 4690 }, { "epoch": 0.6536612554866579, "grad_norm": 0.1921788901090622, "learning_rate": 3.059670412668525e-06, "loss": 0.1553, "step": 4691 }, { "epoch": 0.6538005991778723, "grad_norm": 0.13736020028591156, "learning_rate": 3.0574985419298843e-06, "loss": 0.095, "step": 4692 }, { "epoch": 0.6539399428690866, "grad_norm": 0.17719529569149017, "learning_rate": 3.055327102770719e-06, "loss": 0.1435, "step": 4693 }, { "epoch": 0.654079286560301, "grad_norm": 0.13914547860622406, "learning_rate": 3.053156095673474e-06, "loss": 0.1342, "step": 4694 }, { "epoch": 0.6542186302515154, "grad_norm": 0.15475766360759735, "learning_rate": 3.0509855211204976e-06, "loss": 0.1466, "step": 4695 }, { "epoch": 0.6543579739427298, "grad_norm": 0.1350952833890915, "learning_rate": 3.048815379594043e-06, "loss": 0.1028, "step": 4696 }, { "epoch": 0.6544973176339441, "grad_norm": 0.17850971221923828, "learning_rate": 3.046645671576264e-06, "loss": 0.1389, "step": 4697 }, { "epoch": 0.6546366613251585, "grad_norm": 0.11612506955862045, "learning_rate": 3.044476397549221e-06, "loss": 0.1045, "step": 4698 }, { "epoch": 0.6547760050163729, "grad_norm": 0.1815727949142456, "learning_rate": 3.0423075579948756e-06, "loss": 0.1171, "step": 4699 }, { "epoch": 0.6549153487075873, "grad_norm": 0.17303957045078278, "learning_rate": 3.0401391533950976e-06, "loss": 0.1477, "step": 4700 }, { "epoch": 0.6550546923988017, "grad_norm": 0.20835478603839874, "learning_rate": 3.037971184231655e-06, "loss": 0.1571, "step": 4701 }, { "epoch": 0.655194036090016, "grad_norm": 0.10253511369228363, "learning_rate": 3.035803650986222e-06, "loss": 0.1061, "step": 4702 }, { "epoch": 0.6553333797812304, "grad_norm": 0.16866114735603333, "learning_rate": 3.0336365541403723e-06, "loss": 0.1484, "step": 4703 }, { "epoch": 0.6554727234724448, "grad_norm": 0.22109690308570862, "learning_rate": 3.0314698941755886e-06, "loss": 0.1248, "step": 4704 }, { "epoch": 0.6556120671636592, "grad_norm": 0.0943758487701416, "learning_rate": 3.0293036715732527e-06, "loss": 0.0897, "step": 4705 }, { "epoch": 0.6557514108548735, "grad_norm": 0.13184338808059692, "learning_rate": 3.0271378868146494e-06, "loss": 0.1131, "step": 4706 }, { "epoch": 0.6558907545460879, "grad_norm": 0.23651114106178284, "learning_rate": 3.024972540380966e-06, "loss": 0.1501, "step": 4707 }, { "epoch": 0.6560300982373023, "grad_norm": 0.15573297441005707, "learning_rate": 3.0228076327532925e-06, "loss": 0.1287, "step": 4708 }, { "epoch": 0.6561694419285167, "grad_norm": 0.14373020827770233, "learning_rate": 3.0206431644126234e-06, "loss": 0.1406, "step": 4709 }, { "epoch": 0.656308785619731, "grad_norm": 0.19753316044807434, "learning_rate": 3.0184791358398537e-06, "loss": 0.1316, "step": 4710 }, { "epoch": 0.6564481293109454, "grad_norm": 0.08861610293388367, "learning_rate": 3.016315547515783e-06, "loss": 0.0952, "step": 4711 }, { "epoch": 0.6565874730021598, "grad_norm": 0.18035432696342468, "learning_rate": 3.0141523999211065e-06, "loss": 0.1487, "step": 4712 }, { "epoch": 0.6567268166933742, "grad_norm": 0.1416572630405426, "learning_rate": 3.0119896935364305e-06, "loss": 0.1269, "step": 4713 }, { "epoch": 0.6568661603845886, "grad_norm": 0.12879163026809692, "learning_rate": 3.009827428842258e-06, "loss": 0.123, "step": 4714 }, { "epoch": 0.6570055040758029, "grad_norm": 0.13322216272354126, "learning_rate": 3.0076656063189926e-06, "loss": 0.1346, "step": 4715 }, { "epoch": 0.6571448477670173, "grad_norm": 0.21997042000293732, "learning_rate": 3.0055042264469447e-06, "loss": 0.1327, "step": 4716 }, { "epoch": 0.6572841914582317, "grad_norm": 0.21246041357517242, "learning_rate": 3.003343289706324e-06, "loss": 0.1477, "step": 4717 }, { "epoch": 0.6574235351494461, "grad_norm": 0.11760104447603226, "learning_rate": 3.001182796577239e-06, "loss": 0.1285, "step": 4718 }, { "epoch": 0.6575628788406604, "grad_norm": 0.1104748323559761, "learning_rate": 2.999022747539701e-06, "loss": 0.116, "step": 4719 }, { "epoch": 0.6577022225318748, "grad_norm": 0.15957365930080414, "learning_rate": 2.9968631430736274e-06, "loss": 0.1113, "step": 4720 }, { "epoch": 0.6578415662230892, "grad_norm": 0.10346828401088715, "learning_rate": 2.99470398365883e-06, "loss": 0.0972, "step": 4721 }, { "epoch": 0.6579809099143036, "grad_norm": 0.20058926939964294, "learning_rate": 2.9925452697750275e-06, "loss": 0.1226, "step": 4722 }, { "epoch": 0.658120253605518, "grad_norm": 0.32774052023887634, "learning_rate": 2.990387001901834e-06, "loss": 0.173, "step": 4723 }, { "epoch": 0.6582595972967323, "grad_norm": 0.15758025646209717, "learning_rate": 2.988229180518767e-06, "loss": 0.1097, "step": 4724 }, { "epoch": 0.6583989409879468, "grad_norm": 0.19163575768470764, "learning_rate": 2.9860718061052478e-06, "loss": 0.1705, "step": 4725 }, { "epoch": 0.6585382846791612, "grad_norm": 0.1251475214958191, "learning_rate": 2.9839148791405937e-06, "loss": 0.1136, "step": 4726 }, { "epoch": 0.6586776283703756, "grad_norm": 0.15125808119773865, "learning_rate": 2.981758400104028e-06, "loss": 0.1094, "step": 4727 }, { "epoch": 0.65881697206159, "grad_norm": 0.1612967699766159, "learning_rate": 2.979602369474667e-06, "loss": 0.1382, "step": 4728 }, { "epoch": 0.6589563157528043, "grad_norm": 0.1942720115184784, "learning_rate": 2.977446787731532e-06, "loss": 0.1823, "step": 4729 }, { "epoch": 0.6590956594440187, "grad_norm": 0.15900269150733948, "learning_rate": 2.975291655353546e-06, "loss": 0.1064, "step": 4730 }, { "epoch": 0.6592350031352331, "grad_norm": 0.25001898407936096, "learning_rate": 2.9731369728195288e-06, "loss": 0.1871, "step": 4731 }, { "epoch": 0.6593743468264475, "grad_norm": 0.2546013593673706, "learning_rate": 2.9709827406082028e-06, "loss": 0.185, "step": 4732 }, { "epoch": 0.6595136905176618, "grad_norm": 0.18973374366760254, "learning_rate": 2.9688289591981887e-06, "loss": 0.1325, "step": 4733 }, { "epoch": 0.6596530342088762, "grad_norm": 0.21669989824295044, "learning_rate": 2.9666756290680078e-06, "loss": 0.1384, "step": 4734 }, { "epoch": 0.6597923779000906, "grad_norm": 0.178498774766922, "learning_rate": 2.964522750696079e-06, "loss": 0.171, "step": 4735 }, { "epoch": 0.659931721591305, "grad_norm": 0.09914327412843704, "learning_rate": 2.962370324560725e-06, "loss": 0.1126, "step": 4736 }, { "epoch": 0.6600710652825194, "grad_norm": 0.10975015163421631, "learning_rate": 2.9602183511401656e-06, "loss": 0.1073, "step": 4737 }, { "epoch": 0.6602104089737337, "grad_norm": 0.2025010585784912, "learning_rate": 2.9580668309125203e-06, "loss": 0.1458, "step": 4738 }, { "epoch": 0.6603497526649481, "grad_norm": 0.1907721757888794, "learning_rate": 2.9559157643558046e-06, "loss": 0.1228, "step": 4739 }, { "epoch": 0.6604890963561625, "grad_norm": 0.13365530967712402, "learning_rate": 2.9537651519479403e-06, "loss": 0.127, "step": 4740 }, { "epoch": 0.6606284400473769, "grad_norm": 0.10564860701560974, "learning_rate": 2.951614994166743e-06, "loss": 0.1112, "step": 4741 }, { "epoch": 0.6607677837385912, "grad_norm": 0.15226739645004272, "learning_rate": 2.9494652914899267e-06, "loss": 0.1187, "step": 4742 }, { "epoch": 0.6609071274298056, "grad_norm": 0.13374747335910797, "learning_rate": 2.947316044395112e-06, "loss": 0.1249, "step": 4743 }, { "epoch": 0.66104647112102, "grad_norm": 0.13159111142158508, "learning_rate": 2.945167253359806e-06, "loss": 0.1225, "step": 4744 }, { "epoch": 0.6611858148122344, "grad_norm": 0.20431648194789886, "learning_rate": 2.943018918861424e-06, "loss": 0.1387, "step": 4745 }, { "epoch": 0.6613251585034488, "grad_norm": 0.1257283240556717, "learning_rate": 2.940871041377277e-06, "loss": 0.1063, "step": 4746 }, { "epoch": 0.6614645021946631, "grad_norm": 0.25880342721939087, "learning_rate": 2.938723621384572e-06, "loss": 0.1118, "step": 4747 }, { "epoch": 0.6616038458858775, "grad_norm": 0.13511508703231812, "learning_rate": 2.936576659360421e-06, "loss": 0.1245, "step": 4748 }, { "epoch": 0.6617431895770919, "grad_norm": 0.1412617564201355, "learning_rate": 2.9344301557818267e-06, "loss": 0.1316, "step": 4749 }, { "epoch": 0.6618825332683063, "grad_norm": 0.17106883227825165, "learning_rate": 2.9322841111256937e-06, "loss": 0.1227, "step": 4750 }, { "epoch": 0.6620218769595206, "grad_norm": 0.19489869475364685, "learning_rate": 2.930138525868824e-06, "loss": 0.141, "step": 4751 }, { "epoch": 0.662161220650735, "grad_norm": 0.15795274078845978, "learning_rate": 2.927993400487919e-06, "loss": 0.1325, "step": 4752 }, { "epoch": 0.6623005643419494, "grad_norm": 0.18449372053146362, "learning_rate": 2.9258487354595754e-06, "loss": 0.1221, "step": 4753 }, { "epoch": 0.6624399080331638, "grad_norm": 0.20875594019889832, "learning_rate": 2.9237045312602908e-06, "loss": 0.1346, "step": 4754 }, { "epoch": 0.6625792517243781, "grad_norm": 0.10076648741960526, "learning_rate": 2.921560788366454e-06, "loss": 0.1189, "step": 4755 }, { "epoch": 0.6627185954155925, "grad_norm": 0.16358844935894012, "learning_rate": 2.9194175072543594e-06, "loss": 0.1388, "step": 4756 }, { "epoch": 0.6628579391068069, "grad_norm": 0.21987994015216827, "learning_rate": 2.9172746884001944e-06, "loss": 0.1569, "step": 4757 }, { "epoch": 0.6629972827980213, "grad_norm": 0.16815420985221863, "learning_rate": 2.9151323322800433e-06, "loss": 0.1507, "step": 4758 }, { "epoch": 0.6631366264892357, "grad_norm": 0.22991514205932617, "learning_rate": 2.9129904393698917e-06, "loss": 0.1294, "step": 4759 }, { "epoch": 0.66327597018045, "grad_norm": 0.14336176216602325, "learning_rate": 2.910849010145617e-06, "loss": 0.1313, "step": 4760 }, { "epoch": 0.6634153138716644, "grad_norm": 0.11976195871829987, "learning_rate": 2.908708045082994e-06, "loss": 0.1207, "step": 4761 }, { "epoch": 0.6635546575628788, "grad_norm": 0.1567140370607376, "learning_rate": 2.906567544657699e-06, "loss": 0.1603, "step": 4762 }, { "epoch": 0.6636940012540932, "grad_norm": 0.14311285316944122, "learning_rate": 2.9044275093453034e-06, "loss": 0.1322, "step": 4763 }, { "epoch": 0.6638333449453075, "grad_norm": 0.26472097635269165, "learning_rate": 2.902287939621272e-06, "loss": 0.1393, "step": 4764 }, { "epoch": 0.663972688636522, "grad_norm": 0.21404929459095, "learning_rate": 2.9001488359609676e-06, "loss": 0.128, "step": 4765 }, { "epoch": 0.6641120323277364, "grad_norm": 0.18578588962554932, "learning_rate": 2.898010198839651e-06, "loss": 0.1609, "step": 4766 }, { "epoch": 0.6642513760189508, "grad_norm": 0.6593394875526428, "learning_rate": 2.895872028732481e-06, "loss": 0.1398, "step": 4767 }, { "epoch": 0.6643907197101652, "grad_norm": 0.21269887685775757, "learning_rate": 2.893734326114506e-06, "loss": 0.1249, "step": 4768 }, { "epoch": 0.6645300634013795, "grad_norm": 0.0951591432094574, "learning_rate": 2.8915970914606793e-06, "loss": 0.1098, "step": 4769 }, { "epoch": 0.6646694070925939, "grad_norm": 0.09943486005067825, "learning_rate": 2.8894603252458407e-06, "loss": 0.0957, "step": 4770 }, { "epoch": 0.6648087507838083, "grad_norm": 0.2525797188282013, "learning_rate": 2.8873240279447355e-06, "loss": 0.1147, "step": 4771 }, { "epoch": 0.6649480944750227, "grad_norm": 0.3539848029613495, "learning_rate": 2.8851882000319966e-06, "loss": 0.142, "step": 4772 }, { "epoch": 0.665087438166237, "grad_norm": 0.19321615993976593, "learning_rate": 2.883052841982157e-06, "loss": 0.1313, "step": 4773 }, { "epoch": 0.6652267818574514, "grad_norm": 0.10017110407352448, "learning_rate": 2.8809179542696474e-06, "loss": 0.1033, "step": 4774 }, { "epoch": 0.6653661255486658, "grad_norm": 0.10871065407991409, "learning_rate": 2.878783537368789e-06, "loss": 0.0912, "step": 4775 }, { "epoch": 0.6655054692398802, "grad_norm": 0.24220988154411316, "learning_rate": 2.8766495917537985e-06, "loss": 0.1343, "step": 4776 }, { "epoch": 0.6656448129310946, "grad_norm": 0.12953856587409973, "learning_rate": 2.874516117898792e-06, "loss": 0.1071, "step": 4777 }, { "epoch": 0.6657841566223089, "grad_norm": 0.20966161787509918, "learning_rate": 2.8723831162777806e-06, "loss": 0.1237, "step": 4778 }, { "epoch": 0.6659235003135233, "grad_norm": 0.1729404628276825, "learning_rate": 2.8702505873646636e-06, "loss": 0.1348, "step": 4779 }, { "epoch": 0.6660628440047377, "grad_norm": 0.1724051982164383, "learning_rate": 2.8681185316332453e-06, "loss": 0.1482, "step": 4780 }, { "epoch": 0.6662021876959521, "grad_norm": 0.1000327542424202, "learning_rate": 2.865986949557218e-06, "loss": 0.0905, "step": 4781 }, { "epoch": 0.6663415313871665, "grad_norm": 0.1852371096611023, "learning_rate": 2.8638558416101683e-06, "loss": 0.1295, "step": 4782 }, { "epoch": 0.6664808750783808, "grad_norm": 0.24715878069400787, "learning_rate": 2.8617252082655813e-06, "loss": 0.1396, "step": 4783 }, { "epoch": 0.6666202187695952, "grad_norm": 0.2111637145280838, "learning_rate": 2.8595950499968352e-06, "loss": 0.1409, "step": 4784 }, { "epoch": 0.6667595624608096, "grad_norm": 0.19247612357139587, "learning_rate": 2.8574653672772068e-06, "loss": 0.1283, "step": 4785 }, { "epoch": 0.666898906152024, "grad_norm": 0.2111203372478485, "learning_rate": 2.8553361605798545e-06, "loss": 0.1351, "step": 4786 }, { "epoch": 0.6670382498432383, "grad_norm": 0.2664738595485687, "learning_rate": 2.8532074303778446e-06, "loss": 0.126, "step": 4787 }, { "epoch": 0.6671775935344527, "grad_norm": 0.30296552181243896, "learning_rate": 2.8510791771441327e-06, "loss": 0.1524, "step": 4788 }, { "epoch": 0.6673169372256671, "grad_norm": 0.095000721514225, "learning_rate": 2.8489514013515656e-06, "loss": 0.0854, "step": 4789 }, { "epoch": 0.6674562809168815, "grad_norm": 0.12483292073011398, "learning_rate": 2.8468241034728878e-06, "loss": 0.1113, "step": 4790 }, { "epoch": 0.6675956246080959, "grad_norm": 0.1518544703722, "learning_rate": 2.8446972839807384e-06, "loss": 0.1193, "step": 4791 }, { "epoch": 0.6677349682993102, "grad_norm": 0.18022046983242035, "learning_rate": 2.8425709433476455e-06, "loss": 0.1551, "step": 4792 }, { "epoch": 0.6678743119905246, "grad_norm": 0.22241589426994324, "learning_rate": 2.8404450820460326e-06, "loss": 0.1244, "step": 4793 }, { "epoch": 0.668013655681739, "grad_norm": 0.15184324979782104, "learning_rate": 2.8383197005482187e-06, "loss": 0.1139, "step": 4794 }, { "epoch": 0.6681529993729534, "grad_norm": 0.1533159613609314, "learning_rate": 2.8361947993264185e-06, "loss": 0.1156, "step": 4795 }, { "epoch": 0.6682923430641677, "grad_norm": 0.15171043574810028, "learning_rate": 2.834070378852732e-06, "loss": 0.1511, "step": 4796 }, { "epoch": 0.6684316867553821, "grad_norm": 0.21101367473602295, "learning_rate": 2.8319464395991567e-06, "loss": 0.1464, "step": 4797 }, { "epoch": 0.6685710304465965, "grad_norm": 0.13338401913642883, "learning_rate": 2.829822982037585e-06, "loss": 0.1218, "step": 4798 }, { "epoch": 0.6687103741378109, "grad_norm": 0.22492067515850067, "learning_rate": 2.8277000066398032e-06, "loss": 0.1581, "step": 4799 }, { "epoch": 0.6688497178290252, "grad_norm": 0.237693652510643, "learning_rate": 2.8255775138774827e-06, "loss": 0.1512, "step": 4800 }, { "epoch": 0.6689890615202396, "grad_norm": 0.2308712601661682, "learning_rate": 2.823455504222198e-06, "loss": 0.1663, "step": 4801 }, { "epoch": 0.669128405211454, "grad_norm": 0.15486402809619904, "learning_rate": 2.821333978145407e-06, "loss": 0.125, "step": 4802 }, { "epoch": 0.6692677489026684, "grad_norm": 0.19580553472042084, "learning_rate": 2.8192129361184685e-06, "loss": 0.1024, "step": 4803 }, { "epoch": 0.6694070925938828, "grad_norm": 0.1262023001909256, "learning_rate": 2.817092378612625e-06, "loss": 0.1235, "step": 4804 }, { "epoch": 0.6695464362850972, "grad_norm": 0.11744451522827148, "learning_rate": 2.814972306099018e-06, "loss": 0.1159, "step": 4805 }, { "epoch": 0.6696857799763116, "grad_norm": 0.17678837478160858, "learning_rate": 2.8128527190486823e-06, "loss": 0.1567, "step": 4806 }, { "epoch": 0.669825123667526, "grad_norm": 0.16145995259284973, "learning_rate": 2.8107336179325383e-06, "loss": 0.12, "step": 4807 }, { "epoch": 0.6699644673587404, "grad_norm": 0.15238995850086212, "learning_rate": 2.808615003221401e-06, "loss": 0.1303, "step": 4808 }, { "epoch": 0.6701038110499548, "grad_norm": 0.1495039016008377, "learning_rate": 2.80649687538598e-06, "loss": 0.1309, "step": 4809 }, { "epoch": 0.6702431547411691, "grad_norm": 0.1759186089038849, "learning_rate": 2.8043792348968767e-06, "loss": 0.1211, "step": 4810 }, { "epoch": 0.6703824984323835, "grad_norm": 0.11836095154285431, "learning_rate": 2.8022620822245782e-06, "loss": 0.1082, "step": 4811 }, { "epoch": 0.6705218421235979, "grad_norm": 0.1876043826341629, "learning_rate": 2.8001454178394715e-06, "loss": 0.1493, "step": 4812 }, { "epoch": 0.6706611858148123, "grad_norm": 0.11395192891359329, "learning_rate": 2.7980292422118282e-06, "loss": 0.1221, "step": 4813 }, { "epoch": 0.6708005295060266, "grad_norm": 0.16432170569896698, "learning_rate": 2.795913555811817e-06, "loss": 0.18, "step": 4814 }, { "epoch": 0.670939873197241, "grad_norm": 0.1565055102109909, "learning_rate": 2.793798359109492e-06, "loss": 0.1687, "step": 4815 }, { "epoch": 0.6710792168884554, "grad_norm": 0.13722068071365356, "learning_rate": 2.7916836525748024e-06, "loss": 0.1122, "step": 4816 }, { "epoch": 0.6712185605796698, "grad_norm": 0.15370674431324005, "learning_rate": 2.7895694366775934e-06, "loss": 0.1526, "step": 4817 }, { "epoch": 0.6713579042708842, "grad_norm": 0.14470450580120087, "learning_rate": 2.7874557118875863e-06, "loss": 0.1494, "step": 4818 }, { "epoch": 0.6714972479620985, "grad_norm": 0.17821602523326874, "learning_rate": 2.7853424786744068e-06, "loss": 0.1526, "step": 4819 }, { "epoch": 0.6716365916533129, "grad_norm": 0.11859575659036636, "learning_rate": 2.7832297375075685e-06, "loss": 0.1155, "step": 4820 }, { "epoch": 0.6717759353445273, "grad_norm": 0.13368813693523407, "learning_rate": 2.7811174888564713e-06, "loss": 0.1103, "step": 4821 }, { "epoch": 0.6719152790357417, "grad_norm": 0.2309083342552185, "learning_rate": 2.779005733190412e-06, "loss": 0.1509, "step": 4822 }, { "epoch": 0.672054622726956, "grad_norm": 0.15815554559230804, "learning_rate": 2.7768944709785705e-06, "loss": 0.117, "step": 4823 }, { "epoch": 0.6721939664181704, "grad_norm": 0.19109012186527252, "learning_rate": 2.774783702690025e-06, "loss": 0.1365, "step": 4824 }, { "epoch": 0.6723333101093848, "grad_norm": 0.17443661391735077, "learning_rate": 2.7726734287937367e-06, "loss": 0.1237, "step": 4825 }, { "epoch": 0.6724726538005992, "grad_norm": 0.20691487193107605, "learning_rate": 2.770563649758562e-06, "loss": 0.141, "step": 4826 }, { "epoch": 0.6726119974918136, "grad_norm": 0.16855685412883759, "learning_rate": 2.768454366053247e-06, "loss": 0.1214, "step": 4827 }, { "epoch": 0.6727513411830279, "grad_norm": 0.1615157425403595, "learning_rate": 2.7663455781464245e-06, "loss": 0.1224, "step": 4828 }, { "epoch": 0.6728906848742423, "grad_norm": 0.11741677671670914, "learning_rate": 2.764237286506618e-06, "loss": 0.1232, "step": 4829 }, { "epoch": 0.6730300285654567, "grad_norm": 0.16110098361968994, "learning_rate": 2.7621294916022423e-06, "loss": 0.1127, "step": 4830 }, { "epoch": 0.6731693722566711, "grad_norm": 0.15342223644256592, "learning_rate": 2.760022193901605e-06, "loss": 0.1286, "step": 4831 }, { "epoch": 0.6733087159478854, "grad_norm": 0.170639306306839, "learning_rate": 2.7579153938728943e-06, "loss": 0.1471, "step": 4832 }, { "epoch": 0.6734480596390998, "grad_norm": 0.2841227948665619, "learning_rate": 2.7558090919841972e-06, "loss": 0.1598, "step": 4833 }, { "epoch": 0.6735874033303142, "grad_norm": 0.11730663478374481, "learning_rate": 2.753703288703482e-06, "loss": 0.1296, "step": 4834 }, { "epoch": 0.6737267470215286, "grad_norm": 0.25086209177970886, "learning_rate": 2.7515979844986148e-06, "loss": 0.1596, "step": 4835 }, { "epoch": 0.673866090712743, "grad_norm": 0.2172294557094574, "learning_rate": 2.749493179837341e-06, "loss": 0.1452, "step": 4836 }, { "epoch": 0.6740054344039573, "grad_norm": 0.16775429248809814, "learning_rate": 2.747388875187303e-06, "loss": 0.1328, "step": 4837 }, { "epoch": 0.6741447780951717, "grad_norm": 0.20480675995349884, "learning_rate": 2.7452850710160305e-06, "loss": 0.1236, "step": 4838 }, { "epoch": 0.6742841217863861, "grad_norm": 0.1417657881975174, "learning_rate": 2.74318176779094e-06, "loss": 0.1119, "step": 4839 }, { "epoch": 0.6744234654776005, "grad_norm": 0.14325164258480072, "learning_rate": 2.741078965979334e-06, "loss": 0.1103, "step": 4840 }, { "epoch": 0.6745628091688148, "grad_norm": 0.25815239548683167, "learning_rate": 2.7389766660484103e-06, "loss": 0.1512, "step": 4841 }, { "epoch": 0.6747021528600292, "grad_norm": 0.1863546073436737, "learning_rate": 2.736874868465253e-06, "loss": 0.1499, "step": 4842 }, { "epoch": 0.6748414965512436, "grad_norm": 0.12583696842193604, "learning_rate": 2.7347735736968318e-06, "loss": 0.1276, "step": 4843 }, { "epoch": 0.674980840242458, "grad_norm": 0.13170966506004333, "learning_rate": 2.7326727822100047e-06, "loss": 0.1154, "step": 4844 }, { "epoch": 0.6751201839336725, "grad_norm": 0.11354526877403259, "learning_rate": 2.7305724944715218e-06, "loss": 0.1269, "step": 4845 }, { "epoch": 0.6752595276248868, "grad_norm": 0.15854141116142273, "learning_rate": 2.72847271094802e-06, "loss": 0.1202, "step": 4846 }, { "epoch": 0.6753988713161012, "grad_norm": 0.25936397910118103, "learning_rate": 2.7263734321060198e-06, "loss": 0.1335, "step": 4847 }, { "epoch": 0.6755382150073156, "grad_norm": 0.11817106604576111, "learning_rate": 2.7242746584119364e-06, "loss": 0.1301, "step": 4848 }, { "epoch": 0.67567755869853, "grad_norm": 0.18400593101978302, "learning_rate": 2.722176390332071e-06, "loss": 0.145, "step": 4849 }, { "epoch": 0.6758169023897443, "grad_norm": 0.20760875940322876, "learning_rate": 2.720078628332605e-06, "loss": 0.1482, "step": 4850 }, { "epoch": 0.6759562460809587, "grad_norm": 0.10878507047891617, "learning_rate": 2.7179813728796156e-06, "loss": 0.1131, "step": 4851 }, { "epoch": 0.6760955897721731, "grad_norm": 0.12694722414016724, "learning_rate": 2.7158846244390657e-06, "loss": 0.1271, "step": 4852 }, { "epoch": 0.6762349334633875, "grad_norm": 0.10445591807365417, "learning_rate": 2.7137883834768076e-06, "loss": 0.1109, "step": 4853 }, { "epoch": 0.6763742771546019, "grad_norm": 0.1354309320449829, "learning_rate": 2.7116926504585756e-06, "loss": 0.1166, "step": 4854 }, { "epoch": 0.6765136208458162, "grad_norm": 0.11840195953845978, "learning_rate": 2.7095974258499914e-06, "loss": 0.1293, "step": 4855 }, { "epoch": 0.6766529645370306, "grad_norm": 0.16954414546489716, "learning_rate": 2.7075027101165706e-06, "loss": 0.1373, "step": 4856 }, { "epoch": 0.676792308228245, "grad_norm": 0.08941410481929779, "learning_rate": 2.7054085037237066e-06, "loss": 0.0923, "step": 4857 }, { "epoch": 0.6769316519194594, "grad_norm": 0.16019149124622345, "learning_rate": 2.7033148071366866e-06, "loss": 0.1517, "step": 4858 }, { "epoch": 0.6770709956106737, "grad_norm": 0.23136159777641296, "learning_rate": 2.701221620820685e-06, "loss": 0.1373, "step": 4859 }, { "epoch": 0.6772103393018881, "grad_norm": 0.17586340010166168, "learning_rate": 2.6991289452407564e-06, "loss": 0.1354, "step": 4860 }, { "epoch": 0.6773496829931025, "grad_norm": 0.1482803374528885, "learning_rate": 2.697036780861845e-06, "loss": 0.1484, "step": 4861 }, { "epoch": 0.6774890266843169, "grad_norm": 0.12779656052589417, "learning_rate": 2.694945128148784e-06, "loss": 0.1188, "step": 4862 }, { "epoch": 0.6776283703755313, "grad_norm": 0.1363644301891327, "learning_rate": 2.692853987566291e-06, "loss": 0.1282, "step": 4863 }, { "epoch": 0.6777677140667456, "grad_norm": 0.16008929908275604, "learning_rate": 2.690763359578969e-06, "loss": 0.1442, "step": 4864 }, { "epoch": 0.67790705775796, "grad_norm": 0.26915374398231506, "learning_rate": 2.6886732446513066e-06, "loss": 0.1938, "step": 4865 }, { "epoch": 0.6780464014491744, "grad_norm": 0.11034207046031952, "learning_rate": 2.68658364324768e-06, "loss": 0.1081, "step": 4866 }, { "epoch": 0.6781857451403888, "grad_norm": 0.15075337886810303, "learning_rate": 2.684494555832353e-06, "loss": 0.1089, "step": 4867 }, { "epoch": 0.6783250888316031, "grad_norm": 0.1753329485654831, "learning_rate": 2.6824059828694715e-06, "loss": 0.1407, "step": 4868 }, { "epoch": 0.6784644325228175, "grad_norm": 0.13427120447158813, "learning_rate": 2.680317924823068e-06, "loss": 0.114, "step": 4869 }, { "epoch": 0.6786037762140319, "grad_norm": 0.25571689009666443, "learning_rate": 2.6782303821570644e-06, "loss": 0.1395, "step": 4870 }, { "epoch": 0.6787431199052463, "grad_norm": 0.15667028725147247, "learning_rate": 2.676143355335263e-06, "loss": 0.1388, "step": 4871 }, { "epoch": 0.6788824635964607, "grad_norm": 0.15338771045207977, "learning_rate": 2.6740568448213523e-06, "loss": 0.1317, "step": 4872 }, { "epoch": 0.679021807287675, "grad_norm": 0.22235992550849915, "learning_rate": 2.6719708510789077e-06, "loss": 0.1349, "step": 4873 }, { "epoch": 0.6791611509788894, "grad_norm": 0.21016153693199158, "learning_rate": 2.669885374571392e-06, "loss": 0.1515, "step": 4874 }, { "epoch": 0.6793004946701038, "grad_norm": 0.1745138317346573, "learning_rate": 2.667800415762149e-06, "loss": 0.14, "step": 4875 }, { "epoch": 0.6794398383613182, "grad_norm": 0.20915594696998596, "learning_rate": 2.665715975114407e-06, "loss": 0.1235, "step": 4876 }, { "epoch": 0.6795791820525325, "grad_norm": 0.15984924137592316, "learning_rate": 2.6636320530912817e-06, "loss": 0.12, "step": 4877 }, { "epoch": 0.6797185257437469, "grad_norm": 0.12865720689296722, "learning_rate": 2.6615486501557765e-06, "loss": 0.137, "step": 4878 }, { "epoch": 0.6798578694349613, "grad_norm": 0.2071686089038849, "learning_rate": 2.659465766770772e-06, "loss": 0.1716, "step": 4879 }, { "epoch": 0.6799972131261757, "grad_norm": 0.10106498748064041, "learning_rate": 2.6573834033990404e-06, "loss": 0.1184, "step": 4880 }, { "epoch": 0.68013655681739, "grad_norm": 0.1544240117073059, "learning_rate": 2.655301560503234e-06, "loss": 0.1285, "step": 4881 }, { "epoch": 0.6802759005086044, "grad_norm": 0.21488454937934875, "learning_rate": 2.6532202385458875e-06, "loss": 0.1142, "step": 4882 }, { "epoch": 0.6804152441998188, "grad_norm": 0.22346986830234528, "learning_rate": 2.6511394379894274e-06, "loss": 0.1343, "step": 4883 }, { "epoch": 0.6805545878910332, "grad_norm": 0.2315378338098526, "learning_rate": 2.649059159296158e-06, "loss": 0.1486, "step": 4884 }, { "epoch": 0.6806939315822477, "grad_norm": 0.21907316148281097, "learning_rate": 2.6469794029282726e-06, "loss": 0.116, "step": 4885 }, { "epoch": 0.680833275273462, "grad_norm": 0.13674046099185944, "learning_rate": 2.6449001693478438e-06, "loss": 0.116, "step": 4886 }, { "epoch": 0.6809726189646764, "grad_norm": 0.388116717338562, "learning_rate": 2.642821459016827e-06, "loss": 0.1717, "step": 4887 }, { "epoch": 0.6811119626558908, "grad_norm": 0.13002236187458038, "learning_rate": 2.6407432723970694e-06, "loss": 0.1228, "step": 4888 }, { "epoch": 0.6812513063471052, "grad_norm": 0.14375129342079163, "learning_rate": 2.6386656099502917e-06, "loss": 0.1391, "step": 4889 }, { "epoch": 0.6813906500383196, "grad_norm": 0.0963456854224205, "learning_rate": 2.6365884721381045e-06, "loss": 0.0966, "step": 4890 }, { "epoch": 0.6815299937295339, "grad_norm": 0.26764413714408875, "learning_rate": 2.6345118594220044e-06, "loss": 0.1252, "step": 4891 }, { "epoch": 0.6816693374207483, "grad_norm": 0.1753927767276764, "learning_rate": 2.632435772263363e-06, "loss": 0.112, "step": 4892 }, { "epoch": 0.6818086811119627, "grad_norm": 0.1502913385629654, "learning_rate": 2.6303602111234394e-06, "loss": 0.1257, "step": 4893 }, { "epoch": 0.6819480248031771, "grad_norm": 0.20752540230751038, "learning_rate": 2.6282851764633765e-06, "loss": 0.142, "step": 4894 }, { "epoch": 0.6820873684943914, "grad_norm": 0.2378000020980835, "learning_rate": 2.626210668744203e-06, "loss": 0.1445, "step": 4895 }, { "epoch": 0.6822267121856058, "grad_norm": 0.14342272281646729, "learning_rate": 2.624136688426824e-06, "loss": 0.1213, "step": 4896 }, { "epoch": 0.6823660558768202, "grad_norm": 0.2696172893047333, "learning_rate": 2.6220632359720287e-06, "loss": 0.135, "step": 4897 }, { "epoch": 0.6825053995680346, "grad_norm": 0.11730610579252243, "learning_rate": 2.6199903118404934e-06, "loss": 0.1245, "step": 4898 }, { "epoch": 0.682644743259249, "grad_norm": 0.12304936349391937, "learning_rate": 2.617917916492776e-06, "loss": 0.1134, "step": 4899 }, { "epoch": 0.6827840869504633, "grad_norm": 0.2135438621044159, "learning_rate": 2.615846050389312e-06, "loss": 0.1267, "step": 4900 }, { "epoch": 0.6829234306416777, "grad_norm": 0.15296193957328796, "learning_rate": 2.6137747139904262e-06, "loss": 0.1102, "step": 4901 }, { "epoch": 0.6830627743328921, "grad_norm": 0.16523395478725433, "learning_rate": 2.611703907756319e-06, "loss": 0.1321, "step": 4902 }, { "epoch": 0.6832021180241065, "grad_norm": 0.20336700975894928, "learning_rate": 2.6096336321470796e-06, "loss": 0.1236, "step": 4903 }, { "epoch": 0.6833414617153208, "grad_norm": 0.1483083963394165, "learning_rate": 2.6075638876226715e-06, "loss": 0.1077, "step": 4904 }, { "epoch": 0.6834808054065352, "grad_norm": 0.10856851190328598, "learning_rate": 2.605494674642948e-06, "loss": 0.1002, "step": 4905 }, { "epoch": 0.6836201490977496, "grad_norm": 0.20058360695838928, "learning_rate": 2.603425993667642e-06, "loss": 0.1415, "step": 4906 }, { "epoch": 0.683759492788964, "grad_norm": 0.3221275508403778, "learning_rate": 2.6013578451563653e-06, "loss": 0.1175, "step": 4907 }, { "epoch": 0.6838988364801784, "grad_norm": 0.11366351693868637, "learning_rate": 2.599290229568612e-06, "loss": 0.1077, "step": 4908 }, { "epoch": 0.6840381801713927, "grad_norm": 0.10206498205661774, "learning_rate": 2.59722314736376e-06, "loss": 0.1109, "step": 4909 }, { "epoch": 0.6841775238626071, "grad_norm": 0.2480747401714325, "learning_rate": 2.5951565990010706e-06, "loss": 0.155, "step": 4910 }, { "epoch": 0.6843168675538215, "grad_norm": 0.11301609873771667, "learning_rate": 2.5930905849396792e-06, "loss": 0.0986, "step": 4911 }, { "epoch": 0.6844562112450359, "grad_norm": 0.18310189247131348, "learning_rate": 2.5910251056386113e-06, "loss": 0.1232, "step": 4912 }, { "epoch": 0.6845955549362502, "grad_norm": 0.2474045306444168, "learning_rate": 2.5889601615567657e-06, "loss": 0.1852, "step": 4913 }, { "epoch": 0.6847348986274646, "grad_norm": 0.09855977445840836, "learning_rate": 2.5868957531529283e-06, "loss": 0.1026, "step": 4914 }, { "epoch": 0.684874242318679, "grad_norm": 0.2179051786661148, "learning_rate": 2.584831880885761e-06, "loss": 0.1715, "step": 4915 }, { "epoch": 0.6850135860098934, "grad_norm": 0.2030625343322754, "learning_rate": 2.582768545213811e-06, "loss": 0.1517, "step": 4916 }, { "epoch": 0.6851529297011077, "grad_norm": 0.1434762179851532, "learning_rate": 2.5807057465955065e-06, "loss": 0.1271, "step": 4917 }, { "epoch": 0.6852922733923221, "grad_norm": 0.13858671486377716, "learning_rate": 2.5786434854891482e-06, "loss": 0.133, "step": 4918 }, { "epoch": 0.6854316170835365, "grad_norm": 0.10699322074651718, "learning_rate": 2.576581762352928e-06, "loss": 0.1089, "step": 4919 }, { "epoch": 0.6855709607747509, "grad_norm": 0.16485914587974548, "learning_rate": 2.574520577644913e-06, "loss": 0.1146, "step": 4920 }, { "epoch": 0.6857103044659653, "grad_norm": 0.1384771466255188, "learning_rate": 2.5724599318230504e-06, "loss": 0.13, "step": 4921 }, { "epoch": 0.6858496481571796, "grad_norm": 0.1077318862080574, "learning_rate": 2.570399825345169e-06, "loss": 0.1096, "step": 4922 }, { "epoch": 0.685988991848394, "grad_norm": 0.11609398573637009, "learning_rate": 2.5683402586689788e-06, "loss": 0.1163, "step": 4923 }, { "epoch": 0.6861283355396084, "grad_norm": 0.22650259733200073, "learning_rate": 2.566281232252068e-06, "loss": 0.1274, "step": 4924 }, { "epoch": 0.6862676792308228, "grad_norm": 0.15202485024929047, "learning_rate": 2.564222746551903e-06, "loss": 0.1244, "step": 4925 }, { "epoch": 0.6864070229220373, "grad_norm": 0.17710189521312714, "learning_rate": 2.562164802025834e-06, "loss": 0.1475, "step": 4926 }, { "epoch": 0.6865463666132516, "grad_norm": 0.13763867318630219, "learning_rate": 2.5601073991310903e-06, "loss": 0.1427, "step": 4927 }, { "epoch": 0.686685710304466, "grad_norm": 0.1741667240858078, "learning_rate": 2.5580505383247796e-06, "loss": 0.1274, "step": 4928 }, { "epoch": 0.6868250539956804, "grad_norm": 0.20156998932361603, "learning_rate": 2.5559942200638866e-06, "loss": 0.1427, "step": 4929 }, { "epoch": 0.6869643976868948, "grad_norm": 0.13348637521266937, "learning_rate": 2.5539384448052797e-06, "loss": 0.1349, "step": 4930 }, { "epoch": 0.6871037413781091, "grad_norm": 0.18214093148708344, "learning_rate": 2.5518832130057082e-06, "loss": 0.1363, "step": 4931 }, { "epoch": 0.6872430850693235, "grad_norm": 0.14460431039333344, "learning_rate": 2.5498285251217938e-06, "loss": 0.126, "step": 4932 }, { "epoch": 0.6873824287605379, "grad_norm": 0.20418886840343475, "learning_rate": 2.5477743816100443e-06, "loss": 0.1748, "step": 4933 }, { "epoch": 0.6875217724517523, "grad_norm": 0.1616395264863968, "learning_rate": 2.5457207829268394e-06, "loss": 0.1308, "step": 4934 }, { "epoch": 0.6876611161429667, "grad_norm": 0.16586662828922272, "learning_rate": 2.5436677295284474e-06, "loss": 0.1451, "step": 4935 }, { "epoch": 0.687800459834181, "grad_norm": 0.13957712054252625, "learning_rate": 2.5416152218710044e-06, "loss": 0.135, "step": 4936 }, { "epoch": 0.6879398035253954, "grad_norm": 0.2254328727722168, "learning_rate": 2.539563260410533e-06, "loss": 0.1344, "step": 4937 }, { "epoch": 0.6880791472166098, "grad_norm": 0.2396067976951599, "learning_rate": 2.5375118456029345e-06, "loss": 0.1268, "step": 4938 }, { "epoch": 0.6882184909078242, "grad_norm": 0.1447124481201172, "learning_rate": 2.5354609779039844e-06, "loss": 0.1426, "step": 4939 }, { "epoch": 0.6883578345990385, "grad_norm": 0.21360009908676147, "learning_rate": 2.533410657769337e-06, "loss": 0.1508, "step": 4940 }, { "epoch": 0.6884971782902529, "grad_norm": 0.17707158625125885, "learning_rate": 2.531360885654528e-06, "loss": 0.1356, "step": 4941 }, { "epoch": 0.6886365219814673, "grad_norm": 0.15219347178936005, "learning_rate": 2.529311662014972e-06, "loss": 0.1204, "step": 4942 }, { "epoch": 0.6887758656726817, "grad_norm": 0.14901480078697205, "learning_rate": 2.5272629873059564e-06, "loss": 0.1305, "step": 4943 }, { "epoch": 0.688915209363896, "grad_norm": 0.1053147092461586, "learning_rate": 2.5252148619826535e-06, "loss": 0.1014, "step": 4944 }, { "epoch": 0.6890545530551104, "grad_norm": 0.2444324493408203, "learning_rate": 2.5231672865001056e-06, "loss": 0.1416, "step": 4945 }, { "epoch": 0.6891938967463248, "grad_norm": 0.10365939140319824, "learning_rate": 2.5211202613132413e-06, "loss": 0.1027, "step": 4946 }, { "epoch": 0.6893332404375392, "grad_norm": 0.28391727805137634, "learning_rate": 2.5190737868768592e-06, "loss": 0.1602, "step": 4947 }, { "epoch": 0.6894725841287536, "grad_norm": 0.11859111487865448, "learning_rate": 2.5170278636456413e-06, "loss": 0.128, "step": 4948 }, { "epoch": 0.6896119278199679, "grad_norm": 0.1415063887834549, "learning_rate": 2.5149824920741493e-06, "loss": 0.1158, "step": 4949 }, { "epoch": 0.6897512715111823, "grad_norm": 0.17692235112190247, "learning_rate": 2.51293767261681e-06, "loss": 0.1196, "step": 4950 }, { "epoch": 0.6898906152023967, "grad_norm": 0.10171855241060257, "learning_rate": 2.5108934057279376e-06, "loss": 0.1299, "step": 4951 }, { "epoch": 0.6900299588936111, "grad_norm": 0.09981095045804977, "learning_rate": 2.5088496918617243e-06, "loss": 0.1019, "step": 4952 }, { "epoch": 0.6901693025848255, "grad_norm": 0.12461677938699722, "learning_rate": 2.5068065314722378e-06, "loss": 0.1124, "step": 4953 }, { "epoch": 0.6903086462760398, "grad_norm": 0.12051946669816971, "learning_rate": 2.504763925013419e-06, "loss": 0.1088, "step": 4954 }, { "epoch": 0.6904479899672542, "grad_norm": 0.16984303295612335, "learning_rate": 2.5027218729390867e-06, "loss": 0.1453, "step": 4955 }, { "epoch": 0.6905873336584686, "grad_norm": 0.12630252540111542, "learning_rate": 2.500680375702943e-06, "loss": 0.1273, "step": 4956 }, { "epoch": 0.690726677349683, "grad_norm": 0.14061599969863892, "learning_rate": 2.498639433758557e-06, "loss": 0.1265, "step": 4957 }, { "epoch": 0.6908660210408973, "grad_norm": 0.11915503442287445, "learning_rate": 2.4965990475593814e-06, "loss": 0.0998, "step": 4958 }, { "epoch": 0.6910053647321117, "grad_norm": 0.14075008034706116, "learning_rate": 2.494559217558746e-06, "loss": 0.1429, "step": 4959 }, { "epoch": 0.6911447084233261, "grad_norm": 0.2005535215139389, "learning_rate": 2.492519944209853e-06, "loss": 0.1066, "step": 4960 }, { "epoch": 0.6912840521145405, "grad_norm": 0.1923806220293045, "learning_rate": 2.4904812279657792e-06, "loss": 0.1184, "step": 4961 }, { "epoch": 0.6914233958057548, "grad_norm": 0.20639950037002563, "learning_rate": 2.488443069279483e-06, "loss": 0.1523, "step": 4962 }, { "epoch": 0.6915627394969692, "grad_norm": 0.13545550405979156, "learning_rate": 2.4864054686037993e-06, "loss": 0.1112, "step": 4963 }, { "epoch": 0.6917020831881836, "grad_norm": 0.0960499569773674, "learning_rate": 2.484368426391432e-06, "loss": 0.1149, "step": 4964 }, { "epoch": 0.691841426879398, "grad_norm": 0.1936836987733841, "learning_rate": 2.482331943094969e-06, "loss": 0.1432, "step": 4965 }, { "epoch": 0.6919807705706125, "grad_norm": 0.21019834280014038, "learning_rate": 2.480296019166868e-06, "loss": 0.1461, "step": 4966 }, { "epoch": 0.6921201142618268, "grad_norm": 0.18198008835315704, "learning_rate": 2.478260655059467e-06, "loss": 0.0988, "step": 4967 }, { "epoch": 0.6922594579530412, "grad_norm": 0.16699102520942688, "learning_rate": 2.4762258512249745e-06, "loss": 0.1633, "step": 4968 }, { "epoch": 0.6923988016442556, "grad_norm": 0.07944336533546448, "learning_rate": 2.4741916081154786e-06, "loss": 0.0917, "step": 4969 }, { "epoch": 0.69253814533547, "grad_norm": 0.17734238505363464, "learning_rate": 2.472157926182945e-06, "loss": 0.1281, "step": 4970 }, { "epoch": 0.6926774890266844, "grad_norm": 0.11540418863296509, "learning_rate": 2.470124805879208e-06, "loss": 0.0994, "step": 4971 }, { "epoch": 0.6928168327178987, "grad_norm": 0.16250549256801605, "learning_rate": 2.468092247655979e-06, "loss": 0.1415, "step": 4972 }, { "epoch": 0.6929561764091131, "grad_norm": 0.1250845342874527, "learning_rate": 2.466060251964848e-06, "loss": 0.1399, "step": 4973 }, { "epoch": 0.6930955201003275, "grad_norm": 0.1419483870267868, "learning_rate": 2.464028819257281e-06, "loss": 0.1401, "step": 4974 }, { "epoch": 0.6932348637915419, "grad_norm": 0.14666271209716797, "learning_rate": 2.4619979499846127e-06, "loss": 0.1229, "step": 4975 }, { "epoch": 0.6933742074827562, "grad_norm": 0.16327370703220367, "learning_rate": 2.459967644598054e-06, "loss": 0.1191, "step": 4976 }, { "epoch": 0.6935135511739706, "grad_norm": 0.2021070420742035, "learning_rate": 2.457937903548695e-06, "loss": 0.1299, "step": 4977 }, { "epoch": 0.693652894865185, "grad_norm": 0.12294040620326996, "learning_rate": 2.4559087272875e-06, "loss": 0.1216, "step": 4978 }, { "epoch": 0.6937922385563994, "grad_norm": 0.09144072979688644, "learning_rate": 2.4538801162653002e-06, "loss": 0.0972, "step": 4979 }, { "epoch": 0.6939315822476138, "grad_norm": 0.2147776484489441, "learning_rate": 2.451852070932811e-06, "loss": 0.1416, "step": 4980 }, { "epoch": 0.6940709259388281, "grad_norm": 0.24856378138065338, "learning_rate": 2.4498245917406195e-06, "loss": 0.1073, "step": 4981 }, { "epoch": 0.6942102696300425, "grad_norm": 0.1435723900794983, "learning_rate": 2.4477976791391784e-06, "loss": 0.1193, "step": 4982 }, { "epoch": 0.6943496133212569, "grad_norm": 0.17609155178070068, "learning_rate": 2.445771333578825e-06, "loss": 0.1588, "step": 4983 }, { "epoch": 0.6944889570124713, "grad_norm": 0.25270891189575195, "learning_rate": 2.443745555509768e-06, "loss": 0.1354, "step": 4984 }, { "epoch": 0.6946283007036856, "grad_norm": 0.1564420461654663, "learning_rate": 2.4417203453820892e-06, "loss": 0.1072, "step": 4985 }, { "epoch": 0.6947676443949, "grad_norm": 0.17542217671871185, "learning_rate": 2.4396957036457443e-06, "loss": 0.1109, "step": 4986 }, { "epoch": 0.6949069880861144, "grad_norm": 0.16147711873054504, "learning_rate": 2.437671630750558e-06, "loss": 0.1208, "step": 4987 }, { "epoch": 0.6950463317773288, "grad_norm": 0.12148775160312653, "learning_rate": 2.4356481271462396e-06, "loss": 0.1203, "step": 4988 }, { "epoch": 0.6951856754685432, "grad_norm": 0.12348615378141403, "learning_rate": 2.4336251932823594e-06, "loss": 0.1156, "step": 4989 }, { "epoch": 0.6953250191597575, "grad_norm": 0.15214011073112488, "learning_rate": 2.4316028296083705e-06, "loss": 0.1357, "step": 4990 }, { "epoch": 0.6954643628509719, "grad_norm": 0.10681517422199249, "learning_rate": 2.4295810365735974e-06, "loss": 0.1184, "step": 4991 }, { "epoch": 0.6956037065421863, "grad_norm": 0.25233402848243713, "learning_rate": 2.427559814627234e-06, "loss": 0.1245, "step": 4992 }, { "epoch": 0.6957430502334007, "grad_norm": 0.25248706340789795, "learning_rate": 2.425539164218348e-06, "loss": 0.1319, "step": 4993 }, { "epoch": 0.695882393924615, "grad_norm": 0.17471224069595337, "learning_rate": 2.4235190857958834e-06, "loss": 0.1158, "step": 4994 }, { "epoch": 0.6960217376158294, "grad_norm": 0.19847635924816132, "learning_rate": 2.4214995798086584e-06, "loss": 0.1165, "step": 4995 }, { "epoch": 0.6961610813070438, "grad_norm": 0.203476220369339, "learning_rate": 2.4194806467053584e-06, "loss": 0.1878, "step": 4996 }, { "epoch": 0.6963004249982582, "grad_norm": 0.18247050046920776, "learning_rate": 2.417462286934543e-06, "loss": 0.1286, "step": 4997 }, { "epoch": 0.6964397686894725, "grad_norm": 0.13135674595832825, "learning_rate": 2.4154445009446457e-06, "loss": 0.1082, "step": 4998 }, { "epoch": 0.6965791123806869, "grad_norm": 0.21275480091571808, "learning_rate": 2.413427289183977e-06, "loss": 0.1504, "step": 4999 }, { "epoch": 0.6967184560719013, "grad_norm": 0.15498626232147217, "learning_rate": 2.41141065210071e-06, "loss": 0.152, "step": 5000 }, { "epoch": 0.6968577997631157, "grad_norm": 0.2922079861164093, "learning_rate": 2.4093945901428977e-06, "loss": 0.1634, "step": 5001 }, { "epoch": 0.6969971434543301, "grad_norm": 0.255430668592453, "learning_rate": 2.4073791037584648e-06, "loss": 0.1123, "step": 5002 }, { "epoch": 0.6971364871455444, "grad_norm": 0.12267529964447021, "learning_rate": 2.4053641933952043e-06, "loss": 0.1029, "step": 5003 }, { "epoch": 0.6972758308367588, "grad_norm": 0.15961164236068726, "learning_rate": 2.403349859500782e-06, "loss": 0.1499, "step": 5004 }, { "epoch": 0.6974151745279732, "grad_norm": 0.31914031505584717, "learning_rate": 2.4013361025227384e-06, "loss": 0.1157, "step": 5005 }, { "epoch": 0.6975545182191877, "grad_norm": 0.19343112409114838, "learning_rate": 2.3993229229084856e-06, "loss": 0.1419, "step": 5006 }, { "epoch": 0.6976938619104021, "grad_norm": 0.1783185452222824, "learning_rate": 2.3973103211053052e-06, "loss": 0.119, "step": 5007 }, { "epoch": 0.6978332056016164, "grad_norm": 0.17543365061283112, "learning_rate": 2.3952982975603494e-06, "loss": 0.1299, "step": 5008 }, { "epoch": 0.6979725492928308, "grad_norm": 0.21240520477294922, "learning_rate": 2.393286852720645e-06, "loss": 0.1352, "step": 5009 }, { "epoch": 0.6981118929840452, "grad_norm": 0.2613601088523865, "learning_rate": 2.391275987033092e-06, "loss": 0.1237, "step": 5010 }, { "epoch": 0.6982512366752596, "grad_norm": 0.17975552380084991, "learning_rate": 2.3892657009444543e-06, "loss": 0.1171, "step": 5011 }, { "epoch": 0.698390580366474, "grad_norm": 0.12082284688949585, "learning_rate": 2.387255994901376e-06, "loss": 0.1188, "step": 5012 }, { "epoch": 0.6985299240576883, "grad_norm": 0.16580429673194885, "learning_rate": 2.3852468693503635e-06, "loss": 0.1257, "step": 5013 }, { "epoch": 0.6986692677489027, "grad_norm": 0.19063438475131989, "learning_rate": 2.3832383247378025e-06, "loss": 0.1247, "step": 5014 }, { "epoch": 0.6988086114401171, "grad_norm": 0.15718133747577667, "learning_rate": 2.3812303615099423e-06, "loss": 0.109, "step": 5015 }, { "epoch": 0.6989479551313315, "grad_norm": 0.20142066478729248, "learning_rate": 2.3792229801129086e-06, "loss": 0.1455, "step": 5016 }, { "epoch": 0.6990872988225458, "grad_norm": 0.1411486119031906, "learning_rate": 2.3772161809926973e-06, "loss": 0.1326, "step": 5017 }, { "epoch": 0.6992266425137602, "grad_norm": 0.2213156670331955, "learning_rate": 2.375209964595171e-06, "loss": 0.1148, "step": 5018 }, { "epoch": 0.6993659862049746, "grad_norm": 0.13371357321739197, "learning_rate": 2.373204331366064e-06, "loss": 0.1312, "step": 5019 }, { "epoch": 0.699505329896189, "grad_norm": 0.2855522930622101, "learning_rate": 2.3711992817509854e-06, "loss": 0.1338, "step": 5020 }, { "epoch": 0.6996446735874033, "grad_norm": 0.12220947444438934, "learning_rate": 2.3691948161954083e-06, "loss": 0.1187, "step": 5021 }, { "epoch": 0.6997840172786177, "grad_norm": 0.10060225427150726, "learning_rate": 2.3671909351446802e-06, "loss": 0.1067, "step": 5022 }, { "epoch": 0.6999233609698321, "grad_norm": 0.12891152501106262, "learning_rate": 2.365187639044021e-06, "loss": 0.1142, "step": 5023 }, { "epoch": 0.7000627046610465, "grad_norm": 0.21477144956588745, "learning_rate": 2.363184928338514e-06, "loss": 0.1236, "step": 5024 }, { "epoch": 0.7002020483522609, "grad_norm": 0.12837223708629608, "learning_rate": 2.3611828034731144e-06, "loss": 0.1465, "step": 5025 }, { "epoch": 0.7003413920434752, "grad_norm": 0.18357597291469574, "learning_rate": 2.359181264892651e-06, "loss": 0.1254, "step": 5026 }, { "epoch": 0.7004807357346896, "grad_norm": 0.17628328502178192, "learning_rate": 2.3571803130418215e-06, "loss": 0.1456, "step": 5027 }, { "epoch": 0.700620079425904, "grad_norm": 0.17687232792377472, "learning_rate": 2.3551799483651894e-06, "loss": 0.165, "step": 5028 }, { "epoch": 0.7007594231171184, "grad_norm": 0.18871518969535828, "learning_rate": 2.3531801713071887e-06, "loss": 0.1488, "step": 5029 }, { "epoch": 0.7008987668083327, "grad_norm": 0.14490818977355957, "learning_rate": 2.351180982312127e-06, "loss": 0.1239, "step": 5030 }, { "epoch": 0.7010381104995471, "grad_norm": 0.19001056253910065, "learning_rate": 2.349182381824178e-06, "loss": 0.134, "step": 5031 }, { "epoch": 0.7011774541907615, "grad_norm": 0.11044707149267197, "learning_rate": 2.3471843702873835e-06, "loss": 0.1048, "step": 5032 }, { "epoch": 0.7013167978819759, "grad_norm": 0.12093711644411087, "learning_rate": 2.345186948145659e-06, "loss": 0.1198, "step": 5033 }, { "epoch": 0.7014561415731903, "grad_norm": 0.13276296854019165, "learning_rate": 2.343190115842782e-06, "loss": 0.1362, "step": 5034 }, { "epoch": 0.7015954852644046, "grad_norm": 0.28986823558807373, "learning_rate": 2.341193873822407e-06, "loss": 0.1388, "step": 5035 }, { "epoch": 0.701734828955619, "grad_norm": 0.09978175163269043, "learning_rate": 2.33919822252805e-06, "loss": 0.1155, "step": 5036 }, { "epoch": 0.7018741726468334, "grad_norm": 0.22561344504356384, "learning_rate": 2.337203162403101e-06, "loss": 0.1522, "step": 5037 }, { "epoch": 0.7020135163380478, "grad_norm": 0.19078589975833893, "learning_rate": 2.335208693890819e-06, "loss": 0.1416, "step": 5038 }, { "epoch": 0.7021528600292621, "grad_norm": 0.188469797372818, "learning_rate": 2.3332148174343257e-06, "loss": 0.1314, "step": 5039 }, { "epoch": 0.7022922037204765, "grad_norm": 0.1038111075758934, "learning_rate": 2.331221533476615e-06, "loss": 0.1102, "step": 5040 }, { "epoch": 0.7024315474116909, "grad_norm": 0.14786268770694733, "learning_rate": 2.3292288424605503e-06, "loss": 0.1224, "step": 5041 }, { "epoch": 0.7025708911029053, "grad_norm": 0.147341787815094, "learning_rate": 2.327236744828864e-06, "loss": 0.1225, "step": 5042 }, { "epoch": 0.7027102347941196, "grad_norm": 0.11858817934989929, "learning_rate": 2.325245241024151e-06, "loss": 0.1318, "step": 5043 }, { "epoch": 0.702849578485334, "grad_norm": 0.11286652088165283, "learning_rate": 2.323254331488881e-06, "loss": 0.1126, "step": 5044 }, { "epoch": 0.7029889221765484, "grad_norm": 0.14689071476459503, "learning_rate": 2.3212640166653868e-06, "loss": 0.1464, "step": 5045 }, { "epoch": 0.7031282658677629, "grad_norm": 0.11447480320930481, "learning_rate": 2.319274296995872e-06, "loss": 0.1094, "step": 5046 }, { "epoch": 0.7032676095589773, "grad_norm": 0.14389532804489136, "learning_rate": 2.3172851729224056e-06, "loss": 0.1277, "step": 5047 }, { "epoch": 0.7034069532501916, "grad_norm": 0.15796072781085968, "learning_rate": 2.315296644886926e-06, "loss": 0.1274, "step": 5048 }, { "epoch": 0.703546296941406, "grad_norm": 0.1592448651790619, "learning_rate": 2.313308713331242e-06, "loss": 0.1532, "step": 5049 }, { "epoch": 0.7036856406326204, "grad_norm": 0.2579653859138489, "learning_rate": 2.3113213786970205e-06, "loss": 0.1391, "step": 5050 }, { "epoch": 0.7038249843238348, "grad_norm": 0.2586260735988617, "learning_rate": 2.3093346414258054e-06, "loss": 0.1448, "step": 5051 }, { "epoch": 0.7039643280150492, "grad_norm": 0.14558500051498413, "learning_rate": 2.3073485019590043e-06, "loss": 0.1407, "step": 5052 }, { "epoch": 0.7041036717062635, "grad_norm": 0.13148021697998047, "learning_rate": 2.305362960737893e-06, "loss": 0.1359, "step": 5053 }, { "epoch": 0.7042430153974779, "grad_norm": 0.1668182760477066, "learning_rate": 2.3033780182036127e-06, "loss": 0.1463, "step": 5054 }, { "epoch": 0.7043823590886923, "grad_norm": 0.15190379321575165, "learning_rate": 2.301393674797169e-06, "loss": 0.1276, "step": 5055 }, { "epoch": 0.7045217027799067, "grad_norm": 0.16120189428329468, "learning_rate": 2.2994099309594437e-06, "loss": 0.1171, "step": 5056 }, { "epoch": 0.704661046471121, "grad_norm": 0.10075979679822922, "learning_rate": 2.297426787131174e-06, "loss": 0.0982, "step": 5057 }, { "epoch": 0.7048003901623354, "grad_norm": 0.1970769762992859, "learning_rate": 2.2954442437529705e-06, "loss": 0.1338, "step": 5058 }, { "epoch": 0.7049397338535498, "grad_norm": 0.12877807021141052, "learning_rate": 2.293462301265313e-06, "loss": 0.1043, "step": 5059 }, { "epoch": 0.7050790775447642, "grad_norm": 0.11681177467107773, "learning_rate": 2.2914809601085405e-06, "loss": 0.0918, "step": 5060 }, { "epoch": 0.7052184212359786, "grad_norm": 0.21691890060901642, "learning_rate": 2.28950022072286e-06, "loss": 0.1593, "step": 5061 }, { "epoch": 0.7053577649271929, "grad_norm": 0.15212665498256683, "learning_rate": 2.2875200835483486e-06, "loss": 0.1379, "step": 5062 }, { "epoch": 0.7054971086184073, "grad_norm": 0.14952352643013, "learning_rate": 2.2855405490249498e-06, "loss": 0.1065, "step": 5063 }, { "epoch": 0.7056364523096217, "grad_norm": 0.20930641889572144, "learning_rate": 2.283561617592467e-06, "loss": 0.1472, "step": 5064 }, { "epoch": 0.7057757960008361, "grad_norm": 0.12551824748516083, "learning_rate": 2.2815832896905772e-06, "loss": 0.1201, "step": 5065 }, { "epoch": 0.7059151396920504, "grad_norm": 0.09789729118347168, "learning_rate": 2.279605565758816e-06, "loss": 0.1149, "step": 5066 }, { "epoch": 0.7060544833832648, "grad_norm": 0.15473978221416473, "learning_rate": 2.277628446236592e-06, "loss": 0.1057, "step": 5067 }, { "epoch": 0.7061938270744792, "grad_norm": 0.11908694356679916, "learning_rate": 2.275651931563173e-06, "loss": 0.1171, "step": 5068 }, { "epoch": 0.7063331707656936, "grad_norm": 0.2508281469345093, "learning_rate": 2.273676022177697e-06, "loss": 0.1308, "step": 5069 }, { "epoch": 0.706472514456908, "grad_norm": 0.13868644833564758, "learning_rate": 2.2717007185191673e-06, "loss": 0.114, "step": 5070 }, { "epoch": 0.7066118581481223, "grad_norm": 0.19916445016860962, "learning_rate": 2.2697260210264506e-06, "loss": 0.1399, "step": 5071 }, { "epoch": 0.7067512018393367, "grad_norm": 0.20841211080551147, "learning_rate": 2.267751930138276e-06, "loss": 0.1451, "step": 5072 }, { "epoch": 0.7068905455305511, "grad_norm": 0.12819382548332214, "learning_rate": 2.265778446293245e-06, "loss": 0.1223, "step": 5073 }, { "epoch": 0.7070298892217655, "grad_norm": 0.1344398409128189, "learning_rate": 2.263805569929821e-06, "loss": 0.1085, "step": 5074 }, { "epoch": 0.7071692329129798, "grad_norm": 0.19092713296413422, "learning_rate": 2.2618333014863296e-06, "loss": 0.1523, "step": 5075 }, { "epoch": 0.7073085766041942, "grad_norm": 0.2087249904870987, "learning_rate": 2.259861641400967e-06, "loss": 0.1406, "step": 5076 }, { "epoch": 0.7074479202954086, "grad_norm": 0.1220986470580101, "learning_rate": 2.2578905901117876e-06, "loss": 0.1304, "step": 5077 }, { "epoch": 0.707587263986623, "grad_norm": 0.09135758876800537, "learning_rate": 2.255920148056717e-06, "loss": 0.0984, "step": 5078 }, { "epoch": 0.7077266076778373, "grad_norm": 0.16629447042942047, "learning_rate": 2.2539503156735392e-06, "loss": 0.121, "step": 5079 }, { "epoch": 0.7078659513690517, "grad_norm": 0.1733112782239914, "learning_rate": 2.2519810933999085e-06, "loss": 0.1356, "step": 5080 }, { "epoch": 0.7080052950602661, "grad_norm": 0.1193411648273468, "learning_rate": 2.2500124816733437e-06, "loss": 0.1143, "step": 5081 }, { "epoch": 0.7081446387514805, "grad_norm": 0.16389796137809753, "learning_rate": 2.248044480931219e-06, "loss": 0.152, "step": 5082 }, { "epoch": 0.7082839824426949, "grad_norm": 0.15280675888061523, "learning_rate": 2.2460770916107823e-06, "loss": 0.131, "step": 5083 }, { "epoch": 0.7084233261339092, "grad_norm": 0.14475087821483612, "learning_rate": 2.2441103141491424e-06, "loss": 0.1223, "step": 5084 }, { "epoch": 0.7085626698251236, "grad_norm": 0.19299034774303436, "learning_rate": 2.2421441489832745e-06, "loss": 0.1246, "step": 5085 }, { "epoch": 0.7087020135163381, "grad_norm": 0.15500614047050476, "learning_rate": 2.240178596550014e-06, "loss": 0.109, "step": 5086 }, { "epoch": 0.7088413572075525, "grad_norm": 0.17026382684707642, "learning_rate": 2.23821365728606e-06, "loss": 0.147, "step": 5087 }, { "epoch": 0.7089807008987669, "grad_norm": 0.14889344573020935, "learning_rate": 2.23624933162798e-06, "loss": 0.1245, "step": 5088 }, { "epoch": 0.7091200445899812, "grad_norm": 0.13668875396251678, "learning_rate": 2.2342856200121993e-06, "loss": 0.1219, "step": 5089 }, { "epoch": 0.7092593882811956, "grad_norm": 0.10454686731100082, "learning_rate": 2.2323225228750113e-06, "loss": 0.1208, "step": 5090 }, { "epoch": 0.70939873197241, "grad_norm": 0.12952940165996552, "learning_rate": 2.230360040652574e-06, "loss": 0.1086, "step": 5091 }, { "epoch": 0.7095380756636244, "grad_norm": 0.22589339315891266, "learning_rate": 2.228398173780903e-06, "loss": 0.1291, "step": 5092 }, { "epoch": 0.7096774193548387, "grad_norm": 0.12429624050855637, "learning_rate": 2.2264369226958794e-06, "loss": 0.1161, "step": 5093 }, { "epoch": 0.7098167630460531, "grad_norm": 0.09983576089143753, "learning_rate": 2.2244762878332506e-06, "loss": 0.1039, "step": 5094 }, { "epoch": 0.7099561067372675, "grad_norm": 0.13725653290748596, "learning_rate": 2.222516269628626e-06, "loss": 0.1253, "step": 5095 }, { "epoch": 0.7100954504284819, "grad_norm": 0.11947385966777802, "learning_rate": 2.220556868517473e-06, "loss": 0.1176, "step": 5096 }, { "epoch": 0.7102347941196963, "grad_norm": 0.12173144519329071, "learning_rate": 2.2185980849351295e-06, "loss": 0.1332, "step": 5097 }, { "epoch": 0.7103741378109106, "grad_norm": 0.23739784955978394, "learning_rate": 2.2166399193167905e-06, "loss": 0.1542, "step": 5098 }, { "epoch": 0.710513481502125, "grad_norm": 0.13571415841579437, "learning_rate": 2.214682372097517e-06, "loss": 0.1062, "step": 5099 }, { "epoch": 0.7106528251933394, "grad_norm": 0.16563531756401062, "learning_rate": 2.212725443712229e-06, "loss": 0.1367, "step": 5100 }, { "epoch": 0.7107921688845538, "grad_norm": 0.14025168120861053, "learning_rate": 2.2107691345957133e-06, "loss": 0.1132, "step": 5101 }, { "epoch": 0.7109315125757681, "grad_norm": 0.18067699670791626, "learning_rate": 2.208813445182618e-06, "loss": 0.1243, "step": 5102 }, { "epoch": 0.7110708562669825, "grad_norm": 0.15066401660442352, "learning_rate": 2.2068583759074513e-06, "loss": 0.1432, "step": 5103 }, { "epoch": 0.7112101999581969, "grad_norm": 0.16622944176197052, "learning_rate": 2.2049039272045837e-06, "loss": 0.136, "step": 5104 }, { "epoch": 0.7113495436494113, "grad_norm": 0.17569535970687866, "learning_rate": 2.2029500995082497e-06, "loss": 0.1282, "step": 5105 }, { "epoch": 0.7114888873406257, "grad_norm": 0.12340820580720901, "learning_rate": 2.2009968932525478e-06, "loss": 0.1133, "step": 5106 }, { "epoch": 0.71162823103184, "grad_norm": 0.19312725961208344, "learning_rate": 2.199044308871434e-06, "loss": 0.1269, "step": 5107 }, { "epoch": 0.7117675747230544, "grad_norm": 0.17491111159324646, "learning_rate": 2.197092346798726e-06, "loss": 0.1302, "step": 5108 }, { "epoch": 0.7119069184142688, "grad_norm": 0.17964674532413483, "learning_rate": 2.1951410074681074e-06, "loss": 0.1302, "step": 5109 }, { "epoch": 0.7120462621054832, "grad_norm": 0.0974874347448349, "learning_rate": 2.193190291313122e-06, "loss": 0.1003, "step": 5110 }, { "epoch": 0.7121856057966975, "grad_norm": 0.11501161754131317, "learning_rate": 2.1912401987671724e-06, "loss": 0.1147, "step": 5111 }, { "epoch": 0.7123249494879119, "grad_norm": 0.16334709525108337, "learning_rate": 2.1892907302635246e-06, "loss": 0.1582, "step": 5112 }, { "epoch": 0.7124642931791263, "grad_norm": 0.22808928787708282, "learning_rate": 2.1873418862353095e-06, "loss": 0.1343, "step": 5113 }, { "epoch": 0.7126036368703407, "grad_norm": 0.09173895418643951, "learning_rate": 2.185393667115513e-06, "loss": 0.0994, "step": 5114 }, { "epoch": 0.712742980561555, "grad_norm": 0.2657230496406555, "learning_rate": 2.1834460733369835e-06, "loss": 0.1615, "step": 5115 }, { "epoch": 0.7128823242527694, "grad_norm": 0.14257390797138214, "learning_rate": 2.181499105332433e-06, "loss": 0.1265, "step": 5116 }, { "epoch": 0.7130216679439838, "grad_norm": 0.10519010573625565, "learning_rate": 2.179552763534436e-06, "loss": 0.1121, "step": 5117 }, { "epoch": 0.7131610116351982, "grad_norm": 0.20358841121196747, "learning_rate": 2.177607048375423e-06, "loss": 0.142, "step": 5118 }, { "epoch": 0.7133003553264126, "grad_norm": 0.13170665502548218, "learning_rate": 2.1756619602876857e-06, "loss": 0.1274, "step": 5119 }, { "epoch": 0.7134396990176269, "grad_norm": 0.18631644546985626, "learning_rate": 2.1737174997033818e-06, "loss": 0.1311, "step": 5120 }, { "epoch": 0.7135790427088413, "grad_norm": 0.15031611919403076, "learning_rate": 2.1717736670545226e-06, "loss": 0.1151, "step": 5121 }, { "epoch": 0.7137183864000557, "grad_norm": 0.12501047551631927, "learning_rate": 2.169830462772985e-06, "loss": 0.1264, "step": 5122 }, { "epoch": 0.7138577300912701, "grad_norm": 0.2394302636384964, "learning_rate": 2.1678878872905063e-06, "loss": 0.1507, "step": 5123 }, { "epoch": 0.7139970737824844, "grad_norm": 0.13308711349964142, "learning_rate": 2.1659459410386814e-06, "loss": 0.118, "step": 5124 }, { "epoch": 0.7141364174736988, "grad_norm": 0.22021904587745667, "learning_rate": 2.1640046244489637e-06, "loss": 0.1416, "step": 5125 }, { "epoch": 0.7142757611649132, "grad_norm": 0.13476140797138214, "learning_rate": 2.1620639379526715e-06, "loss": 0.1197, "step": 5126 }, { "epoch": 0.7144151048561277, "grad_norm": 0.15035848319530487, "learning_rate": 2.1601238819809827e-06, "loss": 0.0993, "step": 5127 }, { "epoch": 0.7145544485473421, "grad_norm": 0.17732226848602295, "learning_rate": 2.158184456964932e-06, "loss": 0.133, "step": 5128 }, { "epoch": 0.7146937922385564, "grad_norm": 0.2593410313129425, "learning_rate": 2.156245663335414e-06, "loss": 0.1358, "step": 5129 }, { "epoch": 0.7148331359297708, "grad_norm": 0.13537146151065826, "learning_rate": 2.154307501523185e-06, "loss": 0.1176, "step": 5130 }, { "epoch": 0.7149724796209852, "grad_norm": 0.15763233602046967, "learning_rate": 2.1523699719588633e-06, "loss": 0.1768, "step": 5131 }, { "epoch": 0.7151118233121996, "grad_norm": 0.12992972135543823, "learning_rate": 2.1504330750729185e-06, "loss": 0.1216, "step": 5132 }, { "epoch": 0.715251167003414, "grad_norm": 0.17290528118610382, "learning_rate": 2.1484968112956884e-06, "loss": 0.1484, "step": 5133 }, { "epoch": 0.7153905106946283, "grad_norm": 0.22103078663349152, "learning_rate": 2.146561181057368e-06, "loss": 0.1363, "step": 5134 }, { "epoch": 0.7155298543858427, "grad_norm": 0.33317404985427856, "learning_rate": 2.1446261847880073e-06, "loss": 0.1305, "step": 5135 }, { "epoch": 0.7156691980770571, "grad_norm": 0.1388518065214157, "learning_rate": 2.1426918229175175e-06, "loss": 0.0958, "step": 5136 }, { "epoch": 0.7158085417682715, "grad_norm": 0.2049645632505417, "learning_rate": 2.140758095875671e-06, "loss": 0.1635, "step": 5137 }, { "epoch": 0.7159478854594858, "grad_norm": 0.3736967146396637, "learning_rate": 2.1388250040921007e-06, "loss": 0.1743, "step": 5138 }, { "epoch": 0.7160872291507002, "grad_norm": 0.16414514183998108, "learning_rate": 2.136892547996292e-06, "loss": 0.1321, "step": 5139 }, { "epoch": 0.7162265728419146, "grad_norm": 0.1336103081703186, "learning_rate": 2.1349607280175918e-06, "loss": 0.1245, "step": 5140 }, { "epoch": 0.716365916533129, "grad_norm": 0.21940772235393524, "learning_rate": 2.133029544585207e-06, "loss": 0.1189, "step": 5141 }, { "epoch": 0.7165052602243434, "grad_norm": 0.18444082140922546, "learning_rate": 2.1310989981282067e-06, "loss": 0.1272, "step": 5142 }, { "epoch": 0.7166446039155577, "grad_norm": 0.18002575635910034, "learning_rate": 2.1291690890755078e-06, "loss": 0.126, "step": 5143 }, { "epoch": 0.7167839476067721, "grad_norm": 0.20152699947357178, "learning_rate": 2.127239817855897e-06, "loss": 0.1418, "step": 5144 }, { "epoch": 0.7169232912979865, "grad_norm": 0.23682145774364471, "learning_rate": 2.1253111848980113e-06, "loss": 0.1517, "step": 5145 }, { "epoch": 0.7170626349892009, "grad_norm": 0.3860624134540558, "learning_rate": 2.1233831906303514e-06, "loss": 0.1409, "step": 5146 }, { "epoch": 0.7172019786804152, "grad_norm": 0.14817385375499725, "learning_rate": 2.121455835481271e-06, "loss": 0.1165, "step": 5147 }, { "epoch": 0.7173413223716296, "grad_norm": 0.1837790608406067, "learning_rate": 2.119529119878985e-06, "loss": 0.1141, "step": 5148 }, { "epoch": 0.717480666062844, "grad_norm": 0.18802882730960846, "learning_rate": 2.1176030442515704e-06, "loss": 0.1487, "step": 5149 }, { "epoch": 0.7176200097540584, "grad_norm": 0.15985511243343353, "learning_rate": 2.115677609026949e-06, "loss": 0.1325, "step": 5150 }, { "epoch": 0.7177593534452728, "grad_norm": 0.1888623684644699, "learning_rate": 2.1137528146329133e-06, "loss": 0.1311, "step": 5151 }, { "epoch": 0.7178986971364871, "grad_norm": 0.3034290075302124, "learning_rate": 2.1118286614971075e-06, "loss": 0.1241, "step": 5152 }, { "epoch": 0.7180380408277015, "grad_norm": 0.6219804286956787, "learning_rate": 2.1099051500470368e-06, "loss": 0.222, "step": 5153 }, { "epoch": 0.7181773845189159, "grad_norm": 0.33686092495918274, "learning_rate": 2.1079822807100585e-06, "loss": 0.1635, "step": 5154 }, { "epoch": 0.7183167282101303, "grad_norm": 0.109054796397686, "learning_rate": 2.1060600539133928e-06, "loss": 0.1107, "step": 5155 }, { "epoch": 0.7184560719013446, "grad_norm": 0.18596604466438293, "learning_rate": 2.104138470084114e-06, "loss": 0.1232, "step": 5156 }, { "epoch": 0.718595415592559, "grad_norm": 0.13715167343616486, "learning_rate": 2.1022175296491516e-06, "loss": 0.146, "step": 5157 }, { "epoch": 0.7187347592837734, "grad_norm": 0.25089776515960693, "learning_rate": 2.100297233035296e-06, "loss": 0.1616, "step": 5158 }, { "epoch": 0.7188741029749878, "grad_norm": 0.16906492412090302, "learning_rate": 2.098377580669196e-06, "loss": 0.1097, "step": 5159 }, { "epoch": 0.7190134466662021, "grad_norm": 0.20198924839496613, "learning_rate": 2.096458572977352e-06, "loss": 0.1409, "step": 5160 }, { "epoch": 0.7191527903574165, "grad_norm": 0.11186092346906662, "learning_rate": 2.0945402103861233e-06, "loss": 0.1222, "step": 5161 }, { "epoch": 0.7192921340486309, "grad_norm": 0.14470696449279785, "learning_rate": 2.0926224933217267e-06, "loss": 0.1305, "step": 5162 }, { "epoch": 0.7194314777398453, "grad_norm": 0.1716194748878479, "learning_rate": 2.0907054222102367e-06, "loss": 0.1551, "step": 5163 }, { "epoch": 0.7195708214310597, "grad_norm": 0.3098990023136139, "learning_rate": 2.0887889974775805e-06, "loss": 0.1574, "step": 5164 }, { "epoch": 0.719710165122274, "grad_norm": 0.1316365897655487, "learning_rate": 2.0868732195495463e-06, "loss": 0.1138, "step": 5165 }, { "epoch": 0.7198495088134884, "grad_norm": 0.11947789788246155, "learning_rate": 2.0849580888517733e-06, "loss": 0.1174, "step": 5166 }, { "epoch": 0.7199888525047029, "grad_norm": 0.1361876279115677, "learning_rate": 2.083043605809763e-06, "loss": 0.1192, "step": 5167 }, { "epoch": 0.7201281961959173, "grad_norm": 0.1554059088230133, "learning_rate": 2.081129770848867e-06, "loss": 0.1207, "step": 5168 }, { "epoch": 0.7202675398871317, "grad_norm": 0.18643341958522797, "learning_rate": 2.0792165843942963e-06, "loss": 0.1187, "step": 5169 }, { "epoch": 0.720406883578346, "grad_norm": 0.18049298226833344, "learning_rate": 2.0773040468711205e-06, "loss": 0.1632, "step": 5170 }, { "epoch": 0.7205462272695604, "grad_norm": 0.20963436365127563, "learning_rate": 2.0753921587042586e-06, "loss": 0.1363, "step": 5171 }, { "epoch": 0.7206855709607748, "grad_norm": 0.08771895617246628, "learning_rate": 2.0734809203184873e-06, "loss": 0.106, "step": 5172 }, { "epoch": 0.7208249146519892, "grad_norm": 0.18081550300121307, "learning_rate": 2.071570332138442e-06, "loss": 0.1584, "step": 5173 }, { "epoch": 0.7209642583432035, "grad_norm": 0.1438969075679779, "learning_rate": 2.0696603945886133e-06, "loss": 0.1115, "step": 5174 }, { "epoch": 0.7211036020344179, "grad_norm": 0.24065080285072327, "learning_rate": 2.067751108093343e-06, "loss": 0.1375, "step": 5175 }, { "epoch": 0.7212429457256323, "grad_norm": 0.2170238494873047, "learning_rate": 2.0658424730768335e-06, "loss": 0.1236, "step": 5176 }, { "epoch": 0.7213822894168467, "grad_norm": 0.20731709897518158, "learning_rate": 2.063934489963137e-06, "loss": 0.1416, "step": 5177 }, { "epoch": 0.7215216331080611, "grad_norm": 0.15726622939109802, "learning_rate": 2.0620271591761666e-06, "loss": 0.1492, "step": 5178 }, { "epoch": 0.7216609767992754, "grad_norm": 0.2058267444372177, "learning_rate": 2.0601204811396847e-06, "loss": 0.159, "step": 5179 }, { "epoch": 0.7218003204904898, "grad_norm": 0.13429780304431915, "learning_rate": 2.058214456277314e-06, "loss": 0.1353, "step": 5180 }, { "epoch": 0.7219396641817042, "grad_norm": 0.15004952251911163, "learning_rate": 2.0563090850125318e-06, "loss": 0.1157, "step": 5181 }, { "epoch": 0.7220790078729186, "grad_norm": 0.18730731308460236, "learning_rate": 2.054404367768662e-06, "loss": 0.1405, "step": 5182 }, { "epoch": 0.7222183515641329, "grad_norm": 0.14426058530807495, "learning_rate": 2.0525003049688923e-06, "loss": 0.1285, "step": 5183 }, { "epoch": 0.7223576952553473, "grad_norm": 0.12344422191381454, "learning_rate": 2.0505968970362627e-06, "loss": 0.1128, "step": 5184 }, { "epoch": 0.7224970389465617, "grad_norm": 0.19718274474143982, "learning_rate": 2.048694144393668e-06, "loss": 0.1196, "step": 5185 }, { "epoch": 0.7226363826377761, "grad_norm": 0.273470401763916, "learning_rate": 2.0467920474638552e-06, "loss": 0.1499, "step": 5186 }, { "epoch": 0.7227757263289905, "grad_norm": 0.16104142367839813, "learning_rate": 2.0448906066694247e-06, "loss": 0.1191, "step": 5187 }, { "epoch": 0.7229150700202048, "grad_norm": 0.12653814256191254, "learning_rate": 2.042989822432837e-06, "loss": 0.1054, "step": 5188 }, { "epoch": 0.7230544137114192, "grad_norm": 0.19577695429325104, "learning_rate": 2.041089695176399e-06, "loss": 0.1342, "step": 5189 }, { "epoch": 0.7231937574026336, "grad_norm": 0.21549363434314728, "learning_rate": 2.0391902253222777e-06, "loss": 0.1283, "step": 5190 }, { "epoch": 0.723333101093848, "grad_norm": 0.20174938440322876, "learning_rate": 2.037291413292494e-06, "loss": 0.1159, "step": 5191 }, { "epoch": 0.7234724447850623, "grad_norm": 0.29844990372657776, "learning_rate": 2.035393259508919e-06, "loss": 0.1406, "step": 5192 }, { "epoch": 0.7236117884762767, "grad_norm": 0.22916479408740997, "learning_rate": 2.0334957643932757e-06, "loss": 0.1802, "step": 5193 }, { "epoch": 0.7237511321674911, "grad_norm": 0.18169908225536346, "learning_rate": 2.0315989283671474e-06, "loss": 0.1398, "step": 5194 }, { "epoch": 0.7238904758587055, "grad_norm": 0.19215574860572815, "learning_rate": 2.0297027518519696e-06, "loss": 0.109, "step": 5195 }, { "epoch": 0.7240298195499199, "grad_norm": 0.17812536656856537, "learning_rate": 2.0278072352690253e-06, "loss": 0.1272, "step": 5196 }, { "epoch": 0.7241691632411342, "grad_norm": 0.21466392278671265, "learning_rate": 2.0259123790394587e-06, "loss": 0.133, "step": 5197 }, { "epoch": 0.7243085069323486, "grad_norm": 0.20686864852905273, "learning_rate": 2.0240181835842605e-06, "loss": 0.1167, "step": 5198 }, { "epoch": 0.724447850623563, "grad_norm": 0.2586985230445862, "learning_rate": 2.0221246493242802e-06, "loss": 0.1583, "step": 5199 }, { "epoch": 0.7245871943147774, "grad_norm": 0.09207740426063538, "learning_rate": 2.0202317766802155e-06, "loss": 0.1015, "step": 5200 }, { "epoch": 0.7247265380059917, "grad_norm": 0.14974351227283478, "learning_rate": 2.0183395660726208e-06, "loss": 0.1187, "step": 5201 }, { "epoch": 0.7248658816972061, "grad_norm": 0.14093519747257233, "learning_rate": 2.0164480179219038e-06, "loss": 0.1265, "step": 5202 }, { "epoch": 0.7250052253884205, "grad_norm": 0.14925608038902283, "learning_rate": 2.014557132648321e-06, "loss": 0.1268, "step": 5203 }, { "epoch": 0.7251445690796349, "grad_norm": 0.2568490207195282, "learning_rate": 2.0126669106719833e-06, "loss": 0.1657, "step": 5204 }, { "epoch": 0.7252839127708492, "grad_norm": 0.14274151623249054, "learning_rate": 2.010777352412856e-06, "loss": 0.1269, "step": 5205 }, { "epoch": 0.7254232564620636, "grad_norm": 0.19700230658054352, "learning_rate": 2.0088884582907574e-06, "loss": 0.1188, "step": 5206 }, { "epoch": 0.7255626001532781, "grad_norm": 0.19330473244190216, "learning_rate": 2.0070002287253554e-06, "loss": 0.1316, "step": 5207 }, { "epoch": 0.7257019438444925, "grad_norm": 0.19282415509223938, "learning_rate": 2.0051126641361697e-06, "loss": 0.1446, "step": 5208 }, { "epoch": 0.7258412875357069, "grad_norm": 0.15094803273677826, "learning_rate": 2.0032257649425753e-06, "loss": 0.1295, "step": 5209 }, { "epoch": 0.7259806312269212, "grad_norm": 0.1371152251958847, "learning_rate": 2.0013395315637997e-06, "loss": 0.1178, "step": 5210 }, { "epoch": 0.7261199749181356, "grad_norm": 0.13072548806667328, "learning_rate": 1.9994539644189183e-06, "loss": 0.1011, "step": 5211 }, { "epoch": 0.72625931860935, "grad_norm": 0.20630092918872833, "learning_rate": 1.9975690639268623e-06, "loss": 0.1624, "step": 5212 }, { "epoch": 0.7263986623005644, "grad_norm": 0.14417366683483124, "learning_rate": 1.9956848305064156e-06, "loss": 0.1119, "step": 5213 }, { "epoch": 0.7265380059917788, "grad_norm": 0.10989121347665787, "learning_rate": 1.99380126457621e-06, "loss": 0.1026, "step": 5214 }, { "epoch": 0.7266773496829931, "grad_norm": 0.1935224086046219, "learning_rate": 1.9919183665547285e-06, "loss": 0.1469, "step": 5215 }, { "epoch": 0.7268166933742075, "grad_norm": 0.11878102272748947, "learning_rate": 1.9900361368603104e-06, "loss": 0.115, "step": 5216 }, { "epoch": 0.7269560370654219, "grad_norm": 0.15261033177375793, "learning_rate": 1.988154575911146e-06, "loss": 0.1294, "step": 5217 }, { "epoch": 0.7270953807566363, "grad_norm": 0.12031487375497818, "learning_rate": 1.9862736841252734e-06, "loss": 0.1085, "step": 5218 }, { "epoch": 0.7272347244478506, "grad_norm": 0.13159418106079102, "learning_rate": 1.984393461920581e-06, "loss": 0.112, "step": 5219 }, { "epoch": 0.727374068139065, "grad_norm": 0.19760629534721375, "learning_rate": 1.9825139097148166e-06, "loss": 0.1525, "step": 5220 }, { "epoch": 0.7275134118302794, "grad_norm": 0.2217143476009369, "learning_rate": 1.980635027925569e-06, "loss": 0.1436, "step": 5221 }, { "epoch": 0.7276527555214938, "grad_norm": 0.23811939358711243, "learning_rate": 1.9787568169702848e-06, "loss": 0.1418, "step": 5222 }, { "epoch": 0.7277920992127082, "grad_norm": 0.1389862447977066, "learning_rate": 1.9768792772662616e-06, "loss": 0.1186, "step": 5223 }, { "epoch": 0.7279314429039225, "grad_norm": 0.1737814098596573, "learning_rate": 1.975002409230644e-06, "loss": 0.1103, "step": 5224 }, { "epoch": 0.7280707865951369, "grad_norm": 0.1718387007713318, "learning_rate": 1.9731262132804275e-06, "loss": 0.145, "step": 5225 }, { "epoch": 0.7282101302863513, "grad_norm": 0.1595897674560547, "learning_rate": 1.9712506898324613e-06, "loss": 0.1163, "step": 5226 }, { "epoch": 0.7283494739775657, "grad_norm": 0.10834503173828125, "learning_rate": 1.969375839303447e-06, "loss": 0.114, "step": 5227 }, { "epoch": 0.72848881766878, "grad_norm": 0.18484655022621155, "learning_rate": 1.967501662109928e-06, "loss": 0.1494, "step": 5228 }, { "epoch": 0.7286281613599944, "grad_norm": 0.14567215740680695, "learning_rate": 1.965628158668309e-06, "loss": 0.1185, "step": 5229 }, { "epoch": 0.7287675050512088, "grad_norm": 0.09589780867099762, "learning_rate": 1.9637553293948353e-06, "loss": 0.1056, "step": 5230 }, { "epoch": 0.7289068487424232, "grad_norm": 0.22028076648712158, "learning_rate": 1.9618831747056106e-06, "loss": 0.1296, "step": 5231 }, { "epoch": 0.7290461924336376, "grad_norm": 0.15107698738574982, "learning_rate": 1.960011695016581e-06, "loss": 0.1081, "step": 5232 }, { "epoch": 0.7291855361248519, "grad_norm": 0.13483089208602905, "learning_rate": 1.958140890743549e-06, "loss": 0.1349, "step": 5233 }, { "epoch": 0.7293248798160663, "grad_norm": 0.20601750910282135, "learning_rate": 1.956270762302166e-06, "loss": 0.1151, "step": 5234 }, { "epoch": 0.7294642235072807, "grad_norm": 0.14657068252563477, "learning_rate": 1.9544013101079295e-06, "loss": 0.1249, "step": 5235 }, { "epoch": 0.7296035671984951, "grad_norm": 0.19512112438678741, "learning_rate": 1.9525325345761887e-06, "loss": 0.1316, "step": 5236 }, { "epoch": 0.7297429108897094, "grad_norm": 0.19472476840019226, "learning_rate": 1.950664436122144e-06, "loss": 0.1304, "step": 5237 }, { "epoch": 0.7298822545809238, "grad_norm": 0.22440707683563232, "learning_rate": 1.948797015160845e-06, "loss": 0.153, "step": 5238 }, { "epoch": 0.7300215982721382, "grad_norm": 0.08728259801864624, "learning_rate": 1.94693027210719e-06, "loss": 0.1027, "step": 5239 }, { "epoch": 0.7301609419633526, "grad_norm": 0.16200686991214752, "learning_rate": 1.945064207375923e-06, "loss": 0.1209, "step": 5240 }, { "epoch": 0.730300285654567, "grad_norm": 0.13191092014312744, "learning_rate": 1.9431988213816444e-06, "loss": 0.1114, "step": 5241 }, { "epoch": 0.7304396293457813, "grad_norm": 0.3068526089191437, "learning_rate": 1.9413341145388013e-06, "loss": 0.1684, "step": 5242 }, { "epoch": 0.7305789730369957, "grad_norm": 0.1097462847828865, "learning_rate": 1.9394700872616856e-06, "loss": 0.1232, "step": 5243 }, { "epoch": 0.7307183167282101, "grad_norm": 0.23690304160118103, "learning_rate": 1.9376067399644456e-06, "loss": 0.1667, "step": 5244 }, { "epoch": 0.7308576604194245, "grad_norm": 0.17103341221809387, "learning_rate": 1.93574407306107e-06, "loss": 0.1285, "step": 5245 }, { "epoch": 0.7309970041106388, "grad_norm": 0.31322458386421204, "learning_rate": 1.9338820869654056e-06, "loss": 0.1518, "step": 5246 }, { "epoch": 0.7311363478018533, "grad_norm": 0.26326414942741394, "learning_rate": 1.9320207820911387e-06, "loss": 0.1298, "step": 5247 }, { "epoch": 0.7312756914930677, "grad_norm": 0.18538185954093933, "learning_rate": 1.930160158851811e-06, "loss": 0.1564, "step": 5248 }, { "epoch": 0.7314150351842821, "grad_norm": 0.1462155431509018, "learning_rate": 1.9283002176608116e-06, "loss": 0.1009, "step": 5249 }, { "epoch": 0.7315543788754965, "grad_norm": 0.20228922367095947, "learning_rate": 1.9264409589313767e-06, "loss": 0.1762, "step": 5250 }, { "epoch": 0.7316937225667108, "grad_norm": 0.17265112698078156, "learning_rate": 1.9245823830765874e-06, "loss": 0.1536, "step": 5251 }, { "epoch": 0.7318330662579252, "grad_norm": 0.2090367078781128, "learning_rate": 1.92272449050938e-06, "loss": 0.1673, "step": 5252 }, { "epoch": 0.7319724099491396, "grad_norm": 0.20764824748039246, "learning_rate": 1.920867281642538e-06, "loss": 0.1402, "step": 5253 }, { "epoch": 0.732111753640354, "grad_norm": 0.13617505133152008, "learning_rate": 1.919010756888685e-06, "loss": 0.1426, "step": 5254 }, { "epoch": 0.7322510973315683, "grad_norm": 0.17997756600379944, "learning_rate": 1.917154916660304e-06, "loss": 0.1493, "step": 5255 }, { "epoch": 0.7323904410227827, "grad_norm": 0.11378661543130875, "learning_rate": 1.9152997613697184e-06, "loss": 0.1066, "step": 5256 }, { "epoch": 0.7325297847139971, "grad_norm": 0.1098223328590393, "learning_rate": 1.913445291429099e-06, "loss": 0.1089, "step": 5257 }, { "epoch": 0.7326691284052115, "grad_norm": 0.13241134583950043, "learning_rate": 1.9115915072504683e-06, "loss": 0.1176, "step": 5258 }, { "epoch": 0.7328084720964259, "grad_norm": 0.2155422866344452, "learning_rate": 1.909738409245697e-06, "loss": 0.1434, "step": 5259 }, { "epoch": 0.7329478157876402, "grad_norm": 0.1555972695350647, "learning_rate": 1.9078859978264995e-06, "loss": 0.1448, "step": 5260 }, { "epoch": 0.7330871594788546, "grad_norm": 0.16227751970291138, "learning_rate": 1.9060342734044374e-06, "loss": 0.1067, "step": 5261 }, { "epoch": 0.733226503170069, "grad_norm": 0.1621219962835312, "learning_rate": 1.904183236390923e-06, "loss": 0.1159, "step": 5262 }, { "epoch": 0.7333658468612834, "grad_norm": 0.13619545102119446, "learning_rate": 1.9023328871972163e-06, "loss": 0.1236, "step": 5263 }, { "epoch": 0.7335051905524977, "grad_norm": 0.1272849291563034, "learning_rate": 1.9004832262344197e-06, "loss": 0.1373, "step": 5264 }, { "epoch": 0.7336445342437121, "grad_norm": 0.16765639185905457, "learning_rate": 1.8986342539134873e-06, "loss": 0.1296, "step": 5265 }, { "epoch": 0.7337838779349265, "grad_norm": 0.15447211265563965, "learning_rate": 1.8967859706452196e-06, "loss": 0.1194, "step": 5266 }, { "epoch": 0.7339232216261409, "grad_norm": 0.13260915875434875, "learning_rate": 1.894938376840262e-06, "loss": 0.1033, "step": 5267 }, { "epoch": 0.7340625653173553, "grad_norm": 0.1669398844242096, "learning_rate": 1.8930914729091055e-06, "loss": 0.1198, "step": 5268 }, { "epoch": 0.7342019090085696, "grad_norm": 0.1811884194612503, "learning_rate": 1.8912452592620916e-06, "loss": 0.1306, "step": 5269 }, { "epoch": 0.734341252699784, "grad_norm": 0.17928683757781982, "learning_rate": 1.8893997363094086e-06, "loss": 0.1503, "step": 5270 }, { "epoch": 0.7344805963909984, "grad_norm": 0.11129637062549591, "learning_rate": 1.8875549044610886e-06, "loss": 0.1064, "step": 5271 }, { "epoch": 0.7346199400822128, "grad_norm": 0.1937808394432068, "learning_rate": 1.8857107641270084e-06, "loss": 0.1892, "step": 5272 }, { "epoch": 0.7347592837734271, "grad_norm": 0.10604139417409897, "learning_rate": 1.8838673157168956e-06, "loss": 0.1058, "step": 5273 }, { "epoch": 0.7348986274646415, "grad_norm": 0.13713985681533813, "learning_rate": 1.8820245596403253e-06, "loss": 0.1323, "step": 5274 }, { "epoch": 0.7350379711558559, "grad_norm": 0.12732957303524017, "learning_rate": 1.8801824963067105e-06, "loss": 0.109, "step": 5275 }, { "epoch": 0.7351773148470703, "grad_norm": 0.16756780445575714, "learning_rate": 1.8783411261253208e-06, "loss": 0.1379, "step": 5276 }, { "epoch": 0.7353166585382847, "grad_norm": 0.18273523449897766, "learning_rate": 1.8765004495052623e-06, "loss": 0.1293, "step": 5277 }, { "epoch": 0.735456002229499, "grad_norm": 0.15727843344211578, "learning_rate": 1.8746604668554952e-06, "loss": 0.1194, "step": 5278 }, { "epoch": 0.7355953459207134, "grad_norm": 0.13655120134353638, "learning_rate": 1.8728211785848176e-06, "loss": 0.1227, "step": 5279 }, { "epoch": 0.7357346896119278, "grad_norm": 0.16396968066692352, "learning_rate": 1.8709825851018798e-06, "loss": 0.1252, "step": 5280 }, { "epoch": 0.7358740333031422, "grad_norm": 0.1756107211112976, "learning_rate": 1.869144686815178e-06, "loss": 0.1306, "step": 5281 }, { "epoch": 0.7360133769943565, "grad_norm": 0.1554456353187561, "learning_rate": 1.8673074841330447e-06, "loss": 0.1202, "step": 5282 }, { "epoch": 0.7361527206855709, "grad_norm": 0.19218239188194275, "learning_rate": 1.8654709774636676e-06, "loss": 0.1558, "step": 5283 }, { "epoch": 0.7362920643767853, "grad_norm": 0.12271831929683685, "learning_rate": 1.8636351672150771e-06, "loss": 0.1141, "step": 5284 }, { "epoch": 0.7364314080679997, "grad_norm": 0.10000938922166824, "learning_rate": 1.8618000537951496e-06, "loss": 0.1056, "step": 5285 }, { "epoch": 0.736570751759214, "grad_norm": 0.14966952800750732, "learning_rate": 1.8599656376116026e-06, "loss": 0.1179, "step": 5286 }, { "epoch": 0.7367100954504285, "grad_norm": 0.2835557460784912, "learning_rate": 1.8581319190720038e-06, "loss": 0.1865, "step": 5287 }, { "epoch": 0.7368494391416429, "grad_norm": 0.1566094160079956, "learning_rate": 1.8562988985837632e-06, "loss": 0.137, "step": 5288 }, { "epoch": 0.7369887828328573, "grad_norm": 0.12482410669326782, "learning_rate": 1.854466576554133e-06, "loss": 0.1111, "step": 5289 }, { "epoch": 0.7371281265240717, "grad_norm": 0.10706465691328049, "learning_rate": 1.8526349533902161e-06, "loss": 0.1086, "step": 5290 }, { "epoch": 0.737267470215286, "grad_norm": 0.1325620859861374, "learning_rate": 1.8508040294989588e-06, "loss": 0.1302, "step": 5291 }, { "epoch": 0.7374068139065004, "grad_norm": 0.33052149415016174, "learning_rate": 1.8489738052871486e-06, "loss": 0.1254, "step": 5292 }, { "epoch": 0.7375461575977148, "grad_norm": 0.20755864679813385, "learning_rate": 1.8471442811614177e-06, "loss": 0.1645, "step": 5293 }, { "epoch": 0.7376855012889292, "grad_norm": 0.10367026925086975, "learning_rate": 1.8453154575282472e-06, "loss": 0.1038, "step": 5294 }, { "epoch": 0.7378248449801436, "grad_norm": 0.23873309791088104, "learning_rate": 1.8434873347939608e-06, "loss": 0.1252, "step": 5295 }, { "epoch": 0.7379641886713579, "grad_norm": 0.27762922644615173, "learning_rate": 1.8416599133647223e-06, "loss": 0.1415, "step": 5296 }, { "epoch": 0.7381035323625723, "grad_norm": 0.29298630356788635, "learning_rate": 1.839833193646547e-06, "loss": 0.1395, "step": 5297 }, { "epoch": 0.7382428760537867, "grad_norm": 0.17569565773010254, "learning_rate": 1.8380071760452862e-06, "loss": 0.1156, "step": 5298 }, { "epoch": 0.7383822197450011, "grad_norm": 0.15860380232334137, "learning_rate": 1.8361818609666433e-06, "loss": 0.1258, "step": 5299 }, { "epoch": 0.7385215634362154, "grad_norm": 0.15236671268939972, "learning_rate": 1.8343572488161576e-06, "loss": 0.1228, "step": 5300 }, { "epoch": 0.7386609071274298, "grad_norm": 0.31366991996765137, "learning_rate": 1.832533339999219e-06, "loss": 0.1578, "step": 5301 }, { "epoch": 0.7388002508186442, "grad_norm": 0.1300639510154724, "learning_rate": 1.8307101349210588e-06, "loss": 0.1366, "step": 5302 }, { "epoch": 0.7389395945098586, "grad_norm": 0.30405351519584656, "learning_rate": 1.8288876339867511e-06, "loss": 0.1303, "step": 5303 }, { "epoch": 0.739078938201073, "grad_norm": 0.21340234577655792, "learning_rate": 1.8270658376012112e-06, "loss": 0.1576, "step": 5304 }, { "epoch": 0.7392182818922873, "grad_norm": 0.15430408716201782, "learning_rate": 1.8252447461692029e-06, "loss": 0.1266, "step": 5305 }, { "epoch": 0.7393576255835017, "grad_norm": 0.2532825469970703, "learning_rate": 1.8234243600953334e-06, "loss": 0.1514, "step": 5306 }, { "epoch": 0.7394969692747161, "grad_norm": 0.15893538296222687, "learning_rate": 1.8216046797840465e-06, "loss": 0.1195, "step": 5307 }, { "epoch": 0.7396363129659305, "grad_norm": 0.13770243525505066, "learning_rate": 1.8197857056396372e-06, "loss": 0.1141, "step": 5308 }, { "epoch": 0.7397756566571448, "grad_norm": 0.13188527524471283, "learning_rate": 1.8179674380662372e-06, "loss": 0.1549, "step": 5309 }, { "epoch": 0.7399150003483592, "grad_norm": 0.15184210240840912, "learning_rate": 1.8161498774678271e-06, "loss": 0.1691, "step": 5310 }, { "epoch": 0.7400543440395736, "grad_norm": 0.18442916870117188, "learning_rate": 1.8143330242482244e-06, "loss": 0.1388, "step": 5311 }, { "epoch": 0.740193687730788, "grad_norm": 0.15550510585308075, "learning_rate": 1.8125168788110932e-06, "loss": 0.1233, "step": 5312 }, { "epoch": 0.7403330314220024, "grad_norm": 0.21966581046581268, "learning_rate": 1.8107014415599416e-06, "loss": 0.1122, "step": 5313 }, { "epoch": 0.7404723751132167, "grad_norm": 0.1288537085056305, "learning_rate": 1.808886712898117e-06, "loss": 0.1209, "step": 5314 }, { "epoch": 0.7406117188044311, "grad_norm": 0.21899063885211945, "learning_rate": 1.8070726932288086e-06, "loss": 0.1843, "step": 5315 }, { "epoch": 0.7407510624956455, "grad_norm": 0.08713055402040482, "learning_rate": 1.8052593829550525e-06, "loss": 0.0876, "step": 5316 }, { "epoch": 0.7408904061868599, "grad_norm": 0.1531154066324234, "learning_rate": 1.8034467824797252e-06, "loss": 0.1356, "step": 5317 }, { "epoch": 0.7410297498780742, "grad_norm": 0.10886842757463455, "learning_rate": 1.8016348922055448e-06, "loss": 0.1275, "step": 5318 }, { "epoch": 0.7411690935692886, "grad_norm": 0.14444610476493835, "learning_rate": 1.7998237125350698e-06, "loss": 0.1214, "step": 5319 }, { "epoch": 0.741308437260503, "grad_norm": 0.11871476471424103, "learning_rate": 1.7980132438707059e-06, "loss": 0.1167, "step": 5320 }, { "epoch": 0.7414477809517174, "grad_norm": 0.2420634776353836, "learning_rate": 1.7962034866146954e-06, "loss": 0.1649, "step": 5321 }, { "epoch": 0.7415871246429317, "grad_norm": 0.16496023535728455, "learning_rate": 1.794394441169126e-06, "loss": 0.1337, "step": 5322 }, { "epoch": 0.7417264683341461, "grad_norm": 0.1797296702861786, "learning_rate": 1.7925861079359268e-06, "loss": 0.1475, "step": 5323 }, { "epoch": 0.7418658120253605, "grad_norm": 0.20844197273254395, "learning_rate": 1.790778487316871e-06, "loss": 0.1395, "step": 5324 }, { "epoch": 0.7420051557165749, "grad_norm": 0.19362054765224457, "learning_rate": 1.7889715797135643e-06, "loss": 0.1469, "step": 5325 }, { "epoch": 0.7421444994077893, "grad_norm": 0.22397658228874207, "learning_rate": 1.7871653855274634e-06, "loss": 0.1216, "step": 5326 }, { "epoch": 0.7422838430990036, "grad_norm": 0.14709945023059845, "learning_rate": 1.7853599051598658e-06, "loss": 0.1141, "step": 5327 }, { "epoch": 0.7424231867902181, "grad_norm": 0.13379022479057312, "learning_rate": 1.7835551390119033e-06, "loss": 0.1418, "step": 5328 }, { "epoch": 0.7425625304814325, "grad_norm": 0.1384703814983368, "learning_rate": 1.7817510874845585e-06, "loss": 0.1263, "step": 5329 }, { "epoch": 0.7427018741726469, "grad_norm": 0.13741295039653778, "learning_rate": 1.779947750978646e-06, "loss": 0.1156, "step": 5330 }, { "epoch": 0.7428412178638613, "grad_norm": 0.18642179667949677, "learning_rate": 1.7781451298948305e-06, "loss": 0.1401, "step": 5331 }, { "epoch": 0.7429805615550756, "grad_norm": 0.160249263048172, "learning_rate": 1.7763432246336087e-06, "loss": 0.1502, "step": 5332 }, { "epoch": 0.74311990524629, "grad_norm": 0.22856663167476654, "learning_rate": 1.7745420355953253e-06, "loss": 0.1412, "step": 5333 }, { "epoch": 0.7432592489375044, "grad_norm": 0.11024852842092514, "learning_rate": 1.7727415631801648e-06, "loss": 0.1055, "step": 5334 }, { "epoch": 0.7433985926287188, "grad_norm": 0.19701334834098816, "learning_rate": 1.7709418077881495e-06, "loss": 0.1388, "step": 5335 }, { "epoch": 0.7435379363199331, "grad_norm": 0.15369382500648499, "learning_rate": 1.7691427698191422e-06, "loss": 0.1389, "step": 5336 }, { "epoch": 0.7436772800111475, "grad_norm": 0.2301529049873352, "learning_rate": 1.7673444496728493e-06, "loss": 0.1466, "step": 5337 }, { "epoch": 0.7438166237023619, "grad_norm": 0.2148313820362091, "learning_rate": 1.7655468477488191e-06, "loss": 0.1378, "step": 5338 }, { "epoch": 0.7439559673935763, "grad_norm": 0.23332564532756805, "learning_rate": 1.763749964446435e-06, "loss": 0.1353, "step": 5339 }, { "epoch": 0.7440953110847907, "grad_norm": 0.13276778161525726, "learning_rate": 1.7619538001649228e-06, "loss": 0.1235, "step": 5340 }, { "epoch": 0.744234654776005, "grad_norm": 0.21005448698997498, "learning_rate": 1.7601583553033502e-06, "loss": 0.1418, "step": 5341 }, { "epoch": 0.7443739984672194, "grad_norm": 0.16239894926548004, "learning_rate": 1.7583636302606254e-06, "loss": 0.1254, "step": 5342 }, { "epoch": 0.7445133421584338, "grad_norm": 0.1956779509782791, "learning_rate": 1.756569625435493e-06, "loss": 0.1537, "step": 5343 }, { "epoch": 0.7446526858496482, "grad_norm": 0.28411251306533813, "learning_rate": 1.7547763412265412e-06, "loss": 0.1565, "step": 5344 }, { "epoch": 0.7447920295408625, "grad_norm": 0.2728615403175354, "learning_rate": 1.7529837780321979e-06, "loss": 0.1734, "step": 5345 }, { "epoch": 0.7449313732320769, "grad_norm": 0.13649436831474304, "learning_rate": 1.751191936250729e-06, "loss": 0.1057, "step": 5346 }, { "epoch": 0.7450707169232913, "grad_norm": 0.2295604646205902, "learning_rate": 1.7494008162802378e-06, "loss": 0.1439, "step": 5347 }, { "epoch": 0.7452100606145057, "grad_norm": 0.19355928897857666, "learning_rate": 1.7476104185186737e-06, "loss": 0.1561, "step": 5348 }, { "epoch": 0.74534940430572, "grad_norm": 0.15198631584644318, "learning_rate": 1.7458207433638225e-06, "loss": 0.1264, "step": 5349 }, { "epoch": 0.7454887479969344, "grad_norm": 0.1636957824230194, "learning_rate": 1.7440317912133076e-06, "loss": 0.1429, "step": 5350 }, { "epoch": 0.7456280916881488, "grad_norm": 0.20962801575660706, "learning_rate": 1.7422435624645928e-06, "loss": 0.1396, "step": 5351 }, { "epoch": 0.7457674353793632, "grad_norm": 0.12652403116226196, "learning_rate": 1.7404560575149821e-06, "loss": 0.131, "step": 5352 }, { "epoch": 0.7459067790705776, "grad_norm": 0.10913132131099701, "learning_rate": 1.7386692767616204e-06, "loss": 0.1109, "step": 5353 }, { "epoch": 0.7460461227617919, "grad_norm": 0.15830671787261963, "learning_rate": 1.7368832206014863e-06, "loss": 0.1381, "step": 5354 }, { "epoch": 0.7461854664530063, "grad_norm": 0.16368786990642548, "learning_rate": 1.735097889431404e-06, "loss": 0.1178, "step": 5355 }, { "epoch": 0.7463248101442207, "grad_norm": 0.18063689768314362, "learning_rate": 1.733313283648032e-06, "loss": 0.1416, "step": 5356 }, { "epoch": 0.7464641538354351, "grad_norm": 0.13716696202754974, "learning_rate": 1.7315294036478664e-06, "loss": 0.1325, "step": 5357 }, { "epoch": 0.7466034975266495, "grad_norm": 0.1745259165763855, "learning_rate": 1.7297462498272476e-06, "loss": 0.1457, "step": 5358 }, { "epoch": 0.7467428412178638, "grad_norm": 0.17311376333236694, "learning_rate": 1.727963822582352e-06, "loss": 0.1425, "step": 5359 }, { "epoch": 0.7468821849090782, "grad_norm": 0.2516069710254669, "learning_rate": 1.7261821223091918e-06, "loss": 0.1384, "step": 5360 }, { "epoch": 0.7470215286002926, "grad_norm": 0.26773500442504883, "learning_rate": 1.7244011494036228e-06, "loss": 0.1649, "step": 5361 }, { "epoch": 0.747160872291507, "grad_norm": 0.19362419843673706, "learning_rate": 1.722620904261334e-06, "loss": 0.1398, "step": 5362 }, { "epoch": 0.7473002159827213, "grad_norm": 0.2481040060520172, "learning_rate": 1.720841387277858e-06, "loss": 0.1653, "step": 5363 }, { "epoch": 0.7474395596739357, "grad_norm": 0.09996923059225082, "learning_rate": 1.7190625988485593e-06, "loss": 0.0973, "step": 5364 }, { "epoch": 0.7475789033651501, "grad_norm": 0.19012443721294403, "learning_rate": 1.7172845393686465e-06, "loss": 0.1573, "step": 5365 }, { "epoch": 0.7477182470563645, "grad_norm": 0.13368390500545502, "learning_rate": 1.7155072092331648e-06, "loss": 0.1048, "step": 5366 }, { "epoch": 0.7478575907475788, "grad_norm": 0.2840670347213745, "learning_rate": 1.7137306088369948e-06, "loss": 0.1413, "step": 5367 }, { "epoch": 0.7479969344387933, "grad_norm": 0.21259689331054688, "learning_rate": 1.7119547385748552e-06, "loss": 0.1896, "step": 5368 }, { "epoch": 0.7481362781300077, "grad_norm": 0.20934845507144928, "learning_rate": 1.7101795988413056e-06, "loss": 0.156, "step": 5369 }, { "epoch": 0.7482756218212221, "grad_norm": 0.10642843693494797, "learning_rate": 1.708405190030743e-06, "loss": 0.1186, "step": 5370 }, { "epoch": 0.7484149655124365, "grad_norm": 0.20507106184959412, "learning_rate": 1.7066315125373984e-06, "loss": 0.1414, "step": 5371 }, { "epoch": 0.7485543092036508, "grad_norm": 0.14658664166927338, "learning_rate": 1.7048585667553414e-06, "loss": 0.1207, "step": 5372 }, { "epoch": 0.7486936528948652, "grad_norm": 0.11651813983917236, "learning_rate": 1.7030863530784814e-06, "loss": 0.1351, "step": 5373 }, { "epoch": 0.7488329965860796, "grad_norm": 0.11077657341957092, "learning_rate": 1.7013148719005652e-06, "loss": 0.1172, "step": 5374 }, { "epoch": 0.748972340277294, "grad_norm": 0.12053507566452026, "learning_rate": 1.6995441236151732e-06, "loss": 0.1181, "step": 5375 }, { "epoch": 0.7491116839685084, "grad_norm": 0.1591937392950058, "learning_rate": 1.6977741086157273e-06, "loss": 0.1345, "step": 5376 }, { "epoch": 0.7492510276597227, "grad_norm": 0.11071108281612396, "learning_rate": 1.6960048272954821e-06, "loss": 0.1067, "step": 5377 }, { "epoch": 0.7493903713509371, "grad_norm": 0.1375793069601059, "learning_rate": 1.6942362800475343e-06, "loss": 0.1362, "step": 5378 }, { "epoch": 0.7495297150421515, "grad_norm": 0.12148315459489822, "learning_rate": 1.6924684672648117e-06, "loss": 0.1123, "step": 5379 }, { "epoch": 0.7496690587333659, "grad_norm": 0.2364087700843811, "learning_rate": 1.6907013893400838e-06, "loss": 0.1353, "step": 5380 }, { "epoch": 0.7498084024245802, "grad_norm": 0.10200172662734985, "learning_rate": 1.6889350466659554e-06, "loss": 0.1078, "step": 5381 }, { "epoch": 0.7499477461157946, "grad_norm": 0.12417134642601013, "learning_rate": 1.687169439634867e-06, "loss": 0.113, "step": 5382 }, { "epoch": 0.750087089807009, "grad_norm": 0.21336841583251953, "learning_rate": 1.6854045686390947e-06, "loss": 0.1683, "step": 5383 }, { "epoch": 0.7502264334982234, "grad_norm": 0.14847470819950104, "learning_rate": 1.6836404340707535e-06, "loss": 0.1281, "step": 5384 }, { "epoch": 0.7503657771894378, "grad_norm": 0.09873147308826447, "learning_rate": 1.6818770363217957e-06, "loss": 0.1094, "step": 5385 }, { "epoch": 0.7505051208806521, "grad_norm": 0.12324370443820953, "learning_rate": 1.6801143757840043e-06, "loss": 0.1159, "step": 5386 }, { "epoch": 0.7506444645718665, "grad_norm": 0.14411117136478424, "learning_rate": 1.678352452849007e-06, "loss": 0.1057, "step": 5387 }, { "epoch": 0.7507838082630809, "grad_norm": 0.122827909886837, "learning_rate": 1.6765912679082592e-06, "loss": 0.1132, "step": 5388 }, { "epoch": 0.7509231519542953, "grad_norm": 0.18029220402240753, "learning_rate": 1.6748308213530555e-06, "loss": 0.1316, "step": 5389 }, { "epoch": 0.7510624956455096, "grad_norm": 0.1522265523672104, "learning_rate": 1.6730711135745287e-06, "loss": 0.1098, "step": 5390 }, { "epoch": 0.751201839336724, "grad_norm": 0.12663137912750244, "learning_rate": 1.6713121449636471e-06, "loss": 0.1144, "step": 5391 }, { "epoch": 0.7513411830279384, "grad_norm": 0.23892565071582794, "learning_rate": 1.6695539159112112e-06, "loss": 0.1686, "step": 5392 }, { "epoch": 0.7514805267191528, "grad_norm": 0.16768047213554382, "learning_rate": 1.6677964268078584e-06, "loss": 0.1195, "step": 5393 }, { "epoch": 0.7516198704103672, "grad_norm": 0.14002005755901337, "learning_rate": 1.666039678044064e-06, "loss": 0.1146, "step": 5394 }, { "epoch": 0.7517592141015815, "grad_norm": 0.1543574184179306, "learning_rate": 1.6642836700101396e-06, "loss": 0.1299, "step": 5395 }, { "epoch": 0.7518985577927959, "grad_norm": 0.14278925955295563, "learning_rate": 1.6625284030962257e-06, "loss": 0.1188, "step": 5396 }, { "epoch": 0.7520379014840103, "grad_norm": 0.17874260246753693, "learning_rate": 1.6607738776923072e-06, "loss": 0.1443, "step": 5397 }, { "epoch": 0.7521772451752247, "grad_norm": 0.18484650552272797, "learning_rate": 1.659020094188195e-06, "loss": 0.1445, "step": 5398 }, { "epoch": 0.752316588866439, "grad_norm": 0.1353510469198227, "learning_rate": 1.657267052973544e-06, "loss": 0.1068, "step": 5399 }, { "epoch": 0.7524559325576534, "grad_norm": 0.18063195049762726, "learning_rate": 1.6555147544378364e-06, "loss": 0.1281, "step": 5400 }, { "epoch": 0.7525952762488678, "grad_norm": 0.137942373752594, "learning_rate": 1.653763198970394e-06, "loss": 0.1129, "step": 5401 }, { "epoch": 0.7527346199400822, "grad_norm": 0.2208743542432785, "learning_rate": 1.652012386960375e-06, "loss": 0.1532, "step": 5402 }, { "epoch": 0.7528739636312965, "grad_norm": 0.1476643830537796, "learning_rate": 1.6502623187967675e-06, "loss": 0.1137, "step": 5403 }, { "epoch": 0.7530133073225109, "grad_norm": 0.12626419961452484, "learning_rate": 1.6485129948683954e-06, "loss": 0.1177, "step": 5404 }, { "epoch": 0.7531526510137253, "grad_norm": 0.2087121158838272, "learning_rate": 1.64676441556392e-06, "loss": 0.1198, "step": 5405 }, { "epoch": 0.7532919947049397, "grad_norm": 0.24661225080490112, "learning_rate": 1.6450165812718377e-06, "loss": 0.1209, "step": 5406 }, { "epoch": 0.7534313383961541, "grad_norm": 0.1306104063987732, "learning_rate": 1.643269492380473e-06, "loss": 0.1267, "step": 5407 }, { "epoch": 0.7535706820873685, "grad_norm": 0.08393911272287369, "learning_rate": 1.6415231492779942e-06, "loss": 0.0895, "step": 5408 }, { "epoch": 0.7537100257785829, "grad_norm": 0.12291527539491653, "learning_rate": 1.6397775523523946e-06, "loss": 0.1258, "step": 5409 }, { "epoch": 0.7538493694697973, "grad_norm": 0.15679694712162018, "learning_rate": 1.6380327019915088e-06, "loss": 0.137, "step": 5410 }, { "epoch": 0.7539887131610117, "grad_norm": 0.19251340627670288, "learning_rate": 1.6362885985830001e-06, "loss": 0.1329, "step": 5411 }, { "epoch": 0.7541280568522261, "grad_norm": 0.20560957491397858, "learning_rate": 1.6345452425143705e-06, "loss": 0.1008, "step": 5412 }, { "epoch": 0.7542674005434404, "grad_norm": 0.2949577569961548, "learning_rate": 1.6328026341729547e-06, "loss": 0.1488, "step": 5413 }, { "epoch": 0.7544067442346548, "grad_norm": 0.23044714331626892, "learning_rate": 1.6310607739459188e-06, "loss": 0.1425, "step": 5414 }, { "epoch": 0.7545460879258692, "grad_norm": 0.20975281298160553, "learning_rate": 1.6293196622202635e-06, "loss": 0.1592, "step": 5415 }, { "epoch": 0.7546854316170836, "grad_norm": 0.21039840579032898, "learning_rate": 1.6275792993828249e-06, "loss": 0.1398, "step": 5416 }, { "epoch": 0.754824775308298, "grad_norm": 0.17161212861537933, "learning_rate": 1.6258396858202746e-06, "loss": 0.1307, "step": 5417 }, { "epoch": 0.7549641189995123, "grad_norm": 0.09576839208602905, "learning_rate": 1.6241008219191107e-06, "loss": 0.1064, "step": 5418 }, { "epoch": 0.7551034626907267, "grad_norm": 0.18063071370124817, "learning_rate": 1.622362708065673e-06, "loss": 0.1232, "step": 5419 }, { "epoch": 0.7552428063819411, "grad_norm": 0.2302987426519394, "learning_rate": 1.6206253446461278e-06, "loss": 0.1532, "step": 5420 }, { "epoch": 0.7553821500731555, "grad_norm": 0.13596884906291962, "learning_rate": 1.618888732046478e-06, "loss": 0.1159, "step": 5421 }, { "epoch": 0.7555214937643698, "grad_norm": 0.17260749638080597, "learning_rate": 1.6171528706525596e-06, "loss": 0.1576, "step": 5422 }, { "epoch": 0.7556608374555842, "grad_norm": 0.15898005664348602, "learning_rate": 1.6154177608500415e-06, "loss": 0.1152, "step": 5423 }, { "epoch": 0.7558001811467986, "grad_norm": 0.1772860288619995, "learning_rate": 1.6136834030244292e-06, "loss": 0.1665, "step": 5424 }, { "epoch": 0.755939524838013, "grad_norm": 0.2004399597644806, "learning_rate": 1.61194979756105e-06, "loss": 0.1414, "step": 5425 }, { "epoch": 0.7560788685292273, "grad_norm": 0.20416054129600525, "learning_rate": 1.6102169448450756e-06, "loss": 0.1358, "step": 5426 }, { "epoch": 0.7562182122204417, "grad_norm": 0.12932412326335907, "learning_rate": 1.6084848452615076e-06, "loss": 0.1, "step": 5427 }, { "epoch": 0.7563575559116561, "grad_norm": 0.14493805170059204, "learning_rate": 1.6067534991951754e-06, "loss": 0.1266, "step": 5428 }, { "epoch": 0.7564968996028705, "grad_norm": 0.16158096492290497, "learning_rate": 1.6050229070307488e-06, "loss": 0.1442, "step": 5429 }, { "epoch": 0.7566362432940849, "grad_norm": 0.19792601466178894, "learning_rate": 1.6032930691527214e-06, "loss": 0.136, "step": 5430 }, { "epoch": 0.7567755869852992, "grad_norm": 0.19897396862506866, "learning_rate": 1.6015639859454278e-06, "loss": 0.135, "step": 5431 }, { "epoch": 0.7569149306765136, "grad_norm": 0.18298350274562836, "learning_rate": 1.5998356577930274e-06, "loss": 0.1431, "step": 5432 }, { "epoch": 0.757054274367728, "grad_norm": 0.22291156649589539, "learning_rate": 1.5981080850795171e-06, "loss": 0.1705, "step": 5433 }, { "epoch": 0.7571936180589424, "grad_norm": 0.1189654991030693, "learning_rate": 1.5963812681887248e-06, "loss": 0.1131, "step": 5434 }, { "epoch": 0.7573329617501567, "grad_norm": 0.17133626341819763, "learning_rate": 1.5946552075043092e-06, "loss": 0.1276, "step": 5435 }, { "epoch": 0.7574723054413711, "grad_norm": 0.20335203409194946, "learning_rate": 1.592929903409759e-06, "loss": 0.1439, "step": 5436 }, { "epoch": 0.7576116491325855, "grad_norm": 0.20519967377185822, "learning_rate": 1.5912053562884e-06, "loss": 0.1683, "step": 5437 }, { "epoch": 0.7577509928237999, "grad_norm": 0.12369721382856369, "learning_rate": 1.589481566523388e-06, "loss": 0.1232, "step": 5438 }, { "epoch": 0.7578903365150143, "grad_norm": 0.12032272666692734, "learning_rate": 1.587758534497707e-06, "loss": 0.1131, "step": 5439 }, { "epoch": 0.7580296802062286, "grad_norm": 0.09787976741790771, "learning_rate": 1.5860362605941788e-06, "loss": 0.0935, "step": 5440 }, { "epoch": 0.758169023897443, "grad_norm": 0.1394568383693695, "learning_rate": 1.5843147451954493e-06, "loss": 0.1369, "step": 5441 }, { "epoch": 0.7583083675886574, "grad_norm": 0.21653585135936737, "learning_rate": 1.5825939886840036e-06, "loss": 0.1849, "step": 5442 }, { "epoch": 0.7584477112798718, "grad_norm": 0.1782175898551941, "learning_rate": 1.5808739914421512e-06, "loss": 0.1499, "step": 5443 }, { "epoch": 0.7585870549710861, "grad_norm": 0.09984739124774933, "learning_rate": 1.5791547538520386e-06, "loss": 0.1073, "step": 5444 }, { "epoch": 0.7587263986623005, "grad_norm": 0.11012967675924301, "learning_rate": 1.5774362762956414e-06, "loss": 0.1145, "step": 5445 }, { "epoch": 0.7588657423535149, "grad_norm": 0.14340274035930634, "learning_rate": 1.5757185591547653e-06, "loss": 0.1401, "step": 5446 }, { "epoch": 0.7590050860447293, "grad_norm": 0.1011243537068367, "learning_rate": 1.574001602811046e-06, "loss": 0.1115, "step": 5447 }, { "epoch": 0.7591444297359438, "grad_norm": 0.12006405740976334, "learning_rate": 1.5722854076459538e-06, "loss": 0.0958, "step": 5448 }, { "epoch": 0.7592837734271581, "grad_norm": 0.2986306846141815, "learning_rate": 1.57056997404079e-06, "loss": 0.1506, "step": 5449 }, { "epoch": 0.7594231171183725, "grad_norm": 0.13938340544700623, "learning_rate": 1.5688553023766823e-06, "loss": 0.175, "step": 5450 }, { "epoch": 0.7595624608095869, "grad_norm": 0.21014921367168427, "learning_rate": 1.5671413930345902e-06, "loss": 0.1258, "step": 5451 }, { "epoch": 0.7597018045008013, "grad_norm": 0.15400050580501556, "learning_rate": 1.5654282463953074e-06, "loss": 0.1529, "step": 5452 }, { "epoch": 0.7598411481920156, "grad_norm": 0.2027483880519867, "learning_rate": 1.5637158628394572e-06, "loss": 0.1382, "step": 5453 }, { "epoch": 0.75998049188323, "grad_norm": 0.20377182960510254, "learning_rate": 1.5620042427474892e-06, "loss": 0.1482, "step": 5454 }, { "epoch": 0.7601198355744444, "grad_norm": 0.2600347101688385, "learning_rate": 1.5602933864996872e-06, "loss": 0.1586, "step": 5455 }, { "epoch": 0.7602591792656588, "grad_norm": 0.134657084941864, "learning_rate": 1.5585832944761686e-06, "loss": 0.1092, "step": 5456 }, { "epoch": 0.7603985229568732, "grad_norm": 0.16736121475696564, "learning_rate": 1.5568739670568693e-06, "loss": 0.1497, "step": 5457 }, { "epoch": 0.7605378666480875, "grad_norm": 0.10312274098396301, "learning_rate": 1.555165404621567e-06, "loss": 0.1001, "step": 5458 }, { "epoch": 0.7606772103393019, "grad_norm": 0.12816597521305084, "learning_rate": 1.5534576075498664e-06, "loss": 0.1265, "step": 5459 }, { "epoch": 0.7608165540305163, "grad_norm": 0.1391669064760208, "learning_rate": 1.5517505762211982e-06, "loss": 0.1189, "step": 5460 }, { "epoch": 0.7609558977217307, "grad_norm": 0.1965605914592743, "learning_rate": 1.5500443110148283e-06, "loss": 0.1093, "step": 5461 }, { "epoch": 0.761095241412945, "grad_norm": 0.15408430993556976, "learning_rate": 1.5483388123098474e-06, "loss": 0.1507, "step": 5462 }, { "epoch": 0.7612345851041594, "grad_norm": 0.13329452276229858, "learning_rate": 1.546634080485181e-06, "loss": 0.1198, "step": 5463 }, { "epoch": 0.7613739287953738, "grad_norm": 0.1807703673839569, "learning_rate": 1.5449301159195785e-06, "loss": 0.1485, "step": 5464 }, { "epoch": 0.7615132724865882, "grad_norm": 0.12830176949501038, "learning_rate": 1.5432269189916237e-06, "loss": 0.1108, "step": 5465 }, { "epoch": 0.7616526161778026, "grad_norm": 0.1059846356511116, "learning_rate": 1.54152449007973e-06, "loss": 0.1075, "step": 5466 }, { "epoch": 0.7617919598690169, "grad_norm": 0.11179016530513763, "learning_rate": 1.539822829562136e-06, "loss": 0.1128, "step": 5467 }, { "epoch": 0.7619313035602313, "grad_norm": 0.11966919153928757, "learning_rate": 1.5381219378169103e-06, "loss": 0.0983, "step": 5468 }, { "epoch": 0.7620706472514457, "grad_norm": 0.25145405530929565, "learning_rate": 1.5364218152219545e-06, "loss": 0.1749, "step": 5469 }, { "epoch": 0.7622099909426601, "grad_norm": 0.13796952366828918, "learning_rate": 1.5347224621549978e-06, "loss": 0.109, "step": 5470 }, { "epoch": 0.7623493346338744, "grad_norm": 0.3012319803237915, "learning_rate": 1.5330238789935963e-06, "loss": 0.1441, "step": 5471 }, { "epoch": 0.7624886783250888, "grad_norm": 0.09908333420753479, "learning_rate": 1.5313260661151352e-06, "loss": 0.1077, "step": 5472 }, { "epoch": 0.7626280220163032, "grad_norm": 0.11267726868391037, "learning_rate": 1.5296290238968303e-06, "loss": 0.1144, "step": 5473 }, { "epoch": 0.7627673657075176, "grad_norm": 0.1195085421204567, "learning_rate": 1.5279327527157289e-06, "loss": 0.1241, "step": 5474 }, { "epoch": 0.762906709398732, "grad_norm": 0.2031543105840683, "learning_rate": 1.526237252948699e-06, "loss": 0.1318, "step": 5475 }, { "epoch": 0.7630460530899463, "grad_norm": 0.20025812089443207, "learning_rate": 1.5245425249724443e-06, "loss": 0.1597, "step": 5476 }, { "epoch": 0.7631853967811607, "grad_norm": 0.12450528889894485, "learning_rate": 1.5228485691634964e-06, "loss": 0.117, "step": 5477 }, { "epoch": 0.7633247404723751, "grad_norm": 0.18196210265159607, "learning_rate": 1.5211553858982115e-06, "loss": 0.1347, "step": 5478 }, { "epoch": 0.7634640841635895, "grad_norm": 0.13678400218486786, "learning_rate": 1.5194629755527746e-06, "loss": 0.1258, "step": 5479 }, { "epoch": 0.7636034278548038, "grad_norm": 0.15383972227573395, "learning_rate": 1.517771338503203e-06, "loss": 0.1124, "step": 5480 }, { "epoch": 0.7637427715460182, "grad_norm": 0.13049980998039246, "learning_rate": 1.5160804751253405e-06, "loss": 0.1192, "step": 5481 }, { "epoch": 0.7638821152372326, "grad_norm": 0.11508543789386749, "learning_rate": 1.5143903857948572e-06, "loss": 0.1143, "step": 5482 }, { "epoch": 0.764021458928447, "grad_norm": 0.13736990094184875, "learning_rate": 1.5127010708872513e-06, "loss": 0.1445, "step": 5483 }, { "epoch": 0.7641608026196614, "grad_norm": 0.13015003502368927, "learning_rate": 1.5110125307778506e-06, "loss": 0.124, "step": 5484 }, { "epoch": 0.7643001463108757, "grad_norm": 0.14048944413661957, "learning_rate": 1.5093247658418125e-06, "loss": 0.1313, "step": 5485 }, { "epoch": 0.7644394900020901, "grad_norm": 0.15234337747097015, "learning_rate": 1.5076377764541162e-06, "loss": 0.1306, "step": 5486 }, { "epoch": 0.7645788336933045, "grad_norm": 0.17981155216693878, "learning_rate": 1.5059515629895754e-06, "loss": 0.1388, "step": 5487 }, { "epoch": 0.764718177384519, "grad_norm": 0.16989511251449585, "learning_rate": 1.5042661258228268e-06, "loss": 0.124, "step": 5488 }, { "epoch": 0.7648575210757333, "grad_norm": 0.19379597902297974, "learning_rate": 1.502581465328335e-06, "loss": 0.1305, "step": 5489 }, { "epoch": 0.7649968647669477, "grad_norm": 0.15455037355422974, "learning_rate": 1.5008975818803939e-06, "loss": 0.1306, "step": 5490 }, { "epoch": 0.7651362084581621, "grad_norm": 0.13218075037002563, "learning_rate": 1.4992144758531257e-06, "loss": 0.1048, "step": 5491 }, { "epoch": 0.7652755521493765, "grad_norm": 0.12735114991664886, "learning_rate": 1.4975321476204767e-06, "loss": 0.128, "step": 5492 }, { "epoch": 0.7654148958405909, "grad_norm": 0.11024664342403412, "learning_rate": 1.4958505975562205e-06, "loss": 0.1221, "step": 5493 }, { "epoch": 0.7655542395318052, "grad_norm": 0.20354078710079193, "learning_rate": 1.49416982603396e-06, "loss": 0.1091, "step": 5494 }, { "epoch": 0.7656935832230196, "grad_norm": 0.1708490550518036, "learning_rate": 1.4924898334271265e-06, "loss": 0.1073, "step": 5495 }, { "epoch": 0.765832926914234, "grad_norm": 0.19012285768985748, "learning_rate": 1.4908106201089722e-06, "loss": 0.1277, "step": 5496 }, { "epoch": 0.7659722706054484, "grad_norm": 0.1361212134361267, "learning_rate": 1.4891321864525826e-06, "loss": 0.1361, "step": 5497 }, { "epoch": 0.7661116142966627, "grad_norm": 0.12045344710350037, "learning_rate": 1.4874545328308681e-06, "loss": 0.1133, "step": 5498 }, { "epoch": 0.7662509579878771, "grad_norm": 0.14520248770713806, "learning_rate": 1.4857776596165635e-06, "loss": 0.1164, "step": 5499 }, { "epoch": 0.7663903016790915, "grad_norm": 0.13125862181186676, "learning_rate": 1.4841015671822306e-06, "loss": 0.1028, "step": 5500 }, { "epoch": 0.7665296453703059, "grad_norm": 0.1361752599477768, "learning_rate": 1.4824262559002595e-06, "loss": 0.1263, "step": 5501 }, { "epoch": 0.7666689890615203, "grad_norm": 0.17563769221305847, "learning_rate": 1.480751726142869e-06, "loss": 0.1583, "step": 5502 }, { "epoch": 0.7668083327527346, "grad_norm": 0.09842494130134583, "learning_rate": 1.4790779782820991e-06, "loss": 0.0952, "step": 5503 }, { "epoch": 0.766947676443949, "grad_norm": 0.24219940602779388, "learning_rate": 1.4774050126898164e-06, "loss": 0.1625, "step": 5504 }, { "epoch": 0.7670870201351634, "grad_norm": 0.1197778508067131, "learning_rate": 1.4757328297377177e-06, "loss": 0.1223, "step": 5505 }, { "epoch": 0.7672263638263778, "grad_norm": 0.19937920570373535, "learning_rate": 1.474061429797326e-06, "loss": 0.1298, "step": 5506 }, { "epoch": 0.7673657075175921, "grad_norm": 0.18231534957885742, "learning_rate": 1.4723908132399838e-06, "loss": 0.1182, "step": 5507 }, { "epoch": 0.7675050512088065, "grad_norm": 0.13187836110591888, "learning_rate": 1.4707209804368683e-06, "loss": 0.1051, "step": 5508 }, { "epoch": 0.7676443949000209, "grad_norm": 0.12094926834106445, "learning_rate": 1.4690519317589742e-06, "loss": 0.0943, "step": 5509 }, { "epoch": 0.7677837385912353, "grad_norm": 0.16550615429878235, "learning_rate": 1.4673836675771298e-06, "loss": 0.1233, "step": 5510 }, { "epoch": 0.7679230822824497, "grad_norm": 0.15834493935108185, "learning_rate": 1.4657161882619814e-06, "loss": 0.1168, "step": 5511 }, { "epoch": 0.768062425973664, "grad_norm": 0.19279785454273224, "learning_rate": 1.4640494941840072e-06, "loss": 0.1293, "step": 5512 }, { "epoch": 0.7682017696648784, "grad_norm": 0.21285486221313477, "learning_rate": 1.4623835857135099e-06, "loss": 0.1281, "step": 5513 }, { "epoch": 0.7683411133560928, "grad_norm": 0.19242729246616364, "learning_rate": 1.460718463220615e-06, "loss": 0.1465, "step": 5514 }, { "epoch": 0.7684804570473072, "grad_norm": 0.15450675785541534, "learning_rate": 1.4590541270752723e-06, "loss": 0.1364, "step": 5515 }, { "epoch": 0.7686198007385215, "grad_norm": 0.10571660101413727, "learning_rate": 1.457390577647262e-06, "loss": 0.1125, "step": 5516 }, { "epoch": 0.7687591444297359, "grad_norm": 0.10828018933534622, "learning_rate": 1.455727815306187e-06, "loss": 0.1095, "step": 5517 }, { "epoch": 0.7688984881209503, "grad_norm": 0.19483791291713715, "learning_rate": 1.454065840421473e-06, "loss": 0.1328, "step": 5518 }, { "epoch": 0.7690378318121647, "grad_norm": 0.21447522938251495, "learning_rate": 1.4524046533623758e-06, "loss": 0.1362, "step": 5519 }, { "epoch": 0.769177175503379, "grad_norm": 0.16516000032424927, "learning_rate": 1.450744254497972e-06, "loss": 0.1458, "step": 5520 }, { "epoch": 0.7693165191945934, "grad_norm": 0.16238604485988617, "learning_rate": 1.4490846441971624e-06, "loss": 0.1317, "step": 5521 }, { "epoch": 0.7694558628858078, "grad_norm": 0.16080398857593536, "learning_rate": 1.4474258228286758e-06, "loss": 0.1401, "step": 5522 }, { "epoch": 0.7695952065770222, "grad_norm": 0.19341963529586792, "learning_rate": 1.4457677907610646e-06, "loss": 0.1181, "step": 5523 }, { "epoch": 0.7697345502682366, "grad_norm": 0.1156914234161377, "learning_rate": 1.4441105483627088e-06, "loss": 0.1077, "step": 5524 }, { "epoch": 0.7698738939594509, "grad_norm": 0.09499242901802063, "learning_rate": 1.442454096001804e-06, "loss": 0.1021, "step": 5525 }, { "epoch": 0.7700132376506653, "grad_norm": 0.29459047317504883, "learning_rate": 1.4407984340463794e-06, "loss": 0.1817, "step": 5526 }, { "epoch": 0.7701525813418797, "grad_norm": 0.39786091446876526, "learning_rate": 1.4391435628642853e-06, "loss": 0.1451, "step": 5527 }, { "epoch": 0.7702919250330941, "grad_norm": 0.1215173676609993, "learning_rate": 1.437489482823195e-06, "loss": 0.1302, "step": 5528 }, { "epoch": 0.7704312687243086, "grad_norm": 0.18219132721424103, "learning_rate": 1.4358361942906097e-06, "loss": 0.1338, "step": 5529 }, { "epoch": 0.7705706124155229, "grad_norm": 0.15708354115486145, "learning_rate": 1.4341836976338485e-06, "loss": 0.1384, "step": 5530 }, { "epoch": 0.7707099561067373, "grad_norm": 0.13250213861465454, "learning_rate": 1.4325319932200631e-06, "loss": 0.1101, "step": 5531 }, { "epoch": 0.7708492997979517, "grad_norm": 0.17920367419719696, "learning_rate": 1.43088108141622e-06, "loss": 0.1313, "step": 5532 }, { "epoch": 0.7709886434891661, "grad_norm": 0.18322166800498962, "learning_rate": 1.4292309625891166e-06, "loss": 0.1275, "step": 5533 }, { "epoch": 0.7711279871803804, "grad_norm": 0.1827051341533661, "learning_rate": 1.4275816371053725e-06, "loss": 0.1323, "step": 5534 }, { "epoch": 0.7712673308715948, "grad_norm": 0.09320085495710373, "learning_rate": 1.425933105331429e-06, "loss": 0.1042, "step": 5535 }, { "epoch": 0.7714066745628092, "grad_norm": 0.46394050121307373, "learning_rate": 1.424285367633551e-06, "loss": 0.1701, "step": 5536 }, { "epoch": 0.7715460182540236, "grad_norm": 0.2510068714618683, "learning_rate": 1.422638424377829e-06, "loss": 0.1456, "step": 5537 }, { "epoch": 0.771685361945238, "grad_norm": 0.22767160832881927, "learning_rate": 1.420992275930178e-06, "loss": 0.1262, "step": 5538 }, { "epoch": 0.7718247056364523, "grad_norm": 0.21854357421398163, "learning_rate": 1.4193469226563322e-06, "loss": 0.1403, "step": 5539 }, { "epoch": 0.7719640493276667, "grad_norm": 0.0979316458106041, "learning_rate": 1.4177023649218536e-06, "loss": 0.1114, "step": 5540 }, { "epoch": 0.7721033930188811, "grad_norm": 0.09660854190587997, "learning_rate": 1.4160586030921224e-06, "loss": 0.1101, "step": 5541 }, { "epoch": 0.7722427367100955, "grad_norm": 0.14188966155052185, "learning_rate": 1.4144156375323486e-06, "loss": 0.1223, "step": 5542 }, { "epoch": 0.7723820804013098, "grad_norm": 0.22692057490348816, "learning_rate": 1.4127734686075589e-06, "loss": 0.1346, "step": 5543 }, { "epoch": 0.7725214240925242, "grad_norm": 0.19970420002937317, "learning_rate": 1.411132096682606e-06, "loss": 0.1076, "step": 5544 }, { "epoch": 0.7726607677837386, "grad_norm": 0.13617999851703644, "learning_rate": 1.4094915221221677e-06, "loss": 0.1109, "step": 5545 }, { "epoch": 0.772800111474953, "grad_norm": 0.20383019745349884, "learning_rate": 1.4078517452907403e-06, "loss": 0.1628, "step": 5546 }, { "epoch": 0.7729394551661674, "grad_norm": 0.10763765871524811, "learning_rate": 1.4062127665526438e-06, "loss": 0.1064, "step": 5547 }, { "epoch": 0.7730787988573817, "grad_norm": 0.1198306530714035, "learning_rate": 1.4045745862720227e-06, "loss": 0.108, "step": 5548 }, { "epoch": 0.7732181425485961, "grad_norm": 0.18301866948604584, "learning_rate": 1.4029372048128454e-06, "loss": 0.1733, "step": 5549 }, { "epoch": 0.7733574862398105, "grad_norm": 0.18088814616203308, "learning_rate": 1.401300622538897e-06, "loss": 0.1219, "step": 5550 }, { "epoch": 0.7734968299310249, "grad_norm": 0.18785761296749115, "learning_rate": 1.3996648398137924e-06, "loss": 0.1403, "step": 5551 }, { "epoch": 0.7736361736222392, "grad_norm": 0.10357046127319336, "learning_rate": 1.398029857000962e-06, "loss": 0.1116, "step": 5552 }, { "epoch": 0.7737755173134536, "grad_norm": 0.2133842259645462, "learning_rate": 1.3963956744636642e-06, "loss": 0.1455, "step": 5553 }, { "epoch": 0.773914861004668, "grad_norm": 0.12092513591051102, "learning_rate": 1.394762292564974e-06, "loss": 0.1097, "step": 5554 }, { "epoch": 0.7740542046958824, "grad_norm": 0.12706150114536285, "learning_rate": 1.393129711667794e-06, "loss": 0.1278, "step": 5555 }, { "epoch": 0.7741935483870968, "grad_norm": 0.12823604047298431, "learning_rate": 1.3914979321348488e-06, "loss": 0.1109, "step": 5556 }, { "epoch": 0.7743328920783111, "grad_norm": 0.15638428926467896, "learning_rate": 1.3898669543286763e-06, "loss": 0.1161, "step": 5557 }, { "epoch": 0.7744722357695255, "grad_norm": 0.13257089257240295, "learning_rate": 1.3882367786116458e-06, "loss": 0.1139, "step": 5558 }, { "epoch": 0.7746115794607399, "grad_norm": 0.1682112067937851, "learning_rate": 1.3866074053459465e-06, "loss": 0.1338, "step": 5559 }, { "epoch": 0.7747509231519543, "grad_norm": 0.20851416885852814, "learning_rate": 1.3849788348935856e-06, "loss": 0.1297, "step": 5560 }, { "epoch": 0.7748902668431686, "grad_norm": 0.15446555614471436, "learning_rate": 1.3833510676163963e-06, "loss": 0.1226, "step": 5561 }, { "epoch": 0.775029610534383, "grad_norm": 0.14413218200206757, "learning_rate": 1.3817241038760287e-06, "loss": 0.1254, "step": 5562 }, { "epoch": 0.7751689542255974, "grad_norm": 0.15340355038642883, "learning_rate": 1.3800979440339602e-06, "loss": 0.1226, "step": 5563 }, { "epoch": 0.7753082979168118, "grad_norm": 0.17315635085105896, "learning_rate": 1.3784725884514833e-06, "loss": 0.1285, "step": 5564 }, { "epoch": 0.7754476416080262, "grad_norm": 0.22017709910869598, "learning_rate": 1.3768480374897163e-06, "loss": 0.1615, "step": 5565 }, { "epoch": 0.7755869852992405, "grad_norm": 0.18381071090698242, "learning_rate": 1.3752242915095993e-06, "loss": 0.156, "step": 5566 }, { "epoch": 0.7757263289904549, "grad_norm": 0.20997272431850433, "learning_rate": 1.3736013508718892e-06, "loss": 0.1262, "step": 5567 }, { "epoch": 0.7758656726816693, "grad_norm": 0.16388002038002014, "learning_rate": 1.371979215937166e-06, "loss": 0.1148, "step": 5568 }, { "epoch": 0.7760050163728838, "grad_norm": 0.18218199908733368, "learning_rate": 1.3703578870658312e-06, "loss": 0.1378, "step": 5569 }, { "epoch": 0.7761443600640981, "grad_norm": 0.1549680083990097, "learning_rate": 1.3687373646181095e-06, "loss": 0.1238, "step": 5570 }, { "epoch": 0.7762837037553125, "grad_norm": 0.13435740768909454, "learning_rate": 1.3671176489540406e-06, "loss": 0.1301, "step": 5571 }, { "epoch": 0.7764230474465269, "grad_norm": 0.2190312147140503, "learning_rate": 1.3654987404334917e-06, "loss": 0.1378, "step": 5572 }, { "epoch": 0.7765623911377413, "grad_norm": 0.14739413559436798, "learning_rate": 1.363880639416144e-06, "loss": 0.0942, "step": 5573 }, { "epoch": 0.7767017348289557, "grad_norm": 0.20762529969215393, "learning_rate": 1.3622633462615058e-06, "loss": 0.1201, "step": 5574 }, { "epoch": 0.77684107852017, "grad_norm": 0.1807398647069931, "learning_rate": 1.3606468613288997e-06, "loss": 0.1326, "step": 5575 }, { "epoch": 0.7769804222113844, "grad_norm": 0.18702442944049835, "learning_rate": 1.359031184977473e-06, "loss": 0.1233, "step": 5576 }, { "epoch": 0.7771197659025988, "grad_norm": 0.16574543714523315, "learning_rate": 1.3574163175661936e-06, "loss": 0.1352, "step": 5577 }, { "epoch": 0.7772591095938132, "grad_norm": 0.17540130019187927, "learning_rate": 1.3558022594538473e-06, "loss": 0.1402, "step": 5578 }, { "epoch": 0.7773984532850275, "grad_norm": 0.21322064101696014, "learning_rate": 1.3541890109990386e-06, "loss": 0.1686, "step": 5579 }, { "epoch": 0.7775377969762419, "grad_norm": 0.15594938397407532, "learning_rate": 1.3525765725601964e-06, "loss": 0.1271, "step": 5580 }, { "epoch": 0.7776771406674563, "grad_norm": 0.17771829664707184, "learning_rate": 1.3509649444955697e-06, "loss": 0.156, "step": 5581 }, { "epoch": 0.7778164843586707, "grad_norm": 0.206898033618927, "learning_rate": 1.3493541271632227e-06, "loss": 0.1422, "step": 5582 }, { "epoch": 0.7779558280498851, "grad_norm": 0.12131474167108536, "learning_rate": 1.3477441209210418e-06, "loss": 0.1216, "step": 5583 }, { "epoch": 0.7780951717410994, "grad_norm": 0.22768259048461914, "learning_rate": 1.3461349261267347e-06, "loss": 0.1328, "step": 5584 }, { "epoch": 0.7782345154323138, "grad_norm": 0.1290149837732315, "learning_rate": 1.3445265431378297e-06, "loss": 0.115, "step": 5585 }, { "epoch": 0.7783738591235282, "grad_norm": 0.12608762085437775, "learning_rate": 1.3429189723116693e-06, "loss": 0.1217, "step": 5586 }, { "epoch": 0.7785132028147426, "grad_norm": 0.13550019264221191, "learning_rate": 1.3413122140054219e-06, "loss": 0.1018, "step": 5587 }, { "epoch": 0.7786525465059569, "grad_norm": 0.13158762454986572, "learning_rate": 1.3397062685760715e-06, "loss": 0.136, "step": 5588 }, { "epoch": 0.7787918901971713, "grad_norm": 0.1763547956943512, "learning_rate": 1.3381011363804208e-06, "loss": 0.1298, "step": 5589 }, { "epoch": 0.7789312338883857, "grad_norm": 0.16992118954658508, "learning_rate": 1.3364968177750953e-06, "loss": 0.1318, "step": 5590 }, { "epoch": 0.7790705775796001, "grad_norm": 0.1338822990655899, "learning_rate": 1.3348933131165387e-06, "loss": 0.1136, "step": 5591 }, { "epoch": 0.7792099212708145, "grad_norm": 0.14654512703418732, "learning_rate": 1.333290622761011e-06, "loss": 0.116, "step": 5592 }, { "epoch": 0.7793492649620288, "grad_norm": 0.17756816744804382, "learning_rate": 1.3316887470645956e-06, "loss": 0.1333, "step": 5593 }, { "epoch": 0.7794886086532432, "grad_norm": 0.12477800250053406, "learning_rate": 1.3300876863831903e-06, "loss": 0.1022, "step": 5594 }, { "epoch": 0.7796279523444576, "grad_norm": 0.21405133605003357, "learning_rate": 1.3284874410725174e-06, "loss": 0.1461, "step": 5595 }, { "epoch": 0.779767296035672, "grad_norm": 0.16797684133052826, "learning_rate": 1.3268880114881112e-06, "loss": 0.1349, "step": 5596 }, { "epoch": 0.7799066397268863, "grad_norm": 0.1255592703819275, "learning_rate": 1.3252893979853304e-06, "loss": 0.1037, "step": 5597 }, { "epoch": 0.7800459834181007, "grad_norm": 0.11553764343261719, "learning_rate": 1.3236916009193517e-06, "loss": 0.1235, "step": 5598 }, { "epoch": 0.7801853271093151, "grad_norm": 0.16440527141094208, "learning_rate": 1.3220946206451678e-06, "loss": 0.15, "step": 5599 }, { "epoch": 0.7803246708005295, "grad_norm": 0.1694401651620865, "learning_rate": 1.3204984575175893e-06, "loss": 0.1225, "step": 5600 }, { "epoch": 0.7804640144917439, "grad_norm": 0.1436637043952942, "learning_rate": 1.31890311189125e-06, "loss": 0.1216, "step": 5601 }, { "epoch": 0.7806033581829582, "grad_norm": 0.16382800042629242, "learning_rate": 1.317308584120599e-06, "loss": 0.1315, "step": 5602 }, { "epoch": 0.7807427018741726, "grad_norm": 0.1144673302769661, "learning_rate": 1.3157148745599035e-06, "loss": 0.1256, "step": 5603 }, { "epoch": 0.780882045565387, "grad_norm": 0.1298045516014099, "learning_rate": 1.314121983563248e-06, "loss": 0.1419, "step": 5604 }, { "epoch": 0.7810213892566014, "grad_norm": 0.1139615997672081, "learning_rate": 1.3125299114845375e-06, "loss": 0.1337, "step": 5605 }, { "epoch": 0.7811607329478157, "grad_norm": 0.18986958265304565, "learning_rate": 1.3109386586774958e-06, "loss": 0.1129, "step": 5606 }, { "epoch": 0.7813000766390301, "grad_norm": 0.14740276336669922, "learning_rate": 1.3093482254956602e-06, "loss": 0.1266, "step": 5607 }, { "epoch": 0.7814394203302445, "grad_norm": 0.11138053238391876, "learning_rate": 1.3077586122923896e-06, "loss": 0.1021, "step": 5608 }, { "epoch": 0.781578764021459, "grad_norm": 0.20284728705883026, "learning_rate": 1.3061698194208616e-06, "loss": 0.1595, "step": 5609 }, { "epoch": 0.7817181077126734, "grad_norm": 0.20234109461307526, "learning_rate": 1.3045818472340683e-06, "loss": 0.1498, "step": 5610 }, { "epoch": 0.7818574514038877, "grad_norm": 0.10761036723852158, "learning_rate": 1.3029946960848188e-06, "loss": 0.0908, "step": 5611 }, { "epoch": 0.7819967950951021, "grad_norm": 0.14691634476184845, "learning_rate": 1.3014083663257443e-06, "loss": 0.1456, "step": 5612 }, { "epoch": 0.7821361387863165, "grad_norm": 0.127471461892128, "learning_rate": 1.299822858309292e-06, "loss": 0.1185, "step": 5613 }, { "epoch": 0.7822754824775309, "grad_norm": 0.1859208643436432, "learning_rate": 1.2982381723877235e-06, "loss": 0.1213, "step": 5614 }, { "epoch": 0.7824148261687452, "grad_norm": 0.1498563438653946, "learning_rate": 1.2966543089131196e-06, "loss": 0.0974, "step": 5615 }, { "epoch": 0.7825541698599596, "grad_norm": 0.12176045030355453, "learning_rate": 1.295071268237379e-06, "loss": 0.1314, "step": 5616 }, { "epoch": 0.782693513551174, "grad_norm": 0.11530175805091858, "learning_rate": 1.2934890507122195e-06, "loss": 0.0885, "step": 5617 }, { "epoch": 0.7828328572423884, "grad_norm": 0.21855761110782623, "learning_rate": 1.2919076566891703e-06, "loss": 0.1546, "step": 5618 }, { "epoch": 0.7829722009336028, "grad_norm": 0.18864066898822784, "learning_rate": 1.2903270865195837e-06, "loss": 0.1497, "step": 5619 }, { "epoch": 0.7831115446248171, "grad_norm": 0.19857719540596008, "learning_rate": 1.2887473405546254e-06, "loss": 0.1186, "step": 5620 }, { "epoch": 0.7832508883160315, "grad_norm": 0.3130056858062744, "learning_rate": 1.2871684191452772e-06, "loss": 0.1263, "step": 5621 }, { "epoch": 0.7833902320072459, "grad_norm": 0.2705417573451996, "learning_rate": 1.2855903226423412e-06, "loss": 0.1415, "step": 5622 }, { "epoch": 0.7835295756984603, "grad_norm": 0.18911917507648468, "learning_rate": 1.2840130513964338e-06, "loss": 0.1482, "step": 5623 }, { "epoch": 0.7836689193896746, "grad_norm": 0.20649565756320953, "learning_rate": 1.2824366057579917e-06, "loss": 0.1157, "step": 5624 }, { "epoch": 0.783808263080889, "grad_norm": 0.22304493188858032, "learning_rate": 1.2808609860772598e-06, "loss": 0.127, "step": 5625 }, { "epoch": 0.7839476067721034, "grad_norm": 0.4263792932033539, "learning_rate": 1.2792861927043071e-06, "loss": 0.1723, "step": 5626 }, { "epoch": 0.7840869504633178, "grad_norm": 0.19184094667434692, "learning_rate": 1.277712225989019e-06, "loss": 0.1148, "step": 5627 }, { "epoch": 0.7842262941545322, "grad_norm": 0.14593856036663055, "learning_rate": 1.2761390862810907e-06, "loss": 0.1459, "step": 5628 }, { "epoch": 0.7843656378457465, "grad_norm": 0.2062235325574875, "learning_rate": 1.274566773930041e-06, "loss": 0.1137, "step": 5629 }, { "epoch": 0.7845049815369609, "grad_norm": 0.14423403143882751, "learning_rate": 1.272995289285202e-06, "loss": 0.1264, "step": 5630 }, { "epoch": 0.7846443252281753, "grad_norm": 0.07686854153871536, "learning_rate": 1.2714246326957213e-06, "loss": 0.0932, "step": 5631 }, { "epoch": 0.7847836689193897, "grad_norm": 0.14804016053676605, "learning_rate": 1.2698548045105608e-06, "loss": 0.108, "step": 5632 }, { "epoch": 0.784923012610604, "grad_norm": 0.1627015322446823, "learning_rate": 1.2682858050785018e-06, "loss": 0.1141, "step": 5633 }, { "epoch": 0.7850623563018184, "grad_norm": 0.11307931691408157, "learning_rate": 1.266717634748142e-06, "loss": 0.0981, "step": 5634 }, { "epoch": 0.7852016999930328, "grad_norm": 0.16736391186714172, "learning_rate": 1.2651502938678917e-06, "loss": 0.1343, "step": 5635 }, { "epoch": 0.7853410436842472, "grad_norm": 0.1958085149526596, "learning_rate": 1.2635837827859766e-06, "loss": 0.1606, "step": 5636 }, { "epoch": 0.7854803873754616, "grad_norm": 0.10944319516420364, "learning_rate": 1.2620181018504406e-06, "loss": 0.1114, "step": 5637 }, { "epoch": 0.7856197310666759, "grad_norm": 0.19058053195476532, "learning_rate": 1.2604532514091444e-06, "loss": 0.1316, "step": 5638 }, { "epoch": 0.7857590747578903, "grad_norm": 0.13885188102722168, "learning_rate": 1.258889231809759e-06, "loss": 0.1173, "step": 5639 }, { "epoch": 0.7858984184491047, "grad_norm": 0.1582634001970291, "learning_rate": 1.2573260433997768e-06, "loss": 0.1211, "step": 5640 }, { "epoch": 0.7860377621403191, "grad_norm": 0.17869576811790466, "learning_rate": 1.2557636865265e-06, "loss": 0.1544, "step": 5641 }, { "epoch": 0.7861771058315334, "grad_norm": 0.27693960070610046, "learning_rate": 1.254202161537051e-06, "loss": 0.1343, "step": 5642 }, { "epoch": 0.7863164495227478, "grad_norm": 0.15238451957702637, "learning_rate": 1.2526414687783616e-06, "loss": 0.1186, "step": 5643 }, { "epoch": 0.7864557932139622, "grad_norm": 0.2724434733390808, "learning_rate": 1.2510816085971849e-06, "loss": 0.1605, "step": 5644 }, { "epoch": 0.7865951369051766, "grad_norm": 0.12078524380922318, "learning_rate": 1.2495225813400864e-06, "loss": 0.1182, "step": 5645 }, { "epoch": 0.786734480596391, "grad_norm": 0.1251162439584732, "learning_rate": 1.247964387353446e-06, "loss": 0.1062, "step": 5646 }, { "epoch": 0.7868738242876053, "grad_norm": 0.1255187839269638, "learning_rate": 1.2464070269834566e-06, "loss": 0.1314, "step": 5647 }, { "epoch": 0.7870131679788197, "grad_norm": 0.14416423439979553, "learning_rate": 1.2448505005761297e-06, "loss": 0.1263, "step": 5648 }, { "epoch": 0.7871525116700342, "grad_norm": 0.23517203330993652, "learning_rate": 1.2432948084772917e-06, "loss": 0.1356, "step": 5649 }, { "epoch": 0.7872918553612486, "grad_norm": 0.14678287506103516, "learning_rate": 1.2417399510325785e-06, "loss": 0.1178, "step": 5650 }, { "epoch": 0.787431199052463, "grad_norm": 0.12984062731266022, "learning_rate": 1.2401859285874474e-06, "loss": 0.1248, "step": 5651 }, { "epoch": 0.7875705427436773, "grad_norm": 0.1511414349079132, "learning_rate": 1.2386327414871635e-06, "loss": 0.1339, "step": 5652 }, { "epoch": 0.7877098864348917, "grad_norm": 0.15807536244392395, "learning_rate": 1.237080390076812e-06, "loss": 0.1198, "step": 5653 }, { "epoch": 0.7878492301261061, "grad_norm": 0.14159724116325378, "learning_rate": 1.2355288747012878e-06, "loss": 0.1073, "step": 5654 }, { "epoch": 0.7879885738173205, "grad_norm": 0.13302813470363617, "learning_rate": 1.2339781957053031e-06, "loss": 0.1007, "step": 5655 }, { "epoch": 0.7881279175085348, "grad_norm": 0.09385275095701218, "learning_rate": 1.232428353433387e-06, "loss": 0.1022, "step": 5656 }, { "epoch": 0.7882672611997492, "grad_norm": 0.10686753690242767, "learning_rate": 1.2308793482298724e-06, "loss": 0.108, "step": 5657 }, { "epoch": 0.7884066048909636, "grad_norm": 0.17326253652572632, "learning_rate": 1.2293311804389162e-06, "loss": 0.1401, "step": 5658 }, { "epoch": 0.788545948582178, "grad_norm": 0.16965001821517944, "learning_rate": 1.227783850404487e-06, "loss": 0.1406, "step": 5659 }, { "epoch": 0.7886852922733923, "grad_norm": 0.16272974014282227, "learning_rate": 1.2262373584703642e-06, "loss": 0.1196, "step": 5660 }, { "epoch": 0.7888246359646067, "grad_norm": 0.15041720867156982, "learning_rate": 1.2246917049801449e-06, "loss": 0.1215, "step": 5661 }, { "epoch": 0.7889639796558211, "grad_norm": 0.08251173794269562, "learning_rate": 1.2231468902772354e-06, "loss": 0.0907, "step": 5662 }, { "epoch": 0.7891033233470355, "grad_norm": 0.1516561210155487, "learning_rate": 1.221602914704862e-06, "loss": 0.1358, "step": 5663 }, { "epoch": 0.7892426670382499, "grad_norm": 0.1396895796060562, "learning_rate": 1.2200597786060565e-06, "loss": 0.1395, "step": 5664 }, { "epoch": 0.7893820107294642, "grad_norm": 0.15900665521621704, "learning_rate": 1.2185174823236711e-06, "loss": 0.1291, "step": 5665 }, { "epoch": 0.7895213544206786, "grad_norm": 0.13204728066921234, "learning_rate": 1.2169760262003693e-06, "loss": 0.1358, "step": 5666 }, { "epoch": 0.789660698111893, "grad_norm": 0.14237435162067413, "learning_rate": 1.2154354105786276e-06, "loss": 0.0948, "step": 5667 }, { "epoch": 0.7898000418031074, "grad_norm": 0.14894933998584747, "learning_rate": 1.2138956358007325e-06, "loss": 0.1283, "step": 5668 }, { "epoch": 0.7899393854943217, "grad_norm": 0.12104395776987076, "learning_rate": 1.212356702208789e-06, "loss": 0.1083, "step": 5669 }, { "epoch": 0.7900787291855361, "grad_norm": 0.1539430320262909, "learning_rate": 1.210818610144714e-06, "loss": 0.1374, "step": 5670 }, { "epoch": 0.7902180728767505, "grad_norm": 0.15372973680496216, "learning_rate": 1.209281359950234e-06, "loss": 0.1208, "step": 5671 }, { "epoch": 0.7903574165679649, "grad_norm": 0.24755507707595825, "learning_rate": 1.2077449519668943e-06, "loss": 0.1222, "step": 5672 }, { "epoch": 0.7904967602591793, "grad_norm": 0.16541355848312378, "learning_rate": 1.2062093865360458e-06, "loss": 0.1269, "step": 5673 }, { "epoch": 0.7906361039503936, "grad_norm": 0.14687469601631165, "learning_rate": 1.2046746639988593e-06, "loss": 0.1378, "step": 5674 }, { "epoch": 0.790775447641608, "grad_norm": 0.20638297498226166, "learning_rate": 1.2031407846963122e-06, "loss": 0.1624, "step": 5675 }, { "epoch": 0.7909147913328224, "grad_norm": 0.1622241735458374, "learning_rate": 1.201607748969199e-06, "loss": 0.1088, "step": 5676 }, { "epoch": 0.7910541350240368, "grad_norm": 0.08332277089357376, "learning_rate": 1.2000755571581263e-06, "loss": 0.1009, "step": 5677 }, { "epoch": 0.7911934787152511, "grad_norm": 0.1468038111925125, "learning_rate": 1.1985442096035116e-06, "loss": 0.1142, "step": 5678 }, { "epoch": 0.7913328224064655, "grad_norm": 0.18158075213432312, "learning_rate": 1.1970137066455834e-06, "loss": 0.1203, "step": 5679 }, { "epoch": 0.7914721660976799, "grad_norm": 0.21444325149059296, "learning_rate": 1.1954840486243857e-06, "loss": 0.1463, "step": 5680 }, { "epoch": 0.7916115097888943, "grad_norm": 0.15815892815589905, "learning_rate": 1.193955235879775e-06, "loss": 0.144, "step": 5681 }, { "epoch": 0.7917508534801087, "grad_norm": 0.09443097561597824, "learning_rate": 1.1924272687514182e-06, "loss": 0.1073, "step": 5682 }, { "epoch": 0.791890197171323, "grad_norm": 0.15635161101818085, "learning_rate": 1.1909001475787917e-06, "loss": 0.1197, "step": 5683 }, { "epoch": 0.7920295408625374, "grad_norm": 0.17403830587863922, "learning_rate": 1.1893738727011894e-06, "loss": 0.1202, "step": 5684 }, { "epoch": 0.7921688845537518, "grad_norm": 0.17046058177947998, "learning_rate": 1.187848444457716e-06, "loss": 0.133, "step": 5685 }, { "epoch": 0.7923082282449662, "grad_norm": 0.2307152897119522, "learning_rate": 1.1863238631872843e-06, "loss": 0.1276, "step": 5686 }, { "epoch": 0.7924475719361805, "grad_norm": 0.10074465721845627, "learning_rate": 1.184800129228622e-06, "loss": 0.1128, "step": 5687 }, { "epoch": 0.7925869156273949, "grad_norm": 0.19016721844673157, "learning_rate": 1.1832772429202716e-06, "loss": 0.1395, "step": 5688 }, { "epoch": 0.7927262593186094, "grad_norm": 0.16174577176570892, "learning_rate": 1.1817552046005777e-06, "loss": 0.1255, "step": 5689 }, { "epoch": 0.7928656030098238, "grad_norm": 0.13010075688362122, "learning_rate": 1.1802340146077045e-06, "loss": 0.1114, "step": 5690 }, { "epoch": 0.7930049467010382, "grad_norm": 0.22369323670864105, "learning_rate": 1.1787136732796289e-06, "loss": 0.1366, "step": 5691 }, { "epoch": 0.7931442903922525, "grad_norm": 0.14272014796733856, "learning_rate": 1.177194180954132e-06, "loss": 0.1139, "step": 5692 }, { "epoch": 0.7932836340834669, "grad_norm": 0.17983490228652954, "learning_rate": 1.1756755379688133e-06, "loss": 0.1289, "step": 5693 }, { "epoch": 0.7934229777746813, "grad_norm": 0.15720929205417633, "learning_rate": 1.174157744661078e-06, "loss": 0.1342, "step": 5694 }, { "epoch": 0.7935623214658957, "grad_norm": 0.16744160652160645, "learning_rate": 1.1726408013681473e-06, "loss": 0.1402, "step": 5695 }, { "epoch": 0.79370166515711, "grad_norm": 0.1580916941165924, "learning_rate": 1.1711247084270494e-06, "loss": 0.1609, "step": 5696 }, { "epoch": 0.7938410088483244, "grad_norm": 0.2336304783821106, "learning_rate": 1.1696094661746267e-06, "loss": 0.1157, "step": 5697 }, { "epoch": 0.7939803525395388, "grad_norm": 0.21757292747497559, "learning_rate": 1.1680950749475328e-06, "loss": 0.1167, "step": 5698 }, { "epoch": 0.7941196962307532, "grad_norm": 0.13132163882255554, "learning_rate": 1.1665815350822291e-06, "loss": 0.1105, "step": 5699 }, { "epoch": 0.7942590399219676, "grad_norm": 0.1732279509305954, "learning_rate": 1.1650688469149884e-06, "loss": 0.119, "step": 5700 }, { "epoch": 0.7943983836131819, "grad_norm": 0.13311998546123505, "learning_rate": 1.1635570107818973e-06, "loss": 0.1142, "step": 5701 }, { "epoch": 0.7945377273043963, "grad_norm": 0.12459834665060043, "learning_rate": 1.1620460270188516e-06, "loss": 0.1287, "step": 5702 }, { "epoch": 0.7946770709956107, "grad_norm": 0.17269812524318695, "learning_rate": 1.1605358959615559e-06, "loss": 0.1275, "step": 5703 }, { "epoch": 0.7948164146868251, "grad_norm": 0.15237075090408325, "learning_rate": 1.159026617945529e-06, "loss": 0.125, "step": 5704 }, { "epoch": 0.7949557583780394, "grad_norm": 0.14371618628501892, "learning_rate": 1.1575181933060952e-06, "loss": 0.1255, "step": 5705 }, { "epoch": 0.7950951020692538, "grad_norm": 0.1434628665447235, "learning_rate": 1.156010622378395e-06, "loss": 0.1182, "step": 5706 }, { "epoch": 0.7952344457604682, "grad_norm": 0.07661621272563934, "learning_rate": 1.1545039054973733e-06, "loss": 0.0966, "step": 5707 }, { "epoch": 0.7953737894516826, "grad_norm": 0.15543340146541595, "learning_rate": 1.1529980429977899e-06, "loss": 0.126, "step": 5708 }, { "epoch": 0.795513133142897, "grad_norm": 0.15251106023788452, "learning_rate": 1.151493035214214e-06, "loss": 0.1228, "step": 5709 }, { "epoch": 0.7956524768341113, "grad_norm": 0.1478687822818756, "learning_rate": 1.1499888824810223e-06, "loss": 0.1262, "step": 5710 }, { "epoch": 0.7957918205253257, "grad_norm": 0.10947690159082413, "learning_rate": 1.148485585132403e-06, "loss": 0.1214, "step": 5711 }, { "epoch": 0.7959311642165401, "grad_norm": 0.17049640417099, "learning_rate": 1.1469831435023542e-06, "loss": 0.1564, "step": 5712 }, { "epoch": 0.7960705079077545, "grad_norm": 0.1763889044523239, "learning_rate": 1.1454815579246874e-06, "loss": 0.1485, "step": 5713 }, { "epoch": 0.7962098515989688, "grad_norm": 0.19628073275089264, "learning_rate": 1.143980828733018e-06, "loss": 0.1509, "step": 5714 }, { "epoch": 0.7963491952901832, "grad_norm": 0.12110070139169693, "learning_rate": 1.1424809562607725e-06, "loss": 0.1093, "step": 5715 }, { "epoch": 0.7964885389813976, "grad_norm": 0.12272839993238449, "learning_rate": 1.1409819408411898e-06, "loss": 0.1155, "step": 5716 }, { "epoch": 0.796627882672612, "grad_norm": 0.2645038962364197, "learning_rate": 1.1394837828073184e-06, "loss": 0.1486, "step": 5717 }, { "epoch": 0.7967672263638264, "grad_norm": 0.12892647087574005, "learning_rate": 1.1379864824920116e-06, "loss": 0.1383, "step": 5718 }, { "epoch": 0.7969065700550407, "grad_norm": 0.13438817858695984, "learning_rate": 1.1364900402279394e-06, "loss": 0.131, "step": 5719 }, { "epoch": 0.7970459137462551, "grad_norm": 0.13992281258106232, "learning_rate": 1.134994456347574e-06, "loss": 0.1234, "step": 5720 }, { "epoch": 0.7971852574374695, "grad_norm": 0.21942465007305145, "learning_rate": 1.1334997311832003e-06, "loss": 0.1279, "step": 5721 }, { "epoch": 0.7973246011286839, "grad_norm": 0.11289247870445251, "learning_rate": 1.132005865066912e-06, "loss": 0.1054, "step": 5722 }, { "epoch": 0.7974639448198982, "grad_norm": 0.19903701543807983, "learning_rate": 1.1305128583306125e-06, "loss": 0.1868, "step": 5723 }, { "epoch": 0.7976032885111126, "grad_norm": 0.18664488196372986, "learning_rate": 1.1290207113060158e-06, "loss": 0.1378, "step": 5724 }, { "epoch": 0.797742632202327, "grad_norm": 0.1390666514635086, "learning_rate": 1.127529424324641e-06, "loss": 0.1296, "step": 5725 }, { "epoch": 0.7978819758935414, "grad_norm": 0.11866910010576248, "learning_rate": 1.1260389977178166e-06, "loss": 0.1127, "step": 5726 }, { "epoch": 0.7980213195847558, "grad_norm": 0.08280736207962036, "learning_rate": 1.1245494318166844e-06, "loss": 0.1025, "step": 5727 }, { "epoch": 0.7981606632759701, "grad_norm": 0.18987390398979187, "learning_rate": 1.1230607269521886e-06, "loss": 0.1319, "step": 5728 }, { "epoch": 0.7983000069671845, "grad_norm": 0.11632852256298065, "learning_rate": 1.1215728834550877e-06, "loss": 0.1161, "step": 5729 }, { "epoch": 0.798439350658399, "grad_norm": 0.10664193332195282, "learning_rate": 1.1200859016559473e-06, "loss": 0.1129, "step": 5730 }, { "epoch": 0.7985786943496134, "grad_norm": 0.12123153358697891, "learning_rate": 1.1185997818851402e-06, "loss": 0.1078, "step": 5731 }, { "epoch": 0.7987180380408277, "grad_norm": 0.19345757365226746, "learning_rate": 1.1171145244728454e-06, "loss": 0.1182, "step": 5732 }, { "epoch": 0.7988573817320421, "grad_norm": 0.24445036053657532, "learning_rate": 1.1156301297490563e-06, "loss": 0.1533, "step": 5733 }, { "epoch": 0.7989967254232565, "grad_norm": 0.17755919694900513, "learning_rate": 1.1141465980435713e-06, "loss": 0.1125, "step": 5734 }, { "epoch": 0.7991360691144709, "grad_norm": 0.19384834170341492, "learning_rate": 1.112663929685997e-06, "loss": 0.1343, "step": 5735 }, { "epoch": 0.7992754128056853, "grad_norm": 0.14752225577831268, "learning_rate": 1.111182125005747e-06, "loss": 0.1232, "step": 5736 }, { "epoch": 0.7994147564968996, "grad_norm": 0.275897741317749, "learning_rate": 1.1097011843320454e-06, "loss": 0.1388, "step": 5737 }, { "epoch": 0.799554100188114, "grad_norm": 0.12012802064418793, "learning_rate": 1.1082211079939248e-06, "loss": 0.1182, "step": 5738 }, { "epoch": 0.7996934438793284, "grad_norm": 0.13509753346443176, "learning_rate": 1.106741896320222e-06, "loss": 0.1113, "step": 5739 }, { "epoch": 0.7998327875705428, "grad_norm": 0.17540445923805237, "learning_rate": 1.1052635496395864e-06, "loss": 0.121, "step": 5740 }, { "epoch": 0.7999721312617571, "grad_norm": 0.20425595343112946, "learning_rate": 1.1037860682804708e-06, "loss": 0.158, "step": 5741 }, { "epoch": 0.8001114749529715, "grad_norm": 0.2220039814710617, "learning_rate": 1.1023094525711397e-06, "loss": 0.1528, "step": 5742 }, { "epoch": 0.8002508186441859, "grad_norm": 0.20234300196170807, "learning_rate": 1.1008337028396616e-06, "loss": 0.1298, "step": 5743 }, { "epoch": 0.8003901623354003, "grad_norm": 0.10222177952528, "learning_rate": 1.099358819413915e-06, "loss": 0.1036, "step": 5744 }, { "epoch": 0.8005295060266147, "grad_norm": 0.14279384911060333, "learning_rate": 1.0978848026215865e-06, "loss": 0.1126, "step": 5745 }, { "epoch": 0.800668849717829, "grad_norm": 0.1814657747745514, "learning_rate": 1.0964116527901686e-06, "loss": 0.1435, "step": 5746 }, { "epoch": 0.8008081934090434, "grad_norm": 0.21652135252952576, "learning_rate": 1.094939370246959e-06, "loss": 0.1316, "step": 5747 }, { "epoch": 0.8009475371002578, "grad_norm": 0.17462043464183807, "learning_rate": 1.093467955319068e-06, "loss": 0.1127, "step": 5748 }, { "epoch": 0.8010868807914722, "grad_norm": 0.16855394840240479, "learning_rate": 1.0919974083334106e-06, "loss": 0.1117, "step": 5749 }, { "epoch": 0.8012262244826865, "grad_norm": 0.22766099870204926, "learning_rate": 1.0905277296167066e-06, "loss": 0.1726, "step": 5750 }, { "epoch": 0.8013655681739009, "grad_norm": 0.13495640456676483, "learning_rate": 1.089058919495488e-06, "loss": 0.1203, "step": 5751 }, { "epoch": 0.8015049118651153, "grad_norm": 0.19410021603107452, "learning_rate": 1.0875909782960887e-06, "loss": 0.1584, "step": 5752 }, { "epoch": 0.8016442555563297, "grad_norm": 0.15075549483299255, "learning_rate": 1.0861239063446511e-06, "loss": 0.139, "step": 5753 }, { "epoch": 0.801783599247544, "grad_norm": 0.12413483113050461, "learning_rate": 1.0846577039671263e-06, "loss": 0.1257, "step": 5754 }, { "epoch": 0.8019229429387584, "grad_norm": 0.13834324479103088, "learning_rate": 1.0831923714892706e-06, "loss": 0.1172, "step": 5755 }, { "epoch": 0.8020622866299728, "grad_norm": 0.14363056421279907, "learning_rate": 1.0817279092366507e-06, "loss": 0.1165, "step": 5756 }, { "epoch": 0.8022016303211872, "grad_norm": 0.1601037234067917, "learning_rate": 1.0802643175346312e-06, "loss": 0.134, "step": 5757 }, { "epoch": 0.8023409740124016, "grad_norm": 0.12475183606147766, "learning_rate": 1.0788015967083904e-06, "loss": 0.1006, "step": 5758 }, { "epoch": 0.8024803177036159, "grad_norm": 0.2492923140525818, "learning_rate": 1.0773397470829145e-06, "loss": 0.1591, "step": 5759 }, { "epoch": 0.8026196613948303, "grad_norm": 0.1801798790693283, "learning_rate": 1.0758787689829891e-06, "loss": 0.1194, "step": 5760 }, { "epoch": 0.8027590050860447, "grad_norm": 0.11930657923221588, "learning_rate": 1.074418662733212e-06, "loss": 0.1252, "step": 5761 }, { "epoch": 0.8028983487772591, "grad_norm": 0.12954644858837128, "learning_rate": 1.0729594286579876e-06, "loss": 0.1046, "step": 5762 }, { "epoch": 0.8030376924684735, "grad_norm": 0.12261094152927399, "learning_rate": 1.0715010670815212e-06, "loss": 0.1004, "step": 5763 }, { "epoch": 0.8031770361596878, "grad_norm": 0.2006673514842987, "learning_rate": 1.0700435783278278e-06, "loss": 0.1266, "step": 5764 }, { "epoch": 0.8033163798509022, "grad_norm": 0.16181829571723938, "learning_rate": 1.068586962720729e-06, "loss": 0.1177, "step": 5765 }, { "epoch": 0.8034557235421166, "grad_norm": 0.10040118545293808, "learning_rate": 1.0671312205838525e-06, "loss": 0.1142, "step": 5766 }, { "epoch": 0.803595067233331, "grad_norm": 0.16406558454036713, "learning_rate": 1.06567635224063e-06, "loss": 0.1296, "step": 5767 }, { "epoch": 0.8037344109245453, "grad_norm": 0.17314350605010986, "learning_rate": 1.0642223580142985e-06, "loss": 0.1364, "step": 5768 }, { "epoch": 0.8038737546157597, "grad_norm": 0.1572837233543396, "learning_rate": 1.0627692382279038e-06, "loss": 0.143, "step": 5769 }, { "epoch": 0.8040130983069742, "grad_norm": 0.18990497291088104, "learning_rate": 1.0613169932042972e-06, "loss": 0.1307, "step": 5770 }, { "epoch": 0.8041524419981886, "grad_norm": 0.1514940708875656, "learning_rate": 1.0598656232661313e-06, "loss": 0.1142, "step": 5771 }, { "epoch": 0.804291785689403, "grad_norm": 0.15387842059135437, "learning_rate": 1.0584151287358708e-06, "loss": 0.1275, "step": 5772 }, { "epoch": 0.8044311293806173, "grad_norm": 0.19795668125152588, "learning_rate": 1.0569655099357795e-06, "loss": 0.1366, "step": 5773 }, { "epoch": 0.8045704730718317, "grad_norm": 0.24857933819293976, "learning_rate": 1.0555167671879319e-06, "loss": 0.1164, "step": 5774 }, { "epoch": 0.8047098167630461, "grad_norm": 0.23131224513053894, "learning_rate": 1.0540689008142035e-06, "loss": 0.1124, "step": 5775 }, { "epoch": 0.8048491604542605, "grad_norm": 0.1259567141532898, "learning_rate": 1.052621911136278e-06, "loss": 0.1361, "step": 5776 }, { "epoch": 0.8049885041454748, "grad_norm": 0.19379296898841858, "learning_rate": 1.0511757984756455e-06, "loss": 0.1138, "step": 5777 }, { "epoch": 0.8051278478366892, "grad_norm": 0.18995939195156097, "learning_rate": 1.049730563153597e-06, "loss": 0.1315, "step": 5778 }, { "epoch": 0.8052671915279036, "grad_norm": 0.1663563847541809, "learning_rate": 1.0482862054912296e-06, "loss": 0.1336, "step": 5779 }, { "epoch": 0.805406535219118, "grad_norm": 0.10967162251472473, "learning_rate": 1.0468427258094481e-06, "loss": 0.1007, "step": 5780 }, { "epoch": 0.8055458789103324, "grad_norm": 0.20628930628299713, "learning_rate": 1.045400124428963e-06, "loss": 0.1305, "step": 5781 }, { "epoch": 0.8056852226015467, "grad_norm": 0.149249866604805, "learning_rate": 1.043958401670283e-06, "loss": 0.1067, "step": 5782 }, { "epoch": 0.8058245662927611, "grad_norm": 0.21774162352085114, "learning_rate": 1.04251755785373e-06, "loss": 0.1214, "step": 5783 }, { "epoch": 0.8059639099839755, "grad_norm": 0.1262117624282837, "learning_rate": 1.0410775932994232e-06, "loss": 0.098, "step": 5784 }, { "epoch": 0.8061032536751899, "grad_norm": 0.20335155725479126, "learning_rate": 1.039638508327293e-06, "loss": 0.1422, "step": 5785 }, { "epoch": 0.8062425973664042, "grad_norm": 0.1459759622812271, "learning_rate": 1.0382003032570682e-06, "loss": 0.1272, "step": 5786 }, { "epoch": 0.8063819410576186, "grad_norm": 0.10958434641361237, "learning_rate": 1.0367629784082867e-06, "loss": 0.1008, "step": 5787 }, { "epoch": 0.806521284748833, "grad_norm": 0.2003493309020996, "learning_rate": 1.0353265341002916e-06, "loss": 0.1485, "step": 5788 }, { "epoch": 0.8066606284400474, "grad_norm": 0.10135939717292786, "learning_rate": 1.0338909706522232e-06, "loss": 0.1071, "step": 5789 }, { "epoch": 0.8067999721312618, "grad_norm": 0.13378199934959412, "learning_rate": 1.032456288383033e-06, "loss": 0.1293, "step": 5790 }, { "epoch": 0.8069393158224761, "grad_norm": 0.12177130579948425, "learning_rate": 1.0310224876114766e-06, "loss": 0.1139, "step": 5791 }, { "epoch": 0.8070786595136905, "grad_norm": 0.14772601425647736, "learning_rate": 1.0295895686561087e-06, "loss": 0.1359, "step": 5792 }, { "epoch": 0.8072180032049049, "grad_norm": 0.1290132999420166, "learning_rate": 1.0281575318352937e-06, "loss": 0.1011, "step": 5793 }, { "epoch": 0.8073573468961193, "grad_norm": 0.17394056916236877, "learning_rate": 1.0267263774671953e-06, "loss": 0.1275, "step": 5794 }, { "epoch": 0.8074966905873336, "grad_norm": 0.12774549424648285, "learning_rate": 1.0252961058697858e-06, "loss": 0.1352, "step": 5795 }, { "epoch": 0.807636034278548, "grad_norm": 0.19238203763961792, "learning_rate": 1.0238667173608364e-06, "loss": 0.1405, "step": 5796 }, { "epoch": 0.8077753779697624, "grad_norm": 0.1283227950334549, "learning_rate": 1.0224382122579256e-06, "loss": 0.1167, "step": 5797 }, { "epoch": 0.8079147216609768, "grad_norm": 0.07492484152317047, "learning_rate": 1.0210105908784362e-06, "loss": 0.1042, "step": 5798 }, { "epoch": 0.8080540653521912, "grad_norm": 0.18644046783447266, "learning_rate": 1.0195838535395514e-06, "loss": 0.1376, "step": 5799 }, { "epoch": 0.8081934090434055, "grad_norm": 0.3036171793937683, "learning_rate": 1.0181580005582586e-06, "loss": 0.1242, "step": 5800 }, { "epoch": 0.8083327527346199, "grad_norm": 0.15755680203437805, "learning_rate": 1.0167330322513508e-06, "loss": 0.1363, "step": 5801 }, { "epoch": 0.8084720964258343, "grad_norm": 0.08938523381948471, "learning_rate": 1.0153089489354256e-06, "loss": 0.088, "step": 5802 }, { "epoch": 0.8086114401170487, "grad_norm": 0.23023810982704163, "learning_rate": 1.0138857509268784e-06, "loss": 0.1567, "step": 5803 }, { "epoch": 0.808750783808263, "grad_norm": 0.08799798786640167, "learning_rate": 1.012463438541914e-06, "loss": 0.1018, "step": 5804 }, { "epoch": 0.8088901274994774, "grad_norm": 0.16838358342647552, "learning_rate": 1.0110420120965354e-06, "loss": 0.1341, "step": 5805 }, { "epoch": 0.8090294711906918, "grad_norm": 0.0953098013997078, "learning_rate": 1.0096214719065534e-06, "loss": 0.1057, "step": 5806 }, { "epoch": 0.8091688148819062, "grad_norm": 0.246611550450325, "learning_rate": 1.008201818287577e-06, "loss": 0.1627, "step": 5807 }, { "epoch": 0.8093081585731206, "grad_norm": 0.21007996797561646, "learning_rate": 1.0067830515550224e-06, "loss": 0.1391, "step": 5808 }, { "epoch": 0.8094475022643349, "grad_norm": 0.2168823927640915, "learning_rate": 1.0053651720241087e-06, "loss": 0.1945, "step": 5809 }, { "epoch": 0.8095868459555494, "grad_norm": 0.15792407095432281, "learning_rate": 1.0039481800098545e-06, "loss": 0.1406, "step": 5810 }, { "epoch": 0.8097261896467638, "grad_norm": 0.22625142335891724, "learning_rate": 1.0025320758270819e-06, "loss": 0.1377, "step": 5811 }, { "epoch": 0.8098655333379782, "grad_norm": 0.134125217795372, "learning_rate": 1.001116859790418e-06, "loss": 0.1142, "step": 5812 }, { "epoch": 0.8100048770291925, "grad_norm": 0.13480621576309204, "learning_rate": 9.997025322142934e-07, "loss": 0.1307, "step": 5813 }, { "epoch": 0.8101442207204069, "grad_norm": 0.19373047351837158, "learning_rate": 9.98289093412938e-07, "loss": 0.1076, "step": 5814 }, { "epoch": 0.8102835644116213, "grad_norm": 0.1735759675502777, "learning_rate": 9.96876543700384e-07, "loss": 0.1093, "step": 5815 }, { "epoch": 0.8104229081028357, "grad_norm": 0.228118896484375, "learning_rate": 9.95464883390469e-07, "loss": 0.1327, "step": 5816 }, { "epoch": 0.8105622517940501, "grad_norm": 0.0865393653512001, "learning_rate": 9.940541127968335e-07, "loss": 0.098, "step": 5817 }, { "epoch": 0.8107015954852644, "grad_norm": 0.14022907614707947, "learning_rate": 9.92644232232915e-07, "loss": 0.1091, "step": 5818 }, { "epoch": 0.8108409391764788, "grad_norm": 0.17097656428813934, "learning_rate": 9.912352420119587e-07, "loss": 0.1065, "step": 5819 }, { "epoch": 0.8109802828676932, "grad_norm": 0.14826329052448273, "learning_rate": 9.89827142447013e-07, "loss": 0.1185, "step": 5820 }, { "epoch": 0.8111196265589076, "grad_norm": 0.09093166142702103, "learning_rate": 9.884199338509193e-07, "loss": 0.0939, "step": 5821 }, { "epoch": 0.811258970250122, "grad_norm": 0.08490591496229172, "learning_rate": 9.87013616536331e-07, "loss": 0.0851, "step": 5822 }, { "epoch": 0.8113983139413363, "grad_norm": 0.13877108693122864, "learning_rate": 9.856081908156984e-07, "loss": 0.1377, "step": 5823 }, { "epoch": 0.8115376576325507, "grad_norm": 0.1824723482131958, "learning_rate": 9.842036570012776e-07, "loss": 0.1398, "step": 5824 }, { "epoch": 0.8116770013237651, "grad_norm": 0.21601861715316772, "learning_rate": 9.828000154051216e-07, "loss": 0.1329, "step": 5825 }, { "epoch": 0.8118163450149795, "grad_norm": 0.17077206075191498, "learning_rate": 9.813972663390864e-07, "loss": 0.1404, "step": 5826 }, { "epoch": 0.8119556887061938, "grad_norm": 0.13152170181274414, "learning_rate": 9.79995410114834e-07, "loss": 0.1345, "step": 5827 }, { "epoch": 0.8120950323974082, "grad_norm": 0.14022596180438995, "learning_rate": 9.785944470438218e-07, "loss": 0.1036, "step": 5828 }, { "epoch": 0.8122343760886226, "grad_norm": 0.1970801204442978, "learning_rate": 9.771943774373138e-07, "loss": 0.1582, "step": 5829 }, { "epoch": 0.812373719779837, "grad_norm": 0.31316423416137695, "learning_rate": 9.757952016063738e-07, "loss": 0.1145, "step": 5830 }, { "epoch": 0.8125130634710513, "grad_norm": 0.30170977115631104, "learning_rate": 9.743969198618659e-07, "loss": 0.127, "step": 5831 }, { "epoch": 0.8126524071622657, "grad_norm": 0.11793883889913559, "learning_rate": 9.729995325144548e-07, "loss": 0.1135, "step": 5832 }, { "epoch": 0.8127917508534801, "grad_norm": 0.19013117253780365, "learning_rate": 9.716030398746096e-07, "loss": 0.1466, "step": 5833 }, { "epoch": 0.8129310945446945, "grad_norm": 0.12863785028457642, "learning_rate": 9.702074422526004e-07, "loss": 0.1207, "step": 5834 }, { "epoch": 0.8130704382359089, "grad_norm": 0.14707514643669128, "learning_rate": 9.688127399584956e-07, "loss": 0.1558, "step": 5835 }, { "epoch": 0.8132097819271232, "grad_norm": 0.08626825362443924, "learning_rate": 9.674189333021655e-07, "loss": 0.1002, "step": 5836 }, { "epoch": 0.8133491256183376, "grad_norm": 0.12845562398433685, "learning_rate": 9.660260225932834e-07, "loss": 0.1272, "step": 5837 }, { "epoch": 0.813488469309552, "grad_norm": 0.13898301124572754, "learning_rate": 9.646340081413225e-07, "loss": 0.1219, "step": 5838 }, { "epoch": 0.8136278130007664, "grad_norm": 0.13895218074321747, "learning_rate": 9.632428902555546e-07, "loss": 0.1234, "step": 5839 }, { "epoch": 0.8137671566919807, "grad_norm": 0.13420867919921875, "learning_rate": 9.618526692450564e-07, "loss": 0.1294, "step": 5840 }, { "epoch": 0.8139065003831951, "grad_norm": 0.10924328863620758, "learning_rate": 9.604633454187035e-07, "loss": 0.1235, "step": 5841 }, { "epoch": 0.8140458440744095, "grad_norm": 0.1351640671491623, "learning_rate": 9.59074919085171e-07, "loss": 0.1245, "step": 5842 }, { "epoch": 0.8141851877656239, "grad_norm": 0.1717318594455719, "learning_rate": 9.57687390552935e-07, "loss": 0.1294, "step": 5843 }, { "epoch": 0.8143245314568383, "grad_norm": 0.23286378383636475, "learning_rate": 9.563007601302727e-07, "loss": 0.1473, "step": 5844 }, { "epoch": 0.8144638751480526, "grad_norm": 0.15068480372428894, "learning_rate": 9.549150281252633e-07, "loss": 0.1476, "step": 5845 }, { "epoch": 0.814603218839267, "grad_norm": 0.17224682867527008, "learning_rate": 9.535301948457842e-07, "loss": 0.1502, "step": 5846 }, { "epoch": 0.8147425625304814, "grad_norm": 0.16168391704559326, "learning_rate": 9.521462605995119e-07, "loss": 0.1324, "step": 5847 }, { "epoch": 0.8148819062216958, "grad_norm": 0.1491689383983612, "learning_rate": 9.507632256939264e-07, "loss": 0.1484, "step": 5848 }, { "epoch": 0.8150212499129101, "grad_norm": 0.16047286987304688, "learning_rate": 9.493810904363077e-07, "loss": 0.1503, "step": 5849 }, { "epoch": 0.8151605936041246, "grad_norm": 0.25523999333381653, "learning_rate": 9.479998551337322e-07, "loss": 0.1601, "step": 5850 }, { "epoch": 0.815299937295339, "grad_norm": 0.2145562320947647, "learning_rate": 9.466195200930817e-07, "loss": 0.1408, "step": 5851 }, { "epoch": 0.8154392809865534, "grad_norm": 0.17370155453681946, "learning_rate": 9.452400856210337e-07, "loss": 0.1306, "step": 5852 }, { "epoch": 0.8155786246777678, "grad_norm": 0.11617710441350937, "learning_rate": 9.438615520240651e-07, "loss": 0.1118, "step": 5853 }, { "epoch": 0.8157179683689821, "grad_norm": 0.11462882161140442, "learning_rate": 9.424839196084568e-07, "loss": 0.0985, "step": 5854 }, { "epoch": 0.8158573120601965, "grad_norm": 0.1264735609292984, "learning_rate": 9.411071886802869e-07, "loss": 0.1476, "step": 5855 }, { "epoch": 0.8159966557514109, "grad_norm": 0.156111940741539, "learning_rate": 9.397313595454349e-07, "loss": 0.1175, "step": 5856 }, { "epoch": 0.8161359994426253, "grad_norm": 0.14387881755828857, "learning_rate": 9.383564325095767e-07, "loss": 0.1342, "step": 5857 }, { "epoch": 0.8162753431338396, "grad_norm": 0.12090092897415161, "learning_rate": 9.369824078781897e-07, "loss": 0.1159, "step": 5858 }, { "epoch": 0.816414686825054, "grad_norm": 0.13792437314987183, "learning_rate": 9.356092859565524e-07, "loss": 0.1351, "step": 5859 }, { "epoch": 0.8165540305162684, "grad_norm": 0.17557260394096375, "learning_rate": 9.342370670497391e-07, "loss": 0.1209, "step": 5860 }, { "epoch": 0.8166933742074828, "grad_norm": 0.11591629683971405, "learning_rate": 9.328657514626266e-07, "loss": 0.1188, "step": 5861 }, { "epoch": 0.8168327178986972, "grad_norm": 0.10622220486402512, "learning_rate": 9.314953394998905e-07, "loss": 0.1329, "step": 5862 }, { "epoch": 0.8169720615899115, "grad_norm": 0.13631580770015717, "learning_rate": 9.30125831466005e-07, "loss": 0.1238, "step": 5863 }, { "epoch": 0.8171114052811259, "grad_norm": 0.20011281967163086, "learning_rate": 9.287572276652417e-07, "loss": 0.1407, "step": 5864 }, { "epoch": 0.8172507489723403, "grad_norm": 0.11279082298278809, "learning_rate": 9.273895284016743e-07, "loss": 0.1066, "step": 5865 }, { "epoch": 0.8173900926635547, "grad_norm": 0.1480325311422348, "learning_rate": 9.260227339791755e-07, "loss": 0.1326, "step": 5866 }, { "epoch": 0.817529436354769, "grad_norm": 0.14951372146606445, "learning_rate": 9.246568447014148e-07, "loss": 0.1348, "step": 5867 }, { "epoch": 0.8176687800459834, "grad_norm": 0.13629865646362305, "learning_rate": 9.232918608718599e-07, "loss": 0.1156, "step": 5868 }, { "epoch": 0.8178081237371978, "grad_norm": 0.17908614873886108, "learning_rate": 9.219277827937811e-07, "loss": 0.1281, "step": 5869 }, { "epoch": 0.8179474674284122, "grad_norm": 0.16207703948020935, "learning_rate": 9.205646107702465e-07, "loss": 0.129, "step": 5870 }, { "epoch": 0.8180868111196266, "grad_norm": 0.1910155713558197, "learning_rate": 9.192023451041187e-07, "loss": 0.1372, "step": 5871 }, { "epoch": 0.8182261548108409, "grad_norm": 0.15468627214431763, "learning_rate": 9.178409860980648e-07, "loss": 0.1331, "step": 5872 }, { "epoch": 0.8183654985020553, "grad_norm": 0.10650444030761719, "learning_rate": 9.164805340545457e-07, "loss": 0.1156, "step": 5873 }, { "epoch": 0.8185048421932697, "grad_norm": 0.17564985156059265, "learning_rate": 9.151209892758245e-07, "loss": 0.1362, "step": 5874 }, { "epoch": 0.8186441858844841, "grad_norm": 0.15608619153499603, "learning_rate": 9.137623520639588e-07, "loss": 0.1413, "step": 5875 }, { "epoch": 0.8187835295756984, "grad_norm": 0.16152167320251465, "learning_rate": 9.124046227208083e-07, "loss": 0.1332, "step": 5876 }, { "epoch": 0.8189228732669128, "grad_norm": 0.1489202082157135, "learning_rate": 9.110478015480301e-07, "loss": 0.1453, "step": 5877 }, { "epoch": 0.8190622169581272, "grad_norm": 0.10476600378751755, "learning_rate": 9.096918888470785e-07, "loss": 0.1026, "step": 5878 }, { "epoch": 0.8192015606493416, "grad_norm": 0.21530044078826904, "learning_rate": 9.083368849192042e-07, "loss": 0.1319, "step": 5879 }, { "epoch": 0.819340904340556, "grad_norm": 0.12084964662790298, "learning_rate": 9.069827900654604e-07, "loss": 0.1301, "step": 5880 }, { "epoch": 0.8194802480317703, "grad_norm": 0.17238342761993408, "learning_rate": 9.056296045866964e-07, "loss": 0.1286, "step": 5881 }, { "epoch": 0.8196195917229847, "grad_norm": 0.20268212258815765, "learning_rate": 9.042773287835566e-07, "loss": 0.1252, "step": 5882 }, { "epoch": 0.8197589354141991, "grad_norm": 0.1035042554140091, "learning_rate": 9.02925962956489e-07, "loss": 0.1182, "step": 5883 }, { "epoch": 0.8198982791054135, "grad_norm": 0.30681949853897095, "learning_rate": 9.015755074057336e-07, "loss": 0.1533, "step": 5884 }, { "epoch": 0.8200376227966278, "grad_norm": 0.14026519656181335, "learning_rate": 9.002259624313325e-07, "loss": 0.1143, "step": 5885 }, { "epoch": 0.8201769664878422, "grad_norm": 0.11430202424526215, "learning_rate": 8.98877328333122e-07, "loss": 0.1031, "step": 5886 }, { "epoch": 0.8203163101790566, "grad_norm": 0.179025799036026, "learning_rate": 8.975296054107396e-07, "loss": 0.1317, "step": 5887 }, { "epoch": 0.820455653870271, "grad_norm": 0.12148021906614304, "learning_rate": 8.961827939636198e-07, "loss": 0.1146, "step": 5888 }, { "epoch": 0.8205949975614854, "grad_norm": 0.10774272680282593, "learning_rate": 8.948368942909891e-07, "loss": 0.1025, "step": 5889 }, { "epoch": 0.8207343412526998, "grad_norm": 0.14303763210773468, "learning_rate": 8.934919066918779e-07, "loss": 0.1358, "step": 5890 }, { "epoch": 0.8208736849439142, "grad_norm": 0.1545470654964447, "learning_rate": 8.921478314651133e-07, "loss": 0.1272, "step": 5891 }, { "epoch": 0.8210130286351286, "grad_norm": 0.18316222727298737, "learning_rate": 8.908046689093153e-07, "loss": 0.1504, "step": 5892 }, { "epoch": 0.821152372326343, "grad_norm": 0.2330177277326584, "learning_rate": 8.894624193229051e-07, "loss": 0.1715, "step": 5893 }, { "epoch": 0.8212917160175573, "grad_norm": 0.12950827181339264, "learning_rate": 8.88121083004102e-07, "loss": 0.1117, "step": 5894 }, { "epoch": 0.8214310597087717, "grad_norm": 0.15080484747886658, "learning_rate": 8.867806602509177e-07, "loss": 0.1423, "step": 5895 }, { "epoch": 0.8215704033999861, "grad_norm": 0.2546241879463196, "learning_rate": 8.854411513611638e-07, "loss": 0.1457, "step": 5896 }, { "epoch": 0.8217097470912005, "grad_norm": 0.2712787687778473, "learning_rate": 8.841025566324485e-07, "loss": 0.1414, "step": 5897 }, { "epoch": 0.8218490907824149, "grad_norm": 0.13018451631069183, "learning_rate": 8.827648763621793e-07, "loss": 0.1, "step": 5898 }, { "epoch": 0.8219884344736292, "grad_norm": 0.18545931577682495, "learning_rate": 8.814281108475565e-07, "loss": 0.1225, "step": 5899 }, { "epoch": 0.8221277781648436, "grad_norm": 0.14201028645038605, "learning_rate": 8.800922603855772e-07, "loss": 0.1187, "step": 5900 }, { "epoch": 0.822267121856058, "grad_norm": 0.10908050090074539, "learning_rate": 8.787573252730386e-07, "loss": 0.1071, "step": 5901 }, { "epoch": 0.8224064655472724, "grad_norm": 0.2098396122455597, "learning_rate": 8.774233058065346e-07, "loss": 0.1341, "step": 5902 }, { "epoch": 0.8225458092384867, "grad_norm": 0.23961833119392395, "learning_rate": 8.760902022824502e-07, "loss": 0.1056, "step": 5903 }, { "epoch": 0.8226851529297011, "grad_norm": 0.22207286953926086, "learning_rate": 8.747580149969737e-07, "loss": 0.1362, "step": 5904 }, { "epoch": 0.8228244966209155, "grad_norm": 0.09953915327787399, "learning_rate": 8.734267442460842e-07, "loss": 0.1059, "step": 5905 }, { "epoch": 0.8229638403121299, "grad_norm": 0.18262818455696106, "learning_rate": 8.720963903255619e-07, "loss": 0.1654, "step": 5906 }, { "epoch": 0.8231031840033443, "grad_norm": 0.18264247477054596, "learning_rate": 8.707669535309793e-07, "loss": 0.1334, "step": 5907 }, { "epoch": 0.8232425276945586, "grad_norm": 0.14630348980426788, "learning_rate": 8.694384341577072e-07, "loss": 0.1294, "step": 5908 }, { "epoch": 0.823381871385773, "grad_norm": 0.2485896497964859, "learning_rate": 8.681108325009141e-07, "loss": 0.1141, "step": 5909 }, { "epoch": 0.8235212150769874, "grad_norm": 0.12474798411130905, "learning_rate": 8.667841488555617e-07, "loss": 0.1199, "step": 5910 }, { "epoch": 0.8236605587682018, "grad_norm": 0.2252672165632248, "learning_rate": 8.654583835164066e-07, "loss": 0.1589, "step": 5911 }, { "epoch": 0.8237999024594161, "grad_norm": 0.12421455979347229, "learning_rate": 8.641335367780057e-07, "loss": 0.0925, "step": 5912 }, { "epoch": 0.8239392461506305, "grad_norm": 0.16872061789035797, "learning_rate": 8.62809608934711e-07, "loss": 0.1225, "step": 5913 }, { "epoch": 0.8240785898418449, "grad_norm": 0.1730576604604721, "learning_rate": 8.614866002806665e-07, "loss": 0.1165, "step": 5914 }, { "epoch": 0.8242179335330593, "grad_norm": 0.22902226448059082, "learning_rate": 8.601645111098162e-07, "loss": 0.1505, "step": 5915 }, { "epoch": 0.8243572772242737, "grad_norm": 0.2684895098209381, "learning_rate": 8.588433417158965e-07, "loss": 0.1372, "step": 5916 }, { "epoch": 0.824496620915488, "grad_norm": 0.17732371389865875, "learning_rate": 8.575230923924432e-07, "loss": 0.1251, "step": 5917 }, { "epoch": 0.8246359646067024, "grad_norm": 0.25784990191459656, "learning_rate": 8.562037634327836e-07, "loss": 0.1962, "step": 5918 }, { "epoch": 0.8247753082979168, "grad_norm": 0.32159000635147095, "learning_rate": 8.548853551300429e-07, "loss": 0.1695, "step": 5919 }, { "epoch": 0.8249146519891312, "grad_norm": 0.16678482294082642, "learning_rate": 8.535678677771441e-07, "loss": 0.1411, "step": 5920 }, { "epoch": 0.8250539956803455, "grad_norm": 0.12538672983646393, "learning_rate": 8.522513016667982e-07, "loss": 0.1074, "step": 5921 }, { "epoch": 0.8251933393715599, "grad_norm": 0.16974245011806488, "learning_rate": 8.509356570915184e-07, "loss": 0.1382, "step": 5922 }, { "epoch": 0.8253326830627743, "grad_norm": 0.15950973331928253, "learning_rate": 8.496209343436101e-07, "loss": 0.1316, "step": 5923 }, { "epoch": 0.8254720267539887, "grad_norm": 0.14319464564323425, "learning_rate": 8.483071337151777e-07, "loss": 0.124, "step": 5924 }, { "epoch": 0.825611370445203, "grad_norm": 0.14360998570919037, "learning_rate": 8.469942554981148e-07, "loss": 0.1289, "step": 5925 }, { "epoch": 0.8257507141364174, "grad_norm": 0.1530519276857376, "learning_rate": 8.456822999841125e-07, "loss": 0.1098, "step": 5926 }, { "epoch": 0.8258900578276318, "grad_norm": 0.1642674058675766, "learning_rate": 8.443712674646598e-07, "loss": 0.1525, "step": 5927 }, { "epoch": 0.8260294015188462, "grad_norm": 0.11885785311460495, "learning_rate": 8.430611582310355e-07, "loss": 0.1057, "step": 5928 }, { "epoch": 0.8261687452100606, "grad_norm": 0.167777419090271, "learning_rate": 8.417519725743173e-07, "loss": 0.1592, "step": 5929 }, { "epoch": 0.8263080889012749, "grad_norm": 0.11051640659570694, "learning_rate": 8.40443710785378e-07, "loss": 0.1294, "step": 5930 }, { "epoch": 0.8264474325924894, "grad_norm": 0.25244060158729553, "learning_rate": 8.391363731548813e-07, "loss": 0.1391, "step": 5931 }, { "epoch": 0.8265867762837038, "grad_norm": 0.12153191864490509, "learning_rate": 8.378299599732875e-07, "loss": 0.1203, "step": 5932 }, { "epoch": 0.8267261199749182, "grad_norm": 0.2258511334657669, "learning_rate": 8.365244715308524e-07, "loss": 0.147, "step": 5933 }, { "epoch": 0.8268654636661326, "grad_norm": 0.10817539691925049, "learning_rate": 8.352199081176271e-07, "loss": 0.1025, "step": 5934 }, { "epoch": 0.8270048073573469, "grad_norm": 0.18137004971504211, "learning_rate": 8.339162700234537e-07, "loss": 0.1426, "step": 5935 }, { "epoch": 0.8271441510485613, "grad_norm": 0.12500056624412537, "learning_rate": 8.326135575379729e-07, "loss": 0.1139, "step": 5936 }, { "epoch": 0.8272834947397757, "grad_norm": 0.1078362762928009, "learning_rate": 8.313117709506158e-07, "loss": 0.111, "step": 5937 }, { "epoch": 0.8274228384309901, "grad_norm": 0.14711229503154755, "learning_rate": 8.30010910550611e-07, "loss": 0.1045, "step": 5938 }, { "epoch": 0.8275621821222044, "grad_norm": 0.11129968613386154, "learning_rate": 8.287109766269786e-07, "loss": 0.113, "step": 5939 }, { "epoch": 0.8277015258134188, "grad_norm": 0.11020352691411972, "learning_rate": 8.274119694685345e-07, "loss": 0.1115, "step": 5940 }, { "epoch": 0.8278408695046332, "grad_norm": 0.1275556981563568, "learning_rate": 8.26113889363891e-07, "loss": 0.13, "step": 5941 }, { "epoch": 0.8279802131958476, "grad_norm": 0.14078135788440704, "learning_rate": 8.248167366014493e-07, "loss": 0.1125, "step": 5942 }, { "epoch": 0.828119556887062, "grad_norm": 0.13245916366577148, "learning_rate": 8.235205114694067e-07, "loss": 0.1126, "step": 5943 }, { "epoch": 0.8282589005782763, "grad_norm": 0.16443388164043427, "learning_rate": 8.222252142557557e-07, "loss": 0.1158, "step": 5944 }, { "epoch": 0.8283982442694907, "grad_norm": 0.10383280366659164, "learning_rate": 8.209308452482829e-07, "loss": 0.1108, "step": 5945 }, { "epoch": 0.8285375879607051, "grad_norm": 0.1453784853219986, "learning_rate": 8.196374047345668e-07, "loss": 0.123, "step": 5946 }, { "epoch": 0.8286769316519195, "grad_norm": 0.1633397936820984, "learning_rate": 8.183448930019783e-07, "loss": 0.1429, "step": 5947 }, { "epoch": 0.8288162753431338, "grad_norm": 0.2043680101633072, "learning_rate": 8.170533103376865e-07, "loss": 0.1375, "step": 5948 }, { "epoch": 0.8289556190343482, "grad_norm": 0.12213439494371414, "learning_rate": 8.157626570286515e-07, "loss": 0.1117, "step": 5949 }, { "epoch": 0.8290949627255626, "grad_norm": 0.2954792082309723, "learning_rate": 8.144729333616259e-07, "loss": 0.1285, "step": 5950 }, { "epoch": 0.829234306416777, "grad_norm": 0.20696911215782166, "learning_rate": 8.131841396231566e-07, "loss": 0.1397, "step": 5951 }, { "epoch": 0.8293736501079914, "grad_norm": 0.14160947501659393, "learning_rate": 8.118962760995874e-07, "loss": 0.1309, "step": 5952 }, { "epoch": 0.8295129937992057, "grad_norm": 0.21921856701374054, "learning_rate": 8.106093430770473e-07, "loss": 0.1716, "step": 5953 }, { "epoch": 0.8296523374904201, "grad_norm": 0.12651853263378143, "learning_rate": 8.093233408414658e-07, "loss": 0.1117, "step": 5954 }, { "epoch": 0.8297916811816345, "grad_norm": 0.21308112144470215, "learning_rate": 8.080382696785627e-07, "loss": 0.1398, "step": 5955 }, { "epoch": 0.8299310248728489, "grad_norm": 0.1013365313410759, "learning_rate": 8.067541298738535e-07, "loss": 0.0998, "step": 5956 }, { "epoch": 0.8300703685640632, "grad_norm": 0.15154382586479187, "learning_rate": 8.054709217126433e-07, "loss": 0.1422, "step": 5957 }, { "epoch": 0.8302097122552776, "grad_norm": 0.14964379370212555, "learning_rate": 8.041886454800307e-07, "loss": 0.1144, "step": 5958 }, { "epoch": 0.830349055946492, "grad_norm": 0.18793149292469025, "learning_rate": 8.029073014609096e-07, "loss": 0.1268, "step": 5959 }, { "epoch": 0.8304883996377064, "grad_norm": 0.1820656955242157, "learning_rate": 8.016268899399643e-07, "loss": 0.1593, "step": 5960 }, { "epoch": 0.8306277433289208, "grad_norm": 0.10774987190961838, "learning_rate": 8.00347411201673e-07, "loss": 0.0962, "step": 5961 }, { "epoch": 0.8307670870201351, "grad_norm": 0.11513586342334747, "learning_rate": 7.990688655303086e-07, "loss": 0.0918, "step": 5962 }, { "epoch": 0.8309064307113495, "grad_norm": 0.14930233359336853, "learning_rate": 7.977912532099336e-07, "loss": 0.1356, "step": 5963 }, { "epoch": 0.8310457744025639, "grad_norm": 0.13299867510795593, "learning_rate": 7.965145745244029e-07, "loss": 0.1351, "step": 5964 }, { "epoch": 0.8311851180937783, "grad_norm": 0.12571054697036743, "learning_rate": 7.95238829757366e-07, "loss": 0.1312, "step": 5965 }, { "epoch": 0.8313244617849926, "grad_norm": 0.15089814364910126, "learning_rate": 7.939640191922665e-07, "loss": 0.1143, "step": 5966 }, { "epoch": 0.831463805476207, "grad_norm": 0.24643474817276, "learning_rate": 7.926901431123362e-07, "loss": 0.1717, "step": 5967 }, { "epoch": 0.8316031491674214, "grad_norm": 0.11435090005397797, "learning_rate": 7.914172018006006e-07, "loss": 0.1315, "step": 5968 }, { "epoch": 0.8317424928586358, "grad_norm": 0.2105342149734497, "learning_rate": 7.901451955398792e-07, "loss": 0.1475, "step": 5969 }, { "epoch": 0.8318818365498502, "grad_norm": 0.12433813512325287, "learning_rate": 7.88874124612784e-07, "loss": 0.0975, "step": 5970 }, { "epoch": 0.8320211802410646, "grad_norm": 0.252409964799881, "learning_rate": 7.876039893017151e-07, "loss": 0.1442, "step": 5971 }, { "epoch": 0.832160523932279, "grad_norm": 0.3455415964126587, "learning_rate": 7.863347898888696e-07, "loss": 0.202, "step": 5972 }, { "epoch": 0.8322998676234934, "grad_norm": 0.14079779386520386, "learning_rate": 7.850665266562352e-07, "loss": 0.1241, "step": 5973 }, { "epoch": 0.8324392113147078, "grad_norm": 0.13787584006786346, "learning_rate": 7.837991998855899e-07, "loss": 0.106, "step": 5974 }, { "epoch": 0.8325785550059221, "grad_norm": 0.3271428346633911, "learning_rate": 7.825328098585039e-07, "loss": 0.1958, "step": 5975 }, { "epoch": 0.8327178986971365, "grad_norm": 0.17474934458732605, "learning_rate": 7.812673568563406e-07, "loss": 0.1453, "step": 5976 }, { "epoch": 0.8328572423883509, "grad_norm": 0.21981962025165558, "learning_rate": 7.800028411602572e-07, "loss": 0.1529, "step": 5977 }, { "epoch": 0.8329965860795653, "grad_norm": 0.1351296305656433, "learning_rate": 7.78739263051198e-07, "loss": 0.1244, "step": 5978 }, { "epoch": 0.8331359297707797, "grad_norm": 0.16542483866214752, "learning_rate": 7.774766228099001e-07, "loss": 0.1223, "step": 5979 }, { "epoch": 0.833275273461994, "grad_norm": 0.1471891552209854, "learning_rate": 7.762149207168951e-07, "loss": 0.1096, "step": 5980 }, { "epoch": 0.8334146171532084, "grad_norm": 0.10209835320711136, "learning_rate": 7.749541570525054e-07, "loss": 0.1153, "step": 5981 }, { "epoch": 0.8335539608444228, "grad_norm": 0.12030816823244095, "learning_rate": 7.736943320968409e-07, "loss": 0.1085, "step": 5982 }, { "epoch": 0.8336933045356372, "grad_norm": 0.10736312717199326, "learning_rate": 7.724354461298089e-07, "loss": 0.114, "step": 5983 }, { "epoch": 0.8338326482268515, "grad_norm": 0.11244098097085953, "learning_rate": 7.711774994311027e-07, "loss": 0.0886, "step": 5984 }, { "epoch": 0.8339719919180659, "grad_norm": 0.16248129308223724, "learning_rate": 7.699204922802123e-07, "loss": 0.126, "step": 5985 }, { "epoch": 0.8341113356092803, "grad_norm": 0.17113284766674042, "learning_rate": 7.686644249564124e-07, "loss": 0.1452, "step": 5986 }, { "epoch": 0.8342506793004947, "grad_norm": 0.1512826383113861, "learning_rate": 7.674092977387737e-07, "loss": 0.1293, "step": 5987 }, { "epoch": 0.8343900229917091, "grad_norm": 0.21038223803043365, "learning_rate": 7.661551109061593e-07, "loss": 0.1609, "step": 5988 }, { "epoch": 0.8345293666829234, "grad_norm": 0.09960538893938065, "learning_rate": 7.649018647372186e-07, "loss": 0.0998, "step": 5989 }, { "epoch": 0.8346687103741378, "grad_norm": 0.15655803680419922, "learning_rate": 7.636495595103938e-07, "loss": 0.123, "step": 5990 }, { "epoch": 0.8348080540653522, "grad_norm": 0.20886319875717163, "learning_rate": 7.6239819550392e-07, "loss": 0.1215, "step": 5991 }, { "epoch": 0.8349473977565666, "grad_norm": 0.13085535168647766, "learning_rate": 7.611477729958205e-07, "loss": 0.0981, "step": 5992 }, { "epoch": 0.835086741447781, "grad_norm": 0.16534237563610077, "learning_rate": 7.598982922639109e-07, "loss": 0.1192, "step": 5993 }, { "epoch": 0.8352260851389953, "grad_norm": 0.18018454313278198, "learning_rate": 7.586497535857984e-07, "loss": 0.1272, "step": 5994 }, { "epoch": 0.8353654288302097, "grad_norm": 0.1462486982345581, "learning_rate": 7.574021572388795e-07, "loss": 0.1306, "step": 5995 }, { "epoch": 0.8355047725214241, "grad_norm": 0.12867462635040283, "learning_rate": 7.561555035003398e-07, "loss": 0.1082, "step": 5996 }, { "epoch": 0.8356441162126385, "grad_norm": 0.14827319979667664, "learning_rate": 7.549097926471583e-07, "loss": 0.1269, "step": 5997 }, { "epoch": 0.8357834599038528, "grad_norm": 0.12822172045707703, "learning_rate": 7.536650249561056e-07, "loss": 0.1247, "step": 5998 }, { "epoch": 0.8359228035950672, "grad_norm": 0.16062234342098236, "learning_rate": 7.524212007037385e-07, "loss": 0.1502, "step": 5999 }, { "epoch": 0.8360621472862816, "grad_norm": 0.17730556428432465, "learning_rate": 7.511783201664053e-07, "loss": 0.1651, "step": 6000 }, { "epoch": 0.836201490977496, "grad_norm": 0.13952046632766724, "learning_rate": 7.499363836202472e-07, "loss": 0.1137, "step": 6001 }, { "epoch": 0.8363408346687103, "grad_norm": 0.14848729968070984, "learning_rate": 7.486953913411954e-07, "loss": 0.1153, "step": 6002 }, { "epoch": 0.8364801783599247, "grad_norm": 0.12033906579017639, "learning_rate": 7.474553436049675e-07, "loss": 0.1225, "step": 6003 }, { "epoch": 0.8366195220511391, "grad_norm": 0.13707345724105835, "learning_rate": 7.462162406870766e-07, "loss": 0.1212, "step": 6004 }, { "epoch": 0.8367588657423535, "grad_norm": 0.18474648892879486, "learning_rate": 7.4497808286282e-07, "loss": 0.1558, "step": 6005 }, { "epoch": 0.8368982094335679, "grad_norm": 0.15990744531154633, "learning_rate": 7.437408704072907e-07, "loss": 0.1346, "step": 6006 }, { "epoch": 0.8370375531247822, "grad_norm": 0.11344756186008453, "learning_rate": 7.425046035953665e-07, "loss": 0.111, "step": 6007 }, { "epoch": 0.8371768968159966, "grad_norm": 0.11627466976642609, "learning_rate": 7.412692827017193e-07, "loss": 0.1374, "step": 6008 }, { "epoch": 0.837316240507211, "grad_norm": 0.14479629695415497, "learning_rate": 7.400349080008107e-07, "loss": 0.1313, "step": 6009 }, { "epoch": 0.8374555841984254, "grad_norm": 0.10593710839748383, "learning_rate": 7.38801479766888e-07, "loss": 0.1195, "step": 6010 }, { "epoch": 0.8375949278896399, "grad_norm": 0.1524559110403061, "learning_rate": 7.375689982739915e-07, "loss": 0.1165, "step": 6011 }, { "epoch": 0.8377342715808542, "grad_norm": 0.11515770852565765, "learning_rate": 7.363374637959498e-07, "loss": 0.1122, "step": 6012 }, { "epoch": 0.8378736152720686, "grad_norm": 0.151605486869812, "learning_rate": 7.35106876606384e-07, "loss": 0.1484, "step": 6013 }, { "epoch": 0.838012958963283, "grad_norm": 0.09922513365745544, "learning_rate": 7.338772369787001e-07, "loss": 0.1122, "step": 6014 }, { "epoch": 0.8381523026544974, "grad_norm": 0.12902212142944336, "learning_rate": 7.326485451860976e-07, "loss": 0.1245, "step": 6015 }, { "epoch": 0.8382916463457117, "grad_norm": 0.17751164734363556, "learning_rate": 7.314208015015623e-07, "loss": 0.1161, "step": 6016 }, { "epoch": 0.8384309900369261, "grad_norm": 0.27058812975883484, "learning_rate": 7.301940061978724e-07, "loss": 0.158, "step": 6017 }, { "epoch": 0.8385703337281405, "grad_norm": 0.1065264344215393, "learning_rate": 7.289681595475922e-07, "loss": 0.1096, "step": 6018 }, { "epoch": 0.8387096774193549, "grad_norm": 0.21268923580646515, "learning_rate": 7.277432618230773e-07, "loss": 0.1489, "step": 6019 }, { "epoch": 0.8388490211105692, "grad_norm": 0.10815638303756714, "learning_rate": 7.265193132964749e-07, "loss": 0.1176, "step": 6020 }, { "epoch": 0.8389883648017836, "grad_norm": 0.12043194472789764, "learning_rate": 7.252963142397134e-07, "loss": 0.1082, "step": 6021 }, { "epoch": 0.839127708492998, "grad_norm": 0.1613629311323166, "learning_rate": 7.24074264924518e-07, "loss": 0.1229, "step": 6022 }, { "epoch": 0.8392670521842124, "grad_norm": 0.22465597093105316, "learning_rate": 7.228531656223997e-07, "loss": 0.1619, "step": 6023 }, { "epoch": 0.8394063958754268, "grad_norm": 0.14324069023132324, "learning_rate": 7.216330166046603e-07, "loss": 0.1384, "step": 6024 }, { "epoch": 0.8395457395666411, "grad_norm": 0.10478170216083527, "learning_rate": 7.204138181423881e-07, "loss": 0.0996, "step": 6025 }, { "epoch": 0.8396850832578555, "grad_norm": 0.15089471638202667, "learning_rate": 7.191955705064591e-07, "loss": 0.1487, "step": 6026 }, { "epoch": 0.8398244269490699, "grad_norm": 0.10656114667654037, "learning_rate": 7.179782739675434e-07, "loss": 0.1122, "step": 6027 }, { "epoch": 0.8399637706402843, "grad_norm": 0.13826030492782593, "learning_rate": 7.167619287960942e-07, "loss": 0.1388, "step": 6028 }, { "epoch": 0.8401031143314986, "grad_norm": 0.13235999643802643, "learning_rate": 7.155465352623559e-07, "loss": 0.1144, "step": 6029 }, { "epoch": 0.840242458022713, "grad_norm": 0.19558745622634888, "learning_rate": 7.143320936363629e-07, "loss": 0.1433, "step": 6030 }, { "epoch": 0.8403818017139274, "grad_norm": 0.13697612285614014, "learning_rate": 7.131186041879357e-07, "loss": 0.131, "step": 6031 }, { "epoch": 0.8405211454051418, "grad_norm": 0.1859910488128662, "learning_rate": 7.119060671866817e-07, "loss": 0.1445, "step": 6032 }, { "epoch": 0.8406604890963562, "grad_norm": 0.14340536296367645, "learning_rate": 7.106944829020013e-07, "loss": 0.128, "step": 6033 }, { "epoch": 0.8407998327875705, "grad_norm": 0.16870491206645966, "learning_rate": 7.094838516030811e-07, "loss": 0.1395, "step": 6034 }, { "epoch": 0.8409391764787849, "grad_norm": 0.20542046427726746, "learning_rate": 7.082741735588938e-07, "loss": 0.16, "step": 6035 }, { "epoch": 0.8410785201699993, "grad_norm": 0.10801863670349121, "learning_rate": 7.070654490382045e-07, "loss": 0.1029, "step": 6036 }, { "epoch": 0.8412178638612137, "grad_norm": 0.2021772563457489, "learning_rate": 7.058576783095622e-07, "loss": 0.1379, "step": 6037 }, { "epoch": 0.841357207552428, "grad_norm": 0.1669193059206009, "learning_rate": 7.046508616413078e-07, "loss": 0.1118, "step": 6038 }, { "epoch": 0.8414965512436424, "grad_norm": 0.14471299946308136, "learning_rate": 7.034449993015663e-07, "loss": 0.1292, "step": 6039 }, { "epoch": 0.8416358949348568, "grad_norm": 0.16022001206874847, "learning_rate": 7.022400915582539e-07, "loss": 0.1242, "step": 6040 }, { "epoch": 0.8417752386260712, "grad_norm": 0.16192469000816345, "learning_rate": 7.010361386790748e-07, "loss": 0.1528, "step": 6041 }, { "epoch": 0.8419145823172856, "grad_norm": 0.13829603791236877, "learning_rate": 6.998331409315184e-07, "loss": 0.1289, "step": 6042 }, { "epoch": 0.8420539260084999, "grad_norm": 0.14482057094573975, "learning_rate": 6.986310985828626e-07, "loss": 0.1433, "step": 6043 }, { "epoch": 0.8421932696997143, "grad_norm": 0.12023743987083435, "learning_rate": 6.974300119001754e-07, "loss": 0.1182, "step": 6044 }, { "epoch": 0.8423326133909287, "grad_norm": 0.18511410057544708, "learning_rate": 6.962298811503104e-07, "loss": 0.1426, "step": 6045 }, { "epoch": 0.8424719570821431, "grad_norm": 0.09144277125597, "learning_rate": 6.950307065999085e-07, "loss": 0.0982, "step": 6046 }, { "epoch": 0.8426113007733574, "grad_norm": 0.12723025679588318, "learning_rate": 6.938324885154007e-07, "loss": 0.1023, "step": 6047 }, { "epoch": 0.8427506444645718, "grad_norm": 0.11802301555871964, "learning_rate": 6.92635227163001e-07, "loss": 0.1164, "step": 6048 }, { "epoch": 0.8428899881557862, "grad_norm": 0.19732819497585297, "learning_rate": 6.914389228087165e-07, "loss": 0.1561, "step": 6049 }, { "epoch": 0.8430293318470006, "grad_norm": 0.1860279142856598, "learning_rate": 6.902435757183357e-07, "loss": 0.1102, "step": 6050 }, { "epoch": 0.8431686755382151, "grad_norm": 0.1484682857990265, "learning_rate": 6.890491861574389e-07, "loss": 0.116, "step": 6051 }, { "epoch": 0.8433080192294294, "grad_norm": 0.1805318146944046, "learning_rate": 6.87855754391395e-07, "loss": 0.1498, "step": 6052 }, { "epoch": 0.8434473629206438, "grad_norm": 0.1813075840473175, "learning_rate": 6.866632806853518e-07, "loss": 0.1476, "step": 6053 }, { "epoch": 0.8435867066118582, "grad_norm": 0.1019660159945488, "learning_rate": 6.854717653042531e-07, "loss": 0.0962, "step": 6054 }, { "epoch": 0.8437260503030726, "grad_norm": 0.20194654166698456, "learning_rate": 6.842812085128253e-07, "loss": 0.1467, "step": 6055 }, { "epoch": 0.843865393994287, "grad_norm": 0.16123968362808228, "learning_rate": 6.830916105755847e-07, "loss": 0.1313, "step": 6056 }, { "epoch": 0.8440047376855013, "grad_norm": 0.1020071730017662, "learning_rate": 6.819029717568315e-07, "loss": 0.1131, "step": 6057 }, { "epoch": 0.8441440813767157, "grad_norm": 0.17073233425617218, "learning_rate": 6.807152923206528e-07, "loss": 0.1273, "step": 6058 }, { "epoch": 0.8442834250679301, "grad_norm": 0.2168717086315155, "learning_rate": 6.795285725309269e-07, "loss": 0.1563, "step": 6059 }, { "epoch": 0.8444227687591445, "grad_norm": 0.2111554592847824, "learning_rate": 6.783428126513125e-07, "loss": 0.1583, "step": 6060 }, { "epoch": 0.8445621124503588, "grad_norm": 0.13772372901439667, "learning_rate": 6.771580129452604e-07, "loss": 0.1233, "step": 6061 }, { "epoch": 0.8447014561415732, "grad_norm": 0.15901492536067963, "learning_rate": 6.759741736760062e-07, "loss": 0.1827, "step": 6062 }, { "epoch": 0.8448407998327876, "grad_norm": 0.16055138409137726, "learning_rate": 6.747912951065722e-07, "loss": 0.1275, "step": 6063 }, { "epoch": 0.844980143524002, "grad_norm": 0.11528843641281128, "learning_rate": 6.736093774997643e-07, "loss": 0.1189, "step": 6064 }, { "epoch": 0.8451194872152163, "grad_norm": 0.15088000893592834, "learning_rate": 6.724284211181803e-07, "loss": 0.138, "step": 6065 }, { "epoch": 0.8452588309064307, "grad_norm": 0.10352124273777008, "learning_rate": 6.712484262242014e-07, "loss": 0.1141, "step": 6066 }, { "epoch": 0.8453981745976451, "grad_norm": 0.12399209290742874, "learning_rate": 6.700693930799945e-07, "loss": 0.1229, "step": 6067 }, { "epoch": 0.8455375182888595, "grad_norm": 0.08784166723489761, "learning_rate": 6.688913219475158e-07, "loss": 0.1018, "step": 6068 }, { "epoch": 0.8456768619800739, "grad_norm": 0.14729273319244385, "learning_rate": 6.677142130885028e-07, "loss": 0.13, "step": 6069 }, { "epoch": 0.8458162056712882, "grad_norm": 0.1427668333053589, "learning_rate": 6.665380667644849e-07, "loss": 0.1148, "step": 6070 }, { "epoch": 0.8459555493625026, "grad_norm": 0.11961793154478073, "learning_rate": 6.653628832367731e-07, "loss": 0.1083, "step": 6071 }, { "epoch": 0.846094893053717, "grad_norm": 0.13420946896076202, "learning_rate": 6.641886627664673e-07, "loss": 0.09, "step": 6072 }, { "epoch": 0.8462342367449314, "grad_norm": 0.1132630854845047, "learning_rate": 6.630154056144533e-07, "loss": 0.1118, "step": 6073 }, { "epoch": 0.8463735804361457, "grad_norm": 0.18152688443660736, "learning_rate": 6.618431120414015e-07, "loss": 0.1423, "step": 6074 }, { "epoch": 0.8465129241273601, "grad_norm": 0.15222598612308502, "learning_rate": 6.606717823077669e-07, "loss": 0.1215, "step": 6075 }, { "epoch": 0.8466522678185745, "grad_norm": 0.14404770731925964, "learning_rate": 6.59501416673794e-07, "loss": 0.1315, "step": 6076 }, { "epoch": 0.8467916115097889, "grad_norm": 0.18004706501960754, "learning_rate": 6.583320153995121e-07, "loss": 0.131, "step": 6077 }, { "epoch": 0.8469309552010033, "grad_norm": 0.2063717246055603, "learning_rate": 6.571635787447339e-07, "loss": 0.1457, "step": 6078 }, { "epoch": 0.8470702988922176, "grad_norm": 0.17503774166107178, "learning_rate": 6.559961069690596e-07, "loss": 0.159, "step": 6079 }, { "epoch": 0.847209642583432, "grad_norm": 0.23775525391101837, "learning_rate": 6.548296003318744e-07, "loss": 0.1468, "step": 6080 }, { "epoch": 0.8473489862746464, "grad_norm": 0.18794755637645721, "learning_rate": 6.536640590923515e-07, "loss": 0.123, "step": 6081 }, { "epoch": 0.8474883299658608, "grad_norm": 0.21597130596637726, "learning_rate": 6.52499483509445e-07, "loss": 0.1493, "step": 6082 }, { "epoch": 0.8476276736570751, "grad_norm": 0.1438799351453781, "learning_rate": 6.51335873841899e-07, "loss": 0.1299, "step": 6083 }, { "epoch": 0.8477670173482895, "grad_norm": 0.1423553228378296, "learning_rate": 6.501732303482394e-07, "loss": 0.1332, "step": 6084 }, { "epoch": 0.8479063610395039, "grad_norm": 0.10862409323453903, "learning_rate": 6.490115532867808e-07, "loss": 0.1174, "step": 6085 }, { "epoch": 0.8480457047307183, "grad_norm": 0.1652476191520691, "learning_rate": 6.478508429156189e-07, "loss": 0.1204, "step": 6086 }, { "epoch": 0.8481850484219327, "grad_norm": 0.1069808304309845, "learning_rate": 6.466910994926384e-07, "loss": 0.0956, "step": 6087 }, { "epoch": 0.848324392113147, "grad_norm": 0.10517771542072296, "learning_rate": 6.455323232755095e-07, "loss": 0.1094, "step": 6088 }, { "epoch": 0.8484637358043614, "grad_norm": 0.16165852546691895, "learning_rate": 6.44374514521684e-07, "loss": 0.1135, "step": 6089 }, { "epoch": 0.8486030794955758, "grad_norm": 0.11427518725395203, "learning_rate": 6.432176734883994e-07, "loss": 0.095, "step": 6090 }, { "epoch": 0.8487424231867903, "grad_norm": 0.1196155846118927, "learning_rate": 6.420618004326818e-07, "loss": 0.1266, "step": 6091 }, { "epoch": 0.8488817668780047, "grad_norm": 0.13866838812828064, "learning_rate": 6.409068956113379e-07, "loss": 0.1155, "step": 6092 }, { "epoch": 0.849021110569219, "grad_norm": 0.19049839675426483, "learning_rate": 6.397529592809615e-07, "loss": 0.149, "step": 6093 }, { "epoch": 0.8491604542604334, "grad_norm": 0.1770039051771164, "learning_rate": 6.38599991697933e-07, "loss": 0.1557, "step": 6094 }, { "epoch": 0.8492997979516478, "grad_norm": 0.115740105509758, "learning_rate": 6.374479931184141e-07, "loss": 0.1103, "step": 6095 }, { "epoch": 0.8494391416428622, "grad_norm": 0.17591600120067596, "learning_rate": 6.362969637983507e-07, "loss": 0.1417, "step": 6096 }, { "epoch": 0.8495784853340765, "grad_norm": 0.11741365492343903, "learning_rate": 6.351469039934771e-07, "loss": 0.1135, "step": 6097 }, { "epoch": 0.8497178290252909, "grad_norm": 0.17033463716506958, "learning_rate": 6.339978139593117e-07, "loss": 0.1617, "step": 6098 }, { "epoch": 0.8498571727165053, "grad_norm": 0.15538732707500458, "learning_rate": 6.328496939511541e-07, "loss": 0.1433, "step": 6099 }, { "epoch": 0.8499965164077197, "grad_norm": 0.1556319147348404, "learning_rate": 6.317025442240893e-07, "loss": 0.1378, "step": 6100 }, { "epoch": 0.850135860098934, "grad_norm": 0.1538032740354538, "learning_rate": 6.305563650329899e-07, "loss": 0.1375, "step": 6101 }, { "epoch": 0.8502752037901484, "grad_norm": 0.18707934021949768, "learning_rate": 6.294111566325106e-07, "loss": 0.1243, "step": 6102 }, { "epoch": 0.8504145474813628, "grad_norm": 0.14100530743598938, "learning_rate": 6.282669192770896e-07, "loss": 0.1311, "step": 6103 }, { "epoch": 0.8505538911725772, "grad_norm": 0.11959777772426605, "learning_rate": 6.271236532209502e-07, "loss": 0.1169, "step": 6104 }, { "epoch": 0.8506932348637916, "grad_norm": 0.15532797574996948, "learning_rate": 6.259813587181024e-07, "loss": 0.1731, "step": 6105 }, { "epoch": 0.8508325785550059, "grad_norm": 0.13084329664707184, "learning_rate": 6.248400360223355e-07, "loss": 0.1062, "step": 6106 }, { "epoch": 0.8509719222462203, "grad_norm": 0.15722505748271942, "learning_rate": 6.236996853872251e-07, "loss": 0.1221, "step": 6107 }, { "epoch": 0.8511112659374347, "grad_norm": 0.1539371758699417, "learning_rate": 6.225603070661318e-07, "loss": 0.1312, "step": 6108 }, { "epoch": 0.8512506096286491, "grad_norm": 0.16916462779045105, "learning_rate": 6.214219013122008e-07, "loss": 0.1264, "step": 6109 }, { "epoch": 0.8513899533198634, "grad_norm": 0.11995135247707367, "learning_rate": 6.202844683783587e-07, "loss": 0.1138, "step": 6110 }, { "epoch": 0.8515292970110778, "grad_norm": 0.16250400245189667, "learning_rate": 6.191480085173163e-07, "loss": 0.1448, "step": 6111 }, { "epoch": 0.8516686407022922, "grad_norm": 0.12548132240772247, "learning_rate": 6.180125219815697e-07, "loss": 0.0859, "step": 6112 }, { "epoch": 0.8518079843935066, "grad_norm": 0.1669294685125351, "learning_rate": 6.168780090233994e-07, "loss": 0.1162, "step": 6113 }, { "epoch": 0.851947328084721, "grad_norm": 0.1295587569475174, "learning_rate": 6.157444698948656e-07, "loss": 0.1256, "step": 6114 }, { "epoch": 0.8520866717759353, "grad_norm": 0.16287444531917572, "learning_rate": 6.146119048478177e-07, "loss": 0.147, "step": 6115 }, { "epoch": 0.8522260154671497, "grad_norm": 0.15716011822223663, "learning_rate": 6.134803141338835e-07, "loss": 0.1544, "step": 6116 }, { "epoch": 0.8523653591583641, "grad_norm": 0.18010824918746948, "learning_rate": 6.123496980044785e-07, "loss": 0.137, "step": 6117 }, { "epoch": 0.8525047028495785, "grad_norm": 0.1747845858335495, "learning_rate": 6.112200567107978e-07, "loss": 0.1523, "step": 6118 }, { "epoch": 0.8526440465407928, "grad_norm": 0.07857047021389008, "learning_rate": 6.10091390503823e-07, "loss": 0.0948, "step": 6119 }, { "epoch": 0.8527833902320072, "grad_norm": 0.1421058177947998, "learning_rate": 6.089636996343202e-07, "loss": 0.1339, "step": 6120 }, { "epoch": 0.8529227339232216, "grad_norm": 0.24599392712116241, "learning_rate": 6.07836984352832e-07, "loss": 0.1401, "step": 6121 }, { "epoch": 0.853062077614436, "grad_norm": 0.15914317965507507, "learning_rate": 6.067112449096907e-07, "loss": 0.1234, "step": 6122 }, { "epoch": 0.8532014213056504, "grad_norm": 0.15385863184928894, "learning_rate": 6.055864815550106e-07, "loss": 0.1276, "step": 6123 }, { "epoch": 0.8533407649968647, "grad_norm": 0.16813480854034424, "learning_rate": 6.044626945386894e-07, "loss": 0.1107, "step": 6124 }, { "epoch": 0.8534801086880791, "grad_norm": 0.08729193359613419, "learning_rate": 6.033398841104043e-07, "loss": 0.0955, "step": 6125 }, { "epoch": 0.8536194523792935, "grad_norm": 0.12186482548713684, "learning_rate": 6.022180505196207e-07, "loss": 0.1073, "step": 6126 }, { "epoch": 0.8537587960705079, "grad_norm": 0.18590706586837769, "learning_rate": 6.01097194015583e-07, "loss": 0.1497, "step": 6127 }, { "epoch": 0.8538981397617222, "grad_norm": 0.3499833941459656, "learning_rate": 5.999773148473193e-07, "loss": 0.1323, "step": 6128 }, { "epoch": 0.8540374834529366, "grad_norm": 0.17456252872943878, "learning_rate": 5.988584132636421e-07, "loss": 0.1244, "step": 6129 }, { "epoch": 0.854176827144151, "grad_norm": 0.26852908730506897, "learning_rate": 5.977404895131467e-07, "loss": 0.1993, "step": 6130 }, { "epoch": 0.8543161708353654, "grad_norm": 0.1346278041601181, "learning_rate": 5.966235438442086e-07, "loss": 0.1135, "step": 6131 }, { "epoch": 0.8544555145265799, "grad_norm": 0.3608441948890686, "learning_rate": 5.955075765049878e-07, "loss": 0.145, "step": 6132 }, { "epoch": 0.8545948582177942, "grad_norm": 0.17939963936805725, "learning_rate": 5.943925877434276e-07, "loss": 0.1335, "step": 6133 }, { "epoch": 0.8547342019090086, "grad_norm": 0.17755906283855438, "learning_rate": 5.932785778072531e-07, "loss": 0.1093, "step": 6134 }, { "epoch": 0.854873545600223, "grad_norm": 0.18590502440929413, "learning_rate": 5.921655469439708e-07, "loss": 0.1567, "step": 6135 }, { "epoch": 0.8550128892914374, "grad_norm": 0.1602751910686493, "learning_rate": 5.910534954008718e-07, "loss": 0.1214, "step": 6136 }, { "epoch": 0.8551522329826517, "grad_norm": 0.15639430284500122, "learning_rate": 5.899424234250278e-07, "loss": 0.1384, "step": 6137 }, { "epoch": 0.8552915766738661, "grad_norm": 0.15791773796081543, "learning_rate": 5.888323312632948e-07, "loss": 0.1191, "step": 6138 }, { "epoch": 0.8554309203650805, "grad_norm": 0.2186843305826187, "learning_rate": 5.877232191623078e-07, "loss": 0.1266, "step": 6139 }, { "epoch": 0.8555702640562949, "grad_norm": 0.14464975893497467, "learning_rate": 5.866150873684878e-07, "loss": 0.131, "step": 6140 }, { "epoch": 0.8557096077475093, "grad_norm": 0.1743377298116684, "learning_rate": 5.855079361280374e-07, "loss": 0.1621, "step": 6141 }, { "epoch": 0.8558489514387236, "grad_norm": 0.16748115420341492, "learning_rate": 5.844017656869389e-07, "loss": 0.1386, "step": 6142 }, { "epoch": 0.855988295129938, "grad_norm": 0.06337621808052063, "learning_rate": 5.83296576290957e-07, "loss": 0.082, "step": 6143 }, { "epoch": 0.8561276388211524, "grad_norm": 0.142243430018425, "learning_rate": 5.821923681856406e-07, "loss": 0.1343, "step": 6144 }, { "epoch": 0.8562669825123668, "grad_norm": 0.1337958425283432, "learning_rate": 5.810891416163211e-07, "loss": 0.1175, "step": 6145 }, { "epoch": 0.8564063262035811, "grad_norm": 0.30919787287712097, "learning_rate": 5.799868968281075e-07, "loss": 0.1698, "step": 6146 }, { "epoch": 0.8565456698947955, "grad_norm": 0.14890222251415253, "learning_rate": 5.788856340658966e-07, "loss": 0.1188, "step": 6147 }, { "epoch": 0.8566850135860099, "grad_norm": 0.12089080363512039, "learning_rate": 5.777853535743605e-07, "loss": 0.0997, "step": 6148 }, { "epoch": 0.8568243572772243, "grad_norm": 0.1765058934688568, "learning_rate": 5.766860555979586e-07, "loss": 0.1081, "step": 6149 }, { "epoch": 0.8569637009684387, "grad_norm": 0.31923115253448486, "learning_rate": 5.755877403809284e-07, "loss": 0.1618, "step": 6150 }, { "epoch": 0.857103044659653, "grad_norm": 0.08971717953681946, "learning_rate": 5.744904081672914e-07, "loss": 0.1064, "step": 6151 }, { "epoch": 0.8572423883508674, "grad_norm": 0.13890528678894043, "learning_rate": 5.733940592008519e-07, "loss": 0.0951, "step": 6152 }, { "epoch": 0.8573817320420818, "grad_norm": 0.13404907286167145, "learning_rate": 5.72298693725189e-07, "loss": 0.1226, "step": 6153 }, { "epoch": 0.8575210757332962, "grad_norm": 0.1355811357498169, "learning_rate": 5.712043119836702e-07, "loss": 0.1169, "step": 6154 }, { "epoch": 0.8576604194245105, "grad_norm": 0.2019481360912323, "learning_rate": 5.701109142194422e-07, "loss": 0.173, "step": 6155 }, { "epoch": 0.8577997631157249, "grad_norm": 0.11707372963428497, "learning_rate": 5.69018500675434e-07, "loss": 0.1191, "step": 6156 }, { "epoch": 0.8579391068069393, "grad_norm": 0.12316776067018509, "learning_rate": 5.679270715943535e-07, "loss": 0.1125, "step": 6157 }, { "epoch": 0.8580784504981537, "grad_norm": 0.1406499147415161, "learning_rate": 5.668366272186915e-07, "loss": 0.1203, "step": 6158 }, { "epoch": 0.858217794189368, "grad_norm": 0.09022276848554611, "learning_rate": 5.657471677907205e-07, "loss": 0.0987, "step": 6159 }, { "epoch": 0.8583571378805824, "grad_norm": 0.09769351780414581, "learning_rate": 5.646586935524922e-07, "loss": 0.1082, "step": 6160 }, { "epoch": 0.8584964815717968, "grad_norm": 0.28572550415992737, "learning_rate": 5.635712047458419e-07, "loss": 0.1496, "step": 6161 }, { "epoch": 0.8586358252630112, "grad_norm": 0.11121387779712677, "learning_rate": 5.624847016123847e-07, "loss": 0.0995, "step": 6162 }, { "epoch": 0.8587751689542256, "grad_norm": 0.20675507187843323, "learning_rate": 5.613991843935179e-07, "loss": 0.1288, "step": 6163 }, { "epoch": 0.8589145126454399, "grad_norm": 0.15064115822315216, "learning_rate": 5.60314653330416e-07, "loss": 0.109, "step": 6164 }, { "epoch": 0.8590538563366543, "grad_norm": 0.18701054155826569, "learning_rate": 5.592311086640379e-07, "loss": 0.1424, "step": 6165 }, { "epoch": 0.8591932000278687, "grad_norm": 0.16144327819347382, "learning_rate": 5.581485506351242e-07, "loss": 0.1239, "step": 6166 }, { "epoch": 0.8593325437190831, "grad_norm": 0.21952177584171295, "learning_rate": 5.570669794841921e-07, "loss": 0.1692, "step": 6167 }, { "epoch": 0.8594718874102975, "grad_norm": 0.1307622641324997, "learning_rate": 5.559863954515448e-07, "loss": 0.1063, "step": 6168 }, { "epoch": 0.8596112311015118, "grad_norm": 0.23866499960422516, "learning_rate": 5.549067987772605e-07, "loss": 0.137, "step": 6169 }, { "epoch": 0.8597505747927262, "grad_norm": 0.14614957571029663, "learning_rate": 5.538281897012032e-07, "loss": 0.1295, "step": 6170 }, { "epoch": 0.8598899184839406, "grad_norm": 0.2271052449941635, "learning_rate": 5.527505684630136e-07, "loss": 0.1462, "step": 6171 }, { "epoch": 0.8600292621751551, "grad_norm": 0.14409367740154266, "learning_rate": 5.51673935302115e-07, "loss": 0.1131, "step": 6172 }, { "epoch": 0.8601686058663695, "grad_norm": 0.1448473483324051, "learning_rate": 5.505982904577123e-07, "loss": 0.1286, "step": 6173 }, { "epoch": 0.8603079495575838, "grad_norm": 0.13310180604457855, "learning_rate": 5.495236341687876e-07, "loss": 0.1229, "step": 6174 }, { "epoch": 0.8604472932487982, "grad_norm": 0.25820475816726685, "learning_rate": 5.484499666741044e-07, "loss": 0.1741, "step": 6175 }, { "epoch": 0.8605866369400126, "grad_norm": 0.22353607416152954, "learning_rate": 5.47377288212208e-07, "loss": 0.1529, "step": 6176 }, { "epoch": 0.860725980631227, "grad_norm": 0.12543293833732605, "learning_rate": 5.463055990214245e-07, "loss": 0.1126, "step": 6177 }, { "epoch": 0.8608653243224413, "grad_norm": 0.10192246735095978, "learning_rate": 5.452348993398566e-07, "loss": 0.108, "step": 6178 }, { "epoch": 0.8610046680136557, "grad_norm": 0.1180301234126091, "learning_rate": 5.441651894053895e-07, "loss": 0.0959, "step": 6179 }, { "epoch": 0.8611440117048701, "grad_norm": 0.11838914453983307, "learning_rate": 5.430964694556884e-07, "loss": 0.0981, "step": 6180 }, { "epoch": 0.8612833553960845, "grad_norm": 0.13591451942920685, "learning_rate": 5.420287397282004e-07, "loss": 0.1251, "step": 6181 }, { "epoch": 0.8614226990872988, "grad_norm": 0.16282904148101807, "learning_rate": 5.409620004601479e-07, "loss": 0.1318, "step": 6182 }, { "epoch": 0.8615620427785132, "grad_norm": 0.11856616288423538, "learning_rate": 5.398962518885375e-07, "loss": 0.1077, "step": 6183 }, { "epoch": 0.8617013864697276, "grad_norm": 0.10789430141448975, "learning_rate": 5.388314942501549e-07, "loss": 0.1145, "step": 6184 }, { "epoch": 0.861840730160942, "grad_norm": 0.13725082576274872, "learning_rate": 5.377677277815646e-07, "loss": 0.1308, "step": 6185 }, { "epoch": 0.8619800738521564, "grad_norm": 0.18588390946388245, "learning_rate": 5.367049527191093e-07, "loss": 0.1567, "step": 6186 }, { "epoch": 0.8621194175433707, "grad_norm": 0.16617116332054138, "learning_rate": 5.356431692989144e-07, "loss": 0.1042, "step": 6187 }, { "epoch": 0.8622587612345851, "grad_norm": 0.28551197052001953, "learning_rate": 5.345823777568859e-07, "loss": 0.1735, "step": 6188 }, { "epoch": 0.8623981049257995, "grad_norm": 0.24820205569267273, "learning_rate": 5.335225783287051e-07, "loss": 0.1989, "step": 6189 }, { "epoch": 0.8625374486170139, "grad_norm": 0.12308410555124283, "learning_rate": 5.324637712498359e-07, "loss": 0.1213, "step": 6190 }, { "epoch": 0.8626767923082282, "grad_norm": 0.18870852887630463, "learning_rate": 5.314059567555213e-07, "loss": 0.1586, "step": 6191 }, { "epoch": 0.8628161359994426, "grad_norm": 0.20931284129619598, "learning_rate": 5.303491350807832e-07, "loss": 0.1429, "step": 6192 }, { "epoch": 0.862955479690657, "grad_norm": 0.17991237342357635, "learning_rate": 5.292933064604228e-07, "loss": 0.1582, "step": 6193 }, { "epoch": 0.8630948233818714, "grad_norm": 0.11797675490379333, "learning_rate": 5.282384711290228e-07, "loss": 0.0965, "step": 6194 }, { "epoch": 0.8632341670730858, "grad_norm": 0.08944769948720932, "learning_rate": 5.271846293209426e-07, "loss": 0.0984, "step": 6195 }, { "epoch": 0.8633735107643001, "grad_norm": 0.18248042464256287, "learning_rate": 5.261317812703204e-07, "loss": 0.1489, "step": 6196 }, { "epoch": 0.8635128544555145, "grad_norm": 0.1921248435974121, "learning_rate": 5.250799272110768e-07, "loss": 0.1534, "step": 6197 }, { "epoch": 0.8636521981467289, "grad_norm": 0.16649727523326874, "learning_rate": 5.240290673769099e-07, "loss": 0.1431, "step": 6198 }, { "epoch": 0.8637915418379433, "grad_norm": 0.12054022401571274, "learning_rate": 5.229792020012947e-07, "loss": 0.1374, "step": 6199 }, { "epoch": 0.8639308855291576, "grad_norm": 0.14105062186717987, "learning_rate": 5.2193033131749e-07, "loss": 0.1322, "step": 6200 }, { "epoch": 0.864070229220372, "grad_norm": 0.18944697082042694, "learning_rate": 5.20882455558529e-07, "loss": 0.1587, "step": 6201 }, { "epoch": 0.8642095729115864, "grad_norm": 0.21506135165691376, "learning_rate": 5.19835574957227e-07, "loss": 0.1115, "step": 6202 }, { "epoch": 0.8643489166028008, "grad_norm": 0.20095443725585938, "learning_rate": 5.187896897461752e-07, "loss": 0.1189, "step": 6203 }, { "epoch": 0.8644882602940152, "grad_norm": 0.14668388664722443, "learning_rate": 5.177448001577468e-07, "loss": 0.1082, "step": 6204 }, { "epoch": 0.8646276039852295, "grad_norm": 0.1895114630460739, "learning_rate": 5.167009064240936e-07, "loss": 0.1421, "step": 6205 }, { "epoch": 0.8647669476764439, "grad_norm": 0.11095087230205536, "learning_rate": 5.156580087771429e-07, "loss": 0.108, "step": 6206 }, { "epoch": 0.8649062913676583, "grad_norm": 0.11226736009120941, "learning_rate": 5.146161074486022e-07, "loss": 0.1112, "step": 6207 }, { "epoch": 0.8650456350588727, "grad_norm": 0.13632464408874512, "learning_rate": 5.135752026699597e-07, "loss": 0.136, "step": 6208 }, { "epoch": 0.865184978750087, "grad_norm": 0.10503604263067245, "learning_rate": 5.125352946724816e-07, "loss": 0.1137, "step": 6209 }, { "epoch": 0.8653243224413014, "grad_norm": 0.1429840326309204, "learning_rate": 5.114963836872105e-07, "loss": 0.1164, "step": 6210 }, { "epoch": 0.8654636661325158, "grad_norm": 0.14013072848320007, "learning_rate": 5.104584699449671e-07, "loss": 0.1194, "step": 6211 }, { "epoch": 0.8656030098237303, "grad_norm": 0.1263946294784546, "learning_rate": 5.094215536763541e-07, "loss": 0.1327, "step": 6212 }, { "epoch": 0.8657423535149447, "grad_norm": 0.14730653166770935, "learning_rate": 5.083856351117511e-07, "loss": 0.118, "step": 6213 }, { "epoch": 0.865881697206159, "grad_norm": 0.1328311711549759, "learning_rate": 5.073507144813139e-07, "loss": 0.1158, "step": 6214 }, { "epoch": 0.8660210408973734, "grad_norm": 0.12083129584789276, "learning_rate": 5.063167920149797e-07, "loss": 0.1111, "step": 6215 }, { "epoch": 0.8661603845885878, "grad_norm": 0.11039550602436066, "learning_rate": 5.052838679424609e-07, "loss": 0.1114, "step": 6216 }, { "epoch": 0.8662997282798022, "grad_norm": 0.16576972603797913, "learning_rate": 5.042519424932512e-07, "loss": 0.118, "step": 6217 }, { "epoch": 0.8664390719710166, "grad_norm": 0.15140299499034882, "learning_rate": 5.0322101589662e-07, "loss": 0.1223, "step": 6218 }, { "epoch": 0.8665784156622309, "grad_norm": 0.22525757551193237, "learning_rate": 5.02191088381615e-07, "loss": 0.1517, "step": 6219 }, { "epoch": 0.8667177593534453, "grad_norm": 0.09942809492349625, "learning_rate": 5.01162160177065e-07, "loss": 0.0944, "step": 6220 }, { "epoch": 0.8668571030446597, "grad_norm": 0.24449460208415985, "learning_rate": 5.001342315115726e-07, "loss": 0.1366, "step": 6221 }, { "epoch": 0.8669964467358741, "grad_norm": 0.2855793833732605, "learning_rate": 4.991073026135196e-07, "loss": 0.1745, "step": 6222 }, { "epoch": 0.8671357904270884, "grad_norm": 0.18306101858615875, "learning_rate": 4.980813737110662e-07, "loss": 0.1437, "step": 6223 }, { "epoch": 0.8672751341183028, "grad_norm": 0.10732901096343994, "learning_rate": 4.970564450321525e-07, "loss": 0.1048, "step": 6224 }, { "epoch": 0.8674144778095172, "grad_norm": 0.1629018783569336, "learning_rate": 4.960325168044916e-07, "loss": 0.1419, "step": 6225 }, { "epoch": 0.8675538215007316, "grad_norm": 0.2030247151851654, "learning_rate": 4.950095892555789e-07, "loss": 0.1846, "step": 6226 }, { "epoch": 0.867693165191946, "grad_norm": 0.14949363470077515, "learning_rate": 4.93987662612685e-07, "loss": 0.102, "step": 6227 }, { "epoch": 0.8678325088831603, "grad_norm": 0.1134168803691864, "learning_rate": 4.929667371028579e-07, "loss": 0.1071, "step": 6228 }, { "epoch": 0.8679718525743747, "grad_norm": 0.10203687101602554, "learning_rate": 4.919468129529237e-07, "loss": 0.0889, "step": 6229 }, { "epoch": 0.8681111962655891, "grad_norm": 0.1591932773590088, "learning_rate": 4.909278903894887e-07, "loss": 0.1162, "step": 6230 }, { "epoch": 0.8682505399568035, "grad_norm": 0.15908773243427277, "learning_rate": 4.89909969638932e-07, "loss": 0.1372, "step": 6231 }, { "epoch": 0.8683898836480178, "grad_norm": 0.2006182074546814, "learning_rate": 4.888930509274125e-07, "loss": 0.1259, "step": 6232 }, { "epoch": 0.8685292273392322, "grad_norm": 0.15383093059062958, "learning_rate": 4.878771344808664e-07, "loss": 0.1165, "step": 6233 }, { "epoch": 0.8686685710304466, "grad_norm": 0.12102482467889786, "learning_rate": 4.868622205250089e-07, "loss": 0.1188, "step": 6234 }, { "epoch": 0.868807914721661, "grad_norm": 0.18955087661743164, "learning_rate": 4.858483092853278e-07, "loss": 0.1326, "step": 6235 }, { "epoch": 0.8689472584128753, "grad_norm": 0.14325760304927826, "learning_rate": 4.848354009870931e-07, "loss": 0.0951, "step": 6236 }, { "epoch": 0.8690866021040897, "grad_norm": 0.2283148318529129, "learning_rate": 4.838234958553501e-07, "loss": 0.1441, "step": 6237 }, { "epoch": 0.8692259457953041, "grad_norm": 0.2258382886648178, "learning_rate": 4.828125941149197e-07, "loss": 0.161, "step": 6238 }, { "epoch": 0.8693652894865185, "grad_norm": 0.17256148159503937, "learning_rate": 4.818026959904016e-07, "loss": 0.1243, "step": 6239 }, { "epoch": 0.8695046331777329, "grad_norm": 0.12198753654956818, "learning_rate": 4.80793801706172e-07, "loss": 0.1231, "step": 6240 }, { "epoch": 0.8696439768689472, "grad_norm": 0.13437069952487946, "learning_rate": 4.797859114863851e-07, "loss": 0.1251, "step": 6241 }, { "epoch": 0.8697833205601616, "grad_norm": 0.16873258352279663, "learning_rate": 4.787790255549707e-07, "loss": 0.1434, "step": 6242 }, { "epoch": 0.869922664251376, "grad_norm": 0.1717132180929184, "learning_rate": 4.777731441356342e-07, "loss": 0.1187, "step": 6243 }, { "epoch": 0.8700620079425904, "grad_norm": 0.15676964819431305, "learning_rate": 4.7676826745186144e-07, "loss": 0.1267, "step": 6244 }, { "epoch": 0.8702013516338047, "grad_norm": 0.16772502660751343, "learning_rate": 4.757643957269131e-07, "loss": 0.134, "step": 6245 }, { "epoch": 0.8703406953250191, "grad_norm": 0.12443762272596359, "learning_rate": 4.7476152918382535e-07, "loss": 0.0983, "step": 6246 }, { "epoch": 0.8704800390162335, "grad_norm": 0.1528947651386261, "learning_rate": 4.737596680454137e-07, "loss": 0.1249, "step": 6247 }, { "epoch": 0.8706193827074479, "grad_norm": 0.1577903777360916, "learning_rate": 4.727588125342669e-07, "loss": 0.1062, "step": 6248 }, { "epoch": 0.8707587263986623, "grad_norm": 0.09248366206884384, "learning_rate": 4.7175896287275424e-07, "loss": 0.1115, "step": 6249 }, { "epoch": 0.8708980700898766, "grad_norm": 0.1443013995885849, "learning_rate": 4.7076011928301803e-07, "loss": 0.1066, "step": 6250 }, { "epoch": 0.871037413781091, "grad_norm": 0.21784716844558716, "learning_rate": 4.6976228198697847e-07, "loss": 0.146, "step": 6251 }, { "epoch": 0.8711767574723055, "grad_norm": 0.126605823636055, "learning_rate": 4.687654512063344e-07, "loss": 0.118, "step": 6252 }, { "epoch": 0.8713161011635199, "grad_norm": 0.1269293874502182, "learning_rate": 4.6776962716255593e-07, "loss": 0.1112, "step": 6253 }, { "epoch": 0.8714554448547343, "grad_norm": 0.1971571147441864, "learning_rate": 4.667748100768937e-07, "loss": 0.1362, "step": 6254 }, { "epoch": 0.8715947885459486, "grad_norm": 0.09951157867908478, "learning_rate": 4.657810001703733e-07, "loss": 0.1046, "step": 6255 }, { "epoch": 0.871734132237163, "grad_norm": 0.20332755148410797, "learning_rate": 4.647881976637975e-07, "loss": 0.1628, "step": 6256 }, { "epoch": 0.8718734759283774, "grad_norm": 0.15205389261245728, "learning_rate": 4.637964027777425e-07, "loss": 0.1414, "step": 6257 }, { "epoch": 0.8720128196195918, "grad_norm": 0.21397854387760162, "learning_rate": 4.62805615732565e-07, "loss": 0.1466, "step": 6258 }, { "epoch": 0.8721521633108061, "grad_norm": 0.14846858382225037, "learning_rate": 4.6181583674839323e-07, "loss": 0.1394, "step": 6259 }, { "epoch": 0.8722915070020205, "grad_norm": 0.18179328739643097, "learning_rate": 4.6082706604513307e-07, "loss": 0.1427, "step": 6260 }, { "epoch": 0.8724308506932349, "grad_norm": 0.21868222951889038, "learning_rate": 4.598393038424681e-07, "loss": 0.1463, "step": 6261 }, { "epoch": 0.8725701943844493, "grad_norm": 0.11636234074831009, "learning_rate": 4.5885255035985675e-07, "loss": 0.1097, "step": 6262 }, { "epoch": 0.8727095380756636, "grad_norm": 0.23992645740509033, "learning_rate": 4.578668058165325e-07, "loss": 0.1436, "step": 6263 }, { "epoch": 0.872848881766878, "grad_norm": 0.13471907377243042, "learning_rate": 4.5688207043150467e-07, "loss": 0.1277, "step": 6264 }, { "epoch": 0.8729882254580924, "grad_norm": 0.2622286379337311, "learning_rate": 4.5589834442355986e-07, "loss": 0.1562, "step": 6265 }, { "epoch": 0.8731275691493068, "grad_norm": 0.1596965193748474, "learning_rate": 4.549156280112599e-07, "loss": 0.1297, "step": 6266 }, { "epoch": 0.8732669128405212, "grad_norm": 0.15829813480377197, "learning_rate": 4.5393392141294066e-07, "loss": 0.0973, "step": 6267 }, { "epoch": 0.8734062565317355, "grad_norm": 0.10035950690507889, "learning_rate": 4.5295322484671667e-07, "loss": 0.1047, "step": 6268 }, { "epoch": 0.8735456002229499, "grad_norm": 0.15680880844593048, "learning_rate": 4.519735385304741e-07, "loss": 0.1084, "step": 6269 }, { "epoch": 0.8736849439141643, "grad_norm": 0.19382473826408386, "learning_rate": 4.509948626818789e-07, "loss": 0.1524, "step": 6270 }, { "epoch": 0.8738242876053787, "grad_norm": 0.13819321990013123, "learning_rate": 4.500171975183687e-07, "loss": 0.1293, "step": 6271 }, { "epoch": 0.873963631296593, "grad_norm": 0.13429546356201172, "learning_rate": 4.4904054325715927e-07, "loss": 0.116, "step": 6272 }, { "epoch": 0.8741029749878074, "grad_norm": 0.20315870642662048, "learning_rate": 4.4806490011524205e-07, "loss": 0.1255, "step": 6273 }, { "epoch": 0.8742423186790218, "grad_norm": 0.15290330350399017, "learning_rate": 4.4709026830938194e-07, "loss": 0.1389, "step": 6274 }, { "epoch": 0.8743816623702362, "grad_norm": 0.13071942329406738, "learning_rate": 4.46116648056118e-07, "loss": 0.125, "step": 6275 }, { "epoch": 0.8745210060614506, "grad_norm": 0.1654311567544937, "learning_rate": 4.451440395717682e-07, "loss": 0.1213, "step": 6276 }, { "epoch": 0.8746603497526649, "grad_norm": 0.1154005154967308, "learning_rate": 4.441724430724248e-07, "loss": 0.1065, "step": 6277 }, { "epoch": 0.8747996934438793, "grad_norm": 0.2430742383003235, "learning_rate": 4.432018587739517e-07, "loss": 0.1242, "step": 6278 }, { "epoch": 0.8749390371350937, "grad_norm": 0.15650756657123566, "learning_rate": 4.422322868919937e-07, "loss": 0.13, "step": 6279 }, { "epoch": 0.8750783808263081, "grad_norm": 0.1051006093621254, "learning_rate": 4.4126372764196457e-07, "loss": 0.1177, "step": 6280 }, { "epoch": 0.8752177245175224, "grad_norm": 0.19929571449756622, "learning_rate": 4.402961812390588e-07, "loss": 0.1484, "step": 6281 }, { "epoch": 0.8753570682087368, "grad_norm": 0.21084921061992645, "learning_rate": 4.3932964789824064e-07, "loss": 0.1372, "step": 6282 }, { "epoch": 0.8754964118999512, "grad_norm": 0.16439926624298096, "learning_rate": 4.3836412783425265e-07, "loss": 0.1244, "step": 6283 }, { "epoch": 0.8756357555911656, "grad_norm": 0.1107163280248642, "learning_rate": 4.3739962126161273e-07, "loss": 0.1024, "step": 6284 }, { "epoch": 0.87577509928238, "grad_norm": 0.1650972217321396, "learning_rate": 4.3643612839461057e-07, "loss": 0.13, "step": 6285 }, { "epoch": 0.8759144429735943, "grad_norm": 0.17239251732826233, "learning_rate": 4.354736494473122e-07, "loss": 0.1458, "step": 6286 }, { "epoch": 0.8760537866648087, "grad_norm": 0.1497879922389984, "learning_rate": 4.345121846335593e-07, "loss": 0.1128, "step": 6287 }, { "epoch": 0.8761931303560231, "grad_norm": 0.14944830536842346, "learning_rate": 4.335517341669676e-07, "loss": 0.1486, "step": 6288 }, { "epoch": 0.8763324740472375, "grad_norm": 0.14848551154136658, "learning_rate": 4.3259229826092655e-07, "loss": 0.1325, "step": 6289 }, { "epoch": 0.8764718177384518, "grad_norm": 0.09384836256504059, "learning_rate": 4.316338771286005e-07, "loss": 0.1021, "step": 6290 }, { "epoch": 0.8766111614296662, "grad_norm": 0.2129547894001007, "learning_rate": 4.3067647098293033e-07, "loss": 0.1608, "step": 6291 }, { "epoch": 0.8767505051208806, "grad_norm": 0.15104275941848755, "learning_rate": 4.29720080036628e-07, "loss": 0.104, "step": 6292 }, { "epoch": 0.8768898488120951, "grad_norm": 0.2397918999195099, "learning_rate": 4.2876470450218254e-07, "loss": 0.1248, "step": 6293 }, { "epoch": 0.8770291925033095, "grad_norm": 0.14425931870937347, "learning_rate": 4.278103445918569e-07, "loss": 0.1117, "step": 6294 }, { "epoch": 0.8771685361945238, "grad_norm": 0.10970491915941238, "learning_rate": 4.268570005176892e-07, "loss": 0.0925, "step": 6295 }, { "epoch": 0.8773078798857382, "grad_norm": 0.1695171445608139, "learning_rate": 4.259046724914878e-07, "loss": 0.1187, "step": 6296 }, { "epoch": 0.8774472235769526, "grad_norm": 0.1848413050174713, "learning_rate": 4.2495336072484015e-07, "loss": 0.1324, "step": 6297 }, { "epoch": 0.877586567268167, "grad_norm": 0.12281926721334457, "learning_rate": 4.240030654291061e-07, "loss": 0.1144, "step": 6298 }, { "epoch": 0.8777259109593814, "grad_norm": 0.16038072109222412, "learning_rate": 4.2305378681541833e-07, "loss": 0.1352, "step": 6299 }, { "epoch": 0.8778652546505957, "grad_norm": 0.11463984847068787, "learning_rate": 4.221055250946865e-07, "loss": 0.1078, "step": 6300 }, { "epoch": 0.8780045983418101, "grad_norm": 0.2174539864063263, "learning_rate": 4.21158280477591e-07, "loss": 0.1411, "step": 6301 }, { "epoch": 0.8781439420330245, "grad_norm": 0.1262793391942978, "learning_rate": 4.202120531745896e-07, "loss": 0.1075, "step": 6302 }, { "epoch": 0.8782832857242389, "grad_norm": 0.1543636918067932, "learning_rate": 4.192668433959113e-07, "loss": 0.1182, "step": 6303 }, { "epoch": 0.8784226294154532, "grad_norm": 0.18765611946582794, "learning_rate": 4.183226513515598e-07, "loss": 0.1371, "step": 6304 }, { "epoch": 0.8785619731066676, "grad_norm": 0.17091062664985657, "learning_rate": 4.173794772513151e-07, "loss": 0.1204, "step": 6305 }, { "epoch": 0.878701316797882, "grad_norm": 0.12411033362150192, "learning_rate": 4.1643732130472737e-07, "loss": 0.0853, "step": 6306 }, { "epoch": 0.8788406604890964, "grad_norm": 0.17425303161144257, "learning_rate": 4.1549618372112135e-07, "loss": 0.1672, "step": 6307 }, { "epoch": 0.8789800041803107, "grad_norm": 0.2098904699087143, "learning_rate": 4.1455606470959755e-07, "loss": 0.1604, "step": 6308 }, { "epoch": 0.8791193478715251, "grad_norm": 0.18661588430404663, "learning_rate": 4.1361696447902944e-07, "loss": 0.1321, "step": 6309 }, { "epoch": 0.8792586915627395, "grad_norm": 0.354676216840744, "learning_rate": 4.1267888323806294e-07, "loss": 0.175, "step": 6310 }, { "epoch": 0.8793980352539539, "grad_norm": 0.21267884969711304, "learning_rate": 4.117418211951174e-07, "loss": 0.1599, "step": 6311 }, { "epoch": 0.8795373789451683, "grad_norm": 0.08863097429275513, "learning_rate": 4.1080577855838746e-07, "loss": 0.095, "step": 6312 }, { "epoch": 0.8796767226363826, "grad_norm": 0.18812227249145508, "learning_rate": 4.098707555358411e-07, "loss": 0.1133, "step": 6313 }, { "epoch": 0.879816066327597, "grad_norm": 0.24470356106758118, "learning_rate": 4.0893675233521777e-07, "loss": 0.183, "step": 6314 }, { "epoch": 0.8799554100188114, "grad_norm": 0.30320578813552856, "learning_rate": 4.080037691640321e-07, "loss": 0.1562, "step": 6315 }, { "epoch": 0.8800947537100258, "grad_norm": 0.1826191544532776, "learning_rate": 4.070718062295731e-07, "loss": 0.1225, "step": 6316 }, { "epoch": 0.8802340974012401, "grad_norm": 0.12255183607339859, "learning_rate": 4.0614086373890026e-07, "loss": 0.1128, "step": 6317 }, { "epoch": 0.8803734410924545, "grad_norm": 0.15749208629131317, "learning_rate": 4.05210941898847e-07, "loss": 0.1065, "step": 6318 }, { "epoch": 0.8805127847836689, "grad_norm": 0.16339528560638428, "learning_rate": 4.042820409160214e-07, "loss": 0.1424, "step": 6319 }, { "epoch": 0.8806521284748833, "grad_norm": 0.11199153959751129, "learning_rate": 4.033541609968056e-07, "loss": 0.1067, "step": 6320 }, { "epoch": 0.8807914721660977, "grad_norm": 0.2521628737449646, "learning_rate": 4.0242730234735184e-07, "loss": 0.1481, "step": 6321 }, { "epoch": 0.880930815857312, "grad_norm": 0.10753188282251358, "learning_rate": 4.01501465173586e-07, "loss": 0.1049, "step": 6322 }, { "epoch": 0.8810701595485264, "grad_norm": 0.11337845772504807, "learning_rate": 4.005766496812097e-07, "loss": 0.1006, "step": 6323 }, { "epoch": 0.8812095032397408, "grad_norm": 0.134867325425148, "learning_rate": 3.9965285607569573e-07, "loss": 0.1023, "step": 6324 }, { "epoch": 0.8813488469309552, "grad_norm": 0.13165195286273956, "learning_rate": 3.987300845622882e-07, "loss": 0.1195, "step": 6325 }, { "epoch": 0.8814881906221695, "grad_norm": 0.18531303107738495, "learning_rate": 3.978083353460083e-07, "loss": 0.1208, "step": 6326 }, { "epoch": 0.8816275343133839, "grad_norm": 0.19937871396541595, "learning_rate": 3.96887608631647e-07, "loss": 0.1231, "step": 6327 }, { "epoch": 0.8817668780045983, "grad_norm": 0.1109953299164772, "learning_rate": 3.959679046237663e-07, "loss": 0.1056, "step": 6328 }, { "epoch": 0.8819062216958127, "grad_norm": 0.21021048724651337, "learning_rate": 3.950492235267062e-07, "loss": 0.1095, "step": 6329 }, { "epoch": 0.882045565387027, "grad_norm": 0.17171111702919006, "learning_rate": 3.9413156554457655e-07, "loss": 0.1289, "step": 6330 }, { "epoch": 0.8821849090782414, "grad_norm": 0.19004277884960175, "learning_rate": 3.9321493088125774e-07, "loss": 0.1369, "step": 6331 }, { "epoch": 0.8823242527694558, "grad_norm": 0.09012280404567719, "learning_rate": 3.9229931974040844e-07, "loss": 0.0954, "step": 6332 }, { "epoch": 0.8824635964606703, "grad_norm": 0.1127670407295227, "learning_rate": 3.9138473232545326e-07, "loss": 0.107, "step": 6333 }, { "epoch": 0.8826029401518847, "grad_norm": 0.10519208759069443, "learning_rate": 3.9047116883959513e-07, "loss": 0.1083, "step": 6334 }, { "epoch": 0.882742283843099, "grad_norm": 0.13108986616134644, "learning_rate": 3.895586294858045e-07, "loss": 0.1293, "step": 6335 }, { "epoch": 0.8828816275343134, "grad_norm": 0.1299431473016739, "learning_rate": 3.886471144668291e-07, "loss": 0.1201, "step": 6336 }, { "epoch": 0.8830209712255278, "grad_norm": 0.15391501784324646, "learning_rate": 3.8773662398518596e-07, "loss": 0.1415, "step": 6337 }, { "epoch": 0.8831603149167422, "grad_norm": 0.1764272004365921, "learning_rate": 3.8682715824316594e-07, "loss": 0.1184, "step": 6338 }, { "epoch": 0.8832996586079566, "grad_norm": 0.18494050204753876, "learning_rate": 3.8591871744282973e-07, "loss": 0.1303, "step": 6339 }, { "epoch": 0.8834390022991709, "grad_norm": 0.20388811826705933, "learning_rate": 3.85011301786013e-07, "loss": 0.1237, "step": 6340 }, { "epoch": 0.8835783459903853, "grad_norm": 0.10856466740369797, "learning_rate": 3.841049114743239e-07, "loss": 0.1162, "step": 6341 }, { "epoch": 0.8837176896815997, "grad_norm": 0.16351301968097687, "learning_rate": 3.8319954670914094e-07, "loss": 0.1543, "step": 6342 }, { "epoch": 0.8838570333728141, "grad_norm": 0.18720406293869019, "learning_rate": 3.8229520769161474e-07, "loss": 0.12, "step": 6343 }, { "epoch": 0.8839963770640284, "grad_norm": 0.10649596899747849, "learning_rate": 3.813918946226691e-07, "loss": 0.1081, "step": 6344 }, { "epoch": 0.8841357207552428, "grad_norm": 0.16302251815795898, "learning_rate": 3.804896077030007e-07, "loss": 0.1348, "step": 6345 }, { "epoch": 0.8842750644464572, "grad_norm": 0.12458310276269913, "learning_rate": 3.7958834713307524e-07, "loss": 0.1081, "step": 6346 }, { "epoch": 0.8844144081376716, "grad_norm": 0.09881558269262314, "learning_rate": 3.786881131131348e-07, "loss": 0.1053, "step": 6347 }, { "epoch": 0.884553751828886, "grad_norm": 0.11032913625240326, "learning_rate": 3.7778890584318773e-07, "loss": 0.1196, "step": 6348 }, { "epoch": 0.8846930955201003, "grad_norm": 0.13983216881752014, "learning_rate": 3.7689072552301973e-07, "loss": 0.1261, "step": 6349 }, { "epoch": 0.8848324392113147, "grad_norm": 0.13959145545959473, "learning_rate": 3.759935723521846e-07, "loss": 0.1378, "step": 6350 }, { "epoch": 0.8849717829025291, "grad_norm": 0.10391250997781754, "learning_rate": 3.7509744653001e-07, "loss": 0.1019, "step": 6351 }, { "epoch": 0.8851111265937435, "grad_norm": 0.1664179265499115, "learning_rate": 3.742023482555951e-07, "loss": 0.1435, "step": 6352 }, { "epoch": 0.8852504702849578, "grad_norm": 0.14329111576080322, "learning_rate": 3.7330827772780967e-07, "loss": 0.1163, "step": 6353 }, { "epoch": 0.8853898139761722, "grad_norm": 0.22338023781776428, "learning_rate": 3.7241523514529476e-07, "loss": 0.1064, "step": 6354 }, { "epoch": 0.8855291576673866, "grad_norm": 0.2000918984413147, "learning_rate": 3.715232207064651e-07, "loss": 0.1328, "step": 6355 }, { "epoch": 0.885668501358601, "grad_norm": 0.13572058081626892, "learning_rate": 3.7063223460950705e-07, "loss": 0.1214, "step": 6356 }, { "epoch": 0.8858078450498154, "grad_norm": 0.10531718283891678, "learning_rate": 3.697422770523751e-07, "loss": 0.1195, "step": 6357 }, { "epoch": 0.8859471887410297, "grad_norm": 0.1620953381061554, "learning_rate": 3.688533482327994e-07, "loss": 0.1567, "step": 6358 }, { "epoch": 0.8860865324322441, "grad_norm": 0.10651160031557083, "learning_rate": 3.6796544834827865e-07, "loss": 0.1186, "step": 6359 }, { "epoch": 0.8862258761234585, "grad_norm": 0.13633395731449127, "learning_rate": 3.670785775960839e-07, "loss": 0.1036, "step": 6360 }, { "epoch": 0.8863652198146729, "grad_norm": 0.11265724897384644, "learning_rate": 3.66192736173257e-07, "loss": 0.1166, "step": 6361 }, { "epoch": 0.8865045635058872, "grad_norm": 0.09956086426973343, "learning_rate": 3.653079242766139e-07, "loss": 0.0932, "step": 6362 }, { "epoch": 0.8866439071971016, "grad_norm": 0.23647260665893555, "learning_rate": 3.6442414210273834e-07, "loss": 0.1726, "step": 6363 }, { "epoch": 0.886783250888316, "grad_norm": 0.22885805368423462, "learning_rate": 3.6354138984798506e-07, "loss": 0.1357, "step": 6364 }, { "epoch": 0.8869225945795304, "grad_norm": 0.15943075716495514, "learning_rate": 3.6265966770848314e-07, "loss": 0.1218, "step": 6365 }, { "epoch": 0.8870619382707448, "grad_norm": 0.16359446942806244, "learning_rate": 3.6177897588013154e-07, "loss": 0.1305, "step": 6366 }, { "epoch": 0.8872012819619591, "grad_norm": 0.20430311560630798, "learning_rate": 3.608993145585987e-07, "loss": 0.142, "step": 6367 }, { "epoch": 0.8873406256531735, "grad_norm": 0.14489702880382538, "learning_rate": 3.600206839393261e-07, "loss": 0.119, "step": 6368 }, { "epoch": 0.8874799693443879, "grad_norm": 0.1674753874540329, "learning_rate": 3.591430842175242e-07, "loss": 0.1359, "step": 6369 }, { "epoch": 0.8876193130356023, "grad_norm": 0.09397326409816742, "learning_rate": 3.5826651558817703e-07, "loss": 0.096, "step": 6370 }, { "epoch": 0.8877586567268166, "grad_norm": 0.12266956269741058, "learning_rate": 3.5739097824603665e-07, "loss": 0.1113, "step": 6371 }, { "epoch": 0.887898000418031, "grad_norm": 0.1002575010061264, "learning_rate": 3.5651647238562904e-07, "loss": 0.1037, "step": 6372 }, { "epoch": 0.8880373441092455, "grad_norm": 0.1300828903913498, "learning_rate": 3.5564299820124883e-07, "loss": 0.1217, "step": 6373 }, { "epoch": 0.8881766878004599, "grad_norm": 0.203186497092247, "learning_rate": 3.547705558869624e-07, "loss": 0.1213, "step": 6374 }, { "epoch": 0.8883160314916743, "grad_norm": 0.18150901794433594, "learning_rate": 3.5389914563660475e-07, "loss": 0.1252, "step": 6375 }, { "epoch": 0.8884553751828886, "grad_norm": 0.10526622086763382, "learning_rate": 3.530287676437849e-07, "loss": 0.1024, "step": 6376 }, { "epoch": 0.888594718874103, "grad_norm": 0.0868832916021347, "learning_rate": 3.5215942210188204e-07, "loss": 0.0876, "step": 6377 }, { "epoch": 0.8887340625653174, "grad_norm": 0.18171869218349457, "learning_rate": 3.512911092040422e-07, "loss": 0.1316, "step": 6378 }, { "epoch": 0.8888734062565318, "grad_norm": 0.1134047582745552, "learning_rate": 3.5042382914318716e-07, "loss": 0.1058, "step": 6379 }, { "epoch": 0.8890127499477462, "grad_norm": 0.12164068222045898, "learning_rate": 3.495575821120045e-07, "loss": 0.1183, "step": 6380 }, { "epoch": 0.8891520936389605, "grad_norm": 0.2043275237083435, "learning_rate": 3.4869236830295695e-07, "loss": 0.1495, "step": 6381 }, { "epoch": 0.8892914373301749, "grad_norm": 0.12569959461688995, "learning_rate": 3.478281879082729e-07, "loss": 0.1057, "step": 6382 }, { "epoch": 0.8894307810213893, "grad_norm": 0.15665332973003387, "learning_rate": 3.469650411199543e-07, "loss": 0.1426, "step": 6383 }, { "epoch": 0.8895701247126037, "grad_norm": 0.13914795219898224, "learning_rate": 3.4610292812977454e-07, "loss": 0.1214, "step": 6384 }, { "epoch": 0.889709468403818, "grad_norm": 0.11747695505619049, "learning_rate": 3.452418491292731e-07, "loss": 0.0991, "step": 6385 }, { "epoch": 0.8898488120950324, "grad_norm": 0.1950370967388153, "learning_rate": 3.4438180430976243e-07, "loss": 0.1629, "step": 6386 }, { "epoch": 0.8899881557862468, "grad_norm": 0.15627843141555786, "learning_rate": 3.4352279386232535e-07, "loss": 0.1297, "step": 6387 }, { "epoch": 0.8901274994774612, "grad_norm": 0.16186536848545074, "learning_rate": 3.426648179778147e-07, "loss": 0.1261, "step": 6388 }, { "epoch": 0.8902668431686755, "grad_norm": 0.1500861495733261, "learning_rate": 3.4180787684685246e-07, "loss": 0.1238, "step": 6389 }, { "epoch": 0.8904061868598899, "grad_norm": 0.16082319617271423, "learning_rate": 3.409519706598324e-07, "loss": 0.1316, "step": 6390 }, { "epoch": 0.8905455305511043, "grad_norm": 0.1494956910610199, "learning_rate": 3.400970996069164e-07, "loss": 0.1201, "step": 6391 }, { "epoch": 0.8906848742423187, "grad_norm": 0.0932208001613617, "learning_rate": 3.392432638780363e-07, "loss": 0.0928, "step": 6392 }, { "epoch": 0.8908242179335331, "grad_norm": 0.09652936458587646, "learning_rate": 3.383904636628965e-07, "loss": 0.1015, "step": 6393 }, { "epoch": 0.8909635616247474, "grad_norm": 0.14552894234657288, "learning_rate": 3.3753869915096936e-07, "loss": 0.1229, "step": 6394 }, { "epoch": 0.8911029053159618, "grad_norm": 0.14960481226444244, "learning_rate": 3.3668797053149907e-07, "loss": 0.1288, "step": 6395 }, { "epoch": 0.8912422490071762, "grad_norm": 0.18964648246765137, "learning_rate": 3.3583827799349486e-07, "loss": 0.1296, "step": 6396 }, { "epoch": 0.8913815926983906, "grad_norm": 0.19334961473941803, "learning_rate": 3.3498962172574033e-07, "loss": 0.1748, "step": 6397 }, { "epoch": 0.891520936389605, "grad_norm": 0.15177661180496216, "learning_rate": 3.3414200191678903e-07, "loss": 0.1222, "step": 6398 }, { "epoch": 0.8916602800808193, "grad_norm": 0.20202577114105225, "learning_rate": 3.332954187549603e-07, "loss": 0.1259, "step": 6399 }, { "epoch": 0.8917996237720337, "grad_norm": 0.12089474499225616, "learning_rate": 3.3244987242834816e-07, "loss": 0.1093, "step": 6400 }, { "epoch": 0.8919389674632481, "grad_norm": 0.13334599137306213, "learning_rate": 3.3160536312481174e-07, "loss": 0.111, "step": 6401 }, { "epoch": 0.8920783111544625, "grad_norm": 0.19045378267765045, "learning_rate": 3.3076189103198265e-07, "loss": 0.1145, "step": 6402 }, { "epoch": 0.8922176548456768, "grad_norm": 0.14663489162921906, "learning_rate": 3.299194563372604e-07, "loss": 0.1324, "step": 6403 }, { "epoch": 0.8923569985368912, "grad_norm": 0.22273515164852142, "learning_rate": 3.290780592278148e-07, "loss": 0.1439, "step": 6404 }, { "epoch": 0.8924963422281056, "grad_norm": 0.15865492820739746, "learning_rate": 3.2823769989058674e-07, "loss": 0.1193, "step": 6405 }, { "epoch": 0.89263568591932, "grad_norm": 0.12287692725658417, "learning_rate": 3.2739837851228306e-07, "loss": 0.1108, "step": 6406 }, { "epoch": 0.8927750296105343, "grad_norm": 0.14540086686611176, "learning_rate": 3.265600952793818e-07, "loss": 0.101, "step": 6407 }, { "epoch": 0.8929143733017487, "grad_norm": 0.10137172043323517, "learning_rate": 3.2572285037813123e-07, "loss": 0.104, "step": 6408 }, { "epoch": 0.8930537169929631, "grad_norm": 0.18914981186389923, "learning_rate": 3.248866439945486e-07, "loss": 0.131, "step": 6409 }, { "epoch": 0.8931930606841775, "grad_norm": 0.1450216919183731, "learning_rate": 3.2405147631441757e-07, "loss": 0.112, "step": 6410 }, { "epoch": 0.8933324043753919, "grad_norm": 0.19993630051612854, "learning_rate": 3.232173475232964e-07, "loss": 0.1461, "step": 6411 }, { "epoch": 0.8934717480666062, "grad_norm": 0.12293466925621033, "learning_rate": 3.2238425780650617e-07, "loss": 0.1425, "step": 6412 }, { "epoch": 0.8936110917578207, "grad_norm": 0.1457219123840332, "learning_rate": 3.215522073491434e-07, "loss": 0.123, "step": 6413 }, { "epoch": 0.8937504354490351, "grad_norm": 0.19232071936130524, "learning_rate": 3.2072119633606845e-07, "loss": 0.1218, "step": 6414 }, { "epoch": 0.8938897791402495, "grad_norm": 0.15244092047214508, "learning_rate": 3.198912249519143e-07, "loss": 0.133, "step": 6415 }, { "epoch": 0.8940291228314639, "grad_norm": 0.11650285869836807, "learning_rate": 3.190622933810816e-07, "loss": 0.0967, "step": 6416 }, { "epoch": 0.8941684665226782, "grad_norm": 0.13937480747699738, "learning_rate": 3.182344018077399e-07, "loss": 0.1145, "step": 6417 }, { "epoch": 0.8943078102138926, "grad_norm": 0.1520204246044159, "learning_rate": 3.1740755041582694e-07, "loss": 0.1006, "step": 6418 }, { "epoch": 0.894447153905107, "grad_norm": 0.13539054989814758, "learning_rate": 3.1658173938905023e-07, "loss": 0.1174, "step": 6419 }, { "epoch": 0.8945864975963214, "grad_norm": 0.24511274695396423, "learning_rate": 3.1575696891088804e-07, "loss": 0.1391, "step": 6420 }, { "epoch": 0.8947258412875357, "grad_norm": 0.1426040679216385, "learning_rate": 3.149332391645843e-07, "loss": 0.1139, "step": 6421 }, { "epoch": 0.8948651849787501, "grad_norm": 0.1643322855234146, "learning_rate": 3.1411055033315207e-07, "loss": 0.1593, "step": 6422 }, { "epoch": 0.8950045286699645, "grad_norm": 0.221308171749115, "learning_rate": 3.132889025993746e-07, "loss": 0.1403, "step": 6423 }, { "epoch": 0.8951438723611789, "grad_norm": 0.2364504635334015, "learning_rate": 3.1246829614580476e-07, "loss": 0.1323, "step": 6424 }, { "epoch": 0.8952832160523932, "grad_norm": 0.12750017642974854, "learning_rate": 3.1164873115476056e-07, "loss": 0.1115, "step": 6425 }, { "epoch": 0.8954225597436076, "grad_norm": 0.135846346616745, "learning_rate": 3.1083020780833137e-07, "loss": 0.107, "step": 6426 }, { "epoch": 0.895561903434822, "grad_norm": 0.16718745231628418, "learning_rate": 3.1001272628837565e-07, "loss": 0.1415, "step": 6427 }, { "epoch": 0.8957012471260364, "grad_norm": 0.11135054379701614, "learning_rate": 3.0919628677651636e-07, "loss": 0.1144, "step": 6428 }, { "epoch": 0.8958405908172508, "grad_norm": 0.11513547599315643, "learning_rate": 3.083808894541496e-07, "loss": 0.1104, "step": 6429 }, { "epoch": 0.8959799345084651, "grad_norm": 0.1846805065870285, "learning_rate": 3.075665345024387e-07, "loss": 0.1378, "step": 6430 }, { "epoch": 0.8961192781996795, "grad_norm": 0.11495672166347504, "learning_rate": 3.0675322210231227e-07, "loss": 0.1219, "step": 6431 }, { "epoch": 0.8962586218908939, "grad_norm": 0.1721593588590622, "learning_rate": 3.0594095243447254e-07, "loss": 0.1296, "step": 6432 }, { "epoch": 0.8963979655821083, "grad_norm": 0.10728906095027924, "learning_rate": 3.0512972567938505e-07, "loss": 0.1244, "step": 6433 }, { "epoch": 0.8965373092733226, "grad_norm": 0.16179192066192627, "learning_rate": 3.043195420172879e-07, "loss": 0.1322, "step": 6434 }, { "epoch": 0.896676652964537, "grad_norm": 0.1777563989162445, "learning_rate": 3.035104016281831e-07, "loss": 0.1412, "step": 6435 }, { "epoch": 0.8968159966557514, "grad_norm": 0.22308407723903656, "learning_rate": 3.027023046918448e-07, "loss": 0.132, "step": 6436 }, { "epoch": 0.8969553403469658, "grad_norm": 0.19324174523353577, "learning_rate": 3.018952513878137e-07, "loss": 0.1497, "step": 6437 }, { "epoch": 0.8970946840381802, "grad_norm": 0.09924764186143875, "learning_rate": 3.010892418953981e-07, "loss": 0.1114, "step": 6438 }, { "epoch": 0.8972340277293945, "grad_norm": 0.14575481414794922, "learning_rate": 3.0028427639367475e-07, "loss": 0.13, "step": 6439 }, { "epoch": 0.8973733714206089, "grad_norm": 0.14949385821819305, "learning_rate": 2.994803550614883e-07, "loss": 0.1494, "step": 6440 }, { "epoch": 0.8975127151118233, "grad_norm": 0.17258451879024506, "learning_rate": 2.9867747807745315e-07, "loss": 0.1424, "step": 6441 }, { "epoch": 0.8976520588030377, "grad_norm": 0.14716507494449615, "learning_rate": 2.978756456199494e-07, "loss": 0.1357, "step": 6442 }, { "epoch": 0.897791402494252, "grad_norm": 0.1566556692123413, "learning_rate": 2.970748578671251e-07, "loss": 0.123, "step": 6443 }, { "epoch": 0.8979307461854664, "grad_norm": 0.09702932089567184, "learning_rate": 2.9627511499689787e-07, "loss": 0.1006, "step": 6444 }, { "epoch": 0.8980700898766808, "grad_norm": 0.2130795568227768, "learning_rate": 2.9547641718695285e-07, "loss": 0.1295, "step": 6445 }, { "epoch": 0.8982094335678952, "grad_norm": 0.15924499928951263, "learning_rate": 2.946787646147414e-07, "loss": 0.1447, "step": 6446 }, { "epoch": 0.8983487772591096, "grad_norm": 0.08921556174755096, "learning_rate": 2.9388215745748347e-07, "loss": 0.1169, "step": 6447 }, { "epoch": 0.8984881209503239, "grad_norm": 0.13432997465133667, "learning_rate": 2.9308659589216913e-07, "loss": 0.115, "step": 6448 }, { "epoch": 0.8986274646415383, "grad_norm": 0.19216889142990112, "learning_rate": 2.92292080095552e-07, "loss": 0.1332, "step": 6449 }, { "epoch": 0.8987668083327527, "grad_norm": 0.1555619090795517, "learning_rate": 2.9149861024415526e-07, "loss": 0.1403, "step": 6450 }, { "epoch": 0.8989061520239671, "grad_norm": 0.1830148547887802, "learning_rate": 2.9070618651427073e-07, "loss": 0.1053, "step": 6451 }, { "epoch": 0.8990454957151814, "grad_norm": 0.16098549962043762, "learning_rate": 2.89914809081957e-07, "loss": 0.1182, "step": 6452 }, { "epoch": 0.8991848394063959, "grad_norm": 0.12144772708415985, "learning_rate": 2.8912447812303956e-07, "loss": 0.1137, "step": 6453 }, { "epoch": 0.8993241830976103, "grad_norm": 0.20429162681102753, "learning_rate": 2.8833519381311127e-07, "loss": 0.1649, "step": 6454 }, { "epoch": 0.8994635267888247, "grad_norm": 0.3253745436668396, "learning_rate": 2.8754695632753406e-07, "loss": 0.1723, "step": 6455 }, { "epoch": 0.8996028704800391, "grad_norm": 0.18386037647724152, "learning_rate": 2.867597658414367e-07, "loss": 0.1278, "step": 6456 }, { "epoch": 0.8997422141712534, "grad_norm": 0.1760895997285843, "learning_rate": 2.859736225297133e-07, "loss": 0.1382, "step": 6457 }, { "epoch": 0.8998815578624678, "grad_norm": 0.1341172307729721, "learning_rate": 2.8518852656702845e-07, "loss": 0.1139, "step": 6458 }, { "epoch": 0.9000209015536822, "grad_norm": 0.22254931926727295, "learning_rate": 2.844044781278127e-07, "loss": 0.1736, "step": 6459 }, { "epoch": 0.9001602452448966, "grad_norm": 0.21482668817043304, "learning_rate": 2.836214773862617e-07, "loss": 0.1448, "step": 6460 }, { "epoch": 0.900299588936111, "grad_norm": 0.20429037511348724, "learning_rate": 2.828395245163418e-07, "loss": 0.1304, "step": 6461 }, { "epoch": 0.9004389326273253, "grad_norm": 0.29741623997688293, "learning_rate": 2.820586196917857e-07, "loss": 0.1655, "step": 6462 }, { "epoch": 0.9005782763185397, "grad_norm": 0.1306406408548355, "learning_rate": 2.812787630860919e-07, "loss": 0.122, "step": 6463 }, { "epoch": 0.9007176200097541, "grad_norm": 0.13064874708652496, "learning_rate": 2.8049995487252625e-07, "loss": 0.1256, "step": 6464 }, { "epoch": 0.9008569637009685, "grad_norm": 0.0774635300040245, "learning_rate": 2.7972219522412194e-07, "loss": 0.0892, "step": 6465 }, { "epoch": 0.9009963073921828, "grad_norm": 0.16634663939476013, "learning_rate": 2.789454843136813e-07, "loss": 0.1403, "step": 6466 }, { "epoch": 0.9011356510833972, "grad_norm": 0.17734822630882263, "learning_rate": 2.7816982231376964e-07, "loss": 0.1065, "step": 6467 }, { "epoch": 0.9012749947746116, "grad_norm": 0.20276546478271484, "learning_rate": 2.773952093967225e-07, "loss": 0.1539, "step": 6468 }, { "epoch": 0.901414338465826, "grad_norm": 0.20588400959968567, "learning_rate": 2.7662164573464156e-07, "loss": 0.1224, "step": 6469 }, { "epoch": 0.9015536821570403, "grad_norm": 0.17809000611305237, "learning_rate": 2.758491314993944e-07, "loss": 0.113, "step": 6470 }, { "epoch": 0.9016930258482547, "grad_norm": 0.19397792220115662, "learning_rate": 2.750776668626148e-07, "loss": 0.1301, "step": 6471 }, { "epoch": 0.9018323695394691, "grad_norm": 0.1404980719089508, "learning_rate": 2.743072519957063e-07, "loss": 0.1214, "step": 6472 }, { "epoch": 0.9019717132306835, "grad_norm": 0.09184740483760834, "learning_rate": 2.73537887069838e-07, "loss": 0.1151, "step": 6473 }, { "epoch": 0.9021110569218979, "grad_norm": 0.11265546828508377, "learning_rate": 2.7276957225594367e-07, "loss": 0.1021, "step": 6474 }, { "epoch": 0.9022504006131122, "grad_norm": 0.1759083867073059, "learning_rate": 2.7200230772472526e-07, "loss": 0.1332, "step": 6475 }, { "epoch": 0.9023897443043266, "grad_norm": 0.18220354616641998, "learning_rate": 2.712360936466524e-07, "loss": 0.1385, "step": 6476 }, { "epoch": 0.902529087995541, "grad_norm": 0.146218404173851, "learning_rate": 2.704709301919606e-07, "loss": 0.1338, "step": 6477 }, { "epoch": 0.9026684316867554, "grad_norm": 0.15866178274154663, "learning_rate": 2.6970681753065e-07, "loss": 0.1415, "step": 6478 }, { "epoch": 0.9028077753779697, "grad_norm": 0.1237933337688446, "learning_rate": 2.6894375583249144e-07, "loss": 0.13, "step": 6479 }, { "epoch": 0.9029471190691841, "grad_norm": 0.11390385776758194, "learning_rate": 2.681817452670171e-07, "loss": 0.1064, "step": 6480 }, { "epoch": 0.9030864627603985, "grad_norm": 0.15336808562278748, "learning_rate": 2.6742078600353106e-07, "loss": 0.104, "step": 6481 }, { "epoch": 0.9032258064516129, "grad_norm": 0.14645572006702423, "learning_rate": 2.6666087821109855e-07, "loss": 0.1505, "step": 6482 }, { "epoch": 0.9033651501428273, "grad_norm": 0.14527565240859985, "learning_rate": 2.6590202205855506e-07, "loss": 0.1223, "step": 6483 }, { "epoch": 0.9035044938340416, "grad_norm": 0.22183996438980103, "learning_rate": 2.6514421771450194e-07, "loss": 0.1442, "step": 6484 }, { "epoch": 0.903643837525256, "grad_norm": 0.1717749387025833, "learning_rate": 2.6438746534730497e-07, "loss": 0.1284, "step": 6485 }, { "epoch": 0.9037831812164704, "grad_norm": 0.18117928504943848, "learning_rate": 2.6363176512509637e-07, "loss": 0.1426, "step": 6486 }, { "epoch": 0.9039225249076848, "grad_norm": 0.2626611292362213, "learning_rate": 2.628771172157768e-07, "loss": 0.1593, "step": 6487 }, { "epoch": 0.9040618685988991, "grad_norm": 0.16808171570301056, "learning_rate": 2.621235217870116e-07, "loss": 0.1258, "step": 6488 }, { "epoch": 0.9042012122901135, "grad_norm": 0.1463809609413147, "learning_rate": 2.6137097900623185e-07, "loss": 0.1137, "step": 6489 }, { "epoch": 0.9043405559813279, "grad_norm": 0.16688276827335358, "learning_rate": 2.6061948904063663e-07, "loss": 0.1563, "step": 6490 }, { "epoch": 0.9044798996725423, "grad_norm": 0.15419715642929077, "learning_rate": 2.598690520571889e-07, "loss": 0.1396, "step": 6491 }, { "epoch": 0.9046192433637567, "grad_norm": 0.1154271587729454, "learning_rate": 2.591196682226182e-07, "loss": 0.119, "step": 6492 }, { "epoch": 0.904758587054971, "grad_norm": 0.1274249106645584, "learning_rate": 2.5837133770342135e-07, "loss": 0.1015, "step": 6493 }, { "epoch": 0.9048979307461855, "grad_norm": 0.1852678656578064, "learning_rate": 2.5762406066585976e-07, "loss": 0.1067, "step": 6494 }, { "epoch": 0.9050372744373999, "grad_norm": 0.11920974403619766, "learning_rate": 2.568778372759628e-07, "loss": 0.1041, "step": 6495 }, { "epoch": 0.9051766181286143, "grad_norm": 0.13661910593509674, "learning_rate": 2.5613266769952183e-07, "loss": 0.1211, "step": 6496 }, { "epoch": 0.9053159618198287, "grad_norm": 0.13061587512493134, "learning_rate": 2.5538855210209823e-07, "loss": 0.1047, "step": 6497 }, { "epoch": 0.905455305511043, "grad_norm": 0.21673212945461273, "learning_rate": 2.54645490649017e-07, "loss": 0.1408, "step": 6498 }, { "epoch": 0.9055946492022574, "grad_norm": 0.14057235419750214, "learning_rate": 2.5390348350536887e-07, "loss": 0.1177, "step": 6499 }, { "epoch": 0.9057339928934718, "grad_norm": 0.17298059165477753, "learning_rate": 2.531625308360125e-07, "loss": 0.1355, "step": 6500 }, { "epoch": 0.9058733365846862, "grad_norm": 0.21377789974212646, "learning_rate": 2.52422632805569e-07, "loss": 0.2081, "step": 6501 }, { "epoch": 0.9060126802759005, "grad_norm": 0.08996345847845078, "learning_rate": 2.5168378957842797e-07, "loss": 0.1091, "step": 6502 }, { "epoch": 0.9061520239671149, "grad_norm": 0.13553102314472198, "learning_rate": 2.5094600131874205e-07, "loss": 0.1119, "step": 6503 }, { "epoch": 0.9062913676583293, "grad_norm": 0.11486086249351501, "learning_rate": 2.5020926819043223e-07, "loss": 0.1016, "step": 6504 }, { "epoch": 0.9064307113495437, "grad_norm": 0.12796364724636078, "learning_rate": 2.4947359035718434e-07, "loss": 0.1208, "step": 6505 }, { "epoch": 0.906570055040758, "grad_norm": 0.11545431613922119, "learning_rate": 2.487389679824481e-07, "loss": 0.1249, "step": 6506 }, { "epoch": 0.9067093987319724, "grad_norm": 0.15994484722614288, "learning_rate": 2.4800540122943915e-07, "loss": 0.1285, "step": 6507 }, { "epoch": 0.9068487424231868, "grad_norm": 0.1750960499048233, "learning_rate": 2.4727289026114043e-07, "loss": 0.1099, "step": 6508 }, { "epoch": 0.9069880861144012, "grad_norm": 0.2060340940952301, "learning_rate": 2.4654143524029896e-07, "loss": 0.1219, "step": 6509 }, { "epoch": 0.9071274298056156, "grad_norm": 0.23510949313640594, "learning_rate": 2.4581103632942747e-07, "loss": 0.1453, "step": 6510 }, { "epoch": 0.9072667734968299, "grad_norm": 0.07529270648956299, "learning_rate": 2.4508169369080404e-07, "loss": 0.098, "step": 6511 }, { "epoch": 0.9074061171880443, "grad_norm": 0.10774309188127518, "learning_rate": 2.443534074864706e-07, "loss": 0.0947, "step": 6512 }, { "epoch": 0.9075454608792587, "grad_norm": 0.14045290648937225, "learning_rate": 2.436261778782378e-07, "loss": 0.1175, "step": 6513 }, { "epoch": 0.9076848045704731, "grad_norm": 0.13072046637535095, "learning_rate": 2.4290000502767755e-07, "loss": 0.1014, "step": 6514 }, { "epoch": 0.9078241482616874, "grad_norm": 0.19288353621959686, "learning_rate": 2.421748890961301e-07, "loss": 0.1491, "step": 6515 }, { "epoch": 0.9079634919529018, "grad_norm": 0.2741023898124695, "learning_rate": 2.4145083024469996e-07, "loss": 0.1458, "step": 6516 }, { "epoch": 0.9081028356441162, "grad_norm": 0.16356155276298523, "learning_rate": 2.407278286342557e-07, "loss": 0.1145, "step": 6517 }, { "epoch": 0.9082421793353306, "grad_norm": 0.20747242867946625, "learning_rate": 2.40005884425431e-07, "loss": 0.1766, "step": 6518 }, { "epoch": 0.908381523026545, "grad_norm": 0.11177629977464676, "learning_rate": 2.39284997778626e-07, "loss": 0.0976, "step": 6519 }, { "epoch": 0.9085208667177593, "grad_norm": 0.09519784897565842, "learning_rate": 2.3856516885400693e-07, "loss": 0.1076, "step": 6520 }, { "epoch": 0.9086602104089737, "grad_norm": 0.173406720161438, "learning_rate": 2.3784639781150143e-07, "loss": 0.1224, "step": 6521 }, { "epoch": 0.9087995541001881, "grad_norm": 0.18553884327411652, "learning_rate": 2.3712868481080397e-07, "loss": 0.1179, "step": 6522 }, { "epoch": 0.9089388977914025, "grad_norm": 0.13568899035453796, "learning_rate": 2.364120300113748e-07, "loss": 0.1295, "step": 6523 }, { "epoch": 0.9090782414826168, "grad_norm": 0.22515636682510376, "learning_rate": 2.356964335724382e-07, "loss": 0.142, "step": 6524 }, { "epoch": 0.9092175851738312, "grad_norm": 0.12010911852121353, "learning_rate": 2.3498189565298312e-07, "loss": 0.1131, "step": 6525 }, { "epoch": 0.9093569288650456, "grad_norm": 0.12220019847154617, "learning_rate": 2.3426841641176311e-07, "loss": 0.1181, "step": 6526 }, { "epoch": 0.90949627255626, "grad_norm": 0.09577040374279022, "learning_rate": 2.3355599600729916e-07, "loss": 0.1022, "step": 6527 }, { "epoch": 0.9096356162474744, "grad_norm": 0.11179980635643005, "learning_rate": 2.328446345978713e-07, "loss": 0.1211, "step": 6528 }, { "epoch": 0.9097749599386887, "grad_norm": 0.15255849063396454, "learning_rate": 2.3213433234152982e-07, "loss": 0.1217, "step": 6529 }, { "epoch": 0.9099143036299031, "grad_norm": 0.15645921230316162, "learning_rate": 2.3142508939608844e-07, "loss": 0.1318, "step": 6530 }, { "epoch": 0.9100536473211175, "grad_norm": 0.23280130326747894, "learning_rate": 2.3071690591912277e-07, "loss": 0.1492, "step": 6531 }, { "epoch": 0.9101929910123319, "grad_norm": 0.13709083199501038, "learning_rate": 2.3000978206797697e-07, "loss": 0.1178, "step": 6532 }, { "epoch": 0.9103323347035462, "grad_norm": 0.1328558772802353, "learning_rate": 2.2930371799975593e-07, "loss": 0.1221, "step": 6533 }, { "epoch": 0.9104716783947607, "grad_norm": 0.13033849000930786, "learning_rate": 2.2859871387133248e-07, "loss": 0.1341, "step": 6534 }, { "epoch": 0.9106110220859751, "grad_norm": 0.18215975165367126, "learning_rate": 2.2789476983934133e-07, "loss": 0.1475, "step": 6535 }, { "epoch": 0.9107503657771895, "grad_norm": 0.18126922845840454, "learning_rate": 2.271918860601835e-07, "loss": 0.1408, "step": 6536 }, { "epoch": 0.9108897094684039, "grad_norm": 0.20808565616607666, "learning_rate": 2.2649006269002406e-07, "loss": 0.1388, "step": 6537 }, { "epoch": 0.9110290531596182, "grad_norm": 0.1457832157611847, "learning_rate": 2.257892998847916e-07, "loss": 0.1133, "step": 6538 }, { "epoch": 0.9111683968508326, "grad_norm": 0.1989179104566574, "learning_rate": 2.250895978001788e-07, "loss": 0.134, "step": 6539 }, { "epoch": 0.911307740542047, "grad_norm": 0.17952729761600494, "learning_rate": 2.2439095659164467e-07, "loss": 0.1229, "step": 6540 }, { "epoch": 0.9114470842332614, "grad_norm": 0.24403147399425507, "learning_rate": 2.236933764144117e-07, "loss": 0.1279, "step": 6541 }, { "epoch": 0.9115864279244758, "grad_norm": 0.1576920449733734, "learning_rate": 2.2299685742346423e-07, "loss": 0.1244, "step": 6542 }, { "epoch": 0.9117257716156901, "grad_norm": 0.11314062774181366, "learning_rate": 2.223013997735557e-07, "loss": 0.1101, "step": 6543 }, { "epoch": 0.9118651153069045, "grad_norm": 0.1371632069349289, "learning_rate": 2.2160700361919807e-07, "loss": 0.1049, "step": 6544 }, { "epoch": 0.9120044589981189, "grad_norm": 0.12687690556049347, "learning_rate": 2.2091366911467238e-07, "loss": 0.1203, "step": 6545 }, { "epoch": 0.9121438026893333, "grad_norm": 0.14131130278110504, "learning_rate": 2.2022139641402095e-07, "loss": 0.1317, "step": 6546 }, { "epoch": 0.9122831463805476, "grad_norm": 0.15228639543056488, "learning_rate": 2.1953018567105078e-07, "loss": 0.1088, "step": 6547 }, { "epoch": 0.912422490071762, "grad_norm": 0.20658409595489502, "learning_rate": 2.1884003703933343e-07, "loss": 0.1214, "step": 6548 }, { "epoch": 0.9125618337629764, "grad_norm": 0.14909595251083374, "learning_rate": 2.181509506722046e-07, "loss": 0.1254, "step": 6549 }, { "epoch": 0.9127011774541908, "grad_norm": 0.125221848487854, "learning_rate": 2.1746292672276238e-07, "loss": 0.1161, "step": 6550 }, { "epoch": 0.9128405211454051, "grad_norm": 0.1787428855895996, "learning_rate": 2.1677596534387114e-07, "loss": 0.1462, "step": 6551 }, { "epoch": 0.9129798648366195, "grad_norm": 0.1374456286430359, "learning_rate": 2.1609006668815768e-07, "loss": 0.1214, "step": 6552 }, { "epoch": 0.9131192085278339, "grad_norm": 0.12619252502918243, "learning_rate": 2.1540523090801292e-07, "loss": 0.1187, "step": 6553 }, { "epoch": 0.9132585522190483, "grad_norm": 0.14491215348243713, "learning_rate": 2.1472145815559064e-07, "loss": 0.1174, "step": 6554 }, { "epoch": 0.9133978959102627, "grad_norm": 0.27488306164741516, "learning_rate": 2.1403874858281104e-07, "loss": 0.1532, "step": 6555 }, { "epoch": 0.913537239601477, "grad_norm": 0.11260944604873657, "learning_rate": 2.133571023413572e-07, "loss": 0.1167, "step": 6556 }, { "epoch": 0.9136765832926914, "grad_norm": 0.15427862107753754, "learning_rate": 2.1267651958267298e-07, "loss": 0.1153, "step": 6557 }, { "epoch": 0.9138159269839058, "grad_norm": 0.09572342038154602, "learning_rate": 2.1199700045797077e-07, "loss": 0.0923, "step": 6558 }, { "epoch": 0.9139552706751202, "grad_norm": 0.20281127095222473, "learning_rate": 2.113185451182226e-07, "loss": 0.126, "step": 6559 }, { "epoch": 0.9140946143663345, "grad_norm": 0.14903971552848816, "learning_rate": 2.106411537141656e-07, "loss": 0.1395, "step": 6560 }, { "epoch": 0.9142339580575489, "grad_norm": 0.26790741086006165, "learning_rate": 2.0996482639630167e-07, "loss": 0.1474, "step": 6561 }, { "epoch": 0.9143733017487633, "grad_norm": 0.10959595441818237, "learning_rate": 2.0928956331489558e-07, "loss": 0.1109, "step": 6562 }, { "epoch": 0.9145126454399777, "grad_norm": 0.17851512134075165, "learning_rate": 2.08615364619974e-07, "loss": 0.1292, "step": 6563 }, { "epoch": 0.9146519891311921, "grad_norm": 0.1463884860277176, "learning_rate": 2.079422304613299e-07, "loss": 0.1183, "step": 6564 }, { "epoch": 0.9147913328224064, "grad_norm": 0.1454346925020218, "learning_rate": 2.0727016098851694e-07, "loss": 0.1154, "step": 6565 }, { "epoch": 0.9149306765136208, "grad_norm": 0.1016610786318779, "learning_rate": 2.0659915635085515e-07, "loss": 0.0894, "step": 6566 }, { "epoch": 0.9150700202048352, "grad_norm": 0.19831544160842896, "learning_rate": 2.0592921669742528e-07, "loss": 0.1222, "step": 6567 }, { "epoch": 0.9152093638960496, "grad_norm": 0.101815365254879, "learning_rate": 2.0526034217707213e-07, "loss": 0.0989, "step": 6568 }, { "epoch": 0.9153487075872639, "grad_norm": 0.11244730651378632, "learning_rate": 2.0459253293840632e-07, "loss": 0.1009, "step": 6569 }, { "epoch": 0.9154880512784783, "grad_norm": 0.12925587594509125, "learning_rate": 2.0392578912979853e-07, "loss": 0.1161, "step": 6570 }, { "epoch": 0.9156273949696927, "grad_norm": 0.24192723631858826, "learning_rate": 2.032601108993837e-07, "loss": 0.1404, "step": 6571 }, { "epoch": 0.9157667386609071, "grad_norm": 0.15346890687942505, "learning_rate": 2.0259549839506064e-07, "loss": 0.1286, "step": 6572 }, { "epoch": 0.9159060823521215, "grad_norm": 0.2562263011932373, "learning_rate": 2.0193195176449188e-07, "loss": 0.1717, "step": 6573 }, { "epoch": 0.9160454260433359, "grad_norm": 0.18338719010353088, "learning_rate": 2.0126947115510165e-07, "loss": 0.1291, "step": 6574 }, { "epoch": 0.9161847697345503, "grad_norm": 0.2225518673658371, "learning_rate": 2.006080567140778e-07, "loss": 0.2036, "step": 6575 }, { "epoch": 0.9163241134257647, "grad_norm": 0.1871078908443451, "learning_rate": 1.999477085883711e-07, "loss": 0.1354, "step": 6576 }, { "epoch": 0.9164634571169791, "grad_norm": 0.21300609409809113, "learning_rate": 1.9928842692469752e-07, "loss": 0.1453, "step": 6577 }, { "epoch": 0.9166028008081935, "grad_norm": 0.12078898400068283, "learning_rate": 1.9863021186953268e-07, "loss": 0.1074, "step": 6578 }, { "epoch": 0.9167421444994078, "grad_norm": 0.16306526958942413, "learning_rate": 1.9797306356911793e-07, "loss": 0.141, "step": 6579 }, { "epoch": 0.9168814881906222, "grad_norm": 0.1528390794992447, "learning_rate": 1.973169821694565e-07, "loss": 0.124, "step": 6580 }, { "epoch": 0.9170208318818366, "grad_norm": 0.20225480198860168, "learning_rate": 1.9666196781631453e-07, "loss": 0.1537, "step": 6581 }, { "epoch": 0.917160175573051, "grad_norm": 0.2115693986415863, "learning_rate": 1.9600802065522063e-07, "loss": 0.179, "step": 6582 }, { "epoch": 0.9172995192642653, "grad_norm": 0.08108612149953842, "learning_rate": 1.95355140831468e-07, "loss": 0.0971, "step": 6583 }, { "epoch": 0.9174388629554797, "grad_norm": 0.17843906581401825, "learning_rate": 1.947033284901112e-07, "loss": 0.1213, "step": 6584 }, { "epoch": 0.9175782066466941, "grad_norm": 0.0912824496626854, "learning_rate": 1.9405258377596825e-07, "loss": 0.0922, "step": 6585 }, { "epoch": 0.9177175503379085, "grad_norm": 0.11298087239265442, "learning_rate": 1.9340290683361907e-07, "loss": 0.1224, "step": 6586 }, { "epoch": 0.9178568940291228, "grad_norm": 0.1479002684354782, "learning_rate": 1.9275429780740763e-07, "loss": 0.1334, "step": 6587 }, { "epoch": 0.9179962377203372, "grad_norm": 0.20780964195728302, "learning_rate": 1.921067568414403e-07, "loss": 0.1263, "step": 6588 }, { "epoch": 0.9181355814115516, "grad_norm": 0.15830184519290924, "learning_rate": 1.9146028407958483e-07, "loss": 0.1469, "step": 6589 }, { "epoch": 0.918274925102766, "grad_norm": 0.1601753681898117, "learning_rate": 1.9081487966547407e-07, "loss": 0.1325, "step": 6590 }, { "epoch": 0.9184142687939804, "grad_norm": 0.13922806084156036, "learning_rate": 1.9017054374250111e-07, "loss": 0.1298, "step": 6591 }, { "epoch": 0.9185536124851947, "grad_norm": 0.1494491547346115, "learning_rate": 1.8952727645382307e-07, "loss": 0.0974, "step": 6592 }, { "epoch": 0.9186929561764091, "grad_norm": 0.12946514785289764, "learning_rate": 1.88885077942359e-07, "loss": 0.1313, "step": 6593 }, { "epoch": 0.9188322998676235, "grad_norm": 0.10955154895782471, "learning_rate": 1.8824394835079086e-07, "loss": 0.1078, "step": 6594 }, { "epoch": 0.9189716435588379, "grad_norm": 0.24785830080509186, "learning_rate": 1.8760388782156468e-07, "loss": 0.1488, "step": 6595 }, { "epoch": 0.9191109872500522, "grad_norm": 0.1479240208864212, "learning_rate": 1.8696489649688454e-07, "loss": 0.1303, "step": 6596 }, { "epoch": 0.9192503309412666, "grad_norm": 0.18393687903881073, "learning_rate": 1.8632697451872074e-07, "loss": 0.1425, "step": 6597 }, { "epoch": 0.919389674632481, "grad_norm": 0.1540527045726776, "learning_rate": 1.8569012202880599e-07, "loss": 0.1134, "step": 6598 }, { "epoch": 0.9195290183236954, "grad_norm": 0.1921822726726532, "learning_rate": 1.850543391686327e-07, "loss": 0.1588, "step": 6599 }, { "epoch": 0.9196683620149098, "grad_norm": 0.11655952036380768, "learning_rate": 1.8441962607945786e-07, "loss": 0.1, "step": 6600 }, { "epoch": 0.9198077057061241, "grad_norm": 0.1273251622915268, "learning_rate": 1.83785982902302e-07, "loss": 0.1292, "step": 6601 }, { "epoch": 0.9199470493973385, "grad_norm": 0.10972049832344055, "learning_rate": 1.8315340977794415e-07, "loss": 0.1183, "step": 6602 }, { "epoch": 0.9200863930885529, "grad_norm": 0.11250259727239609, "learning_rate": 1.825219068469275e-07, "loss": 0.1194, "step": 6603 }, { "epoch": 0.9202257367797673, "grad_norm": 0.08449432998895645, "learning_rate": 1.818914742495581e-07, "loss": 0.1087, "step": 6604 }, { "epoch": 0.9203650804709816, "grad_norm": 0.14731870591640472, "learning_rate": 1.8126211212590505e-07, "loss": 0.1297, "step": 6605 }, { "epoch": 0.920504424162196, "grad_norm": 0.09609661996364594, "learning_rate": 1.8063382061579648e-07, "loss": 0.1023, "step": 6606 }, { "epoch": 0.9206437678534104, "grad_norm": 0.17180170118808746, "learning_rate": 1.8000659985882463e-07, "loss": 0.1378, "step": 6607 }, { "epoch": 0.9207831115446248, "grad_norm": 0.12367963790893555, "learning_rate": 1.7938044999434412e-07, "loss": 0.1099, "step": 6608 }, { "epoch": 0.9209224552358392, "grad_norm": 0.22453255951404572, "learning_rate": 1.7875537116147146e-07, "loss": 0.1365, "step": 6609 }, { "epoch": 0.9210617989270535, "grad_norm": 0.30795273184776306, "learning_rate": 1.781313634990839e-07, "loss": 0.1486, "step": 6610 }, { "epoch": 0.9212011426182679, "grad_norm": 0.12338456511497498, "learning_rate": 1.7750842714582272e-07, "loss": 0.1048, "step": 6611 }, { "epoch": 0.9213404863094823, "grad_norm": 0.17347149550914764, "learning_rate": 1.7688656224008893e-07, "loss": 0.1381, "step": 6612 }, { "epoch": 0.9214798300006967, "grad_norm": 0.16924422979354858, "learning_rate": 1.762657689200481e-07, "loss": 0.1358, "step": 6613 }, { "epoch": 0.9216191736919112, "grad_norm": 0.12184307724237442, "learning_rate": 1.7564604732362545e-07, "loss": 0.1194, "step": 6614 }, { "epoch": 0.9217585173831255, "grad_norm": 0.09610896557569504, "learning_rate": 1.7502739758850863e-07, "loss": 0.1003, "step": 6615 }, { "epoch": 0.9218978610743399, "grad_norm": 0.14764457941055298, "learning_rate": 1.7440981985214933e-07, "loss": 0.1344, "step": 6616 }, { "epoch": 0.9220372047655543, "grad_norm": 0.1414009928703308, "learning_rate": 1.7379331425175728e-07, "loss": 0.1301, "step": 6617 }, { "epoch": 0.9221765484567687, "grad_norm": 0.20747123658657074, "learning_rate": 1.7317788092430676e-07, "loss": 0.1472, "step": 6618 }, { "epoch": 0.922315892147983, "grad_norm": 0.1461777687072754, "learning_rate": 1.725635200065323e-07, "loss": 0.1595, "step": 6619 }, { "epoch": 0.9224552358391974, "grad_norm": 0.13290105760097504, "learning_rate": 1.7195023163493253e-07, "loss": 0.122, "step": 6620 }, { "epoch": 0.9225945795304118, "grad_norm": 0.21678200364112854, "learning_rate": 1.7133801594576393e-07, "loss": 0.148, "step": 6621 }, { "epoch": 0.9227339232216262, "grad_norm": 0.19268707931041718, "learning_rate": 1.7072687307504887e-07, "loss": 0.1444, "step": 6622 }, { "epoch": 0.9228732669128406, "grad_norm": 0.12854993343353271, "learning_rate": 1.701168031585676e-07, "loss": 0.1262, "step": 6623 }, { "epoch": 0.9230126106040549, "grad_norm": 0.145283043384552, "learning_rate": 1.695078063318656e-07, "loss": 0.1324, "step": 6624 }, { "epoch": 0.9231519542952693, "grad_norm": 0.1609765887260437, "learning_rate": 1.6889988273024627e-07, "loss": 0.1188, "step": 6625 }, { "epoch": 0.9232912979864837, "grad_norm": 0.13085466623306274, "learning_rate": 1.682930324887766e-07, "loss": 0.1176, "step": 6626 }, { "epoch": 0.9234306416776981, "grad_norm": 0.19287921488285065, "learning_rate": 1.6768725574228706e-07, "loss": 0.1174, "step": 6627 }, { "epoch": 0.9235699853689124, "grad_norm": 0.2034296840429306, "learning_rate": 1.6708255262536443e-07, "loss": 0.1124, "step": 6628 }, { "epoch": 0.9237093290601268, "grad_norm": 0.14930574595928192, "learning_rate": 1.6647892327236125e-07, "loss": 0.125, "step": 6629 }, { "epoch": 0.9238486727513412, "grad_norm": 0.10092657804489136, "learning_rate": 1.658763678173908e-07, "loss": 0.0862, "step": 6630 }, { "epoch": 0.9239880164425556, "grad_norm": 0.09415411204099655, "learning_rate": 1.6527488639432543e-07, "loss": 0.1076, "step": 6631 }, { "epoch": 0.92412736013377, "grad_norm": 0.13372358679771423, "learning_rate": 1.6467447913680268e-07, "loss": 0.128, "step": 6632 }, { "epoch": 0.9242667038249843, "grad_norm": 0.16645704209804535, "learning_rate": 1.6407514617821752e-07, "loss": 0.1282, "step": 6633 }, { "epoch": 0.9244060475161987, "grad_norm": 0.18150050938129425, "learning_rate": 1.6347688765172953e-07, "loss": 0.127, "step": 6634 }, { "epoch": 0.9245453912074131, "grad_norm": 0.15552417933940887, "learning_rate": 1.6287970369025686e-07, "loss": 0.1501, "step": 6635 }, { "epoch": 0.9246847348986275, "grad_norm": 0.1967392861843109, "learning_rate": 1.6228359442648112e-07, "loss": 0.1591, "step": 6636 }, { "epoch": 0.9248240785898418, "grad_norm": 0.11926093697547913, "learning_rate": 1.616885599928436e-07, "loss": 0.1219, "step": 6637 }, { "epoch": 0.9249634222810562, "grad_norm": 0.15131792426109314, "learning_rate": 1.6109460052154802e-07, "loss": 0.1362, "step": 6638 }, { "epoch": 0.9251027659722706, "grad_norm": 0.19244681298732758, "learning_rate": 1.6050171614455712e-07, "loss": 0.1183, "step": 6639 }, { "epoch": 0.925242109663485, "grad_norm": 0.15384416282176971, "learning_rate": 1.5990990699359777e-07, "loss": 0.1444, "step": 6640 }, { "epoch": 0.9253814533546993, "grad_norm": 0.16133426129817963, "learning_rate": 1.593191732001559e-07, "loss": 0.127, "step": 6641 }, { "epoch": 0.9255207970459137, "grad_norm": 0.14685896039009094, "learning_rate": 1.5872951489547926e-07, "loss": 0.1165, "step": 6642 }, { "epoch": 0.9256601407371281, "grad_norm": 0.11348902434110641, "learning_rate": 1.5814093221057647e-07, "loss": 0.1165, "step": 6643 }, { "epoch": 0.9257994844283425, "grad_norm": 0.10940906405448914, "learning_rate": 1.575534252762162e-07, "loss": 0.1221, "step": 6644 }, { "epoch": 0.9259388281195569, "grad_norm": 0.11892852932214737, "learning_rate": 1.5696699422293072e-07, "loss": 0.1182, "step": 6645 }, { "epoch": 0.9260781718107712, "grad_norm": 0.12488257139921188, "learning_rate": 1.5638163918101024e-07, "loss": 0.1246, "step": 6646 }, { "epoch": 0.9262175155019856, "grad_norm": 0.16020524501800537, "learning_rate": 1.5579736028050797e-07, "loss": 0.1341, "step": 6647 }, { "epoch": 0.9263568591932, "grad_norm": 0.13368277251720428, "learning_rate": 1.5521415765123783e-07, "loss": 0.1423, "step": 6648 }, { "epoch": 0.9264962028844144, "grad_norm": 0.17146684229373932, "learning_rate": 1.546320314227734e-07, "loss": 0.128, "step": 6649 }, { "epoch": 0.9266355465756287, "grad_norm": 0.10361114889383316, "learning_rate": 1.5405098172444954e-07, "loss": 0.109, "step": 6650 }, { "epoch": 0.9267748902668431, "grad_norm": 0.1444985270500183, "learning_rate": 1.5347100868536246e-07, "loss": 0.1499, "step": 6651 }, { "epoch": 0.9269142339580575, "grad_norm": 0.23166468739509583, "learning_rate": 1.5289211243436964e-07, "loss": 0.1374, "step": 6652 }, { "epoch": 0.9270535776492719, "grad_norm": 0.15570871531963348, "learning_rate": 1.5231429310008817e-07, "loss": 0.1215, "step": 6653 }, { "epoch": 0.9271929213404864, "grad_norm": 0.1228199154138565, "learning_rate": 1.5173755081089536e-07, "loss": 0.1198, "step": 6654 }, { "epoch": 0.9273322650317007, "grad_norm": 0.10016370564699173, "learning_rate": 1.511618856949315e-07, "loss": 0.1086, "step": 6655 }, { "epoch": 0.9274716087229151, "grad_norm": 0.20269665122032166, "learning_rate": 1.5058729788009597e-07, "loss": 0.1439, "step": 6656 }, { "epoch": 0.9276109524141295, "grad_norm": 0.16291704773902893, "learning_rate": 1.5001378749404883e-07, "loss": 0.121, "step": 6657 }, { "epoch": 0.9277502961053439, "grad_norm": 0.20626986026763916, "learning_rate": 1.4944135466421095e-07, "loss": 0.1501, "step": 6658 }, { "epoch": 0.9278896397965583, "grad_norm": 0.10968320071697235, "learning_rate": 1.4886999951776448e-07, "loss": 0.1118, "step": 6659 }, { "epoch": 0.9280289834877726, "grad_norm": 0.1867453008890152, "learning_rate": 1.4829972218165013e-07, "loss": 0.1268, "step": 6660 }, { "epoch": 0.928168327178987, "grad_norm": 0.1692134290933609, "learning_rate": 1.477305227825715e-07, "loss": 0.1365, "step": 6661 }, { "epoch": 0.9283076708702014, "grad_norm": 0.1991988718509674, "learning_rate": 1.471624014469919e-07, "loss": 0.1479, "step": 6662 }, { "epoch": 0.9284470145614158, "grad_norm": 0.13493205606937408, "learning_rate": 1.4659535830113368e-07, "loss": 0.1292, "step": 6663 }, { "epoch": 0.9285863582526301, "grad_norm": 0.149070143699646, "learning_rate": 1.4602939347098278e-07, "loss": 0.1251, "step": 6664 }, { "epoch": 0.9287257019438445, "grad_norm": 0.13866013288497925, "learning_rate": 1.454645070822819e-07, "loss": 0.1323, "step": 6665 }, { "epoch": 0.9288650456350589, "grad_norm": 0.1594289392232895, "learning_rate": 1.449006992605373e-07, "loss": 0.1269, "step": 6666 }, { "epoch": 0.9290043893262733, "grad_norm": 0.12533074617385864, "learning_rate": 1.443379701310127e-07, "loss": 0.1241, "step": 6667 }, { "epoch": 0.9291437330174876, "grad_norm": 0.13578151166439056, "learning_rate": 1.4377631981873474e-07, "loss": 0.1004, "step": 6668 }, { "epoch": 0.929283076708702, "grad_norm": 0.13123184442520142, "learning_rate": 1.432157484484892e-07, "loss": 0.14, "step": 6669 }, { "epoch": 0.9294224203999164, "grad_norm": 0.16752056777477264, "learning_rate": 1.4265625614482247e-07, "loss": 0.1213, "step": 6670 }, { "epoch": 0.9295617640911308, "grad_norm": 0.12522222101688385, "learning_rate": 1.4209784303203965e-07, "loss": 0.1233, "step": 6671 }, { "epoch": 0.9297011077823452, "grad_norm": 0.22346830368041992, "learning_rate": 1.415405092342087e-07, "loss": 0.1311, "step": 6672 }, { "epoch": 0.9298404514735595, "grad_norm": 0.20007413625717163, "learning_rate": 1.4098425487515665e-07, "loss": 0.1346, "step": 6673 }, { "epoch": 0.9299797951647739, "grad_norm": 0.2195662260055542, "learning_rate": 1.4042908007846912e-07, "loss": 0.1616, "step": 6674 }, { "epoch": 0.9301191388559883, "grad_norm": 0.15796616673469543, "learning_rate": 1.3987498496749463e-07, "loss": 0.1194, "step": 6675 }, { "epoch": 0.9302584825472027, "grad_norm": 0.1165955662727356, "learning_rate": 1.3932196966533972e-07, "loss": 0.1015, "step": 6676 }, { "epoch": 0.930397826238417, "grad_norm": 0.2082407921552658, "learning_rate": 1.3877003429487224e-07, "loss": 0.1505, "step": 6677 }, { "epoch": 0.9305371699296314, "grad_norm": 0.1202576756477356, "learning_rate": 1.3821917897871905e-07, "loss": 0.1217, "step": 6678 }, { "epoch": 0.9306765136208458, "grad_norm": 0.13455936312675476, "learning_rate": 1.3766940383926785e-07, "loss": 0.1315, "step": 6679 }, { "epoch": 0.9308158573120602, "grad_norm": 0.12015584111213684, "learning_rate": 1.3712070899866704e-07, "loss": 0.1259, "step": 6680 }, { "epoch": 0.9309552010032746, "grad_norm": 0.09383749216794968, "learning_rate": 1.3657309457882294e-07, "loss": 0.0928, "step": 6681 }, { "epoch": 0.9310945446944889, "grad_norm": 0.12652915716171265, "learning_rate": 1.3602656070140275e-07, "loss": 0.1037, "step": 6682 }, { "epoch": 0.9312338883857033, "grad_norm": 0.08778814226388931, "learning_rate": 1.3548110748783426e-07, "loss": 0.1039, "step": 6683 }, { "epoch": 0.9313732320769177, "grad_norm": 0.10111068934202194, "learning_rate": 1.349367350593056e-07, "loss": 0.1074, "step": 6684 }, { "epoch": 0.9315125757681321, "grad_norm": 0.24466969072818756, "learning_rate": 1.3439344353676276e-07, "loss": 0.1387, "step": 6685 }, { "epoch": 0.9316519194593464, "grad_norm": 0.0919637605547905, "learning_rate": 1.3385123304091306e-07, "loss": 0.1027, "step": 6686 }, { "epoch": 0.9317912631505608, "grad_norm": 0.14712584018707275, "learning_rate": 1.3331010369222298e-07, "loss": 0.1134, "step": 6687 }, { "epoch": 0.9319306068417752, "grad_norm": 0.15516839921474457, "learning_rate": 1.3277005561092016e-07, "loss": 0.1256, "step": 6688 }, { "epoch": 0.9320699505329896, "grad_norm": 0.19200754165649414, "learning_rate": 1.3223108891698976e-07, "loss": 0.139, "step": 6689 }, { "epoch": 0.932209294224204, "grad_norm": 0.18588247895240784, "learning_rate": 1.316932037301788e-07, "loss": 0.1592, "step": 6690 }, { "epoch": 0.9323486379154183, "grad_norm": 0.20796708762645721, "learning_rate": 1.3115640016999222e-07, "loss": 0.1429, "step": 6691 }, { "epoch": 0.9324879816066327, "grad_norm": 0.1421327441930771, "learning_rate": 1.3062067835569625e-07, "loss": 0.1132, "step": 6692 }, { "epoch": 0.9326273252978471, "grad_norm": 0.13326767086982727, "learning_rate": 1.3008603840631516e-07, "loss": 0.1305, "step": 6693 }, { "epoch": 0.9327666689890615, "grad_norm": 0.17959965765476227, "learning_rate": 1.2955248044063452e-07, "loss": 0.145, "step": 6694 }, { "epoch": 0.932906012680276, "grad_norm": 0.1366632729768753, "learning_rate": 1.2902000457719886e-07, "loss": 0.1024, "step": 6695 }, { "epoch": 0.9330453563714903, "grad_norm": 0.09861917048692703, "learning_rate": 1.2848861093431143e-07, "loss": 0.1059, "step": 6696 }, { "epoch": 0.9331847000627047, "grad_norm": 0.14013627171516418, "learning_rate": 1.2795829963003604e-07, "loss": 0.1204, "step": 6697 }, { "epoch": 0.9333240437539191, "grad_norm": 0.14164237678050995, "learning_rate": 1.274290707821968e-07, "loss": 0.1192, "step": 6698 }, { "epoch": 0.9334633874451335, "grad_norm": 0.22731082141399384, "learning_rate": 1.269009245083741e-07, "loss": 0.1268, "step": 6699 }, { "epoch": 0.9336027311363478, "grad_norm": 0.1324632167816162, "learning_rate": 1.2637386092591187e-07, "loss": 0.1162, "step": 6700 }, { "epoch": 0.9337420748275622, "grad_norm": 0.18160085380077362, "learning_rate": 1.258478801519114e-07, "loss": 0.1217, "step": 6701 }, { "epoch": 0.9338814185187766, "grad_norm": 0.14505448937416077, "learning_rate": 1.2532298230323258e-07, "loss": 0.1367, "step": 6702 }, { "epoch": 0.934020762209991, "grad_norm": 0.14646029472351074, "learning_rate": 1.2479916749649657e-07, "loss": 0.1256, "step": 6703 }, { "epoch": 0.9341601059012054, "grad_norm": 0.1410728394985199, "learning_rate": 1.2427643584808246e-07, "loss": 0.1194, "step": 6704 }, { "epoch": 0.9342994495924197, "grad_norm": 0.11676663160324097, "learning_rate": 1.2375478747413017e-07, "loss": 0.1227, "step": 6705 }, { "epoch": 0.9344387932836341, "grad_norm": 0.11918248981237411, "learning_rate": 1.2323422249053696e-07, "loss": 0.1145, "step": 6706 }, { "epoch": 0.9345781369748485, "grad_norm": 0.13049738109111786, "learning_rate": 1.2271474101296144e-07, "loss": 0.1311, "step": 6707 }, { "epoch": 0.9347174806660629, "grad_norm": 0.17110684514045715, "learning_rate": 1.2219634315681962e-07, "loss": 0.1781, "step": 6708 }, { "epoch": 0.9348568243572772, "grad_norm": 0.13086196780204773, "learning_rate": 1.2167902903728879e-07, "loss": 0.1248, "step": 6709 }, { "epoch": 0.9349961680484916, "grad_norm": 0.1088448166847229, "learning_rate": 1.211627987693037e-07, "loss": 0.1027, "step": 6710 }, { "epoch": 0.935135511739706, "grad_norm": 0.15290898084640503, "learning_rate": 1.206476524675587e-07, "loss": 0.1389, "step": 6711 }, { "epoch": 0.9352748554309204, "grad_norm": 0.1692674458026886, "learning_rate": 1.2013359024650785e-07, "loss": 0.1445, "step": 6712 }, { "epoch": 0.9354141991221347, "grad_norm": 0.11885641515254974, "learning_rate": 1.196206122203647e-07, "loss": 0.1143, "step": 6713 }, { "epoch": 0.9355535428133491, "grad_norm": 0.09727507829666138, "learning_rate": 1.1910871850309979e-07, "loss": 0.1194, "step": 6714 }, { "epoch": 0.9356928865045635, "grad_norm": 0.23487971723079681, "learning_rate": 1.1859790920844494e-07, "loss": 0.1512, "step": 6715 }, { "epoch": 0.9358322301957779, "grad_norm": 0.14726248383522034, "learning_rate": 1.1808818444989046e-07, "loss": 0.1569, "step": 6716 }, { "epoch": 0.9359715738869923, "grad_norm": 0.24043478071689606, "learning_rate": 1.1757954434068574e-07, "loss": 0.1344, "step": 6717 }, { "epoch": 0.9361109175782066, "grad_norm": 0.11703472584486008, "learning_rate": 1.1707198899383875e-07, "loss": 0.1072, "step": 6718 }, { "epoch": 0.936250261269421, "grad_norm": 0.08910316228866577, "learning_rate": 1.1656551852211595e-07, "loss": 0.1111, "step": 6719 }, { "epoch": 0.9363896049606354, "grad_norm": 0.13654769957065582, "learning_rate": 1.1606013303804508e-07, "loss": 0.1452, "step": 6720 }, { "epoch": 0.9365289486518498, "grad_norm": 0.16140833497047424, "learning_rate": 1.1555583265390968e-07, "loss": 0.135, "step": 6721 }, { "epoch": 0.9366682923430641, "grad_norm": 0.17354761064052582, "learning_rate": 1.1505261748175512e-07, "loss": 0.1236, "step": 6722 }, { "epoch": 0.9368076360342785, "grad_norm": 0.1322079598903656, "learning_rate": 1.1455048763338361e-07, "loss": 0.1155, "step": 6723 }, { "epoch": 0.9369469797254929, "grad_norm": 0.1064356192946434, "learning_rate": 1.1404944322035705e-07, "loss": 0.1003, "step": 6724 }, { "epoch": 0.9370863234167073, "grad_norm": 0.19437214732170105, "learning_rate": 1.1354948435399582e-07, "loss": 0.156, "step": 6725 }, { "epoch": 0.9372256671079217, "grad_norm": 0.10611540079116821, "learning_rate": 1.130506111453794e-07, "loss": 0.0982, "step": 6726 }, { "epoch": 0.937365010799136, "grad_norm": 0.08790852129459381, "learning_rate": 1.1255282370534748e-07, "loss": 0.0958, "step": 6727 }, { "epoch": 0.9375043544903504, "grad_norm": 0.11989668756723404, "learning_rate": 1.1205612214449434e-07, "loss": 0.1192, "step": 6728 }, { "epoch": 0.9376436981815648, "grad_norm": 0.1513424813747406, "learning_rate": 1.1156050657317785e-07, "loss": 0.1391, "step": 6729 }, { "epoch": 0.9377830418727792, "grad_norm": 0.0907830074429512, "learning_rate": 1.1106597710151157e-07, "loss": 0.1022, "step": 6730 }, { "epoch": 0.9379223855639935, "grad_norm": 0.13596366345882416, "learning_rate": 1.1057253383936928e-07, "loss": 0.1216, "step": 6731 }, { "epoch": 0.9380617292552079, "grad_norm": 0.1825919896364212, "learning_rate": 1.1008017689638162e-07, "loss": 0.1461, "step": 6732 }, { "epoch": 0.9382010729464223, "grad_norm": 0.16866356134414673, "learning_rate": 1.0958890638194108e-07, "loss": 0.1429, "step": 6733 }, { "epoch": 0.9383404166376367, "grad_norm": 0.22891102731227875, "learning_rate": 1.0909872240519481e-07, "loss": 0.1578, "step": 6734 }, { "epoch": 0.9384797603288512, "grad_norm": 0.12458762526512146, "learning_rate": 1.0860962507505124e-07, "loss": 0.1037, "step": 6735 }, { "epoch": 0.9386191040200655, "grad_norm": 0.17176969349384308, "learning_rate": 1.0812161450017678e-07, "loss": 0.1255, "step": 6736 }, { "epoch": 0.9387584477112799, "grad_norm": 0.16468779742717743, "learning_rate": 1.0763469078899635e-07, "loss": 0.1254, "step": 6737 }, { "epoch": 0.9388977914024943, "grad_norm": 0.17518873512744904, "learning_rate": 1.0714885404969288e-07, "loss": 0.1272, "step": 6738 }, { "epoch": 0.9390371350937087, "grad_norm": 0.15974844992160797, "learning_rate": 1.0666410439020836e-07, "loss": 0.131, "step": 6739 }, { "epoch": 0.939176478784923, "grad_norm": 0.15379218757152557, "learning_rate": 1.0618044191824273e-07, "loss": 0.1605, "step": 6740 }, { "epoch": 0.9393158224761374, "grad_norm": 0.11123926192522049, "learning_rate": 1.056978667412556e-07, "loss": 0.0945, "step": 6741 }, { "epoch": 0.9394551661673518, "grad_norm": 0.4212062358856201, "learning_rate": 1.0521637896646286e-07, "loss": 0.1743, "step": 6742 }, { "epoch": 0.9395945098585662, "grad_norm": 0.15405257046222687, "learning_rate": 1.0473597870084174e-07, "loss": 0.146, "step": 6743 }, { "epoch": 0.9397338535497806, "grad_norm": 0.07112022489309311, "learning_rate": 1.0425666605112516e-07, "loss": 0.0895, "step": 6744 }, { "epoch": 0.9398731972409949, "grad_norm": 0.14654582738876343, "learning_rate": 1.0377844112380575e-07, "loss": 0.1259, "step": 6745 }, { "epoch": 0.9400125409322093, "grad_norm": 0.12451370060443878, "learning_rate": 1.0330130402513406e-07, "loss": 0.123, "step": 6746 }, { "epoch": 0.9401518846234237, "grad_norm": 0.20298267900943756, "learning_rate": 1.028252548611186e-07, "loss": 0.1683, "step": 6747 }, { "epoch": 0.9402912283146381, "grad_norm": 0.12229391932487488, "learning_rate": 1.0235029373752758e-07, "loss": 0.1121, "step": 6748 }, { "epoch": 0.9404305720058524, "grad_norm": 0.09782292693853378, "learning_rate": 1.0187642075988602e-07, "loss": 0.0962, "step": 6749 }, { "epoch": 0.9405699156970668, "grad_norm": 0.1634252816438675, "learning_rate": 1.0140363603347747e-07, "loss": 0.1168, "step": 6750 }, { "epoch": 0.9407092593882812, "grad_norm": 0.15788999199867249, "learning_rate": 1.0093193966334403e-07, "loss": 0.1165, "step": 6751 }, { "epoch": 0.9408486030794956, "grad_norm": 0.09714053571224213, "learning_rate": 1.0046133175428685e-07, "loss": 0.1089, "step": 6752 }, { "epoch": 0.94098794677071, "grad_norm": 0.10381853580474854, "learning_rate": 9.999181241086231e-08, "loss": 0.0957, "step": 6753 }, { "epoch": 0.9411272904619243, "grad_norm": 0.22256873548030853, "learning_rate": 9.952338173738862e-08, "loss": 0.1755, "step": 6754 }, { "epoch": 0.9412666341531387, "grad_norm": 0.17008791863918304, "learning_rate": 9.905603983793921e-08, "loss": 0.1521, "step": 6755 }, { "epoch": 0.9414059778443531, "grad_norm": 0.09267830103635788, "learning_rate": 9.858978681634823e-08, "loss": 0.1004, "step": 6756 }, { "epoch": 0.9415453215355675, "grad_norm": 0.12182842940092087, "learning_rate": 9.81246227762045e-08, "loss": 0.12, "step": 6757 }, { "epoch": 0.9416846652267818, "grad_norm": 0.10233481228351593, "learning_rate": 9.76605478208581e-08, "loss": 0.122, "step": 6758 }, { "epoch": 0.9418240089179962, "grad_norm": 0.1402970403432846, "learning_rate": 9.719756205341658e-08, "loss": 0.1374, "step": 6759 }, { "epoch": 0.9419633526092106, "grad_norm": 0.2155761569738388, "learning_rate": 9.673566557674263e-08, "loss": 0.1287, "step": 6760 }, { "epoch": 0.942102696300425, "grad_norm": 0.1357443630695343, "learning_rate": 9.627485849346085e-08, "loss": 0.1102, "step": 6761 }, { "epoch": 0.9422420399916394, "grad_norm": 0.13691550493240356, "learning_rate": 9.581514090595212e-08, "loss": 0.1254, "step": 6762 }, { "epoch": 0.9423813836828537, "grad_norm": 0.13409912586212158, "learning_rate": 9.535651291635362e-08, "loss": 0.1187, "step": 6763 }, { "epoch": 0.9425207273740681, "grad_norm": 0.11635604500770569, "learning_rate": 9.489897462656383e-08, "loss": 0.1237, "step": 6764 }, { "epoch": 0.9426600710652825, "grad_norm": 0.1706094592809677, "learning_rate": 9.44425261382359e-08, "loss": 0.1118, "step": 6765 }, { "epoch": 0.9427994147564969, "grad_norm": 0.09851279109716415, "learning_rate": 9.39871675527837e-08, "loss": 0.1151, "step": 6766 }, { "epoch": 0.9429387584477112, "grad_norm": 0.126935675740242, "learning_rate": 9.353289897137574e-08, "loss": 0.1119, "step": 6767 }, { "epoch": 0.9430781021389256, "grad_norm": 0.1244821697473526, "learning_rate": 9.30797204949413e-08, "loss": 0.1254, "step": 6768 }, { "epoch": 0.94321744583014, "grad_norm": 0.1358587145805359, "learning_rate": 9.262763222416649e-08, "loss": 0.1336, "step": 6769 }, { "epoch": 0.9433567895213544, "grad_norm": 0.08807959407567978, "learning_rate": 9.217663425949486e-08, "loss": 0.0973, "step": 6770 }, { "epoch": 0.9434961332125688, "grad_norm": 0.1175769567489624, "learning_rate": 9.172672670112681e-08, "loss": 0.1161, "step": 6771 }, { "epoch": 0.9436354769037831, "grad_norm": 0.13317719101905823, "learning_rate": 9.127790964902239e-08, "loss": 0.1106, "step": 6772 }, { "epoch": 0.9437748205949975, "grad_norm": 0.11936827749013901, "learning_rate": 9.083018320289849e-08, "loss": 0.0944, "step": 6773 }, { "epoch": 0.9439141642862119, "grad_norm": 0.15499016642570496, "learning_rate": 9.038354746222999e-08, "loss": 0.1277, "step": 6774 }, { "epoch": 0.9440535079774264, "grad_norm": 0.08780453354120255, "learning_rate": 8.993800252624863e-08, "loss": 0.1072, "step": 6775 }, { "epoch": 0.9441928516686408, "grad_norm": 0.15881088376045227, "learning_rate": 8.94935484939441e-08, "loss": 0.1303, "step": 6776 }, { "epoch": 0.9443321953598551, "grad_norm": 0.15067540109157562, "learning_rate": 8.905018546406519e-08, "loss": 0.133, "step": 6777 }, { "epoch": 0.9444715390510695, "grad_norm": 0.17063868045806885, "learning_rate": 8.860791353511532e-08, "loss": 0.1254, "step": 6778 }, { "epoch": 0.9446108827422839, "grad_norm": 0.09388939291238785, "learning_rate": 8.816673280535815e-08, "loss": 0.121, "step": 6779 }, { "epoch": 0.9447502264334983, "grad_norm": 0.1308419108390808, "learning_rate": 8.772664337281412e-08, "loss": 0.1233, "step": 6780 }, { "epoch": 0.9448895701247126, "grad_norm": 0.17689888179302216, "learning_rate": 8.728764533526112e-08, "loss": 0.152, "step": 6781 }, { "epoch": 0.945028913815927, "grad_norm": 0.1915632039308548, "learning_rate": 8.684973879023395e-08, "loss": 0.1501, "step": 6782 }, { "epoch": 0.9451682575071414, "grad_norm": 0.11210218816995621, "learning_rate": 8.641292383502531e-08, "loss": 0.1085, "step": 6783 }, { "epoch": 0.9453076011983558, "grad_norm": 0.18524280190467834, "learning_rate": 8.597720056668646e-08, "loss": 0.1129, "step": 6784 }, { "epoch": 0.9454469448895702, "grad_norm": 0.1606867015361786, "learning_rate": 8.55425690820244e-08, "loss": 0.1347, "step": 6785 }, { "epoch": 0.9455862885807845, "grad_norm": 0.14855916798114777, "learning_rate": 8.510902947760469e-08, "loss": 0.1307, "step": 6786 }, { "epoch": 0.9457256322719989, "grad_norm": 0.11530008912086487, "learning_rate": 8.467658184974914e-08, "loss": 0.11, "step": 6787 }, { "epoch": 0.9458649759632133, "grad_norm": 0.14945769309997559, "learning_rate": 8.424522629453924e-08, "loss": 0.1118, "step": 6788 }, { "epoch": 0.9460043196544277, "grad_norm": 0.2099258154630661, "learning_rate": 8.381496290781055e-08, "loss": 0.1863, "step": 6789 }, { "epoch": 0.946143663345642, "grad_norm": 0.12857122719287872, "learning_rate": 8.338579178515882e-08, "loss": 0.1316, "step": 6790 }, { "epoch": 0.9462830070368564, "grad_norm": 0.09011514484882355, "learning_rate": 8.295771302193723e-08, "loss": 0.1046, "step": 6791 }, { "epoch": 0.9464223507280708, "grad_norm": 0.13098029792308807, "learning_rate": 8.253072671325246e-08, "loss": 0.1228, "step": 6792 }, { "epoch": 0.9465616944192852, "grad_norm": 0.1164243221282959, "learning_rate": 8.210483295397309e-08, "loss": 0.1056, "step": 6793 }, { "epoch": 0.9467010381104995, "grad_norm": 0.18349571526050568, "learning_rate": 8.168003183872175e-08, "loss": 0.1323, "step": 6794 }, { "epoch": 0.9468403818017139, "grad_norm": 0.10534098744392395, "learning_rate": 8.125632346188073e-08, "loss": 0.0939, "step": 6795 }, { "epoch": 0.9469797254929283, "grad_norm": 0.15247425436973572, "learning_rate": 8.083370791758804e-08, "loss": 0.1202, "step": 6796 }, { "epoch": 0.9471190691841427, "grad_norm": 0.16359460353851318, "learning_rate": 8.04121852997386e-08, "loss": 0.1184, "step": 6797 }, { "epoch": 0.9472584128753571, "grad_norm": 0.1323169469833374, "learning_rate": 7.999175570198526e-08, "loss": 0.112, "step": 6798 }, { "epoch": 0.9473977565665714, "grad_norm": 0.23769442737102509, "learning_rate": 7.957241921773828e-08, "loss": 0.1633, "step": 6799 }, { "epoch": 0.9475371002577858, "grad_norm": 0.18890367448329926, "learning_rate": 7.915417594016428e-08, "loss": 0.1333, "step": 6800 }, { "epoch": 0.9476764439490002, "grad_norm": 0.22114990651607513, "learning_rate": 7.873702596218836e-08, "loss": 0.2027, "step": 6801 }, { "epoch": 0.9478157876402146, "grad_norm": 0.10852761566638947, "learning_rate": 7.83209693764908e-08, "loss": 0.1098, "step": 6802 }, { "epoch": 0.947955131331429, "grad_norm": 0.16941232979297638, "learning_rate": 7.790600627550937e-08, "loss": 0.1602, "step": 6803 }, { "epoch": 0.9480944750226433, "grad_norm": 0.131890207529068, "learning_rate": 7.749213675143974e-08, "loss": 0.1213, "step": 6804 }, { "epoch": 0.9482338187138577, "grad_norm": 0.14049167931079865, "learning_rate": 7.707936089623558e-08, "loss": 0.1201, "step": 6805 }, { "epoch": 0.9483731624050721, "grad_norm": 0.1832914799451828, "learning_rate": 7.666767880160464e-08, "loss": 0.1424, "step": 6806 }, { "epoch": 0.9485125060962865, "grad_norm": 0.15759062767028809, "learning_rate": 7.625709055901375e-08, "loss": 0.1196, "step": 6807 }, { "epoch": 0.9486518497875008, "grad_norm": 0.11708634346723557, "learning_rate": 7.584759625968663e-08, "loss": 0.1009, "step": 6808 }, { "epoch": 0.9487911934787152, "grad_norm": 0.10675448924303055, "learning_rate": 7.543919599460325e-08, "loss": 0.1133, "step": 6809 }, { "epoch": 0.9489305371699296, "grad_norm": 0.12990152835845947, "learning_rate": 7.503188985450105e-08, "loss": 0.1161, "step": 6810 }, { "epoch": 0.949069880861144, "grad_norm": 0.09346571564674377, "learning_rate": 7.462567792987374e-08, "loss": 0.1027, "step": 6811 }, { "epoch": 0.9492092245523583, "grad_norm": 0.163024440407753, "learning_rate": 7.422056031097302e-08, "loss": 0.1268, "step": 6812 }, { "epoch": 0.9493485682435727, "grad_norm": 0.14366398751735687, "learning_rate": 7.381653708780578e-08, "loss": 0.1229, "step": 6813 }, { "epoch": 0.9494879119347871, "grad_norm": 0.07794877886772156, "learning_rate": 7.341360835013745e-08, "loss": 0.0929, "step": 6814 }, { "epoch": 0.9496272556260016, "grad_norm": 0.14343522489070892, "learning_rate": 7.301177418748973e-08, "loss": 0.1374, "step": 6815 }, { "epoch": 0.949766599317216, "grad_norm": 0.1200299933552742, "learning_rate": 7.261103468914066e-08, "loss": 0.1247, "step": 6816 }, { "epoch": 0.9499059430084303, "grad_norm": 0.16971294581890106, "learning_rate": 7.221138994412569e-08, "loss": 0.1481, "step": 6817 }, { "epoch": 0.9500452866996447, "grad_norm": 0.13766005635261536, "learning_rate": 7.181284004123601e-08, "loss": 0.1228, "step": 6818 }, { "epoch": 0.9501846303908591, "grad_norm": 0.11581546068191528, "learning_rate": 7.14153850690208e-08, "loss": 0.1152, "step": 6819 }, { "epoch": 0.9503239740820735, "grad_norm": 0.1270170509815216, "learning_rate": 7.101902511578606e-08, "loss": 0.1039, "step": 6820 }, { "epoch": 0.9504633177732879, "grad_norm": 0.10819533467292786, "learning_rate": 7.062376026959305e-08, "loss": 0.111, "step": 6821 }, { "epoch": 0.9506026614645022, "grad_norm": 0.17112861573696136, "learning_rate": 7.022959061826151e-08, "loss": 0.1307, "step": 6822 }, { "epoch": 0.9507420051557166, "grad_norm": 0.12434942275285721, "learning_rate": 6.983651624936527e-08, "loss": 0.1173, "step": 6823 }, { "epoch": 0.950881348846931, "grad_norm": 0.13780547678470612, "learning_rate": 6.944453725023836e-08, "loss": 0.1306, "step": 6824 }, { "epoch": 0.9510206925381454, "grad_norm": 0.12804801762104034, "learning_rate": 6.905365370796891e-08, "loss": 0.1164, "step": 6825 }, { "epoch": 0.9511600362293597, "grad_norm": 0.16828250885009766, "learning_rate": 6.866386570940132e-08, "loss": 0.161, "step": 6826 }, { "epoch": 0.9512993799205741, "grad_norm": 0.12239574640989304, "learning_rate": 6.827517334113965e-08, "loss": 0.118, "step": 6827 }, { "epoch": 0.9514387236117885, "grad_norm": 0.13181371986865997, "learning_rate": 6.788757668954038e-08, "loss": 0.1054, "step": 6828 }, { "epoch": 0.9515780673030029, "grad_norm": 0.11402889341115952, "learning_rate": 6.750107584071964e-08, "loss": 0.1112, "step": 6829 }, { "epoch": 0.9517174109942172, "grad_norm": 0.13935574889183044, "learning_rate": 6.711567088054927e-08, "loss": 0.1286, "step": 6830 }, { "epoch": 0.9518567546854316, "grad_norm": 0.2029108852148056, "learning_rate": 6.67313618946569e-08, "loss": 0.1538, "step": 6831 }, { "epoch": 0.951996098376646, "grad_norm": 0.12129921466112137, "learning_rate": 6.634814896842757e-08, "loss": 0.1031, "step": 6832 }, { "epoch": 0.9521354420678604, "grad_norm": 0.17487135529518127, "learning_rate": 6.59660321870026e-08, "loss": 0.1479, "step": 6833 }, { "epoch": 0.9522747857590748, "grad_norm": 0.15798601508140564, "learning_rate": 6.558501163527964e-08, "loss": 0.1291, "step": 6834 }, { "epoch": 0.9524141294502891, "grad_norm": 0.18738187849521637, "learning_rate": 6.520508739791153e-08, "loss": 0.1504, "step": 6835 }, { "epoch": 0.9525534731415035, "grad_norm": 0.21563296020030975, "learning_rate": 6.482625955931022e-08, "loss": 0.1577, "step": 6836 }, { "epoch": 0.9526928168327179, "grad_norm": 0.20401333272457123, "learning_rate": 6.444852820364222e-08, "loss": 0.1681, "step": 6837 }, { "epoch": 0.9528321605239323, "grad_norm": 0.14719733595848083, "learning_rate": 6.407189341483044e-08, "loss": 0.1398, "step": 6838 }, { "epoch": 0.9529715042151466, "grad_norm": 0.14022402465343475, "learning_rate": 6.369635527655515e-08, "loss": 0.1166, "step": 6839 }, { "epoch": 0.953110847906361, "grad_norm": 0.10332309454679489, "learning_rate": 6.332191387225128e-08, "loss": 0.1002, "step": 6840 }, { "epoch": 0.9532501915975754, "grad_norm": 0.11811865866184235, "learning_rate": 6.294856928511284e-08, "loss": 0.1116, "step": 6841 }, { "epoch": 0.9533895352887898, "grad_norm": 0.14473891258239746, "learning_rate": 6.257632159808679e-08, "loss": 0.1167, "step": 6842 }, { "epoch": 0.9535288789800042, "grad_norm": 0.11304578185081482, "learning_rate": 6.220517089387867e-08, "loss": 0.1199, "step": 6843 }, { "epoch": 0.9536682226712185, "grad_norm": 0.13530279695987701, "learning_rate": 6.183511725495028e-08, "loss": 0.1513, "step": 6844 }, { "epoch": 0.9538075663624329, "grad_norm": 0.15039947628974915, "learning_rate": 6.146616076351864e-08, "loss": 0.1714, "step": 6845 }, { "epoch": 0.9539469100536473, "grad_norm": 0.14864644408226013, "learning_rate": 6.109830150155705e-08, "loss": 0.1401, "step": 6846 }, { "epoch": 0.9540862537448617, "grad_norm": 0.178122878074646, "learning_rate": 6.07315395507957e-08, "loss": 0.1399, "step": 6847 }, { "epoch": 0.954225597436076, "grad_norm": 0.15710170567035675, "learning_rate": 6.036587499272161e-08, "loss": 0.1227, "step": 6848 }, { "epoch": 0.9543649411272904, "grad_norm": 0.10768492519855499, "learning_rate": 6.000130790857595e-08, "loss": 0.1074, "step": 6849 }, { "epoch": 0.9545042848185048, "grad_norm": 0.17548450827598572, "learning_rate": 5.963783837935722e-08, "loss": 0.1251, "step": 6850 }, { "epoch": 0.9546436285097192, "grad_norm": 0.14196060597896576, "learning_rate": 5.927546648582083e-08, "loss": 0.1176, "step": 6851 }, { "epoch": 0.9547829722009336, "grad_norm": 0.15073686838150024, "learning_rate": 5.8914192308476835e-08, "loss": 0.1166, "step": 6852 }, { "epoch": 0.9549223158921479, "grad_norm": 0.12789157032966614, "learning_rate": 5.855401592759269e-08, "loss": 0.1153, "step": 6853 }, { "epoch": 0.9550616595833623, "grad_norm": 0.16777421534061432, "learning_rate": 5.8194937423191043e-08, "loss": 0.148, "step": 6854 }, { "epoch": 0.9552010032745768, "grad_norm": 0.18403173983097076, "learning_rate": 5.783695687505087e-08, "loss": 0.1257, "step": 6855 }, { "epoch": 0.9553403469657912, "grad_norm": 0.10062601417303085, "learning_rate": 5.7480074362707415e-08, "loss": 0.1079, "step": 6856 }, { "epoch": 0.9554796906570056, "grad_norm": 0.11816637963056564, "learning_rate": 5.712428996545172e-08, "loss": 0.1082, "step": 6857 }, { "epoch": 0.9556190343482199, "grad_norm": 0.1961033046245575, "learning_rate": 5.6769603762331096e-08, "loss": 0.1642, "step": 6858 }, { "epoch": 0.9557583780394343, "grad_norm": 0.2204238623380661, "learning_rate": 5.641601583214862e-08, "loss": 0.1649, "step": 6859 }, { "epoch": 0.9558977217306487, "grad_norm": 0.12086377292871475, "learning_rate": 5.606352625346368e-08, "loss": 0.112, "step": 6860 }, { "epoch": 0.9560370654218631, "grad_norm": 0.2987113296985626, "learning_rate": 5.571213510459084e-08, "loss": 0.1577, "step": 6861 }, { "epoch": 0.9561764091130774, "grad_norm": 0.182044118642807, "learning_rate": 5.53618424636021e-08, "loss": 0.1306, "step": 6862 }, { "epoch": 0.9563157528042918, "grad_norm": 0.16495247185230255, "learning_rate": 5.501264840832299e-08, "loss": 0.126, "step": 6863 }, { "epoch": 0.9564550964955062, "grad_norm": 0.2308102697134018, "learning_rate": 5.466455301633811e-08, "loss": 0.15, "step": 6864 }, { "epoch": 0.9565944401867206, "grad_norm": 0.2627522945404053, "learning_rate": 5.431755636498559e-08, "loss": 0.1403, "step": 6865 }, { "epoch": 0.956733783877935, "grad_norm": 0.14195047318935394, "learning_rate": 5.3971658531360436e-08, "loss": 0.1295, "step": 6866 }, { "epoch": 0.9568731275691493, "grad_norm": 0.2312740683555603, "learning_rate": 5.362685959231284e-08, "loss": 0.1628, "step": 6867 }, { "epoch": 0.9570124712603637, "grad_norm": 0.1325298398733139, "learning_rate": 5.3283159624448745e-08, "loss": 0.0902, "step": 6868 }, { "epoch": 0.9571518149515781, "grad_norm": 0.12661781907081604, "learning_rate": 5.294055870413206e-08, "loss": 0.1221, "step": 6869 }, { "epoch": 0.9572911586427925, "grad_norm": 0.18661393225193024, "learning_rate": 5.2599056907479685e-08, "loss": 0.1449, "step": 6870 }, { "epoch": 0.9574305023340068, "grad_norm": 0.11653266102075577, "learning_rate": 5.2258654310365366e-08, "loss": 0.1143, "step": 6871 }, { "epoch": 0.9575698460252212, "grad_norm": 0.11066276580095291, "learning_rate": 5.1919350988419716e-08, "loss": 0.1073, "step": 6872 }, { "epoch": 0.9577091897164356, "grad_norm": 0.19262228906154633, "learning_rate": 5.1581147017027434e-08, "loss": 0.1708, "step": 6873 }, { "epoch": 0.95784853340765, "grad_norm": 0.12916605174541473, "learning_rate": 5.124404247133008e-08, "loss": 0.1179, "step": 6874 }, { "epoch": 0.9579878770988643, "grad_norm": 0.14786149561405182, "learning_rate": 5.090803742622441e-08, "loss": 0.1266, "step": 6875 }, { "epoch": 0.9581272207900787, "grad_norm": 0.10636044293642044, "learning_rate": 5.057313195636293e-08, "loss": 0.1091, "step": 6876 }, { "epoch": 0.9582665644812931, "grad_norm": 0.176450714468956, "learning_rate": 5.0239326136154454e-08, "loss": 0.1606, "step": 6877 }, { "epoch": 0.9584059081725075, "grad_norm": 0.15113042294979095, "learning_rate": 4.990662003976243e-08, "loss": 0.1463, "step": 6878 }, { "epoch": 0.9585452518637219, "grad_norm": 0.19585737586021423, "learning_rate": 4.957501374110718e-08, "loss": 0.1679, "step": 6879 }, { "epoch": 0.9586845955549362, "grad_norm": 0.10947238653898239, "learning_rate": 4.924450731386365e-08, "loss": 0.1241, "step": 6880 }, { "epoch": 0.9588239392461506, "grad_norm": 0.172127828001976, "learning_rate": 4.8915100831463116e-08, "loss": 0.1472, "step": 6881 }, { "epoch": 0.958963282937365, "grad_norm": 0.09992074966430664, "learning_rate": 4.858679436709201e-08, "loss": 0.1075, "step": 6882 }, { "epoch": 0.9591026266285794, "grad_norm": 0.1636759489774704, "learning_rate": 4.825958799369201e-08, "loss": 0.1303, "step": 6883 }, { "epoch": 0.9592419703197937, "grad_norm": 0.1968097686767578, "learning_rate": 4.7933481783961624e-08, "loss": 0.1405, "step": 6884 }, { "epoch": 0.9593813140110081, "grad_norm": 0.188916876912117, "learning_rate": 4.760847581035399e-08, "loss": 0.1274, "step": 6885 }, { "epoch": 0.9595206577022225, "grad_norm": 0.11778882890939713, "learning_rate": 4.728457014507859e-08, "loss": 0.1212, "step": 6886 }, { "epoch": 0.9596600013934369, "grad_norm": 0.12671098113059998, "learning_rate": 4.69617648600984e-08, "loss": 0.1207, "step": 6887 }, { "epoch": 0.9597993450846513, "grad_norm": 0.09260889887809753, "learning_rate": 4.664006002713495e-08, "loss": 0.0975, "step": 6888 }, { "epoch": 0.9599386887758656, "grad_norm": 0.15603527426719666, "learning_rate": 4.631945571766272e-08, "loss": 0.1509, "step": 6889 }, { "epoch": 0.96007803246708, "grad_norm": 0.14006160199642181, "learning_rate": 4.5999952002912516e-08, "loss": 0.1153, "step": 6890 }, { "epoch": 0.9602173761582944, "grad_norm": 0.09990203380584717, "learning_rate": 4.5681548953872555e-08, "loss": 0.0948, "step": 6891 }, { "epoch": 0.9603567198495088, "grad_norm": 0.12171705067157745, "learning_rate": 4.536424664128236e-08, "loss": 0.1184, "step": 6892 }, { "epoch": 0.9604960635407231, "grad_norm": 0.18753165006637573, "learning_rate": 4.504804513564054e-08, "loss": 0.1201, "step": 6893 }, { "epoch": 0.9606354072319375, "grad_norm": 0.1321374922990799, "learning_rate": 4.473294450719923e-08, "loss": 0.1105, "step": 6894 }, { "epoch": 0.9607747509231519, "grad_norm": 0.2163173109292984, "learning_rate": 4.441894482596743e-08, "loss": 0.1277, "step": 6895 }, { "epoch": 0.9609140946143664, "grad_norm": 0.1281244158744812, "learning_rate": 4.410604616170822e-08, "loss": 0.1258, "step": 6896 }, { "epoch": 0.9610534383055808, "grad_norm": 0.15752463042736053, "learning_rate": 4.379424858394043e-08, "loss": 0.1038, "step": 6897 }, { "epoch": 0.9611927819967951, "grad_norm": 0.14376232028007507, "learning_rate": 4.348355216193867e-08, "loss": 0.1203, "step": 6898 }, { "epoch": 0.9613321256880095, "grad_norm": 0.17412154376506805, "learning_rate": 4.3173956964732145e-08, "loss": 0.1469, "step": 6899 }, { "epoch": 0.9614714693792239, "grad_norm": 0.11819832026958466, "learning_rate": 4.286546306110639e-08, "loss": 0.0943, "step": 6900 }, { "epoch": 0.9616108130704383, "grad_norm": 0.15422524511814117, "learning_rate": 4.2558070519601594e-08, "loss": 0.1391, "step": 6901 }, { "epoch": 0.9617501567616527, "grad_norm": 0.19774022698402405, "learning_rate": 4.2251779408513104e-08, "loss": 0.1426, "step": 6902 }, { "epoch": 0.961889500452867, "grad_norm": 0.1882520467042923, "learning_rate": 4.19465897958915e-08, "loss": 0.1499, "step": 6903 }, { "epoch": 0.9620288441440814, "grad_norm": 0.15457482635974884, "learning_rate": 4.164250174954365e-08, "loss": 0.1191, "step": 6904 }, { "epoch": 0.9621681878352958, "grad_norm": 0.10064776986837387, "learning_rate": 4.133951533703107e-08, "loss": 0.0974, "step": 6905 }, { "epoch": 0.9623075315265102, "grad_norm": 0.10919415950775146, "learning_rate": 4.1037630625669345e-08, "loss": 0.1031, "step": 6906 }, { "epoch": 0.9624468752177245, "grad_norm": 0.15912124514579773, "learning_rate": 4.07368476825315e-08, "loss": 0.137, "step": 6907 }, { "epoch": 0.9625862189089389, "grad_norm": 0.1616646945476532, "learning_rate": 4.043716657444407e-08, "loss": 0.1254, "step": 6908 }, { "epoch": 0.9627255626001533, "grad_norm": 0.20478664338588715, "learning_rate": 4.0138587367989365e-08, "loss": 0.1306, "step": 6909 }, { "epoch": 0.9628649062913677, "grad_norm": 0.15235067903995514, "learning_rate": 3.984111012950487e-08, "loss": 0.1219, "step": 6910 }, { "epoch": 0.963004249982582, "grad_norm": 0.18581010401248932, "learning_rate": 3.9544734925083264e-08, "loss": 0.1332, "step": 6911 }, { "epoch": 0.9631435936737964, "grad_norm": 0.15156550705432892, "learning_rate": 3.924946182057299e-08, "loss": 0.1396, "step": 6912 }, { "epoch": 0.9632829373650108, "grad_norm": 0.15349699556827545, "learning_rate": 3.8955290881576566e-08, "loss": 0.1322, "step": 6913 }, { "epoch": 0.9634222810562252, "grad_norm": 0.14372681081295013, "learning_rate": 3.866222217345117e-08, "loss": 0.1271, "step": 6914 }, { "epoch": 0.9635616247474396, "grad_norm": 0.2841104567050934, "learning_rate": 3.837025576131137e-08, "loss": 0.1562, "step": 6915 }, { "epoch": 0.9637009684386539, "grad_norm": 0.14955875277519226, "learning_rate": 3.807939171002473e-08, "loss": 0.1224, "step": 6916 }, { "epoch": 0.9638403121298683, "grad_norm": 0.15513797104358673, "learning_rate": 3.778963008421455e-08, "loss": 0.1281, "step": 6917 }, { "epoch": 0.9639796558210827, "grad_norm": 0.11175408214330673, "learning_rate": 3.750097094825933e-08, "loss": 0.1078, "step": 6918 }, { "epoch": 0.9641189995122971, "grad_norm": 0.09850236773490906, "learning_rate": 3.721341436629222e-08, "loss": 0.1138, "step": 6919 }, { "epoch": 0.9642583432035114, "grad_norm": 0.1635781228542328, "learning_rate": 3.6926960402202674e-08, "loss": 0.1124, "step": 6920 }, { "epoch": 0.9643976868947258, "grad_norm": 0.15179967880249023, "learning_rate": 3.66416091196331e-08, "loss": 0.1251, "step": 6921 }, { "epoch": 0.9645370305859402, "grad_norm": 0.19826167821884155, "learning_rate": 3.63573605819828e-08, "loss": 0.1509, "step": 6922 }, { "epoch": 0.9646763742771546, "grad_norm": 0.24455687403678894, "learning_rate": 3.6074214852405695e-08, "loss": 0.1466, "step": 6923 }, { "epoch": 0.964815717968369, "grad_norm": 0.2019413411617279, "learning_rate": 3.5792171993809244e-08, "loss": 0.1319, "step": 6924 }, { "epoch": 0.9649550616595833, "grad_norm": 0.11304106563329697, "learning_rate": 3.55112320688572e-08, "loss": 0.1024, "step": 6925 }, { "epoch": 0.9650944053507977, "grad_norm": 0.19163955748081207, "learning_rate": 3.523139513996798e-08, "loss": 0.1368, "step": 6926 }, { "epoch": 0.9652337490420121, "grad_norm": 0.11188310384750366, "learning_rate": 3.495266126931574e-08, "loss": 0.1055, "step": 6927 }, { "epoch": 0.9653730927332265, "grad_norm": 0.0964423194527626, "learning_rate": 3.467503051882815e-08, "loss": 0.1048, "step": 6928 }, { "epoch": 0.9655124364244408, "grad_norm": 0.12493131309747696, "learning_rate": 3.4398502950188096e-08, "loss": 0.1184, "step": 6929 }, { "epoch": 0.9656517801156552, "grad_norm": 0.20796161890029907, "learning_rate": 3.4123078624834214e-08, "loss": 0.1761, "step": 6930 }, { "epoch": 0.9657911238068696, "grad_norm": 0.1570586860179901, "learning_rate": 3.384875760395978e-08, "loss": 0.1179, "step": 6931 }, { "epoch": 0.965930467498084, "grad_norm": 0.18125002086162567, "learning_rate": 3.3575539948511595e-08, "loss": 0.1503, "step": 6932 }, { "epoch": 0.9660698111892984, "grad_norm": 0.17902261018753052, "learning_rate": 3.330342571919332e-08, "loss": 0.1306, "step": 6933 }, { "epoch": 0.9662091548805127, "grad_norm": 0.12836112082004547, "learning_rate": 3.30324149764627e-08, "loss": 0.138, "step": 6934 }, { "epoch": 0.9663484985717271, "grad_norm": 0.22136034071445465, "learning_rate": 3.2762507780531026e-08, "loss": 0.1324, "step": 6935 }, { "epoch": 0.9664878422629416, "grad_norm": 0.1269519180059433, "learning_rate": 3.249370419136644e-08, "loss": 0.1392, "step": 6936 }, { "epoch": 0.966627185954156, "grad_norm": 0.15396739542484283, "learning_rate": 3.2226004268690605e-08, "loss": 0.1557, "step": 6937 }, { "epoch": 0.9667665296453704, "grad_norm": 0.206028014421463, "learning_rate": 3.195940807198039e-08, "loss": 0.1302, "step": 6938 }, { "epoch": 0.9669058733365847, "grad_norm": 0.22223812341690063, "learning_rate": 3.169391566046731e-08, "loss": 0.1522, "step": 6939 }, { "epoch": 0.9670452170277991, "grad_norm": 0.08854779601097107, "learning_rate": 3.142952709313807e-08, "loss": 0.1041, "step": 6940 }, { "epoch": 0.9671845607190135, "grad_norm": 0.1409836709499359, "learning_rate": 3.116624242873345e-08, "loss": 0.1298, "step": 6941 }, { "epoch": 0.9673239044102279, "grad_norm": 0.14152680337429047, "learning_rate": 3.090406172574889e-08, "loss": 0.1264, "step": 6942 }, { "epoch": 0.9674632481014422, "grad_norm": 0.13433843851089478, "learning_rate": 3.064298504243612e-08, "loss": 0.1205, "step": 6943 }, { "epoch": 0.9676025917926566, "grad_norm": 0.11860920488834381, "learning_rate": 3.0383012436799306e-08, "loss": 0.1067, "step": 6944 }, { "epoch": 0.967741935483871, "grad_norm": 0.18640246987342834, "learning_rate": 3.0124143966599464e-08, "loss": 0.1189, "step": 6945 }, { "epoch": 0.9678812791750854, "grad_norm": 0.12741397321224213, "learning_rate": 2.9866379689350024e-08, "loss": 0.1052, "step": 6946 }, { "epoch": 0.9680206228662998, "grad_norm": 0.18025371432304382, "learning_rate": 2.9609719662320735e-08, "loss": 0.1659, "step": 6947 }, { "epoch": 0.9681599665575141, "grad_norm": 0.15594880282878876, "learning_rate": 2.9354163942535983e-08, "loss": 0.1098, "step": 6948 }, { "epoch": 0.9682993102487285, "grad_norm": 0.16491292417049408, "learning_rate": 2.90997125867748e-08, "loss": 0.1429, "step": 6949 }, { "epoch": 0.9684386539399429, "grad_norm": 0.16174697875976562, "learning_rate": 2.8846365651569175e-08, "loss": 0.1408, "step": 6950 }, { "epoch": 0.9685779976311573, "grad_norm": 0.17786408960819244, "learning_rate": 2.8594123193207978e-08, "loss": 0.1516, "step": 6951 }, { "epoch": 0.9687173413223716, "grad_norm": 0.1040836051106453, "learning_rate": 2.83429852677336e-08, "loss": 0.0982, "step": 6952 }, { "epoch": 0.968856685013586, "grad_norm": 0.1264607459306717, "learning_rate": 2.809295193094308e-08, "loss": 0.1092, "step": 6953 }, { "epoch": 0.9689960287048004, "grad_norm": 0.19458557665348053, "learning_rate": 2.7844023238388084e-08, "loss": 0.1458, "step": 6954 }, { "epoch": 0.9691353723960148, "grad_norm": 0.1475791484117508, "learning_rate": 2.759619924537438e-08, "loss": 0.1397, "step": 6955 }, { "epoch": 0.9692747160872291, "grad_norm": 0.10209313780069351, "learning_rate": 2.7349480006964023e-08, "loss": 0.1154, "step": 6956 }, { "epoch": 0.9694140597784435, "grad_norm": 0.1273447573184967, "learning_rate": 2.7103865577970955e-08, "loss": 0.1269, "step": 6957 }, { "epoch": 0.9695534034696579, "grad_norm": 0.12032163888216019, "learning_rate": 2.6859356012965964e-08, "loss": 0.1091, "step": 6958 }, { "epoch": 0.9696927471608723, "grad_norm": 0.1793348789215088, "learning_rate": 2.661595136627393e-08, "loss": 0.1418, "step": 6959 }, { "epoch": 0.9698320908520867, "grad_norm": 0.22364430129528046, "learning_rate": 2.63736516919727e-08, "loss": 0.1264, "step": 6960 }, { "epoch": 0.969971434543301, "grad_norm": 0.11325500905513763, "learning_rate": 2.6132457043896442e-08, "loss": 0.1041, "step": 6961 }, { "epoch": 0.9701107782345154, "grad_norm": 0.14627209305763245, "learning_rate": 2.589236747563284e-08, "loss": 0.1169, "step": 6962 }, { "epoch": 0.9702501219257298, "grad_norm": 0.21801243722438812, "learning_rate": 2.5653383040524228e-08, "loss": 0.1149, "step": 6963 }, { "epoch": 0.9703894656169442, "grad_norm": 0.12575207650661469, "learning_rate": 2.5415503791667573e-08, "loss": 0.1181, "step": 6964 }, { "epoch": 0.9705288093081585, "grad_norm": 0.1537289172410965, "learning_rate": 2.5178729781915046e-08, "loss": 0.1167, "step": 6965 }, { "epoch": 0.9706681529993729, "grad_norm": 0.1472100466489792, "learning_rate": 2.4943061063870678e-08, "loss": 0.1454, "step": 6966 }, { "epoch": 0.9708074966905873, "grad_norm": 0.1317574828863144, "learning_rate": 2.4708497689896472e-08, "loss": 0.1166, "step": 6967 }, { "epoch": 0.9709468403818017, "grad_norm": 0.14253170788288116, "learning_rate": 2.4475039712105742e-08, "loss": 0.1155, "step": 6968 }, { "epoch": 0.9710861840730161, "grad_norm": 0.09183201938867569, "learning_rate": 2.4242687182368106e-08, "loss": 0.0899, "step": 6969 }, { "epoch": 0.9712255277642304, "grad_norm": 0.12155938148498535, "learning_rate": 2.401144015230672e-08, "loss": 0.1238, "step": 6970 }, { "epoch": 0.9713648714554448, "grad_norm": 0.16434361040592194, "learning_rate": 2.3781298673299924e-08, "loss": 0.1344, "step": 6971 }, { "epoch": 0.9715042151466592, "grad_norm": 0.10774355381727219, "learning_rate": 2.3552262796479042e-08, "loss": 0.1141, "step": 6972 }, { "epoch": 0.9716435588378736, "grad_norm": 0.10304692387580872, "learning_rate": 2.33243325727317e-08, "loss": 0.1064, "step": 6973 }, { "epoch": 0.9717829025290879, "grad_norm": 0.10933263599872589, "learning_rate": 2.3097508052697948e-08, "loss": 0.1014, "step": 6974 }, { "epoch": 0.9719222462203023, "grad_norm": 0.12712056934833527, "learning_rate": 2.2871789286773582e-08, "loss": 0.12, "step": 6975 }, { "epoch": 0.9720615899115168, "grad_norm": 0.15073464810848236, "learning_rate": 2.264717632510738e-08, "loss": 0.1218, "step": 6976 }, { "epoch": 0.9722009336027312, "grad_norm": 0.1898847371339798, "learning_rate": 2.2423669217604415e-08, "loss": 0.1456, "step": 6977 }, { "epoch": 0.9723402772939456, "grad_norm": 0.1545005887746811, "learning_rate": 2.220126801392164e-08, "loss": 0.1451, "step": 6978 }, { "epoch": 0.9724796209851599, "grad_norm": 0.4794739782810211, "learning_rate": 2.1979972763471747e-08, "loss": 0.136, "step": 6979 }, { "epoch": 0.9726189646763743, "grad_norm": 0.15764625370502472, "learning_rate": 2.1759783515422074e-08, "loss": 0.1742, "step": 6980 }, { "epoch": 0.9727583083675887, "grad_norm": 0.21232883632183075, "learning_rate": 2.1540700318693487e-08, "loss": 0.1599, "step": 6981 }, { "epoch": 0.9728976520588031, "grad_norm": 0.14936862885951996, "learning_rate": 2.132272322196094e-08, "loss": 0.1228, "step": 6982 }, { "epoch": 0.9730369957500175, "grad_norm": 0.16237099468708038, "learning_rate": 2.110585227365458e-08, "loss": 0.1295, "step": 6983 }, { "epoch": 0.9731763394412318, "grad_norm": 0.14884553849697113, "learning_rate": 2.0890087521957536e-08, "loss": 0.1207, "step": 6984 }, { "epoch": 0.9733156831324462, "grad_norm": 0.1276542991399765, "learning_rate": 2.0675429014807568e-08, "loss": 0.1187, "step": 6985 }, { "epoch": 0.9734550268236606, "grad_norm": 0.13927240669727325, "learning_rate": 2.0461876799898196e-08, "loss": 0.1298, "step": 6986 }, { "epoch": 0.973594370514875, "grad_norm": 0.12250695377588272, "learning_rate": 2.024943092467424e-08, "loss": 0.1133, "step": 6987 }, { "epoch": 0.9737337142060893, "grad_norm": 0.1596132516860962, "learning_rate": 2.0038091436337392e-08, "loss": 0.1373, "step": 6988 }, { "epoch": 0.9738730578973037, "grad_norm": 0.131379634141922, "learning_rate": 1.9827858381842312e-08, "loss": 0.1295, "step": 6989 }, { "epoch": 0.9740124015885181, "grad_norm": 0.1613786220550537, "learning_rate": 1.961873180789775e-08, "loss": 0.1194, "step": 6990 }, { "epoch": 0.9741517452797325, "grad_norm": 0.15140192210674286, "learning_rate": 1.9410711760967092e-08, "loss": 0.11, "step": 6991 }, { "epoch": 0.9742910889709469, "grad_norm": 0.1265321522951126, "learning_rate": 1.920379828726726e-08, "loss": 0.1134, "step": 6992 }, { "epoch": 0.9744304326621612, "grad_norm": 0.17683105170726776, "learning_rate": 1.8997991432769812e-08, "loss": 0.1415, "step": 6993 }, { "epoch": 0.9745697763533756, "grad_norm": 0.12370963394641876, "learning_rate": 1.8793291243200396e-08, "loss": 0.1125, "step": 6994 }, { "epoch": 0.97470912004459, "grad_norm": 0.1345132291316986, "learning_rate": 1.8589697764039295e-08, "loss": 0.1195, "step": 6995 }, { "epoch": 0.9748484637358044, "grad_norm": 0.21562305092811584, "learning_rate": 1.8387211040519216e-08, "loss": 0.136, "step": 6996 }, { "epoch": 0.9749878074270187, "grad_norm": 0.1384534388780594, "learning_rate": 1.818583111762917e-08, "loss": 0.1053, "step": 6997 }, { "epoch": 0.9751271511182331, "grad_norm": 0.40118154883384705, "learning_rate": 1.7985558040110594e-08, "loss": 0.1444, "step": 6998 }, { "epoch": 0.9752664948094475, "grad_norm": 0.1496875137090683, "learning_rate": 1.778639185245956e-08, "loss": 0.1568, "step": 6999 }, { "epoch": 0.9754058385006619, "grad_norm": 0.11296391487121582, "learning_rate": 1.758833259892623e-08, "loss": 0.107, "step": 7000 }, { "epoch": 0.9755451821918762, "grad_norm": 0.15417662262916565, "learning_rate": 1.7391380323515395e-08, "loss": 0.123, "step": 7001 }, { "epoch": 0.9756845258830906, "grad_norm": 0.14952872693538666, "learning_rate": 1.7195535069984838e-08, "loss": 0.1293, "step": 7002 }, { "epoch": 0.975823869574305, "grad_norm": 0.12988384068012238, "learning_rate": 1.700079688184697e-08, "loss": 0.1375, "step": 7003 }, { "epoch": 0.9759632132655194, "grad_norm": 0.17311333119869232, "learning_rate": 1.6807165802368297e-08, "loss": 0.1408, "step": 7004 }, { "epoch": 0.9761025569567338, "grad_norm": 0.12536519765853882, "learning_rate": 1.661464187456885e-08, "loss": 0.1129, "step": 7005 }, { "epoch": 0.9762419006479481, "grad_norm": 0.1575123816728592, "learning_rate": 1.6423225141223854e-08, "loss": 0.1247, "step": 7006 }, { "epoch": 0.9763812443391625, "grad_norm": 0.10477521270513535, "learning_rate": 1.623291564486096e-08, "loss": 0.106, "step": 7007 }, { "epoch": 0.9765205880303769, "grad_norm": 0.16046305000782013, "learning_rate": 1.604371342776301e-08, "loss": 0.1145, "step": 7008 }, { "epoch": 0.9766599317215913, "grad_norm": 0.10977134853601456, "learning_rate": 1.585561853196582e-08, "loss": 0.1099, "step": 7009 }, { "epoch": 0.9767992754128056, "grad_norm": 0.1663929522037506, "learning_rate": 1.5668630999260968e-08, "loss": 0.1394, "step": 7010 }, { "epoch": 0.97693861910402, "grad_norm": 0.10533162951469421, "learning_rate": 1.5482750871191333e-08, "loss": 0.1115, "step": 7011 }, { "epoch": 0.9770779627952344, "grad_norm": 0.2344849854707718, "learning_rate": 1.529797818905665e-08, "loss": 0.1535, "step": 7012 }, { "epoch": 0.9772173064864488, "grad_norm": 0.14663125574588776, "learning_rate": 1.5114312993908532e-08, "loss": 0.1169, "step": 7013 }, { "epoch": 0.9773566501776632, "grad_norm": 0.20288525521755219, "learning_rate": 1.4931755326552667e-08, "loss": 0.1431, "step": 7014 }, { "epoch": 0.9774959938688775, "grad_norm": 0.17539983987808228, "learning_rate": 1.4750305227549943e-08, "loss": 0.1358, "step": 7015 }, { "epoch": 0.977635337560092, "grad_norm": 0.21299755573272705, "learning_rate": 1.4569962737214228e-08, "loss": 0.1619, "step": 7016 }, { "epoch": 0.9777746812513064, "grad_norm": 0.1324947327375412, "learning_rate": 1.4390727895613465e-08, "loss": 0.1318, "step": 7017 }, { "epoch": 0.9779140249425208, "grad_norm": 0.25107085704803467, "learning_rate": 1.4212600742569694e-08, "loss": 0.1493, "step": 7018 }, { "epoch": 0.9780533686337352, "grad_norm": 0.12412504851818085, "learning_rate": 1.4035581317658476e-08, "loss": 0.1096, "step": 7019 }, { "epoch": 0.9781927123249495, "grad_norm": 0.2127981036901474, "learning_rate": 1.3859669660209463e-08, "loss": 0.1728, "step": 7020 }, { "epoch": 0.9783320560161639, "grad_norm": 0.12232914566993713, "learning_rate": 1.368486580930639e-08, "loss": 0.1085, "step": 7021 }, { "epoch": 0.9784713997073783, "grad_norm": 0.11896677315235138, "learning_rate": 1.3511169803786527e-08, "loss": 0.1192, "step": 7022 }, { "epoch": 0.9786107433985927, "grad_norm": 0.16509194672107697, "learning_rate": 1.333858168224178e-08, "loss": 0.1208, "step": 7023 }, { "epoch": 0.978750087089807, "grad_norm": 0.1170729249715805, "learning_rate": 1.3167101483016476e-08, "loss": 0.1082, "step": 7024 }, { "epoch": 0.9788894307810214, "grad_norm": 0.12706859409809113, "learning_rate": 1.2996729244209583e-08, "loss": 0.1105, "step": 7025 }, { "epoch": 0.9790287744722358, "grad_norm": 0.14419320225715637, "learning_rate": 1.282746500367471e-08, "loss": 0.1144, "step": 7026 }, { "epoch": 0.9791681181634502, "grad_norm": 0.1906460076570511, "learning_rate": 1.2659308799017889e-08, "loss": 0.162, "step": 7027 }, { "epoch": 0.9793074618546646, "grad_norm": 0.11507923156023026, "learning_rate": 1.2492260667599232e-08, "loss": 0.1106, "step": 7028 }, { "epoch": 0.9794468055458789, "grad_norm": 0.10462075471878052, "learning_rate": 1.2326320646534051e-08, "loss": 0.1148, "step": 7029 }, { "epoch": 0.9795861492370933, "grad_norm": 0.19811923801898956, "learning_rate": 1.2161488772690077e-08, "loss": 0.1484, "step": 7030 }, { "epoch": 0.9797254929283077, "grad_norm": 0.14029131829738617, "learning_rate": 1.1997765082688573e-08, "loss": 0.1262, "step": 7031 }, { "epoch": 0.9798648366195221, "grad_norm": 0.16090349853038788, "learning_rate": 1.1835149612905438e-08, "loss": 0.1247, "step": 7032 }, { "epoch": 0.9800041803107364, "grad_norm": 0.09736321121454239, "learning_rate": 1.1673642399470663e-08, "loss": 0.1041, "step": 7033 }, { "epoch": 0.9801435240019508, "grad_norm": 0.10908212512731552, "learning_rate": 1.1513243478267211e-08, "loss": 0.1094, "step": 7034 }, { "epoch": 0.9802828676931652, "grad_norm": 0.1884438395500183, "learning_rate": 1.135395288493213e-08, "loss": 0.1516, "step": 7035 }, { "epoch": 0.9804222113843796, "grad_norm": 0.11654314398765564, "learning_rate": 1.1195770654855443e-08, "loss": 0.1138, "step": 7036 }, { "epoch": 0.980561555075594, "grad_norm": 0.13033929467201233, "learning_rate": 1.1038696823182372e-08, "loss": 0.1248, "step": 7037 }, { "epoch": 0.9807008987668083, "grad_norm": 0.12371237576007843, "learning_rate": 1.088273142481111e-08, "loss": 0.1142, "step": 7038 }, { "epoch": 0.9808402424580227, "grad_norm": 0.1383192390203476, "learning_rate": 1.0727874494393386e-08, "loss": 0.1182, "step": 7039 }, { "epoch": 0.9809795861492371, "grad_norm": 0.15148931741714478, "learning_rate": 1.0574126066335011e-08, "loss": 0.1257, "step": 7040 }, { "epoch": 0.9811189298404515, "grad_norm": 0.10305263847112656, "learning_rate": 1.0421486174795326e-08, "loss": 0.1213, "step": 7041 }, { "epoch": 0.9812582735316658, "grad_norm": 0.10960457473993301, "learning_rate": 1.0269954853687202e-08, "loss": 0.1097, "step": 7042 }, { "epoch": 0.9813976172228802, "grad_norm": 0.11957107484340668, "learning_rate": 1.01195321366776e-08, "loss": 0.1152, "step": 7043 }, { "epoch": 0.9815369609140946, "grad_norm": 0.1419266015291214, "learning_rate": 9.970218057187009e-09, "loss": 0.1193, "step": 7044 }, { "epoch": 0.981676304605309, "grad_norm": 0.18829789757728577, "learning_rate": 9.82201264839e-09, "loss": 0.1376, "step": 7045 }, { "epoch": 0.9818156482965233, "grad_norm": 0.12901470065116882, "learning_rate": 9.67491594321357e-09, "loss": 0.094, "step": 7046 }, { "epoch": 0.9819549919877377, "grad_norm": 0.12814587354660034, "learning_rate": 9.528927974339908e-09, "loss": 0.109, "step": 7047 }, { "epoch": 0.9820943356789521, "grad_norm": 0.15216946601867676, "learning_rate": 9.38404877420418e-09, "loss": 0.14, "step": 7048 }, { "epoch": 0.9822336793701665, "grad_norm": 0.1032942458987236, "learning_rate": 9.240278374995637e-09, "loss": 0.0888, "step": 7049 }, { "epoch": 0.9823730230613809, "grad_norm": 0.17252615094184875, "learning_rate": 9.097616808655396e-09, "loss": 0.1448, "step": 7050 }, { "epoch": 0.9825123667525952, "grad_norm": 0.12455969303846359, "learning_rate": 8.95606410688088e-09, "loss": 0.1163, "step": 7051 }, { "epoch": 0.9826517104438096, "grad_norm": 0.12422219663858414, "learning_rate": 8.815620301121375e-09, "loss": 0.1147, "step": 7052 }, { "epoch": 0.982791054135024, "grad_norm": 0.11775907874107361, "learning_rate": 8.676285422580255e-09, "loss": 0.1164, "step": 7053 }, { "epoch": 0.9829303978262384, "grad_norm": 0.10211171954870224, "learning_rate": 8.538059502214979e-09, "loss": 0.1152, "step": 7054 }, { "epoch": 0.9830697415174527, "grad_norm": 0.1588268280029297, "learning_rate": 8.400942570735427e-09, "loss": 0.1357, "step": 7055 }, { "epoch": 0.9832090852086672, "grad_norm": 0.1360844522714615, "learning_rate": 8.264934658606672e-09, "loss": 0.1203, "step": 7056 }, { "epoch": 0.9833484288998816, "grad_norm": 0.13350406289100647, "learning_rate": 8.13003579604621e-09, "loss": 0.1195, "step": 7057 }, { "epoch": 0.983487772591096, "grad_norm": 0.1086258664727211, "learning_rate": 7.996246013025067e-09, "loss": 0.1112, "step": 7058 }, { "epoch": 0.9836271162823104, "grad_norm": 0.12897033989429474, "learning_rate": 7.863565339268908e-09, "loss": 0.123, "step": 7059 }, { "epoch": 0.9837664599735247, "grad_norm": 0.1064828410744667, "learning_rate": 7.731993804256378e-09, "loss": 0.1066, "step": 7060 }, { "epoch": 0.9839058036647391, "grad_norm": 0.10286755114793777, "learning_rate": 7.60153143721909e-09, "loss": 0.0978, "step": 7061 }, { "epoch": 0.9840451473559535, "grad_norm": 0.14710547029972076, "learning_rate": 7.472178267143304e-09, "loss": 0.1156, "step": 7062 }, { "epoch": 0.9841844910471679, "grad_norm": 0.20256370306015015, "learning_rate": 7.343934322767699e-09, "loss": 0.1507, "step": 7063 }, { "epoch": 0.9843238347383823, "grad_norm": 0.14385050535202026, "learning_rate": 7.216799632586147e-09, "loss": 0.1197, "step": 7064 }, { "epoch": 0.9844631784295966, "grad_norm": 0.10926415771245956, "learning_rate": 7.0907742248443875e-09, "loss": 0.104, "step": 7065 }, { "epoch": 0.984602522120811, "grad_norm": 0.11202800273895264, "learning_rate": 6.965858127542247e-09, "loss": 0.1224, "step": 7066 }, { "epoch": 0.9847418658120254, "grad_norm": 0.2019556164741516, "learning_rate": 6.842051368433633e-09, "loss": 0.1428, "step": 7067 }, { "epoch": 0.9848812095032398, "grad_norm": 0.22923563420772552, "learning_rate": 6.719353975025989e-09, "loss": 0.1256, "step": 7068 }, { "epoch": 0.9850205531944541, "grad_norm": 0.15448026359081268, "learning_rate": 6.5977659745786185e-09, "loss": 0.1444, "step": 7069 }, { "epoch": 0.9851598968856685, "grad_norm": 0.150319442152977, "learning_rate": 6.477287394107134e-09, "loss": 0.1492, "step": 7070 }, { "epoch": 0.9852992405768829, "grad_norm": 0.2168394923210144, "learning_rate": 6.357918260377349e-09, "loss": 0.1777, "step": 7071 }, { "epoch": 0.9854385842680973, "grad_norm": 0.16566333174705505, "learning_rate": 6.239658599911935e-09, "loss": 0.1101, "step": 7072 }, { "epoch": 0.9855779279593117, "grad_norm": 0.14883318543434143, "learning_rate": 6.122508438984875e-09, "loss": 0.1187, "step": 7073 }, { "epoch": 0.985717271650526, "grad_norm": 0.26361116766929626, "learning_rate": 6.0064678036242385e-09, "loss": 0.1675, "step": 7074 }, { "epoch": 0.9858566153417404, "grad_norm": 0.12140185385942459, "learning_rate": 5.891536719611624e-09, "loss": 0.1082, "step": 7075 }, { "epoch": 0.9859959590329548, "grad_norm": 0.2504948675632477, "learning_rate": 5.77771521248216e-09, "loss": 0.1626, "step": 7076 }, { "epoch": 0.9861353027241692, "grad_norm": 0.13267774879932404, "learning_rate": 5.665003307524508e-09, "loss": 0.1443, "step": 7077 }, { "epoch": 0.9862746464153835, "grad_norm": 0.10029848664999008, "learning_rate": 5.5534010297803034e-09, "loss": 0.1018, "step": 7078 }, { "epoch": 0.9864139901065979, "grad_norm": 0.14049787819385529, "learning_rate": 5.4429084040452665e-09, "loss": 0.132, "step": 7079 }, { "epoch": 0.9865533337978123, "grad_norm": 0.11960731446743011, "learning_rate": 5.333525454868094e-09, "loss": 0.1107, "step": 7080 }, { "epoch": 0.9866926774890267, "grad_norm": 0.17594057321548462, "learning_rate": 5.225252206551568e-09, "loss": 0.1158, "step": 7081 }, { "epoch": 0.986832021180241, "grad_norm": 0.210640087723732, "learning_rate": 5.118088683151445e-09, "loss": 0.1495, "step": 7082 }, { "epoch": 0.9869713648714554, "grad_norm": 0.20480002462863922, "learning_rate": 5.01203490847646e-09, "loss": 0.1308, "step": 7083 }, { "epoch": 0.9871107085626698, "grad_norm": 0.17420968413352966, "learning_rate": 4.907090906090539e-09, "loss": 0.1337, "step": 7084 }, { "epoch": 0.9872500522538842, "grad_norm": 0.14035765826702118, "learning_rate": 4.803256699308923e-09, "loss": 0.1308, "step": 7085 }, { "epoch": 0.9873893959450986, "grad_norm": 0.1927097886800766, "learning_rate": 4.700532311200934e-09, "loss": 0.1317, "step": 7086 }, { "epoch": 0.9875287396363129, "grad_norm": 0.17255961894989014, "learning_rate": 4.598917764590538e-09, "loss": 0.1452, "step": 7087 }, { "epoch": 0.9876680833275273, "grad_norm": 0.18278169631958008, "learning_rate": 4.498413082053566e-09, "loss": 0.115, "step": 7088 }, { "epoch": 0.9878074270187417, "grad_norm": 0.14554624259471893, "learning_rate": 4.399018285919376e-09, "loss": 0.1457, "step": 7089 }, { "epoch": 0.9879467707099561, "grad_norm": 0.12458629161119461, "learning_rate": 4.300733398272528e-09, "loss": 0.1163, "step": 7090 }, { "epoch": 0.9880861144011704, "grad_norm": 0.25026702880859375, "learning_rate": 4.203558440948885e-09, "loss": 0.1394, "step": 7091 }, { "epoch": 0.9882254580923848, "grad_norm": 0.11663086712360382, "learning_rate": 4.1074934355384015e-09, "loss": 0.1202, "step": 7092 }, { "epoch": 0.9883648017835992, "grad_norm": 0.1582585722208023, "learning_rate": 4.0125384033845586e-09, "loss": 0.1205, "step": 7093 }, { "epoch": 0.9885041454748136, "grad_norm": 0.13658282160758972, "learning_rate": 3.91869336558437e-09, "loss": 0.1435, "step": 7094 }, { "epoch": 0.988643489166028, "grad_norm": 0.14926661550998688, "learning_rate": 3.8259583429883785e-09, "loss": 0.1355, "step": 7095 }, { "epoch": 0.9887828328572423, "grad_norm": 0.10892353951931, "learning_rate": 3.734333356199548e-09, "loss": 0.1087, "step": 7096 }, { "epoch": 0.9889221765484568, "grad_norm": 0.12098748981952667, "learning_rate": 3.643818425575485e-09, "loss": 0.1194, "step": 7097 }, { "epoch": 0.9890615202396712, "grad_norm": 0.1606818586587906, "learning_rate": 3.5544135712262116e-09, "loss": 0.1418, "step": 7098 }, { "epoch": 0.9892008639308856, "grad_norm": 0.18418806791305542, "learning_rate": 3.4661188130147295e-09, "loss": 0.1498, "step": 7099 }, { "epoch": 0.9893402076221, "grad_norm": 0.16036513447761536, "learning_rate": 3.378934170559789e-09, "loss": 0.1513, "step": 7100 }, { "epoch": 0.9894795513133143, "grad_norm": 0.12725898623466492, "learning_rate": 3.292859663230341e-09, "loss": 0.1239, "step": 7101 }, { "epoch": 0.9896188950045287, "grad_norm": 0.13469429314136505, "learning_rate": 3.207895310150533e-09, "loss": 0.1179, "step": 7102 }, { "epoch": 0.9897582386957431, "grad_norm": 0.16055487096309662, "learning_rate": 3.1240411301980413e-09, "loss": 0.1603, "step": 7103 }, { "epoch": 0.9898975823869575, "grad_norm": 0.13022880256175995, "learning_rate": 3.0412971420029636e-09, "loss": 0.1156, "step": 7104 }, { "epoch": 0.9900369260781718, "grad_norm": 0.13623175024986267, "learning_rate": 2.959663363949483e-09, "loss": 0.1208, "step": 7105 }, { "epoch": 0.9901762697693862, "grad_norm": 0.15177221596240997, "learning_rate": 2.8791398141736484e-09, "loss": 0.1273, "step": 7106 }, { "epoch": 0.9903156134606006, "grad_norm": 0.16281788051128387, "learning_rate": 2.799726510567258e-09, "loss": 0.166, "step": 7107 }, { "epoch": 0.990454957151815, "grad_norm": 0.1401706486940384, "learning_rate": 2.721423470773421e-09, "loss": 0.1058, "step": 7108 }, { "epoch": 0.9905943008430294, "grad_norm": 0.17874625325202942, "learning_rate": 2.644230712189888e-09, "loss": 0.1161, "step": 7109 }, { "epoch": 0.9907336445342437, "grad_norm": 0.11475137621164322, "learning_rate": 2.5681482519662736e-09, "loss": 0.1113, "step": 7110 }, { "epoch": 0.9908729882254581, "grad_norm": 0.09178531914949417, "learning_rate": 2.493176107006834e-09, "loss": 0.1071, "step": 7111 }, { "epoch": 0.9910123319166725, "grad_norm": 0.23858186602592468, "learning_rate": 2.4193142939687996e-09, "loss": 0.1288, "step": 7112 }, { "epoch": 0.9911516756078869, "grad_norm": 0.14938920736312866, "learning_rate": 2.3465628292623776e-09, "loss": 0.1257, "step": 7113 }, { "epoch": 0.9912910192991012, "grad_norm": 0.15672951936721802, "learning_rate": 2.2749217290513048e-09, "loss": 0.1294, "step": 7114 }, { "epoch": 0.9914303629903156, "grad_norm": 0.254888117313385, "learning_rate": 2.2043910092522935e-09, "loss": 0.1321, "step": 7115 }, { "epoch": 0.99156970668153, "grad_norm": 0.18220828473567963, "learning_rate": 2.134970685536697e-09, "loss": 0.1836, "step": 7116 }, { "epoch": 0.9917090503727444, "grad_norm": 0.13551747798919678, "learning_rate": 2.066660773326623e-09, "loss": 0.115, "step": 7117 }, { "epoch": 0.9918483940639587, "grad_norm": 0.1631729006767273, "learning_rate": 1.999461287800486e-09, "loss": 0.1344, "step": 7118 }, { "epoch": 0.9919877377551731, "grad_norm": 0.17158003151416779, "learning_rate": 1.9333722438874548e-09, "loss": 0.1221, "step": 7119 }, { "epoch": 0.9921270814463875, "grad_norm": 0.15039563179016113, "learning_rate": 1.868393656271339e-09, "loss": 0.114, "step": 7120 }, { "epoch": 0.9922664251376019, "grad_norm": 0.17857496440410614, "learning_rate": 1.8045255393889238e-09, "loss": 0.1515, "step": 7121 }, { "epoch": 0.9924057688288163, "grad_norm": 0.12159868329763412, "learning_rate": 1.7417679074299698e-09, "loss": 0.118, "step": 7122 }, { "epoch": 0.9925451125200306, "grad_norm": 0.1209770068526268, "learning_rate": 1.680120774338323e-09, "loss": 0.1092, "step": 7123 }, { "epoch": 0.992684456211245, "grad_norm": 0.12668851017951965, "learning_rate": 1.6195841538096947e-09, "loss": 0.1212, "step": 7124 }, { "epoch": 0.9928237999024594, "grad_norm": 0.12252506613731384, "learning_rate": 1.5601580592949916e-09, "loss": 0.1112, "step": 7125 }, { "epoch": 0.9929631435936738, "grad_norm": 0.11665275692939758, "learning_rate": 1.5018425039969864e-09, "loss": 0.1103, "step": 7126 }, { "epoch": 0.9931024872848881, "grad_norm": 0.11144504696130753, "learning_rate": 1.4446375008714264e-09, "loss": 0.1171, "step": 7127 }, { "epoch": 0.9932418309761025, "grad_norm": 0.17457044124603271, "learning_rate": 1.3885430626287e-09, "loss": 0.1309, "step": 7128 }, { "epoch": 0.9933811746673169, "grad_norm": 0.17832200229167938, "learning_rate": 1.3335592017316156e-09, "loss": 0.1493, "step": 7129 }, { "epoch": 0.9935205183585313, "grad_norm": 0.09733439981937408, "learning_rate": 1.2796859303959575e-09, "loss": 0.0967, "step": 7130 }, { "epoch": 0.9936598620497457, "grad_norm": 0.10889638215303421, "learning_rate": 1.2269232605915948e-09, "loss": 0.0862, "step": 7131 }, { "epoch": 0.99379920574096, "grad_norm": 0.2509024143218994, "learning_rate": 1.1752712040408176e-09, "loss": 0.1604, "step": 7132 }, { "epoch": 0.9939385494321744, "grad_norm": 0.1556006371974945, "learning_rate": 1.124729772219446e-09, "loss": 0.1257, "step": 7133 }, { "epoch": 0.9940778931233888, "grad_norm": 0.1988253891468048, "learning_rate": 1.075298976356831e-09, "loss": 0.1318, "step": 7134 }, { "epoch": 0.9942172368146032, "grad_norm": 0.24983550608158112, "learning_rate": 1.026978827435854e-09, "loss": 0.1643, "step": 7135 }, { "epoch": 0.9943565805058175, "grad_norm": 0.14017491042613983, "learning_rate": 9.797693361912607e-10, "loss": 0.1215, "step": 7136 }, { "epoch": 0.994495924197032, "grad_norm": 0.12907375395298004, "learning_rate": 9.33670513112439e-10, "loss": 0.1339, "step": 7137 }, { "epoch": 0.9946352678882464, "grad_norm": 0.1587958186864853, "learning_rate": 8.886823684417512e-10, "loss": 0.1435, "step": 7138 }, { "epoch": 0.9947746115794608, "grad_norm": 0.17496776580810547, "learning_rate": 8.448049121739798e-10, "loss": 0.1298, "step": 7139 }, { "epoch": 0.9949139552706752, "grad_norm": 0.19782046973705292, "learning_rate": 8.020381540579936e-10, "loss": 0.1537, "step": 7140 }, { "epoch": 0.9950532989618895, "grad_norm": 0.13721738755702972, "learning_rate": 7.603821035950809e-10, "loss": 0.1232, "step": 7141 }, { "epoch": 0.9951926426531039, "grad_norm": 0.17052477598190308, "learning_rate": 7.198367700411712e-10, "loss": 0.1233, "step": 7142 }, { "epoch": 0.9953319863443183, "grad_norm": 0.15578196942806244, "learning_rate": 6.80402162403504e-10, "loss": 0.1393, "step": 7143 }, { "epoch": 0.9954713300355327, "grad_norm": 0.17810435593128204, "learning_rate": 6.420782894445144e-10, "loss": 0.1472, "step": 7144 }, { "epoch": 0.995610673726747, "grad_norm": 0.08622164279222488, "learning_rate": 6.048651596785027e-10, "loss": 0.1, "step": 7145 }, { "epoch": 0.9957500174179614, "grad_norm": 0.13365554809570312, "learning_rate": 5.687627813727448e-10, "loss": 0.1003, "step": 7146 }, { "epoch": 0.9958893611091758, "grad_norm": 0.12375444173812866, "learning_rate": 5.337711625497122e-10, "loss": 0.124, "step": 7147 }, { "epoch": 0.9960287048003902, "grad_norm": 0.12334911525249481, "learning_rate": 4.998903109826314e-10, "loss": 0.1105, "step": 7148 }, { "epoch": 0.9961680484916046, "grad_norm": 0.20112960040569305, "learning_rate": 4.671202341993697e-10, "loss": 0.1581, "step": 7149 }, { "epoch": 0.9963073921828189, "grad_norm": 0.20411506295204163, "learning_rate": 4.354609394802145e-10, "loss": 0.1639, "step": 7150 }, { "epoch": 0.9964467358740333, "grad_norm": 0.1745658665895462, "learning_rate": 4.0491243386009403e-10, "loss": 0.1138, "step": 7151 }, { "epoch": 0.9965860795652477, "grad_norm": 0.13933882117271423, "learning_rate": 3.7547472412580167e-10, "loss": 0.1238, "step": 7152 }, { "epoch": 0.9967254232564621, "grad_norm": 0.15122728049755096, "learning_rate": 3.471478168176612e-10, "loss": 0.1274, "step": 7153 }, { "epoch": 0.9968647669476765, "grad_norm": 0.10998280346393585, "learning_rate": 3.19931718229527e-10, "loss": 0.1166, "step": 7154 }, { "epoch": 0.9970041106388908, "grad_norm": 0.142870232462883, "learning_rate": 2.9382643440767354e-10, "loss": 0.1267, "step": 7155 }, { "epoch": 0.9971434543301052, "grad_norm": 0.2051573246717453, "learning_rate": 2.6883197115190606e-10, "loss": 0.1422, "step": 7156 }, { "epoch": 0.9972827980213196, "grad_norm": 0.13109253346920013, "learning_rate": 2.4494833401667027e-10, "loss": 0.1024, "step": 7157 }, { "epoch": 0.997422141712534, "grad_norm": 0.15185578167438507, "learning_rate": 2.2217552830716693e-10, "loss": 0.1157, "step": 7158 }, { "epoch": 0.9975614854037483, "grad_norm": 0.1986595094203949, "learning_rate": 2.0051355908323743e-10, "loss": 0.1342, "step": 7159 }, { "epoch": 0.9977008290949627, "grad_norm": 0.20526815950870514, "learning_rate": 1.7996243115769863e-10, "loss": 0.1468, "step": 7160 }, { "epoch": 0.9978401727861771, "grad_norm": 0.1280101090669632, "learning_rate": 1.605221490968978e-10, "loss": 0.1257, "step": 7161 }, { "epoch": 0.9979795164773915, "grad_norm": 0.1648375391960144, "learning_rate": 1.421927172201576e-10, "loss": 0.1374, "step": 7162 }, { "epoch": 0.9981188601686058, "grad_norm": 0.1675395965576172, "learning_rate": 1.24974139599221e-10, "loss": 0.149, "step": 7163 }, { "epoch": 0.9982582038598202, "grad_norm": 0.2992393672466278, "learning_rate": 1.0886642005991654e-10, "loss": 0.1363, "step": 7164 }, { "epoch": 0.9983975475510346, "grad_norm": 0.10777521878480911, "learning_rate": 9.386956218104815e-11, "loss": 0.0927, "step": 7165 }, { "epoch": 0.998536891242249, "grad_norm": 0.13590432703495026, "learning_rate": 7.998356929439511e-11, "loss": 0.1171, "step": 7166 }, { "epoch": 0.9986762349334634, "grad_norm": 0.12988446652889252, "learning_rate": 6.72084444852672e-11, "loss": 0.1052, "step": 7167 }, { "epoch": 0.9988155786246777, "grad_norm": 0.09811429679393768, "learning_rate": 5.554419059250471e-11, "loss": 0.0911, "step": 7168 }, { "epoch": 0.9989549223158921, "grad_norm": 0.24672143161296844, "learning_rate": 4.499081020681306e-11, "loss": 0.1686, "step": 7169 }, { "epoch": 0.9990942660071065, "grad_norm": 0.2167014330625534, "learning_rate": 3.554830567298328e-11, "loss": 0.1536, "step": 7170 }, { "epoch": 0.9992336096983209, "grad_norm": 0.16085390746593475, "learning_rate": 2.7216679089892008e-11, "loss": 0.119, "step": 7171 }, { "epoch": 0.9993729533895352, "grad_norm": 0.1319650560617447, "learning_rate": 1.9995932307170783e-11, "loss": 0.1084, "step": 7172 }, { "epoch": 0.9995122970807496, "grad_norm": 0.19197918474674225, "learning_rate": 1.3886066930202113e-11, "loss": 0.149, "step": 7173 }, { "epoch": 0.999651640771964, "grad_norm": 0.14723917841911316, "learning_rate": 8.88708431623364e-12, "loss": 0.1061, "step": 7174 }, { "epoch": 0.9997909844631784, "grad_norm": 0.14315886795520782, "learning_rate": 4.998985576043503e-12, "loss": 0.1172, "step": 7175 }, { "epoch": 0.9999303281543928, "grad_norm": 0.1589851826429367, "learning_rate": 2.2217715728301003e-12, "loss": 0.1426, "step": 7176 }, { "epoch": 1.0, "grad_norm": 0.2280510812997818, "learning_rate": 5.554429238774361e-13, "loss": 0.1771, "step": 7177 }, { "epoch": 1.0, "step": 7177, "total_flos": 5.326107668090192e+19, "train_loss": 0.14823374486457874, "train_runtime": 62363.1144, "train_samples_per_second": 29.459, "train_steps_per_second": 0.115 } ], "logging_steps": 1.0, "max_steps": 7177, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.326107668090192e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }