{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9983753046303816, "eval_steps": 500, "global_step": 2460, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0012185215272136475, "grad_norm": 6.834534852371434, "learning_rate": 4.0650406504065046e-08, "loss": 1.0814, "step": 1 }, { "epoch": 0.002437043054427295, "grad_norm": 6.434639482368721, "learning_rate": 8.130081300813009e-08, "loss": 1.0829, "step": 2 }, { "epoch": 0.0036555645816409425, "grad_norm": 6.863769364155968, "learning_rate": 1.2195121951219514e-07, "loss": 1.1046, "step": 3 }, { "epoch": 0.00487408610885459, "grad_norm": 6.5857528488136925, "learning_rate": 1.6260162601626018e-07, "loss": 1.0853, "step": 4 }, { "epoch": 0.006092607636068237, "grad_norm": 6.521091645053231, "learning_rate": 2.0325203252032523e-07, "loss": 1.0773, "step": 5 }, { "epoch": 0.007311129163281885, "grad_norm": 6.29456329359191, "learning_rate": 2.439024390243903e-07, "loss": 1.0569, "step": 6 }, { "epoch": 0.008529650690495532, "grad_norm": 6.6897100511742575, "learning_rate": 2.845528455284553e-07, "loss": 1.0615, "step": 7 }, { "epoch": 0.00974817221770918, "grad_norm": 6.645918686532055, "learning_rate": 3.2520325203252037e-07, "loss": 1.11, "step": 8 }, { "epoch": 0.010966693744922826, "grad_norm": 6.5860967765132505, "learning_rate": 3.6585365853658536e-07, "loss": 1.0968, "step": 9 }, { "epoch": 0.012185215272136474, "grad_norm": 6.316550148573923, "learning_rate": 4.0650406504065046e-07, "loss": 1.0402, "step": 10 }, { "epoch": 0.013403736799350122, "grad_norm": 6.589233946221034, "learning_rate": 4.471544715447155e-07, "loss": 1.0883, "step": 11 }, { "epoch": 0.01462225832656377, "grad_norm": 5.923561630066145, "learning_rate": 4.878048780487805e-07, "loss": 1.0577, "step": 12 }, { "epoch": 0.015840779853777416, "grad_norm": 6.107921911359583, "learning_rate": 5.284552845528456e-07, "loss": 1.0624, "step": 13 }, { "epoch": 0.017059301380991064, "grad_norm": 5.923788461868563, "learning_rate": 5.691056910569106e-07, "loss": 1.0736, "step": 14 }, { "epoch": 0.018277822908204712, "grad_norm": 5.874057009305477, "learning_rate": 6.097560975609757e-07, "loss": 1.0447, "step": 15 }, { "epoch": 0.01949634443541836, "grad_norm": 5.031342070027601, "learning_rate": 6.504065040650407e-07, "loss": 1.0433, "step": 16 }, { "epoch": 0.020714865962632008, "grad_norm": 4.869478502925938, "learning_rate": 6.910569105691058e-07, "loss": 1.0411, "step": 17 }, { "epoch": 0.021933387489845652, "grad_norm": 4.534685569961031, "learning_rate": 7.317073170731707e-07, "loss": 0.9874, "step": 18 }, { "epoch": 0.0231519090170593, "grad_norm": 4.357854034553876, "learning_rate": 7.723577235772359e-07, "loss": 1.0014, "step": 19 }, { "epoch": 0.024370430544272948, "grad_norm": 4.315329088535952, "learning_rate": 8.130081300813009e-07, "loss": 0.9925, "step": 20 }, { "epoch": 0.025588952071486596, "grad_norm": 3.3054166230275337, "learning_rate": 8.53658536585366e-07, "loss": 1.003, "step": 21 }, { "epoch": 0.026807473598700244, "grad_norm": 2.726952834638993, "learning_rate": 8.94308943089431e-07, "loss": 0.9692, "step": 22 }, { "epoch": 0.028025995125913892, "grad_norm": 2.7243029466264, "learning_rate": 9.349593495934959e-07, "loss": 1.0212, "step": 23 }, { "epoch": 0.02924451665312754, "grad_norm": 2.5896267051583313, "learning_rate": 9.75609756097561e-07, "loss": 0.9757, "step": 24 }, { "epoch": 0.030463038180341188, "grad_norm": 2.6706226110708498, "learning_rate": 1.0162601626016261e-06, "loss": 0.9801, "step": 25 }, { "epoch": 0.03168155970755483, "grad_norm": 2.437924378367497, "learning_rate": 1.0569105691056912e-06, "loss": 0.9768, "step": 26 }, { "epoch": 0.03290008123476848, "grad_norm": 2.5803827822332397, "learning_rate": 1.0975609756097562e-06, "loss": 0.9895, "step": 27 }, { "epoch": 0.03411860276198213, "grad_norm": 2.203043465720474, "learning_rate": 1.1382113821138213e-06, "loss": 0.9726, "step": 28 }, { "epoch": 0.035337124289195776, "grad_norm": 1.805085101507344, "learning_rate": 1.1788617886178863e-06, "loss": 0.9237, "step": 29 }, { "epoch": 0.036555645816409424, "grad_norm": 2.1165116479858215, "learning_rate": 1.2195121951219514e-06, "loss": 0.9338, "step": 30 }, { "epoch": 0.03777416734362307, "grad_norm": 2.3435725524236735, "learning_rate": 1.2601626016260162e-06, "loss": 0.9365, "step": 31 }, { "epoch": 0.03899268887083672, "grad_norm": 2.2985182006567326, "learning_rate": 1.3008130081300815e-06, "loss": 0.9135, "step": 32 }, { "epoch": 0.04021121039805037, "grad_norm": 2.1356048436615036, "learning_rate": 1.3414634146341465e-06, "loss": 0.9196, "step": 33 }, { "epoch": 0.041429731925264016, "grad_norm": 2.028116965668269, "learning_rate": 1.3821138211382116e-06, "loss": 0.9074, "step": 34 }, { "epoch": 0.042648253452477664, "grad_norm": 1.7266339650438713, "learning_rate": 1.4227642276422766e-06, "loss": 0.8969, "step": 35 }, { "epoch": 0.043866774979691305, "grad_norm": 1.548462619619361, "learning_rate": 1.4634146341463414e-06, "loss": 0.9009, "step": 36 }, { "epoch": 0.04508529650690495, "grad_norm": 1.2325234503287605, "learning_rate": 1.5040650406504067e-06, "loss": 0.8942, "step": 37 }, { "epoch": 0.0463038180341186, "grad_norm": 1.119008985117944, "learning_rate": 1.5447154471544717e-06, "loss": 0.8869, "step": 38 }, { "epoch": 0.04752233956133225, "grad_norm": 1.232748685157114, "learning_rate": 1.5853658536585368e-06, "loss": 0.8542, "step": 39 }, { "epoch": 0.048740861088545896, "grad_norm": 1.6677472249438465, "learning_rate": 1.6260162601626018e-06, "loss": 0.8594, "step": 40 }, { "epoch": 0.049959382615759544, "grad_norm": 1.5964345757340976, "learning_rate": 1.6666666666666667e-06, "loss": 0.8481, "step": 41 }, { "epoch": 0.05117790414297319, "grad_norm": 1.5241421915515152, "learning_rate": 1.707317073170732e-06, "loss": 0.8478, "step": 42 }, { "epoch": 0.05239642567018684, "grad_norm": 1.350659230101709, "learning_rate": 1.747967479674797e-06, "loss": 0.8669, "step": 43 }, { "epoch": 0.05361494719740049, "grad_norm": 1.0459886477510316, "learning_rate": 1.788617886178862e-06, "loss": 0.8229, "step": 44 }, { "epoch": 0.054833468724614136, "grad_norm": 0.9790437025811132, "learning_rate": 1.8292682926829268e-06, "loss": 0.8214, "step": 45 }, { "epoch": 0.056051990251827784, "grad_norm": 1.1555035661921353, "learning_rate": 1.8699186991869919e-06, "loss": 0.8106, "step": 46 }, { "epoch": 0.05727051177904143, "grad_norm": 1.246916947603362, "learning_rate": 1.9105691056910574e-06, "loss": 0.8193, "step": 47 }, { "epoch": 0.05848903330625508, "grad_norm": 1.1905103203002494, "learning_rate": 1.951219512195122e-06, "loss": 0.8075, "step": 48 }, { "epoch": 0.05970755483346873, "grad_norm": 0.918493036902164, "learning_rate": 1.991869918699187e-06, "loss": 0.7842, "step": 49 }, { "epoch": 0.060926076360682375, "grad_norm": 0.961427277586569, "learning_rate": 2.0325203252032523e-06, "loss": 0.8206, "step": 50 }, { "epoch": 0.062144597887896016, "grad_norm": 0.7135293393398392, "learning_rate": 2.073170731707317e-06, "loss": 0.7784, "step": 51 }, { "epoch": 0.06336311941510966, "grad_norm": 0.8687421281930399, "learning_rate": 2.1138211382113824e-06, "loss": 0.7953, "step": 52 }, { "epoch": 0.06458164094232331, "grad_norm": 0.8575700781368814, "learning_rate": 2.154471544715447e-06, "loss": 0.7926, "step": 53 }, { "epoch": 0.06580016246953696, "grad_norm": 0.9435599171162209, "learning_rate": 2.1951219512195125e-06, "loss": 0.7766, "step": 54 }, { "epoch": 0.06701868399675061, "grad_norm": 0.7215369508734659, "learning_rate": 2.2357723577235773e-06, "loss": 0.7686, "step": 55 }, { "epoch": 0.06823720552396426, "grad_norm": 0.6329822213923535, "learning_rate": 2.2764227642276426e-06, "loss": 0.7868, "step": 56 }, { "epoch": 0.0694557270511779, "grad_norm": 0.6559439915679887, "learning_rate": 2.317073170731708e-06, "loss": 0.7672, "step": 57 }, { "epoch": 0.07067424857839155, "grad_norm": 0.6424064072265188, "learning_rate": 2.3577235772357727e-06, "loss": 0.7876, "step": 58 }, { "epoch": 0.0718927701056052, "grad_norm": 0.5670684781739027, "learning_rate": 2.3983739837398375e-06, "loss": 0.7545, "step": 59 }, { "epoch": 0.07311129163281885, "grad_norm": 0.62119888744641, "learning_rate": 2.4390243902439027e-06, "loss": 0.7778, "step": 60 }, { "epoch": 0.0743298131600325, "grad_norm": 0.5945888357559133, "learning_rate": 2.4796747967479676e-06, "loss": 0.7593, "step": 61 }, { "epoch": 0.07554833468724614, "grad_norm": 0.5566344615882963, "learning_rate": 2.5203252032520324e-06, "loss": 0.7783, "step": 62 }, { "epoch": 0.07676685621445979, "grad_norm": 0.6010029344681969, "learning_rate": 2.5609756097560977e-06, "loss": 0.7824, "step": 63 }, { "epoch": 0.07798537774167344, "grad_norm": 0.5620208665027641, "learning_rate": 2.601626016260163e-06, "loss": 0.7538, "step": 64 }, { "epoch": 0.07920389926888709, "grad_norm": 0.629839488738847, "learning_rate": 2.6422764227642278e-06, "loss": 0.7478, "step": 65 }, { "epoch": 0.08042242079610074, "grad_norm": 0.5843721125393191, "learning_rate": 2.682926829268293e-06, "loss": 0.7551, "step": 66 }, { "epoch": 0.08164094232331438, "grad_norm": 0.6807297637633912, "learning_rate": 2.723577235772358e-06, "loss": 0.7643, "step": 67 }, { "epoch": 0.08285946385052803, "grad_norm": 0.5337293638802343, "learning_rate": 2.764227642276423e-06, "loss": 0.7581, "step": 68 }, { "epoch": 0.08407798537774168, "grad_norm": 0.5557859370743894, "learning_rate": 2.8048780487804884e-06, "loss": 0.7485, "step": 69 }, { "epoch": 0.08529650690495533, "grad_norm": 0.5518435051656209, "learning_rate": 2.845528455284553e-06, "loss": 0.7503, "step": 70 }, { "epoch": 0.08651502843216897, "grad_norm": 0.5585475097227505, "learning_rate": 2.8861788617886185e-06, "loss": 0.7456, "step": 71 }, { "epoch": 0.08773354995938261, "grad_norm": 0.5842062776799712, "learning_rate": 2.926829268292683e-06, "loss": 0.7457, "step": 72 }, { "epoch": 0.08895207148659626, "grad_norm": 0.5691413037940362, "learning_rate": 2.967479674796748e-06, "loss": 0.7316, "step": 73 }, { "epoch": 0.0901705930138099, "grad_norm": 0.592000953180703, "learning_rate": 3.0081300813008134e-06, "loss": 0.7379, "step": 74 }, { "epoch": 0.09138911454102355, "grad_norm": 0.4986095202064355, "learning_rate": 3.0487804878048782e-06, "loss": 0.7026, "step": 75 }, { "epoch": 0.0926076360682372, "grad_norm": 0.5610598503101445, "learning_rate": 3.0894308943089435e-06, "loss": 0.7366, "step": 76 }, { "epoch": 0.09382615759545085, "grad_norm": 0.5189286785140359, "learning_rate": 3.1300813008130083e-06, "loss": 0.7343, "step": 77 }, { "epoch": 0.0950446791226645, "grad_norm": 0.5352050364602269, "learning_rate": 3.1707317073170736e-06, "loss": 0.7128, "step": 78 }, { "epoch": 0.09626320064987814, "grad_norm": 0.589544520130887, "learning_rate": 3.211382113821139e-06, "loss": 0.7377, "step": 79 }, { "epoch": 0.09748172217709179, "grad_norm": 0.5170292516124821, "learning_rate": 3.2520325203252037e-06, "loss": 0.751, "step": 80 }, { "epoch": 0.09870024370430544, "grad_norm": 0.5178115988752247, "learning_rate": 3.292682926829269e-06, "loss": 0.7263, "step": 81 }, { "epoch": 0.09991876523151909, "grad_norm": 0.5758324455305359, "learning_rate": 3.3333333333333333e-06, "loss": 0.7204, "step": 82 }, { "epoch": 0.10113728675873274, "grad_norm": 0.5191922407454059, "learning_rate": 3.3739837398373986e-06, "loss": 0.7323, "step": 83 }, { "epoch": 0.10235580828594638, "grad_norm": 0.5706404216543195, "learning_rate": 3.414634146341464e-06, "loss": 0.7343, "step": 84 }, { "epoch": 0.10357432981316003, "grad_norm": 0.5166974408338545, "learning_rate": 3.4552845528455287e-06, "loss": 0.7347, "step": 85 }, { "epoch": 0.10479285134037368, "grad_norm": 0.575076347441057, "learning_rate": 3.495934959349594e-06, "loss": 0.729, "step": 86 }, { "epoch": 0.10601137286758733, "grad_norm": 0.5503219216241421, "learning_rate": 3.5365853658536588e-06, "loss": 0.7247, "step": 87 }, { "epoch": 0.10722989439480098, "grad_norm": 0.5315644262103328, "learning_rate": 3.577235772357724e-06, "loss": 0.7188, "step": 88 }, { "epoch": 0.10844841592201462, "grad_norm": 0.5283688627559194, "learning_rate": 3.6178861788617893e-06, "loss": 0.7328, "step": 89 }, { "epoch": 0.10966693744922827, "grad_norm": 0.5657078936164937, "learning_rate": 3.6585365853658537e-06, "loss": 0.7317, "step": 90 }, { "epoch": 0.11088545897644192, "grad_norm": 0.5285271136308272, "learning_rate": 3.699186991869919e-06, "loss": 0.7259, "step": 91 }, { "epoch": 0.11210398050365557, "grad_norm": 0.5665339591374581, "learning_rate": 3.7398373983739838e-06, "loss": 0.71, "step": 92 }, { "epoch": 0.11332250203086922, "grad_norm": 0.5408789367861271, "learning_rate": 3.780487804878049e-06, "loss": 0.7287, "step": 93 }, { "epoch": 0.11454102355808286, "grad_norm": 0.530024765222071, "learning_rate": 3.821138211382115e-06, "loss": 0.7158, "step": 94 }, { "epoch": 0.11575954508529651, "grad_norm": 0.525972820727265, "learning_rate": 3.861788617886179e-06, "loss": 0.6953, "step": 95 }, { "epoch": 0.11697806661251016, "grad_norm": 0.5538892198758983, "learning_rate": 3.902439024390244e-06, "loss": 0.7213, "step": 96 }, { "epoch": 0.11819658813972381, "grad_norm": 0.544929946076996, "learning_rate": 3.943089430894309e-06, "loss": 0.6996, "step": 97 }, { "epoch": 0.11941510966693746, "grad_norm": 0.5734775021447369, "learning_rate": 3.983739837398374e-06, "loss": 0.7154, "step": 98 }, { "epoch": 0.1206336311941511, "grad_norm": 0.5045397032963369, "learning_rate": 4.024390243902439e-06, "loss": 0.7024, "step": 99 }, { "epoch": 0.12185215272136475, "grad_norm": 0.5236114609222794, "learning_rate": 4.0650406504065046e-06, "loss": 0.7226, "step": 100 }, { "epoch": 0.12307067424857839, "grad_norm": 0.5641036987903533, "learning_rate": 4.10569105691057e-06, "loss": 0.7054, "step": 101 }, { "epoch": 0.12428919577579203, "grad_norm": 0.508465869676476, "learning_rate": 4.146341463414634e-06, "loss": 0.7003, "step": 102 }, { "epoch": 0.1255077173030057, "grad_norm": 0.5202630797257376, "learning_rate": 4.1869918699186995e-06, "loss": 0.7204, "step": 103 }, { "epoch": 0.12672623883021933, "grad_norm": 0.5552933325377176, "learning_rate": 4.227642276422765e-06, "loss": 0.7279, "step": 104 }, { "epoch": 0.127944760357433, "grad_norm": 0.5416012915563714, "learning_rate": 4.268292682926829e-06, "loss": 0.7093, "step": 105 }, { "epoch": 0.12916328188464662, "grad_norm": 0.5104004921064896, "learning_rate": 4.308943089430894e-06, "loss": 0.7013, "step": 106 }, { "epoch": 0.1303818034118603, "grad_norm": 0.5765782104977603, "learning_rate": 4.34959349593496e-06, "loss": 0.7045, "step": 107 }, { "epoch": 0.13160032493907392, "grad_norm": 0.5017287673543831, "learning_rate": 4.390243902439025e-06, "loss": 0.6997, "step": 108 }, { "epoch": 0.13281884646628758, "grad_norm": 0.48808935518722807, "learning_rate": 4.43089430894309e-06, "loss": 0.7184, "step": 109 }, { "epoch": 0.13403736799350122, "grad_norm": 0.531216252027469, "learning_rate": 4.471544715447155e-06, "loss": 0.7069, "step": 110 }, { "epoch": 0.13525588952071488, "grad_norm": 0.5149543102282697, "learning_rate": 4.51219512195122e-06, "loss": 0.7023, "step": 111 }, { "epoch": 0.1364744110479285, "grad_norm": 0.5291257352871406, "learning_rate": 4.552845528455285e-06, "loss": 0.7185, "step": 112 }, { "epoch": 0.13769293257514217, "grad_norm": 0.47858897961333036, "learning_rate": 4.59349593495935e-06, "loss": 0.7149, "step": 113 }, { "epoch": 0.1389114541023558, "grad_norm": 0.5359903721383661, "learning_rate": 4.634146341463416e-06, "loss": 0.7118, "step": 114 }, { "epoch": 0.14012997562956944, "grad_norm": 0.5023325811416011, "learning_rate": 4.67479674796748e-06, "loss": 0.6986, "step": 115 }, { "epoch": 0.1413484971567831, "grad_norm": 0.507102678949565, "learning_rate": 4.715447154471545e-06, "loss": 0.7053, "step": 116 }, { "epoch": 0.14256701868399674, "grad_norm": 0.5162377887307996, "learning_rate": 4.75609756097561e-06, "loss": 0.6971, "step": 117 }, { "epoch": 0.1437855402112104, "grad_norm": 0.5228780554370066, "learning_rate": 4.796747967479675e-06, "loss": 0.6915, "step": 118 }, { "epoch": 0.14500406173842403, "grad_norm": 0.5539538888660016, "learning_rate": 4.83739837398374e-06, "loss": 0.6979, "step": 119 }, { "epoch": 0.1462225832656377, "grad_norm": 0.6135340785022244, "learning_rate": 4.8780487804878055e-06, "loss": 0.7197, "step": 120 }, { "epoch": 0.14744110479285133, "grad_norm": 0.5261935119823417, "learning_rate": 4.918699186991871e-06, "loss": 0.6957, "step": 121 }, { "epoch": 0.148659626320065, "grad_norm": 0.5941876044718514, "learning_rate": 4.959349593495935e-06, "loss": 0.7031, "step": 122 }, { "epoch": 0.14987814784727863, "grad_norm": 0.5436866255986976, "learning_rate": 5e-06, "loss": 0.7068, "step": 123 }, { "epoch": 0.1510966693744923, "grad_norm": 0.5295736343510782, "learning_rate": 5.040650406504065e-06, "loss": 0.686, "step": 124 }, { "epoch": 0.15231519090170592, "grad_norm": 0.5536691790810129, "learning_rate": 5.081300813008131e-06, "loss": 0.681, "step": 125 }, { "epoch": 0.15353371242891958, "grad_norm": 0.6057295035493935, "learning_rate": 5.121951219512195e-06, "loss": 0.7195, "step": 126 }, { "epoch": 0.15475223395613322, "grad_norm": 0.49006287650569935, "learning_rate": 5.162601626016261e-06, "loss": 0.6846, "step": 127 }, { "epoch": 0.15597075548334688, "grad_norm": 0.5312531193234717, "learning_rate": 5.203252032520326e-06, "loss": 0.7038, "step": 128 }, { "epoch": 0.1571892770105605, "grad_norm": 0.5581018411430491, "learning_rate": 5.243902439024391e-06, "loss": 0.7184, "step": 129 }, { "epoch": 0.15840779853777417, "grad_norm": 0.5133234429893759, "learning_rate": 5.2845528455284555e-06, "loss": 0.6792, "step": 130 }, { "epoch": 0.1596263200649878, "grad_norm": 0.5191238918744202, "learning_rate": 5.32520325203252e-06, "loss": 0.6921, "step": 131 }, { "epoch": 0.16084484159220147, "grad_norm": 0.6134860482308477, "learning_rate": 5.365853658536586e-06, "loss": 0.6764, "step": 132 }, { "epoch": 0.1620633631194151, "grad_norm": 0.6271296104201189, "learning_rate": 5.4065040650406504e-06, "loss": 0.6829, "step": 133 }, { "epoch": 0.16328188464662877, "grad_norm": 0.6385973016730124, "learning_rate": 5.447154471544716e-06, "loss": 0.6957, "step": 134 }, { "epoch": 0.1645004061738424, "grad_norm": 0.5686127235373731, "learning_rate": 5.487804878048781e-06, "loss": 0.698, "step": 135 }, { "epoch": 0.16571892770105606, "grad_norm": 0.6833156687934043, "learning_rate": 5.528455284552846e-06, "loss": 0.6851, "step": 136 }, { "epoch": 0.1669374492282697, "grad_norm": 0.700308205780514, "learning_rate": 5.569105691056911e-06, "loss": 0.6806, "step": 137 }, { "epoch": 0.16815597075548336, "grad_norm": 0.5462617932277235, "learning_rate": 5.609756097560977e-06, "loss": 0.7091, "step": 138 }, { "epoch": 0.169374492282697, "grad_norm": 0.7852807393490868, "learning_rate": 5.650406504065041e-06, "loss": 0.7123, "step": 139 }, { "epoch": 0.17059301380991065, "grad_norm": 0.6057177569541422, "learning_rate": 5.691056910569106e-06, "loss": 0.7078, "step": 140 }, { "epoch": 0.1718115353371243, "grad_norm": 0.6623630085315475, "learning_rate": 5.731707317073171e-06, "loss": 0.6685, "step": 141 }, { "epoch": 0.17303005686433795, "grad_norm": 0.6456913396682378, "learning_rate": 5.772357723577237e-06, "loss": 0.7076, "step": 142 }, { "epoch": 0.17424857839155158, "grad_norm": 0.5490338737139909, "learning_rate": 5.813008130081301e-06, "loss": 0.6679, "step": 143 }, { "epoch": 0.17546709991876522, "grad_norm": 0.6684035267057585, "learning_rate": 5.853658536585366e-06, "loss": 0.6862, "step": 144 }, { "epoch": 0.17668562144597888, "grad_norm": 0.5217072648450134, "learning_rate": 5.894308943089432e-06, "loss": 0.6846, "step": 145 }, { "epoch": 0.17790414297319251, "grad_norm": 0.5999701195487821, "learning_rate": 5.934959349593496e-06, "loss": 0.6971, "step": 146 }, { "epoch": 0.17912266450040618, "grad_norm": 0.6672016415742844, "learning_rate": 5.9756097560975615e-06, "loss": 0.7023, "step": 147 }, { "epoch": 0.1803411860276198, "grad_norm": 0.5638253505945849, "learning_rate": 6.016260162601627e-06, "loss": 0.6718, "step": 148 }, { "epoch": 0.18155970755483347, "grad_norm": 0.5443164616899534, "learning_rate": 6.056910569105692e-06, "loss": 0.678, "step": 149 }, { "epoch": 0.1827782290820471, "grad_norm": 0.5515636708718161, "learning_rate": 6.0975609756097564e-06, "loss": 0.6914, "step": 150 }, { "epoch": 0.18399675060926077, "grad_norm": 0.6385969983161707, "learning_rate": 6.138211382113821e-06, "loss": 0.6796, "step": 151 }, { "epoch": 0.1852152721364744, "grad_norm": 0.6113406592810082, "learning_rate": 6.178861788617887e-06, "loss": 0.682, "step": 152 }, { "epoch": 0.18643379366368806, "grad_norm": 0.6906350808865743, "learning_rate": 6.219512195121951e-06, "loss": 0.6671, "step": 153 }, { "epoch": 0.1876523151909017, "grad_norm": 0.7020113339328089, "learning_rate": 6.260162601626017e-06, "loss": 0.6835, "step": 154 }, { "epoch": 0.18887083671811536, "grad_norm": 0.5548828056807938, "learning_rate": 6.300813008130082e-06, "loss": 0.6809, "step": 155 }, { "epoch": 0.190089358245329, "grad_norm": 0.8352572415357199, "learning_rate": 6.341463414634147e-06, "loss": 0.6809, "step": 156 }, { "epoch": 0.19130787977254265, "grad_norm": 0.6517742914384106, "learning_rate": 6.3821138211382115e-06, "loss": 0.6791, "step": 157 }, { "epoch": 0.1925264012997563, "grad_norm": 0.6204344146843959, "learning_rate": 6.422764227642278e-06, "loss": 0.6766, "step": 158 }, { "epoch": 0.19374492282696995, "grad_norm": 0.8219899409754744, "learning_rate": 6.463414634146342e-06, "loss": 0.6726, "step": 159 }, { "epoch": 0.19496344435418358, "grad_norm": 0.6541183549209502, "learning_rate": 6.504065040650407e-06, "loss": 0.6781, "step": 160 }, { "epoch": 0.19618196588139725, "grad_norm": 0.566310879262149, "learning_rate": 6.544715447154472e-06, "loss": 0.6702, "step": 161 }, { "epoch": 0.19740048740861088, "grad_norm": 0.775755089339994, "learning_rate": 6.585365853658538e-06, "loss": 0.6905, "step": 162 }, { "epoch": 0.19861900893582454, "grad_norm": 0.6288678821845954, "learning_rate": 6.626016260162602e-06, "loss": 0.6881, "step": 163 }, { "epoch": 0.19983753046303818, "grad_norm": 0.7640676261377178, "learning_rate": 6.666666666666667e-06, "loss": 0.6737, "step": 164 }, { "epoch": 0.20105605199025184, "grad_norm": 0.5731637259066372, "learning_rate": 6.707317073170733e-06, "loss": 0.674, "step": 165 }, { "epoch": 0.20227457351746547, "grad_norm": 0.7761516718399211, "learning_rate": 6.747967479674797e-06, "loss": 0.6928, "step": 166 }, { "epoch": 0.20349309504467913, "grad_norm": 0.7100095841961804, "learning_rate": 6.788617886178862e-06, "loss": 0.6727, "step": 167 }, { "epoch": 0.20471161657189277, "grad_norm": 0.569930635478734, "learning_rate": 6.829268292682928e-06, "loss": 0.689, "step": 168 }, { "epoch": 0.20593013809910643, "grad_norm": 0.7691268212355195, "learning_rate": 6.869918699186993e-06, "loss": 0.6973, "step": 169 }, { "epoch": 0.20714865962632006, "grad_norm": 0.560097362553805, "learning_rate": 6.910569105691057e-06, "loss": 0.6681, "step": 170 }, { "epoch": 0.20836718115353373, "grad_norm": 0.6849837037178143, "learning_rate": 6.951219512195122e-06, "loss": 0.6592, "step": 171 }, { "epoch": 0.20958570268074736, "grad_norm": 0.7951681541297303, "learning_rate": 6.991869918699188e-06, "loss": 0.6812, "step": 172 }, { "epoch": 0.210804224207961, "grad_norm": 0.5428585266109707, "learning_rate": 7.032520325203252e-06, "loss": 0.696, "step": 173 }, { "epoch": 0.21202274573517466, "grad_norm": 0.7462142080092842, "learning_rate": 7.0731707317073175e-06, "loss": 0.6793, "step": 174 }, { "epoch": 0.2132412672623883, "grad_norm": 0.6370138105851062, "learning_rate": 7.113821138211383e-06, "loss": 0.6717, "step": 175 }, { "epoch": 0.21445978878960195, "grad_norm": 0.566025113423941, "learning_rate": 7.154471544715448e-06, "loss": 0.6694, "step": 176 }, { "epoch": 0.21567831031681559, "grad_norm": 0.6632467338949928, "learning_rate": 7.1951219512195125e-06, "loss": 0.679, "step": 177 }, { "epoch": 0.21689683184402925, "grad_norm": 0.5775437329049822, "learning_rate": 7.2357723577235786e-06, "loss": 0.6738, "step": 178 }, { "epoch": 0.21811535337124288, "grad_norm": 0.6763254821774859, "learning_rate": 7.276422764227643e-06, "loss": 0.6885, "step": 179 }, { "epoch": 0.21933387489845654, "grad_norm": 0.6525555364778458, "learning_rate": 7.317073170731707e-06, "loss": 0.6829, "step": 180 }, { "epoch": 0.22055239642567018, "grad_norm": 0.6376488223620492, "learning_rate": 7.357723577235773e-06, "loss": 0.6611, "step": 181 }, { "epoch": 0.22177091795288384, "grad_norm": 0.6135443136132807, "learning_rate": 7.398373983739838e-06, "loss": 0.6875, "step": 182 }, { "epoch": 0.22298943948009747, "grad_norm": 0.6616707267536054, "learning_rate": 7.439024390243903e-06, "loss": 0.6637, "step": 183 }, { "epoch": 0.22420796100731114, "grad_norm": 0.6601543949811752, "learning_rate": 7.4796747967479676e-06, "loss": 0.6714, "step": 184 }, { "epoch": 0.22542648253452477, "grad_norm": 0.689531862633905, "learning_rate": 7.520325203252034e-06, "loss": 0.6717, "step": 185 }, { "epoch": 0.22664500406173843, "grad_norm": 0.6693067594219624, "learning_rate": 7.560975609756098e-06, "loss": 0.6538, "step": 186 }, { "epoch": 0.22786352558895206, "grad_norm": 0.6877489613732909, "learning_rate": 7.601626016260163e-06, "loss": 0.6791, "step": 187 }, { "epoch": 0.22908204711616573, "grad_norm": 0.6102935004924294, "learning_rate": 7.64227642276423e-06, "loss": 0.6697, "step": 188 }, { "epoch": 0.23030056864337936, "grad_norm": 0.7109056322063843, "learning_rate": 7.682926829268293e-06, "loss": 0.679, "step": 189 }, { "epoch": 0.23151909017059302, "grad_norm": 0.6183616410187914, "learning_rate": 7.723577235772358e-06, "loss": 0.662, "step": 190 }, { "epoch": 0.23273761169780666, "grad_norm": 0.6117992409555106, "learning_rate": 7.764227642276424e-06, "loss": 0.6671, "step": 191 }, { "epoch": 0.23395613322502032, "grad_norm": 0.7288006220266883, "learning_rate": 7.804878048780489e-06, "loss": 0.7049, "step": 192 }, { "epoch": 0.23517465475223395, "grad_norm": 0.6795369812377434, "learning_rate": 7.845528455284554e-06, "loss": 0.6638, "step": 193 }, { "epoch": 0.23639317627944761, "grad_norm": 0.6336366896960686, "learning_rate": 7.886178861788618e-06, "loss": 0.6738, "step": 194 }, { "epoch": 0.23761169780666125, "grad_norm": 0.7184491761674651, "learning_rate": 7.926829268292685e-06, "loss": 0.6628, "step": 195 }, { "epoch": 0.2388302193338749, "grad_norm": 0.659177288266525, "learning_rate": 7.967479674796748e-06, "loss": 0.6805, "step": 196 }, { "epoch": 0.24004874086108854, "grad_norm": 0.578465157473097, "learning_rate": 8.008130081300813e-06, "loss": 0.6894, "step": 197 }, { "epoch": 0.2412672623883022, "grad_norm": 0.6868825593189901, "learning_rate": 8.048780487804879e-06, "loss": 0.6847, "step": 198 }, { "epoch": 0.24248578391551584, "grad_norm": 0.6185564483778565, "learning_rate": 8.089430894308944e-06, "loss": 0.6667, "step": 199 }, { "epoch": 0.2437043054427295, "grad_norm": 0.7195682220690447, "learning_rate": 8.130081300813009e-06, "loss": 0.6681, "step": 200 }, { "epoch": 0.24492282696994314, "grad_norm": 0.5988887592571761, "learning_rate": 8.170731707317073e-06, "loss": 0.663, "step": 201 }, { "epoch": 0.24614134849715677, "grad_norm": 0.6728160874756142, "learning_rate": 8.21138211382114e-06, "loss": 0.6688, "step": 202 }, { "epoch": 0.24735987002437043, "grad_norm": 0.6167676512934452, "learning_rate": 8.252032520325203e-06, "loss": 0.6597, "step": 203 }, { "epoch": 0.24857839155158407, "grad_norm": 0.6963420894322225, "learning_rate": 8.292682926829268e-06, "loss": 0.6739, "step": 204 }, { "epoch": 0.24979691307879773, "grad_norm": 0.6374482813383724, "learning_rate": 8.333333333333334e-06, "loss": 0.6782, "step": 205 }, { "epoch": 0.2510154346060114, "grad_norm": 0.7061607202293944, "learning_rate": 8.373983739837399e-06, "loss": 0.6784, "step": 206 }, { "epoch": 0.252233956133225, "grad_norm": 0.6588145062673179, "learning_rate": 8.414634146341464e-06, "loss": 0.6785, "step": 207 }, { "epoch": 0.25345247766043866, "grad_norm": 0.5575233234513415, "learning_rate": 8.45528455284553e-06, "loss": 0.659, "step": 208 }, { "epoch": 0.2546709991876523, "grad_norm": 0.660881658687861, "learning_rate": 8.495934959349595e-06, "loss": 0.672, "step": 209 }, { "epoch": 0.255889520714866, "grad_norm": 0.8528802704630485, "learning_rate": 8.536585365853658e-06, "loss": 0.6758, "step": 210 }, { "epoch": 0.2571080422420796, "grad_norm": 0.5423728405429434, "learning_rate": 8.577235772357724e-06, "loss": 0.6526, "step": 211 }, { "epoch": 0.25832656376929325, "grad_norm": 0.7828577081027186, "learning_rate": 8.617886178861789e-06, "loss": 0.6619, "step": 212 }, { "epoch": 0.2595450852965069, "grad_norm": 0.683409796151077, "learning_rate": 8.658536585365854e-06, "loss": 0.6609, "step": 213 }, { "epoch": 0.2607636068237206, "grad_norm": 0.7965663856791516, "learning_rate": 8.69918699186992e-06, "loss": 0.6771, "step": 214 }, { "epoch": 0.2619821283509342, "grad_norm": 0.5296355193224814, "learning_rate": 8.739837398373985e-06, "loss": 0.6631, "step": 215 }, { "epoch": 0.26320064987814784, "grad_norm": 0.6996343517682662, "learning_rate": 8.78048780487805e-06, "loss": 0.6778, "step": 216 }, { "epoch": 0.2644191714053615, "grad_norm": 0.5685534992796657, "learning_rate": 8.821138211382113e-06, "loss": 0.6542, "step": 217 }, { "epoch": 0.26563769293257516, "grad_norm": 0.5812188798996168, "learning_rate": 8.86178861788618e-06, "loss": 0.6665, "step": 218 }, { "epoch": 0.2668562144597888, "grad_norm": 0.5872098802829594, "learning_rate": 8.902439024390244e-06, "loss": 0.6793, "step": 219 }, { "epoch": 0.26807473598700243, "grad_norm": 0.6138295440628797, "learning_rate": 8.94308943089431e-06, "loss": 0.6845, "step": 220 }, { "epoch": 0.26929325751421607, "grad_norm": 0.5555943793492026, "learning_rate": 8.983739837398374e-06, "loss": 0.6456, "step": 221 }, { "epoch": 0.27051177904142976, "grad_norm": 0.5716563368438368, "learning_rate": 9.02439024390244e-06, "loss": 0.6881, "step": 222 }, { "epoch": 0.2717303005686434, "grad_norm": 0.5657678117161605, "learning_rate": 9.065040650406505e-06, "loss": 0.6721, "step": 223 }, { "epoch": 0.272948822095857, "grad_norm": 0.6494717196083141, "learning_rate": 9.10569105691057e-06, "loss": 0.6856, "step": 224 }, { "epoch": 0.27416734362307066, "grad_norm": 0.5769888402008441, "learning_rate": 9.146341463414635e-06, "loss": 0.65, "step": 225 }, { "epoch": 0.27538586515028435, "grad_norm": 0.6372976291357912, "learning_rate": 9.1869918699187e-06, "loss": 0.679, "step": 226 }, { "epoch": 0.276604386677498, "grad_norm": 0.61020413240267, "learning_rate": 9.227642276422764e-06, "loss": 0.6462, "step": 227 }, { "epoch": 0.2778229082047116, "grad_norm": 0.7347279477946881, "learning_rate": 9.268292682926831e-06, "loss": 0.6502, "step": 228 }, { "epoch": 0.27904142973192525, "grad_norm": 0.6513241840116742, "learning_rate": 9.308943089430895e-06, "loss": 0.6707, "step": 229 }, { "epoch": 0.2802599512591389, "grad_norm": 0.6578226137183896, "learning_rate": 9.34959349593496e-06, "loss": 0.6559, "step": 230 }, { "epoch": 0.2814784727863526, "grad_norm": 0.6443126179924461, "learning_rate": 9.390243902439025e-06, "loss": 0.6815, "step": 231 }, { "epoch": 0.2826969943135662, "grad_norm": 0.5681908489104979, "learning_rate": 9.43089430894309e-06, "loss": 0.6483, "step": 232 }, { "epoch": 0.28391551584077984, "grad_norm": 0.638868530973396, "learning_rate": 9.471544715447156e-06, "loss": 0.6663, "step": 233 }, { "epoch": 0.2851340373679935, "grad_norm": 0.5345735736702238, "learning_rate": 9.51219512195122e-06, "loss": 0.6507, "step": 234 }, { "epoch": 0.28635255889520717, "grad_norm": 0.6170557049684545, "learning_rate": 9.552845528455286e-06, "loss": 0.6533, "step": 235 }, { "epoch": 0.2875710804224208, "grad_norm": 0.6282001318911594, "learning_rate": 9.59349593495935e-06, "loss": 0.6715, "step": 236 }, { "epoch": 0.28878960194963443, "grad_norm": 0.548783110101442, "learning_rate": 9.634146341463415e-06, "loss": 0.6536, "step": 237 }, { "epoch": 0.29000812347684807, "grad_norm": 0.6300302160047813, "learning_rate": 9.67479674796748e-06, "loss": 0.657, "step": 238 }, { "epoch": 0.29122664500406176, "grad_norm": 0.5955216072274768, "learning_rate": 9.715447154471546e-06, "loss": 0.6767, "step": 239 }, { "epoch": 0.2924451665312754, "grad_norm": 0.6216921714562351, "learning_rate": 9.756097560975611e-06, "loss": 0.6492, "step": 240 }, { "epoch": 0.293663688058489, "grad_norm": 0.6909539613975563, "learning_rate": 9.796747967479675e-06, "loss": 0.6618, "step": 241 }, { "epoch": 0.29488220958570266, "grad_norm": 0.8137292747107515, "learning_rate": 9.837398373983741e-06, "loss": 0.6614, "step": 242 }, { "epoch": 0.29610073111291635, "grad_norm": 0.5855911517789665, "learning_rate": 9.878048780487805e-06, "loss": 0.6561, "step": 243 }, { "epoch": 0.29731925264013, "grad_norm": 0.8851136874577217, "learning_rate": 9.91869918699187e-06, "loss": 0.6498, "step": 244 }, { "epoch": 0.2985377741673436, "grad_norm": 0.57227502230073, "learning_rate": 9.959349593495936e-06, "loss": 0.6606, "step": 245 }, { "epoch": 0.29975629569455725, "grad_norm": 0.9576157821693805, "learning_rate": 1e-05, "loss": 0.648, "step": 246 }, { "epoch": 0.30097481722177094, "grad_norm": 0.574426873878406, "learning_rate": 9.999994966333388e-06, "loss": 0.6543, "step": 247 }, { "epoch": 0.3021933387489846, "grad_norm": 0.7230465083023617, "learning_rate": 9.99997986534369e-06, "loss": 0.6654, "step": 248 }, { "epoch": 0.3034118602761982, "grad_norm": 0.5421626680587527, "learning_rate": 9.999954697061305e-06, "loss": 0.6343, "step": 249 }, { "epoch": 0.30463038180341184, "grad_norm": 0.6129301937842085, "learning_rate": 9.999919461536915e-06, "loss": 0.6449, "step": 250 }, { "epoch": 0.30584890333062553, "grad_norm": 0.563497786259594, "learning_rate": 9.999874158841462e-06, "loss": 0.66, "step": 251 }, { "epoch": 0.30706742485783917, "grad_norm": 0.6709530297921161, "learning_rate": 9.999818789066164e-06, "loss": 0.6575, "step": 252 }, { "epoch": 0.3082859463850528, "grad_norm": 0.6033112191541231, "learning_rate": 9.999753352322502e-06, "loss": 0.6745, "step": 253 }, { "epoch": 0.30950446791226643, "grad_norm": 0.7085418197042371, "learning_rate": 9.999677848742238e-06, "loss": 0.645, "step": 254 }, { "epoch": 0.3107229894394801, "grad_norm": 0.6149439429340515, "learning_rate": 9.999592278477389e-06, "loss": 0.6553, "step": 255 }, { "epoch": 0.31194151096669376, "grad_norm": 0.5361824485289747, "learning_rate": 9.999496641700249e-06, "loss": 0.6394, "step": 256 }, { "epoch": 0.3131600324939074, "grad_norm": 0.7876266919973667, "learning_rate": 9.99939093860338e-06, "loss": 0.651, "step": 257 }, { "epoch": 0.314378554021121, "grad_norm": 0.5240336550865616, "learning_rate": 9.999275169399614e-06, "loss": 0.6445, "step": 258 }, { "epoch": 0.31559707554833466, "grad_norm": 0.9003012478867778, "learning_rate": 9.999149334322047e-06, "loss": 0.6759, "step": 259 }, { "epoch": 0.31681559707554835, "grad_norm": 0.520552428762164, "learning_rate": 9.999013433624042e-06, "loss": 0.6656, "step": 260 }, { "epoch": 0.318034118602762, "grad_norm": 0.8451285058918907, "learning_rate": 9.998867467579234e-06, "loss": 0.6393, "step": 261 }, { "epoch": 0.3192526401299756, "grad_norm": 0.6368634173244008, "learning_rate": 9.998711436481519e-06, "loss": 0.6544, "step": 262 }, { "epoch": 0.32047116165718925, "grad_norm": 0.690099709138949, "learning_rate": 9.998545340645058e-06, "loss": 0.6609, "step": 263 }, { "epoch": 0.32168968318440294, "grad_norm": 0.7144861500132949, "learning_rate": 9.998369180404283e-06, "loss": 0.6647, "step": 264 }, { "epoch": 0.3229082047116166, "grad_norm": 0.6362319514002672, "learning_rate": 9.998182956113885e-06, "loss": 0.6533, "step": 265 }, { "epoch": 0.3241267262388302, "grad_norm": 0.6488964510495924, "learning_rate": 9.99798666814882e-06, "loss": 0.6504, "step": 266 }, { "epoch": 0.32534524776604384, "grad_norm": 0.6063198470537309, "learning_rate": 9.99778031690431e-06, "loss": 0.6563, "step": 267 }, { "epoch": 0.32656376929325753, "grad_norm": 0.5938533025522102, "learning_rate": 9.997563902795834e-06, "loss": 0.6675, "step": 268 }, { "epoch": 0.32778229082047117, "grad_norm": 0.7515871090930308, "learning_rate": 9.997337426259134e-06, "loss": 0.6792, "step": 269 }, { "epoch": 0.3290008123476848, "grad_norm": 0.703279934707329, "learning_rate": 9.997100887750215e-06, "loss": 0.6635, "step": 270 }, { "epoch": 0.33021933387489844, "grad_norm": 0.695544945955001, "learning_rate": 9.996854287745337e-06, "loss": 0.645, "step": 271 }, { "epoch": 0.3314378554021121, "grad_norm": 0.7462833994362996, "learning_rate": 9.996597626741023e-06, "loss": 0.6478, "step": 272 }, { "epoch": 0.33265637692932576, "grad_norm": 0.6876699055946316, "learning_rate": 9.99633090525405e-06, "loss": 0.6495, "step": 273 }, { "epoch": 0.3338748984565394, "grad_norm": 0.6161949269900944, "learning_rate": 9.996054123821455e-06, "loss": 0.6477, "step": 274 }, { "epoch": 0.335093419983753, "grad_norm": 0.6992818714334844, "learning_rate": 9.995767283000526e-06, "loss": 0.6471, "step": 275 }, { "epoch": 0.3363119415109667, "grad_norm": 0.6649545633189144, "learning_rate": 9.995470383368808e-06, "loss": 0.6526, "step": 276 }, { "epoch": 0.33753046303818035, "grad_norm": 0.7069772548058584, "learning_rate": 9.995163425524097e-06, "loss": 0.6622, "step": 277 }, { "epoch": 0.338748984565394, "grad_norm": 0.7343365884623839, "learning_rate": 9.994846410084447e-06, "loss": 0.6401, "step": 278 }, { "epoch": 0.3399675060926076, "grad_norm": 0.7666383023534878, "learning_rate": 9.994519337688152e-06, "loss": 0.6351, "step": 279 }, { "epoch": 0.3411860276198213, "grad_norm": 0.7101687784996984, "learning_rate": 9.994182208993766e-06, "loss": 0.6686, "step": 280 }, { "epoch": 0.34240454914703494, "grad_norm": 0.794098416336116, "learning_rate": 9.993835024680084e-06, "loss": 0.6534, "step": 281 }, { "epoch": 0.3436230706742486, "grad_norm": 0.6476191969862704, "learning_rate": 9.993477785446151e-06, "loss": 0.6321, "step": 282 }, { "epoch": 0.3448415922014622, "grad_norm": 0.7027462161925977, "learning_rate": 9.993110492011256e-06, "loss": 0.6677, "step": 283 }, { "epoch": 0.3460601137286759, "grad_norm": 0.7368948502336647, "learning_rate": 9.992733145114932e-06, "loss": 0.6332, "step": 284 }, { "epoch": 0.34727863525588953, "grad_norm": 0.769793462172428, "learning_rate": 9.992345745516954e-06, "loss": 0.6627, "step": 285 }, { "epoch": 0.34849715678310317, "grad_norm": 0.6391657112532801, "learning_rate": 9.99194829399734e-06, "loss": 0.6364, "step": 286 }, { "epoch": 0.3497156783103168, "grad_norm": 0.8671328129231476, "learning_rate": 9.991540791356342e-06, "loss": 0.6558, "step": 287 }, { "epoch": 0.35093419983753044, "grad_norm": 0.6143371180878986, "learning_rate": 9.991123238414455e-06, "loss": 0.6725, "step": 288 }, { "epoch": 0.3521527213647441, "grad_norm": 0.7114612477683598, "learning_rate": 9.99069563601241e-06, "loss": 0.6386, "step": 289 }, { "epoch": 0.35337124289195776, "grad_norm": 0.5910112375855043, "learning_rate": 9.990257985011168e-06, "loss": 0.6648, "step": 290 }, { "epoch": 0.3545897644191714, "grad_norm": 0.6709399619542642, "learning_rate": 9.989810286291923e-06, "loss": 0.6641, "step": 291 }, { "epoch": 0.35580828594638503, "grad_norm": 0.5876086675256037, "learning_rate": 9.989352540756103e-06, "loss": 0.6519, "step": 292 }, { "epoch": 0.3570268074735987, "grad_norm": 0.4993245470857056, "learning_rate": 9.988884749325366e-06, "loss": 0.6409, "step": 293 }, { "epoch": 0.35824532900081235, "grad_norm": 0.6361394412220084, "learning_rate": 9.988406912941591e-06, "loss": 0.6543, "step": 294 }, { "epoch": 0.359463850528026, "grad_norm": 0.5972665446098098, "learning_rate": 9.987919032566885e-06, "loss": 0.6379, "step": 295 }, { "epoch": 0.3606823720552396, "grad_norm": 0.5332779981117456, "learning_rate": 9.987421109183581e-06, "loss": 0.6362, "step": 296 }, { "epoch": 0.3619008935824533, "grad_norm": 0.6057994457076236, "learning_rate": 9.986913143794232e-06, "loss": 0.6455, "step": 297 }, { "epoch": 0.36311941510966694, "grad_norm": 0.6075132715056499, "learning_rate": 9.986395137421607e-06, "loss": 0.6624, "step": 298 }, { "epoch": 0.3643379366368806, "grad_norm": 0.5258247408109219, "learning_rate": 9.985867091108697e-06, "loss": 0.638, "step": 299 }, { "epoch": 0.3655564581640942, "grad_norm": 0.5267906230313797, "learning_rate": 9.985329005918702e-06, "loss": 0.6362, "step": 300 }, { "epoch": 0.3667749796913079, "grad_norm": 0.5638416352250496, "learning_rate": 9.984780882935043e-06, "loss": 0.6301, "step": 301 }, { "epoch": 0.36799350121852153, "grad_norm": 0.545011579464239, "learning_rate": 9.984222723261344e-06, "loss": 0.6599, "step": 302 }, { "epoch": 0.36921202274573517, "grad_norm": 0.5606014722546357, "learning_rate": 9.983654528021442e-06, "loss": 0.6542, "step": 303 }, { "epoch": 0.3704305442729488, "grad_norm": 0.6018343388636366, "learning_rate": 9.98307629835938e-06, "loss": 0.6368, "step": 304 }, { "epoch": 0.3716490658001625, "grad_norm": 0.6118602452372705, "learning_rate": 9.982488035439401e-06, "loss": 0.6513, "step": 305 }, { "epoch": 0.3728675873273761, "grad_norm": 0.6022653337990805, "learning_rate": 9.981889740445958e-06, "loss": 0.6496, "step": 306 }, { "epoch": 0.37408610885458976, "grad_norm": 0.569004250440184, "learning_rate": 9.981281414583693e-06, "loss": 0.6598, "step": 307 }, { "epoch": 0.3753046303818034, "grad_norm": 0.5713014740165444, "learning_rate": 9.980663059077453e-06, "loss": 0.6613, "step": 308 }, { "epoch": 0.3765231519090171, "grad_norm": 0.6154580840564017, "learning_rate": 9.980034675172274e-06, "loss": 0.6442, "step": 309 }, { "epoch": 0.3777416734362307, "grad_norm": 0.5917553562402863, "learning_rate": 9.979396264133388e-06, "loss": 0.6431, "step": 310 }, { "epoch": 0.37896019496344435, "grad_norm": 0.578864320620872, "learning_rate": 9.978747827246214e-06, "loss": 0.6589, "step": 311 }, { "epoch": 0.380178716490658, "grad_norm": 0.6460070122884725, "learning_rate": 9.978089365816357e-06, "loss": 0.6267, "step": 312 }, { "epoch": 0.3813972380178717, "grad_norm": 0.6165901634865715, "learning_rate": 9.977420881169607e-06, "loss": 0.6357, "step": 313 }, { "epoch": 0.3826157595450853, "grad_norm": 0.6862027434641219, "learning_rate": 9.976742374651936e-06, "loss": 0.6607, "step": 314 }, { "epoch": 0.38383428107229894, "grad_norm": 0.6447789605505084, "learning_rate": 9.976053847629496e-06, "loss": 0.6464, "step": 315 }, { "epoch": 0.3850528025995126, "grad_norm": 0.597882927094437, "learning_rate": 9.97535530148861e-06, "loss": 0.6337, "step": 316 }, { "epoch": 0.3862713241267262, "grad_norm": 0.6296819593414332, "learning_rate": 9.974646737635781e-06, "loss": 0.6474, "step": 317 }, { "epoch": 0.3874898456539399, "grad_norm": 0.6313838311506389, "learning_rate": 9.973928157497675e-06, "loss": 0.6289, "step": 318 }, { "epoch": 0.38870836718115354, "grad_norm": 0.6255452790127047, "learning_rate": 9.97319956252113e-06, "loss": 0.6418, "step": 319 }, { "epoch": 0.38992688870836717, "grad_norm": 0.501125482187719, "learning_rate": 9.972460954173149e-06, "loss": 0.6469, "step": 320 }, { "epoch": 0.3911454102355808, "grad_norm": 0.5644277137540713, "learning_rate": 9.971712333940896e-06, "loss": 0.6431, "step": 321 }, { "epoch": 0.3923639317627945, "grad_norm": 0.5401625089221826, "learning_rate": 9.970953703331692e-06, "loss": 0.6399, "step": 322 }, { "epoch": 0.3935824532900081, "grad_norm": 0.6126970579614653, "learning_rate": 9.970185063873012e-06, "loss": 0.6312, "step": 323 }, { "epoch": 0.39480097481722176, "grad_norm": 0.6237167355625934, "learning_rate": 9.969406417112489e-06, "loss": 0.6492, "step": 324 }, { "epoch": 0.3960194963444354, "grad_norm": 0.6083530680570769, "learning_rate": 9.9686177646179e-06, "loss": 0.6404, "step": 325 }, { "epoch": 0.3972380178716491, "grad_norm": 0.6156210783234582, "learning_rate": 9.967819107977175e-06, "loss": 0.626, "step": 326 }, { "epoch": 0.3984565393988627, "grad_norm": 0.6913246389420981, "learning_rate": 9.967010448798376e-06, "loss": 0.6464, "step": 327 }, { "epoch": 0.39967506092607635, "grad_norm": 0.6430895031047548, "learning_rate": 9.966191788709716e-06, "loss": 0.6482, "step": 328 }, { "epoch": 0.40089358245329, "grad_norm": 0.670581453307023, "learning_rate": 9.965363129359537e-06, "loss": 0.649, "step": 329 }, { "epoch": 0.4021121039805037, "grad_norm": 0.6373745499675882, "learning_rate": 9.964524472416319e-06, "loss": 0.6231, "step": 330 }, { "epoch": 0.4033306255077173, "grad_norm": 0.5729524017518108, "learning_rate": 9.96367581956867e-06, "loss": 0.639, "step": 331 }, { "epoch": 0.40454914703493094, "grad_norm": 0.60528048612915, "learning_rate": 9.962817172525323e-06, "loss": 0.6412, "step": 332 }, { "epoch": 0.4057676685621446, "grad_norm": 0.5439146819119978, "learning_rate": 9.961948533015135e-06, "loss": 0.6463, "step": 333 }, { "epoch": 0.40698619008935827, "grad_norm": 0.6696342043794363, "learning_rate": 9.961069902787082e-06, "loss": 0.6559, "step": 334 }, { "epoch": 0.4082047116165719, "grad_norm": 0.6137113821251218, "learning_rate": 9.96018128361026e-06, "loss": 0.6186, "step": 335 }, { "epoch": 0.40942323314378554, "grad_norm": 0.7521896228588043, "learning_rate": 9.959282677273869e-06, "loss": 0.6585, "step": 336 }, { "epoch": 0.41064175467099917, "grad_norm": 0.6161644621872354, "learning_rate": 9.958374085587228e-06, "loss": 0.6511, "step": 337 }, { "epoch": 0.41186027619821286, "grad_norm": 0.6232166791838529, "learning_rate": 9.957455510379753e-06, "loss": 0.6421, "step": 338 }, { "epoch": 0.4130787977254265, "grad_norm": 0.6575837363786434, "learning_rate": 9.956526953500965e-06, "loss": 0.6288, "step": 339 }, { "epoch": 0.41429731925264013, "grad_norm": 0.624761687952515, "learning_rate": 9.955588416820482e-06, "loss": 0.6397, "step": 340 }, { "epoch": 0.41551584077985376, "grad_norm": 0.6332930756907055, "learning_rate": 9.954639902228018e-06, "loss": 0.6444, "step": 341 }, { "epoch": 0.41673436230706745, "grad_norm": 0.5746664206825376, "learning_rate": 9.953681411633376e-06, "loss": 0.6414, "step": 342 }, { "epoch": 0.4179528838342811, "grad_norm": 0.6762777021979247, "learning_rate": 9.952712946966441e-06, "loss": 0.6306, "step": 343 }, { "epoch": 0.4191714053614947, "grad_norm": 0.6244129529931802, "learning_rate": 9.951734510177187e-06, "loss": 0.6366, "step": 344 }, { "epoch": 0.42038992688870835, "grad_norm": 0.6226787509569254, "learning_rate": 9.950746103235663e-06, "loss": 0.6302, "step": 345 }, { "epoch": 0.421608448415922, "grad_norm": 0.6520199261370837, "learning_rate": 9.949747728131994e-06, "loss": 0.6816, "step": 346 }, { "epoch": 0.4228269699431357, "grad_norm": 0.6026134976644628, "learning_rate": 9.948739386876376e-06, "loss": 0.6385, "step": 347 }, { "epoch": 0.4240454914703493, "grad_norm": 0.6012466224483265, "learning_rate": 9.947721081499068e-06, "loss": 0.6458, "step": 348 }, { "epoch": 0.42526401299756295, "grad_norm": 0.5524226925373649, "learning_rate": 9.946692814050396e-06, "loss": 0.6281, "step": 349 }, { "epoch": 0.4264825345247766, "grad_norm": 0.6055953304742949, "learning_rate": 9.945654586600741e-06, "loss": 0.6467, "step": 350 }, { "epoch": 0.42770105605199027, "grad_norm": 0.586137745210729, "learning_rate": 9.944606401240538e-06, "loss": 0.6379, "step": 351 }, { "epoch": 0.4289195775792039, "grad_norm": 0.5125599093697626, "learning_rate": 9.943548260080277e-06, "loss": 0.6523, "step": 352 }, { "epoch": 0.43013809910641754, "grad_norm": 0.6305973658118967, "learning_rate": 9.942480165250487e-06, "loss": 0.6389, "step": 353 }, { "epoch": 0.43135662063363117, "grad_norm": 0.5220411272087411, "learning_rate": 9.941402118901743e-06, "loss": 0.6425, "step": 354 }, { "epoch": 0.43257514216084486, "grad_norm": 0.5753441957701829, "learning_rate": 9.940314123204656e-06, "loss": 0.6441, "step": 355 }, { "epoch": 0.4337936636880585, "grad_norm": 0.584328279121849, "learning_rate": 9.939216180349864e-06, "loss": 0.6359, "step": 356 }, { "epoch": 0.43501218521527213, "grad_norm": 0.6135441335146246, "learning_rate": 9.938108292548044e-06, "loss": 0.6267, "step": 357 }, { "epoch": 0.43623070674248576, "grad_norm": 0.5429972724232678, "learning_rate": 9.93699046202989e-06, "loss": 0.611, "step": 358 }, { "epoch": 0.43744922826969945, "grad_norm": 0.6487815842031103, "learning_rate": 9.935862691046114e-06, "loss": 0.6395, "step": 359 }, { "epoch": 0.4386677497969131, "grad_norm": 0.5638558609882317, "learning_rate": 9.934724981867447e-06, "loss": 0.6398, "step": 360 }, { "epoch": 0.4398862713241267, "grad_norm": 0.7915256825394801, "learning_rate": 9.93357733678463e-06, "loss": 0.6275, "step": 361 }, { "epoch": 0.44110479285134035, "grad_norm": 0.6072564790199728, "learning_rate": 9.932419758108403e-06, "loss": 0.6313, "step": 362 }, { "epoch": 0.44232331437855404, "grad_norm": 0.7829204972438968, "learning_rate": 9.931252248169518e-06, "loss": 0.6334, "step": 363 }, { "epoch": 0.4435418359057677, "grad_norm": 0.6029448727505217, "learning_rate": 9.930074809318714e-06, "loss": 0.6469, "step": 364 }, { "epoch": 0.4447603574329813, "grad_norm": 0.6793840267075067, "learning_rate": 9.928887443926725e-06, "loss": 0.6334, "step": 365 }, { "epoch": 0.44597887896019495, "grad_norm": 0.5488302948299049, "learning_rate": 9.927690154384273e-06, "loss": 0.6213, "step": 366 }, { "epoch": 0.44719740048740864, "grad_norm": 0.7346734434148855, "learning_rate": 9.92648294310206e-06, "loss": 0.6295, "step": 367 }, { "epoch": 0.44841592201462227, "grad_norm": 0.7457059967309784, "learning_rate": 9.925265812510767e-06, "loss": 0.6379, "step": 368 }, { "epoch": 0.4496344435418359, "grad_norm": 0.621543177481449, "learning_rate": 9.924038765061042e-06, "loss": 0.641, "step": 369 }, { "epoch": 0.45085296506904954, "grad_norm": 0.8188643504363363, "learning_rate": 9.922801803223506e-06, "loss": 0.6481, "step": 370 }, { "epoch": 0.45207148659626323, "grad_norm": 0.6040894853255576, "learning_rate": 9.921554929488741e-06, "loss": 0.6493, "step": 371 }, { "epoch": 0.45329000812347686, "grad_norm": 0.8455545003582287, "learning_rate": 9.920298146367287e-06, "loss": 0.6436, "step": 372 }, { "epoch": 0.4545085296506905, "grad_norm": 0.626392939964308, "learning_rate": 9.919031456389632e-06, "loss": 0.6303, "step": 373 }, { "epoch": 0.45572705117790413, "grad_norm": 0.7483260656404666, "learning_rate": 9.917754862106216e-06, "loss": 0.6306, "step": 374 }, { "epoch": 0.45694557270511776, "grad_norm": 0.6122181327172058, "learning_rate": 9.916468366087418e-06, "loss": 0.6409, "step": 375 }, { "epoch": 0.45816409423233145, "grad_norm": 0.5593648989087618, "learning_rate": 9.915171970923556e-06, "loss": 0.6583, "step": 376 }, { "epoch": 0.4593826157595451, "grad_norm": 0.7626157086282944, "learning_rate": 9.913865679224876e-06, "loss": 0.648, "step": 377 }, { "epoch": 0.4606011372867587, "grad_norm": 0.5027545868887003, "learning_rate": 9.912549493621555e-06, "loss": 0.6378, "step": 378 }, { "epoch": 0.46181965881397236, "grad_norm": 0.6593540069533284, "learning_rate": 9.911223416763689e-06, "loss": 0.6487, "step": 379 }, { "epoch": 0.46303818034118605, "grad_norm": 0.7507657782021496, "learning_rate": 9.909887451321288e-06, "loss": 0.6628, "step": 380 }, { "epoch": 0.4642567018683997, "grad_norm": 0.5963371403892291, "learning_rate": 9.908541599984276e-06, "loss": 0.6304, "step": 381 }, { "epoch": 0.4654752233956133, "grad_norm": 0.7456866534587581, "learning_rate": 9.907185865462476e-06, "loss": 0.6362, "step": 382 }, { "epoch": 0.46669374492282695, "grad_norm": 0.5547991254906135, "learning_rate": 9.905820250485619e-06, "loss": 0.631, "step": 383 }, { "epoch": 0.46791226645004064, "grad_norm": 0.7089080919365149, "learning_rate": 9.904444757803322e-06, "loss": 0.6281, "step": 384 }, { "epoch": 0.46913078797725427, "grad_norm": 0.5003916403857714, "learning_rate": 9.903059390185093e-06, "loss": 0.6412, "step": 385 }, { "epoch": 0.4703493095044679, "grad_norm": 0.6729850093918749, "learning_rate": 9.901664150420328e-06, "loss": 0.6329, "step": 386 }, { "epoch": 0.47156783103168154, "grad_norm": 0.5557718878026181, "learning_rate": 9.90025904131829e-06, "loss": 0.6226, "step": 387 }, { "epoch": 0.47278635255889523, "grad_norm": 0.6260971706778755, "learning_rate": 9.898844065708121e-06, "loss": 0.6257, "step": 388 }, { "epoch": 0.47400487408610886, "grad_norm": 0.5411961675821981, "learning_rate": 9.89741922643883e-06, "loss": 0.6517, "step": 389 }, { "epoch": 0.4752233956133225, "grad_norm": 0.5597130938499267, "learning_rate": 9.895984526379282e-06, "loss": 0.6157, "step": 390 }, { "epoch": 0.47644191714053613, "grad_norm": 0.58052501455543, "learning_rate": 9.894539968418195e-06, "loss": 0.6322, "step": 391 }, { "epoch": 0.4776604386677498, "grad_norm": 0.5211161945377233, "learning_rate": 9.893085555464143e-06, "loss": 0.6089, "step": 392 }, { "epoch": 0.47887896019496345, "grad_norm": 0.6838111314182518, "learning_rate": 9.891621290445534e-06, "loss": 0.632, "step": 393 }, { "epoch": 0.4800974817221771, "grad_norm": 0.5785699696283433, "learning_rate": 9.890147176310618e-06, "loss": 0.623, "step": 394 }, { "epoch": 0.4813160032493907, "grad_norm": 0.6260781225868985, "learning_rate": 9.888663216027477e-06, "loss": 0.6433, "step": 395 }, { "epoch": 0.4825345247766044, "grad_norm": 0.5634389513735794, "learning_rate": 9.887169412584012e-06, "loss": 0.6359, "step": 396 }, { "epoch": 0.48375304630381805, "grad_norm": 0.576861556797157, "learning_rate": 9.885665768987947e-06, "loss": 0.6289, "step": 397 }, { "epoch": 0.4849715678310317, "grad_norm": 0.5991685983326442, "learning_rate": 9.88415228826682e-06, "loss": 0.6345, "step": 398 }, { "epoch": 0.4861900893582453, "grad_norm": 0.5331826337156919, "learning_rate": 9.882628973467972e-06, "loss": 0.6282, "step": 399 }, { "epoch": 0.487408610885459, "grad_norm": 0.5052439699487477, "learning_rate": 9.881095827658548e-06, "loss": 0.629, "step": 400 }, { "epoch": 0.48862713241267264, "grad_norm": 0.5842564825983466, "learning_rate": 9.879552853925486e-06, "loss": 0.6518, "step": 401 }, { "epoch": 0.48984565393988627, "grad_norm": 0.5538659465643975, "learning_rate": 9.878000055375512e-06, "loss": 0.6333, "step": 402 }, { "epoch": 0.4910641754670999, "grad_norm": 0.5200827864775698, "learning_rate": 9.876437435135133e-06, "loss": 0.6348, "step": 403 }, { "epoch": 0.49228269699431354, "grad_norm": 0.6043127912027646, "learning_rate": 9.874864996350633e-06, "loss": 0.6136, "step": 404 }, { "epoch": 0.49350121852152723, "grad_norm": 0.4948272003142496, "learning_rate": 9.873282742188066e-06, "loss": 0.6301, "step": 405 }, { "epoch": 0.49471974004874086, "grad_norm": 0.5983030540970795, "learning_rate": 9.871690675833248e-06, "loss": 0.6354, "step": 406 }, { "epoch": 0.4959382615759545, "grad_norm": 0.5309927588463559, "learning_rate": 9.87008880049175e-06, "loss": 0.6316, "step": 407 }, { "epoch": 0.49715678310316813, "grad_norm": 0.46510544628039285, "learning_rate": 9.868477119388897e-06, "loss": 0.641, "step": 408 }, { "epoch": 0.4983753046303818, "grad_norm": 0.4745237655389145, "learning_rate": 9.866855635769753e-06, "loss": 0.6484, "step": 409 }, { "epoch": 0.49959382615759546, "grad_norm": 0.562173043770555, "learning_rate": 9.86522435289912e-06, "loss": 0.6263, "step": 410 }, { "epoch": 0.5008123476848091, "grad_norm": 0.5419982591023096, "learning_rate": 9.863583274061535e-06, "loss": 0.6197, "step": 411 }, { "epoch": 0.5020308692120228, "grad_norm": 0.5709095665576734, "learning_rate": 9.861932402561253e-06, "loss": 0.6253, "step": 412 }, { "epoch": 0.5032493907392364, "grad_norm": 0.5575561882923015, "learning_rate": 9.86027174172225e-06, "loss": 0.6257, "step": 413 }, { "epoch": 0.50446791226645, "grad_norm": 0.5818761313113621, "learning_rate": 9.858601294888212e-06, "loss": 0.6375, "step": 414 }, { "epoch": 0.5056864337936637, "grad_norm": 0.55560278003152, "learning_rate": 9.856921065422527e-06, "loss": 0.6327, "step": 415 }, { "epoch": 0.5069049553208773, "grad_norm": 0.5142680238787152, "learning_rate": 9.855231056708281e-06, "loss": 0.6347, "step": 416 }, { "epoch": 0.508123476848091, "grad_norm": 0.5468260799033448, "learning_rate": 9.853531272148248e-06, "loss": 0.6165, "step": 417 }, { "epoch": 0.5093419983753046, "grad_norm": 0.5366215405716666, "learning_rate": 9.851821715164891e-06, "loss": 0.6232, "step": 418 }, { "epoch": 0.5105605199025183, "grad_norm": 0.6815769917483668, "learning_rate": 9.850102389200346e-06, "loss": 0.6375, "step": 419 }, { "epoch": 0.511779041429732, "grad_norm": 0.5766636790628379, "learning_rate": 9.848373297716414e-06, "loss": 0.6411, "step": 420 }, { "epoch": 0.5129975629569455, "grad_norm": 0.6508434213004275, "learning_rate": 9.846634444194568e-06, "loss": 0.6277, "step": 421 }, { "epoch": 0.5142160844841592, "grad_norm": 0.5654811023161467, "learning_rate": 9.844885832135928e-06, "loss": 0.6192, "step": 422 }, { "epoch": 0.5154346060113729, "grad_norm": 0.6220408843438429, "learning_rate": 9.84312746506127e-06, "loss": 0.6254, "step": 423 }, { "epoch": 0.5166531275385865, "grad_norm": 0.5550144456923615, "learning_rate": 9.841359346511004e-06, "loss": 0.6288, "step": 424 }, { "epoch": 0.5178716490658002, "grad_norm": 0.5804117244385404, "learning_rate": 9.83958148004518e-06, "loss": 0.6244, "step": 425 }, { "epoch": 0.5190901705930138, "grad_norm": 0.6245742605810847, "learning_rate": 9.837793869243468e-06, "loss": 0.6209, "step": 426 }, { "epoch": 0.5203086921202275, "grad_norm": 0.5661037548895256, "learning_rate": 9.83599651770517e-06, "loss": 0.6279, "step": 427 }, { "epoch": 0.5215272136474411, "grad_norm": 0.5358603369119569, "learning_rate": 9.834189429049188e-06, "loss": 0.6307, "step": 428 }, { "epoch": 0.5227457351746547, "grad_norm": 0.6122007731857034, "learning_rate": 9.832372606914038e-06, "loss": 0.6158, "step": 429 }, { "epoch": 0.5239642567018684, "grad_norm": 0.5972271574369769, "learning_rate": 9.830546054957828e-06, "loss": 0.6204, "step": 430 }, { "epoch": 0.525182778229082, "grad_norm": 0.5443858161988891, "learning_rate": 9.82870977685826e-06, "loss": 0.621, "step": 431 }, { "epoch": 0.5264012997562957, "grad_norm": 0.6250123596443754, "learning_rate": 9.826863776312621e-06, "loss": 0.6408, "step": 432 }, { "epoch": 0.5276198212835094, "grad_norm": 0.5933038352389216, "learning_rate": 9.825008057037769e-06, "loss": 0.6588, "step": 433 }, { "epoch": 0.528838342810723, "grad_norm": 0.6567920347058966, "learning_rate": 9.823142622770135e-06, "loss": 0.625, "step": 434 }, { "epoch": 0.5300568643379366, "grad_norm": 0.5779776066299945, "learning_rate": 9.821267477265705e-06, "loss": 0.6387, "step": 435 }, { "epoch": 0.5312753858651503, "grad_norm": 0.570082080677981, "learning_rate": 9.819382624300027e-06, "loss": 0.6324, "step": 436 }, { "epoch": 0.5324939073923639, "grad_norm": 0.5818606827175574, "learning_rate": 9.817488067668186e-06, "loss": 0.644, "step": 437 }, { "epoch": 0.5337124289195776, "grad_norm": 0.5476824827124901, "learning_rate": 9.815583811184809e-06, "loss": 0.6189, "step": 438 }, { "epoch": 0.5349309504467912, "grad_norm": 0.5768267508522074, "learning_rate": 9.813669858684054e-06, "loss": 0.6222, "step": 439 }, { "epoch": 0.5361494719740049, "grad_norm": 0.5120867453918215, "learning_rate": 9.8117462140196e-06, "loss": 0.6204, "step": 440 }, { "epoch": 0.5373679935012186, "grad_norm": 0.5186146607717382, "learning_rate": 9.80981288106464e-06, "loss": 0.6195, "step": 441 }, { "epoch": 0.5385865150284321, "grad_norm": 0.5895698622661449, "learning_rate": 9.807869863711878e-06, "loss": 0.6205, "step": 442 }, { "epoch": 0.5398050365556458, "grad_norm": 0.5421346973971489, "learning_rate": 9.805917165873515e-06, "loss": 0.6303, "step": 443 }, { "epoch": 0.5410235580828595, "grad_norm": 0.5227058266380313, "learning_rate": 9.803954791481239e-06, "loss": 0.6196, "step": 444 }, { "epoch": 0.5422420796100731, "grad_norm": 0.4665750459631165, "learning_rate": 9.801982744486229e-06, "loss": 0.628, "step": 445 }, { "epoch": 0.5434606011372868, "grad_norm": 0.5340051839015313, "learning_rate": 9.800001028859135e-06, "loss": 0.6321, "step": 446 }, { "epoch": 0.5446791226645004, "grad_norm": 0.49569443009344233, "learning_rate": 9.798009648590073e-06, "loss": 0.6295, "step": 447 }, { "epoch": 0.545897644191714, "grad_norm": 0.5589394978947685, "learning_rate": 9.796008607688624e-06, "loss": 0.6458, "step": 448 }, { "epoch": 0.5471161657189277, "grad_norm": 0.5349825334411198, "learning_rate": 9.793997910183815e-06, "loss": 0.6348, "step": 449 }, { "epoch": 0.5483346872461413, "grad_norm": 0.5406756193824626, "learning_rate": 9.79197756012412e-06, "loss": 0.6352, "step": 450 }, { "epoch": 0.549553208773355, "grad_norm": 0.5590939249326192, "learning_rate": 9.789947561577445e-06, "loss": 0.6345, "step": 451 }, { "epoch": 0.5507717303005687, "grad_norm": 0.5138272981689205, "learning_rate": 9.787907918631125e-06, "loss": 0.6457, "step": 452 }, { "epoch": 0.5519902518277823, "grad_norm": 0.5967975071520696, "learning_rate": 9.785858635391913e-06, "loss": 0.6059, "step": 453 }, { "epoch": 0.553208773354996, "grad_norm": 0.4912288949887055, "learning_rate": 9.783799715985973e-06, "loss": 0.6254, "step": 454 }, { "epoch": 0.5544272948822095, "grad_norm": 0.5903941074513651, "learning_rate": 9.78173116455887e-06, "loss": 0.6108, "step": 455 }, { "epoch": 0.5556458164094232, "grad_norm": 0.5632794329839387, "learning_rate": 9.779652985275562e-06, "loss": 0.6187, "step": 456 }, { "epoch": 0.5568643379366369, "grad_norm": 0.5941486268629673, "learning_rate": 9.777565182320396e-06, "loss": 0.6184, "step": 457 }, { "epoch": 0.5580828594638505, "grad_norm": 0.6416650599158464, "learning_rate": 9.775467759897092e-06, "loss": 0.6331, "step": 458 }, { "epoch": 0.5593013809910642, "grad_norm": 0.5651281069823211, "learning_rate": 9.773360722228742e-06, "loss": 0.6307, "step": 459 }, { "epoch": 0.5605199025182778, "grad_norm": 0.6620891236551917, "learning_rate": 9.771244073557792e-06, "loss": 0.6078, "step": 460 }, { "epoch": 0.5617384240454915, "grad_norm": 0.6015785675867341, "learning_rate": 9.769117818146048e-06, "loss": 0.6237, "step": 461 }, { "epoch": 0.5629569455727051, "grad_norm": 0.8038047522794796, "learning_rate": 9.766981960274653e-06, "loss": 0.6173, "step": 462 }, { "epoch": 0.5641754670999187, "grad_norm": 0.6163269598618792, "learning_rate": 9.764836504244086e-06, "loss": 0.6264, "step": 463 }, { "epoch": 0.5653939886271324, "grad_norm": 0.6244153487192251, "learning_rate": 9.762681454374148e-06, "loss": 0.6112, "step": 464 }, { "epoch": 0.5666125101543461, "grad_norm": 0.724456218504814, "learning_rate": 9.760516815003965e-06, "loss": 0.6255, "step": 465 }, { "epoch": 0.5678310316815597, "grad_norm": 0.580652096091434, "learning_rate": 9.758342590491961e-06, "loss": 0.6342, "step": 466 }, { "epoch": 0.5690495532087734, "grad_norm": 0.6644456071205537, "learning_rate": 9.756158785215866e-06, "loss": 0.6127, "step": 467 }, { "epoch": 0.570268074735987, "grad_norm": 0.5736293156748269, "learning_rate": 9.753965403572703e-06, "loss": 0.6313, "step": 468 }, { "epoch": 0.5714865962632006, "grad_norm": 0.6178186373387958, "learning_rate": 9.751762449978767e-06, "loss": 0.643, "step": 469 }, { "epoch": 0.5727051177904143, "grad_norm": 0.584712916385393, "learning_rate": 9.749549928869636e-06, "loss": 0.5948, "step": 470 }, { "epoch": 0.5739236393176279, "grad_norm": 0.6116917271773714, "learning_rate": 9.747327844700147e-06, "loss": 0.6297, "step": 471 }, { "epoch": 0.5751421608448416, "grad_norm": 0.4903955649085751, "learning_rate": 9.745096201944391e-06, "loss": 0.6251, "step": 472 }, { "epoch": 0.5763606823720553, "grad_norm": 0.6968313476556924, "learning_rate": 9.742855005095706e-06, "loss": 0.6117, "step": 473 }, { "epoch": 0.5775792038992689, "grad_norm": 0.48897486873959584, "learning_rate": 9.740604258666668e-06, "loss": 0.6058, "step": 474 }, { "epoch": 0.5787977254264826, "grad_norm": 0.7217629239411762, "learning_rate": 9.73834396718908e-06, "loss": 0.6265, "step": 475 }, { "epoch": 0.5800162469536961, "grad_norm": 0.613354162646377, "learning_rate": 9.736074135213962e-06, "loss": 0.6399, "step": 476 }, { "epoch": 0.5812347684809098, "grad_norm": 0.6447055703309105, "learning_rate": 9.733794767311545e-06, "loss": 0.6335, "step": 477 }, { "epoch": 0.5824532900081235, "grad_norm": 0.5823448015058018, "learning_rate": 9.731505868071262e-06, "loss": 0.6262, "step": 478 }, { "epoch": 0.5836718115353371, "grad_norm": 0.5125864553684497, "learning_rate": 9.729207442101736e-06, "loss": 0.6101, "step": 479 }, { "epoch": 0.5848903330625508, "grad_norm": 0.6147081791430226, "learning_rate": 9.726899494030768e-06, "loss": 0.6411, "step": 480 }, { "epoch": 0.5861088545897645, "grad_norm": 0.5467046907537908, "learning_rate": 9.724582028505336e-06, "loss": 0.6203, "step": 481 }, { "epoch": 0.587327376116978, "grad_norm": 0.5741960101018327, "learning_rate": 9.72225505019158e-06, "loss": 0.624, "step": 482 }, { "epoch": 0.5885458976441917, "grad_norm": 0.6709034274446143, "learning_rate": 9.719918563774793e-06, "loss": 0.6316, "step": 483 }, { "epoch": 0.5897644191714053, "grad_norm": 0.5633926121392079, "learning_rate": 9.71757257395941e-06, "loss": 0.6205, "step": 484 }, { "epoch": 0.590982940698619, "grad_norm": 0.5752003286544818, "learning_rate": 9.715217085469009e-06, "loss": 0.601, "step": 485 }, { "epoch": 0.5922014622258327, "grad_norm": 0.6676085473844594, "learning_rate": 9.712852103046281e-06, "loss": 0.6425, "step": 486 }, { "epoch": 0.5934199837530463, "grad_norm": 0.43714860457984767, "learning_rate": 9.710477631453044e-06, "loss": 0.6264, "step": 487 }, { "epoch": 0.59463850528026, "grad_norm": 0.7834186015627101, "learning_rate": 9.708093675470214e-06, "loss": 0.6294, "step": 488 }, { "epoch": 0.5958570268074735, "grad_norm": 0.5229823852593044, "learning_rate": 9.705700239897809e-06, "loss": 0.6253, "step": 489 }, { "epoch": 0.5970755483346872, "grad_norm": 0.6641427142623177, "learning_rate": 9.70329732955493e-06, "loss": 0.6208, "step": 490 }, { "epoch": 0.5982940698619009, "grad_norm": 0.5777300627058165, "learning_rate": 9.70088494927976e-06, "loss": 0.62, "step": 491 }, { "epoch": 0.5995125913891145, "grad_norm": 0.47427848956457735, "learning_rate": 9.698463103929542e-06, "loss": 0.6168, "step": 492 }, { "epoch": 0.6007311129163282, "grad_norm": 0.6176694192284208, "learning_rate": 9.696031798380586e-06, "loss": 0.6192, "step": 493 }, { "epoch": 0.6019496344435419, "grad_norm": 0.5380294280704867, "learning_rate": 9.693591037528239e-06, "loss": 0.6324, "step": 494 }, { "epoch": 0.6031681559707555, "grad_norm": 0.5270092433580651, "learning_rate": 9.691140826286893e-06, "loss": 0.6275, "step": 495 }, { "epoch": 0.6043866774979691, "grad_norm": 0.5928211370503502, "learning_rate": 9.688681169589971e-06, "loss": 0.6295, "step": 496 }, { "epoch": 0.6056051990251827, "grad_norm": 0.487281690093329, "learning_rate": 9.686212072389904e-06, "loss": 0.6157, "step": 497 }, { "epoch": 0.6068237205523964, "grad_norm": 0.5179266059337351, "learning_rate": 9.68373353965814e-06, "loss": 0.6098, "step": 498 }, { "epoch": 0.6080422420796101, "grad_norm": 0.5314913870970437, "learning_rate": 9.68124557638512e-06, "loss": 0.6173, "step": 499 }, { "epoch": 0.6092607636068237, "grad_norm": 0.4844744555610714, "learning_rate": 9.678748187580278e-06, "loss": 0.6186, "step": 500 }, { "epoch": 0.6104792851340374, "grad_norm": 0.5188776477142794, "learning_rate": 9.676241378272022e-06, "loss": 0.6168, "step": 501 }, { "epoch": 0.6116978066612511, "grad_norm": 0.49668970689497427, "learning_rate": 9.673725153507727e-06, "loss": 0.6128, "step": 502 }, { "epoch": 0.6129163281884646, "grad_norm": 0.5049088012633238, "learning_rate": 9.67119951835373e-06, "loss": 0.6204, "step": 503 }, { "epoch": 0.6141348497156783, "grad_norm": 0.5286755135827618, "learning_rate": 9.66866447789531e-06, "loss": 0.6321, "step": 504 }, { "epoch": 0.6153533712428919, "grad_norm": 0.5414829955250333, "learning_rate": 9.666120037236692e-06, "loss": 0.6073, "step": 505 }, { "epoch": 0.6165718927701056, "grad_norm": 0.5929807296645003, "learning_rate": 9.663566201501017e-06, "loss": 0.6219, "step": 506 }, { "epoch": 0.6177904142973193, "grad_norm": 0.565513002212362, "learning_rate": 9.66100297583035e-06, "loss": 0.6218, "step": 507 }, { "epoch": 0.6190089358245329, "grad_norm": 0.48043459347807704, "learning_rate": 9.65843036538566e-06, "loss": 0.607, "step": 508 }, { "epoch": 0.6202274573517466, "grad_norm": 0.6289509926942585, "learning_rate": 9.655848375346812e-06, "loss": 0.6396, "step": 509 }, { "epoch": 0.6214459788789602, "grad_norm": 0.5609440147588081, "learning_rate": 9.65325701091256e-06, "loss": 0.6303, "step": 510 }, { "epoch": 0.6226645004061738, "grad_norm": 0.5893573188478602, "learning_rate": 9.650656277300525e-06, "loss": 0.6166, "step": 511 }, { "epoch": 0.6238830219333875, "grad_norm": 0.5628137809478111, "learning_rate": 9.6480461797472e-06, "loss": 0.6291, "step": 512 }, { "epoch": 0.6251015434606011, "grad_norm": 0.5493464215154626, "learning_rate": 9.645426723507929e-06, "loss": 0.6222, "step": 513 }, { "epoch": 0.6263200649878148, "grad_norm": 0.5629698357909129, "learning_rate": 9.6427979138569e-06, "loss": 0.6317, "step": 514 }, { "epoch": 0.6275385865150285, "grad_norm": 0.6664927672498832, "learning_rate": 9.640159756087136e-06, "loss": 0.6382, "step": 515 }, { "epoch": 0.628757108042242, "grad_norm": 0.5522749634660304, "learning_rate": 9.637512255510475e-06, "loss": 0.6143, "step": 516 }, { "epoch": 0.6299756295694557, "grad_norm": 0.5532267628661862, "learning_rate": 9.63485541745757e-06, "loss": 0.6374, "step": 517 }, { "epoch": 0.6311941510966693, "grad_norm": 0.6876124654936631, "learning_rate": 9.632189247277885e-06, "loss": 0.6392, "step": 518 }, { "epoch": 0.632412672623883, "grad_norm": 0.653192030137328, "learning_rate": 9.629513750339656e-06, "loss": 0.6146, "step": 519 }, { "epoch": 0.6336311941510967, "grad_norm": 0.5264590327684809, "learning_rate": 9.626828932029907e-06, "loss": 0.6187, "step": 520 }, { "epoch": 0.6348497156783103, "grad_norm": 0.6140627235902801, "learning_rate": 9.624134797754437e-06, "loss": 0.5948, "step": 521 }, { "epoch": 0.636068237205524, "grad_norm": 0.715948251788629, "learning_rate": 9.62143135293779e-06, "loss": 0.6221, "step": 522 }, { "epoch": 0.6372867587327377, "grad_norm": 0.6814424426040064, "learning_rate": 9.618718603023261e-06, "loss": 0.6279, "step": 523 }, { "epoch": 0.6385052802599512, "grad_norm": 0.600168318088034, "learning_rate": 9.615996553472885e-06, "loss": 0.6267, "step": 524 }, { "epoch": 0.6397238017871649, "grad_norm": 0.5619413500131725, "learning_rate": 9.613265209767417e-06, "loss": 0.6288, "step": 525 }, { "epoch": 0.6409423233143785, "grad_norm": 0.5903652755615201, "learning_rate": 9.610524577406325e-06, "loss": 0.6305, "step": 526 }, { "epoch": 0.6421608448415922, "grad_norm": 0.5087861988940737, "learning_rate": 9.607774661907783e-06, "loss": 0.6192, "step": 527 }, { "epoch": 0.6433793663688059, "grad_norm": 0.6555944853088764, "learning_rate": 9.605015468808651e-06, "loss": 0.6255, "step": 528 }, { "epoch": 0.6445978878960195, "grad_norm": 0.6123139168204214, "learning_rate": 9.602247003664476e-06, "loss": 0.6185, "step": 529 }, { "epoch": 0.6458164094232332, "grad_norm": 0.5503960050113602, "learning_rate": 9.599469272049468e-06, "loss": 0.6385, "step": 530 }, { "epoch": 0.6470349309504468, "grad_norm": 0.5823472571150912, "learning_rate": 9.596682279556499e-06, "loss": 0.6241, "step": 531 }, { "epoch": 0.6482534524776604, "grad_norm": 0.5840631388468679, "learning_rate": 9.593886031797081e-06, "loss": 0.625, "step": 532 }, { "epoch": 0.6494719740048741, "grad_norm": 0.5622117171111194, "learning_rate": 9.591080534401371e-06, "loss": 0.6192, "step": 533 }, { "epoch": 0.6506904955320877, "grad_norm": 0.5707745901206253, "learning_rate": 9.588265793018141e-06, "loss": 0.6391, "step": 534 }, { "epoch": 0.6519090170593014, "grad_norm": 0.5896800585312665, "learning_rate": 9.58544181331478e-06, "loss": 0.6339, "step": 535 }, { "epoch": 0.6531275385865151, "grad_norm": 0.5209906229065117, "learning_rate": 9.582608600977276e-06, "loss": 0.601, "step": 536 }, { "epoch": 0.6543460601137286, "grad_norm": 0.5155011577582275, "learning_rate": 9.579766161710209e-06, "loss": 0.6015, "step": 537 }, { "epoch": 0.6555645816409423, "grad_norm": 0.48807425767261786, "learning_rate": 9.576914501236734e-06, "loss": 0.6167, "step": 538 }, { "epoch": 0.656783103168156, "grad_norm": 0.5579148908182612, "learning_rate": 9.574053625298577e-06, "loss": 0.6193, "step": 539 }, { "epoch": 0.6580016246953696, "grad_norm": 0.5287053319535842, "learning_rate": 9.571183539656011e-06, "loss": 0.6291, "step": 540 }, { "epoch": 0.6592201462225833, "grad_norm": 0.6191360016551267, "learning_rate": 9.568304250087864e-06, "loss": 0.6139, "step": 541 }, { "epoch": 0.6604386677497969, "grad_norm": 0.5099069268786582, "learning_rate": 9.565415762391485e-06, "loss": 0.6013, "step": 542 }, { "epoch": 0.6616571892770106, "grad_norm": 0.5421293076141, "learning_rate": 9.562518082382751e-06, "loss": 0.5907, "step": 543 }, { "epoch": 0.6628757108042242, "grad_norm": 0.5498541039203616, "learning_rate": 9.559611215896041e-06, "loss": 0.627, "step": 544 }, { "epoch": 0.6640942323314378, "grad_norm": 0.5680961983046815, "learning_rate": 9.556695168784236e-06, "loss": 0.5952, "step": 545 }, { "epoch": 0.6653127538586515, "grad_norm": 0.5218060004228549, "learning_rate": 9.553769946918698e-06, "loss": 0.6228, "step": 546 }, { "epoch": 0.6665312753858651, "grad_norm": 0.5543031912725007, "learning_rate": 9.550835556189264e-06, "loss": 0.6338, "step": 547 }, { "epoch": 0.6677497969130788, "grad_norm": 0.5668524593324846, "learning_rate": 9.547892002504233e-06, "loss": 0.6219, "step": 548 }, { "epoch": 0.6689683184402925, "grad_norm": 0.5873694380478705, "learning_rate": 9.544939291790352e-06, "loss": 0.624, "step": 549 }, { "epoch": 0.670186839967506, "grad_norm": 0.5399986226537774, "learning_rate": 9.541977429992803e-06, "loss": 0.6385, "step": 550 }, { "epoch": 0.6714053614947197, "grad_norm": 0.7171400926799747, "learning_rate": 9.5390064230752e-06, "loss": 0.621, "step": 551 }, { "epoch": 0.6726238830219334, "grad_norm": 0.6092647452638789, "learning_rate": 9.536026277019562e-06, "loss": 0.6223, "step": 552 }, { "epoch": 0.673842404549147, "grad_norm": 0.683988747327427, "learning_rate": 9.533036997826315e-06, "loss": 0.6199, "step": 553 }, { "epoch": 0.6750609260763607, "grad_norm": 0.5791819914636441, "learning_rate": 9.530038591514275e-06, "loss": 0.6328, "step": 554 }, { "epoch": 0.6762794476035743, "grad_norm": 0.6782628719672897, "learning_rate": 9.527031064120632e-06, "loss": 0.6127, "step": 555 }, { "epoch": 0.677497969130788, "grad_norm": 0.6767775073979123, "learning_rate": 9.524014421700942e-06, "loss": 0.6186, "step": 556 }, { "epoch": 0.6787164906580017, "grad_norm": 0.5114857558759379, "learning_rate": 9.520988670329114e-06, "loss": 0.63, "step": 557 }, { "epoch": 0.6799350121852152, "grad_norm": 0.5501380880007342, "learning_rate": 9.517953816097396e-06, "loss": 0.5915, "step": 558 }, { "epoch": 0.6811535337124289, "grad_norm": 0.6714746829201106, "learning_rate": 9.514909865116368e-06, "loss": 0.6067, "step": 559 }, { "epoch": 0.6823720552396426, "grad_norm": 0.5375092336126965, "learning_rate": 9.511856823514924e-06, "loss": 0.596, "step": 560 }, { "epoch": 0.6835905767668562, "grad_norm": 0.6176188040728243, "learning_rate": 9.508794697440257e-06, "loss": 0.6333, "step": 561 }, { "epoch": 0.6848090982940699, "grad_norm": 0.6212303271054956, "learning_rate": 9.505723493057862e-06, "loss": 0.6178, "step": 562 }, { "epoch": 0.6860276198212835, "grad_norm": 0.5377188134801542, "learning_rate": 9.502643216551502e-06, "loss": 0.6017, "step": 563 }, { "epoch": 0.6872461413484972, "grad_norm": 0.6362000539969834, "learning_rate": 9.499553874123213e-06, "loss": 0.6392, "step": 564 }, { "epoch": 0.6884646628757108, "grad_norm": 0.5480382319562058, "learning_rate": 9.496455471993284e-06, "loss": 0.6113, "step": 565 }, { "epoch": 0.6896831844029244, "grad_norm": 0.6994517506614581, "learning_rate": 9.49334801640024e-06, "loss": 0.6327, "step": 566 }, { "epoch": 0.6909017059301381, "grad_norm": 0.5335729160289857, "learning_rate": 9.490231513600842e-06, "loss": 0.6218, "step": 567 }, { "epoch": 0.6921202274573518, "grad_norm": 0.6063268804347564, "learning_rate": 9.487105969870068e-06, "loss": 0.6174, "step": 568 }, { "epoch": 0.6933387489845654, "grad_norm": 0.6267394635949436, "learning_rate": 9.48397139150109e-06, "loss": 0.605, "step": 569 }, { "epoch": 0.6945572705117791, "grad_norm": 0.48229350211609867, "learning_rate": 9.480827784805278e-06, "loss": 0.6138, "step": 570 }, { "epoch": 0.6957757920389926, "grad_norm": 0.6094361236823382, "learning_rate": 9.477675156112183e-06, "loss": 0.616, "step": 571 }, { "epoch": 0.6969943135662063, "grad_norm": 0.5646668548267415, "learning_rate": 9.474513511769513e-06, "loss": 0.6257, "step": 572 }, { "epoch": 0.69821283509342, "grad_norm": 0.5605266691062354, "learning_rate": 9.47134285814314e-06, "loss": 0.623, "step": 573 }, { "epoch": 0.6994313566206336, "grad_norm": 0.5976205093855237, "learning_rate": 9.468163201617063e-06, "loss": 0.6182, "step": 574 }, { "epoch": 0.7006498781478473, "grad_norm": 0.5736754942220608, "learning_rate": 9.464974548593415e-06, "loss": 0.5973, "step": 575 }, { "epoch": 0.7018683996750609, "grad_norm": 0.5782971035374301, "learning_rate": 9.461776905492446e-06, "loss": 0.6021, "step": 576 }, { "epoch": 0.7030869212022746, "grad_norm": 0.5094228164183464, "learning_rate": 9.458570278752501e-06, "loss": 0.6028, "step": 577 }, { "epoch": 0.7043054427294883, "grad_norm": 0.5803305530484321, "learning_rate": 9.455354674830016e-06, "loss": 0.6224, "step": 578 }, { "epoch": 0.7055239642567018, "grad_norm": 0.5229464149205902, "learning_rate": 9.452130100199504e-06, "loss": 0.6157, "step": 579 }, { "epoch": 0.7067424857839155, "grad_norm": 0.5965075801420928, "learning_rate": 9.448896561353536e-06, "loss": 0.6062, "step": 580 }, { "epoch": 0.7079610073111292, "grad_norm": 0.5275236801559984, "learning_rate": 9.445654064802738e-06, "loss": 0.611, "step": 581 }, { "epoch": 0.7091795288383428, "grad_norm": 0.511555457965572, "learning_rate": 9.442402617075765e-06, "loss": 0.6263, "step": 582 }, { "epoch": 0.7103980503655565, "grad_norm": 0.5490562182756723, "learning_rate": 9.439142224719302e-06, "loss": 0.6236, "step": 583 }, { "epoch": 0.7116165718927701, "grad_norm": 0.5258200584782562, "learning_rate": 9.435872894298037e-06, "loss": 0.6106, "step": 584 }, { "epoch": 0.7128350934199837, "grad_norm": 0.5189357566107585, "learning_rate": 9.43259463239466e-06, "loss": 0.636, "step": 585 }, { "epoch": 0.7140536149471974, "grad_norm": 0.5097577073371684, "learning_rate": 9.429307445609841e-06, "loss": 0.6337, "step": 586 }, { "epoch": 0.715272136474411, "grad_norm": 0.6069103268356187, "learning_rate": 9.426011340562222e-06, "loss": 0.6177, "step": 587 }, { "epoch": 0.7164906580016247, "grad_norm": 0.48842546371203027, "learning_rate": 9.422706323888398e-06, "loss": 0.6011, "step": 588 }, { "epoch": 0.7177091795288384, "grad_norm": 0.5365657101299985, "learning_rate": 9.419392402242912e-06, "loss": 0.6007, "step": 589 }, { "epoch": 0.718927701056052, "grad_norm": 0.5101507591790149, "learning_rate": 9.416069582298236e-06, "loss": 0.6175, "step": 590 }, { "epoch": 0.7201462225832657, "grad_norm": 0.4516555710559031, "learning_rate": 9.412737870744752e-06, "loss": 0.6107, "step": 591 }, { "epoch": 0.7213647441104792, "grad_norm": 0.4881759934731241, "learning_rate": 9.409397274290756e-06, "loss": 0.6224, "step": 592 }, { "epoch": 0.7225832656376929, "grad_norm": 0.45459978443672416, "learning_rate": 9.406047799662426e-06, "loss": 0.6089, "step": 593 }, { "epoch": 0.7238017871649066, "grad_norm": 0.505751917086364, "learning_rate": 9.402689453603815e-06, "loss": 0.6244, "step": 594 }, { "epoch": 0.7250203086921202, "grad_norm": 0.5110751597586063, "learning_rate": 9.399322242876843e-06, "loss": 0.601, "step": 595 }, { "epoch": 0.7262388302193339, "grad_norm": 0.504579475445371, "learning_rate": 9.395946174261274e-06, "loss": 0.6216, "step": 596 }, { "epoch": 0.7274573517465476, "grad_norm": 0.534595723022526, "learning_rate": 9.392561254554712e-06, "loss": 0.6067, "step": 597 }, { "epoch": 0.7286758732737612, "grad_norm": 0.5583009202449097, "learning_rate": 9.38916749057258e-06, "loss": 0.6249, "step": 598 }, { "epoch": 0.7298943948009748, "grad_norm": 0.5059716144312469, "learning_rate": 9.385764889148107e-06, "loss": 0.6115, "step": 599 }, { "epoch": 0.7311129163281884, "grad_norm": 0.6121449401534393, "learning_rate": 9.382353457132318e-06, "loss": 0.6077, "step": 600 }, { "epoch": 0.7323314378554021, "grad_norm": 0.4829522546788395, "learning_rate": 9.378933201394019e-06, "loss": 0.6216, "step": 601 }, { "epoch": 0.7335499593826158, "grad_norm": 0.5436028145378481, "learning_rate": 9.375504128819779e-06, "loss": 0.6185, "step": 602 }, { "epoch": 0.7347684809098294, "grad_norm": 0.5172970082009579, "learning_rate": 9.372066246313922e-06, "loss": 0.644, "step": 603 }, { "epoch": 0.7359870024370431, "grad_norm": 0.4738987982796835, "learning_rate": 9.368619560798511e-06, "loss": 0.6246, "step": 604 }, { "epoch": 0.7372055239642566, "grad_norm": 0.4525040495867516, "learning_rate": 9.36516407921333e-06, "loss": 0.6109, "step": 605 }, { "epoch": 0.7384240454914703, "grad_norm": 0.5076237716007553, "learning_rate": 9.361699808515877e-06, "loss": 0.6151, "step": 606 }, { "epoch": 0.739642567018684, "grad_norm": 0.5074655977130175, "learning_rate": 9.358226755681342e-06, "loss": 0.6082, "step": 607 }, { "epoch": 0.7408610885458976, "grad_norm": 0.4840933107276308, "learning_rate": 9.354744927702607e-06, "loss": 0.615, "step": 608 }, { "epoch": 0.7420796100731113, "grad_norm": 0.5219253787729252, "learning_rate": 9.351254331590216e-06, "loss": 0.5996, "step": 609 }, { "epoch": 0.743298131600325, "grad_norm": 0.5601150249253273, "learning_rate": 9.347754974372365e-06, "loss": 0.6032, "step": 610 }, { "epoch": 0.7445166531275386, "grad_norm": 0.4986838680038737, "learning_rate": 9.344246863094893e-06, "loss": 0.5976, "step": 611 }, { "epoch": 0.7457351746547523, "grad_norm": 0.4948788586568317, "learning_rate": 9.340730004821266e-06, "loss": 0.6085, "step": 612 }, { "epoch": 0.7469536961819658, "grad_norm": 0.5238689007424114, "learning_rate": 9.33720440663256e-06, "loss": 0.6129, "step": 613 }, { "epoch": 0.7481722177091795, "grad_norm": 0.47607891045094536, "learning_rate": 9.33367007562745e-06, "loss": 0.6199, "step": 614 }, { "epoch": 0.7493907392363932, "grad_norm": 0.4955962984701164, "learning_rate": 9.330127018922195e-06, "loss": 0.5949, "step": 615 }, { "epoch": 0.7506092607636068, "grad_norm": 0.6100851359106775, "learning_rate": 9.326575243650618e-06, "loss": 0.6143, "step": 616 }, { "epoch": 0.7518277822908205, "grad_norm": 0.48084331485799453, "learning_rate": 9.323014756964104e-06, "loss": 0.6064, "step": 617 }, { "epoch": 0.7530463038180342, "grad_norm": 0.6768728956598579, "learning_rate": 9.31944556603157e-06, "loss": 0.6229, "step": 618 }, { "epoch": 0.7542648253452477, "grad_norm": 0.6664441895394185, "learning_rate": 9.315867678039469e-06, "loss": 0.631, "step": 619 }, { "epoch": 0.7554833468724614, "grad_norm": 0.6265982250759069, "learning_rate": 9.312281100191752e-06, "loss": 0.63, "step": 620 }, { "epoch": 0.756701868399675, "grad_norm": 0.6297592873763573, "learning_rate": 9.308685839709878e-06, "loss": 0.6264, "step": 621 }, { "epoch": 0.7579203899268887, "grad_norm": 0.5583877292859594, "learning_rate": 9.305081903832784e-06, "loss": 0.5974, "step": 622 }, { "epoch": 0.7591389114541024, "grad_norm": 0.5001555304308823, "learning_rate": 9.301469299816874e-06, "loss": 0.6117, "step": 623 }, { "epoch": 0.760357432981316, "grad_norm": 0.5390093336249369, "learning_rate": 9.297848034936007e-06, "loss": 0.6088, "step": 624 }, { "epoch": 0.7615759545085297, "grad_norm": 0.5678848176997396, "learning_rate": 9.294218116481476e-06, "loss": 0.6018, "step": 625 }, { "epoch": 0.7627944760357434, "grad_norm": 0.5844799796481355, "learning_rate": 9.290579551762002e-06, "loss": 0.604, "step": 626 }, { "epoch": 0.7640129975629569, "grad_norm": 0.5159143134307803, "learning_rate": 9.286932348103716e-06, "loss": 0.6083, "step": 627 }, { "epoch": 0.7652315190901706, "grad_norm": 0.5326620021016965, "learning_rate": 9.283276512850137e-06, "loss": 0.6206, "step": 628 }, { "epoch": 0.7664500406173842, "grad_norm": 0.5963411548189359, "learning_rate": 9.27961205336217e-06, "loss": 0.6108, "step": 629 }, { "epoch": 0.7676685621445979, "grad_norm": 0.5014319447503888, "learning_rate": 9.275938977018082e-06, "loss": 0.6034, "step": 630 }, { "epoch": 0.7688870836718116, "grad_norm": 0.5126870488620024, "learning_rate": 9.272257291213488e-06, "loss": 0.6176, "step": 631 }, { "epoch": 0.7701056051990252, "grad_norm": 0.4787184158365945, "learning_rate": 9.268567003361341e-06, "loss": 0.607, "step": 632 }, { "epoch": 0.7713241267262388, "grad_norm": 0.557057771330538, "learning_rate": 9.264868120891913e-06, "loss": 0.6318, "step": 633 }, { "epoch": 0.7725426482534524, "grad_norm": 0.535409561474859, "learning_rate": 9.261160651252778e-06, "loss": 0.62, "step": 634 }, { "epoch": 0.7737611697806661, "grad_norm": 0.4814507650875912, "learning_rate": 9.257444601908806e-06, "loss": 0.6074, "step": 635 }, { "epoch": 0.7749796913078798, "grad_norm": 0.6101990877396614, "learning_rate": 9.253719980342134e-06, "loss": 0.6208, "step": 636 }, { "epoch": 0.7761982128350934, "grad_norm": 0.5403900228621851, "learning_rate": 9.249986794052168e-06, "loss": 0.5968, "step": 637 }, { "epoch": 0.7774167343623071, "grad_norm": 0.5703352381203307, "learning_rate": 9.24624505055555e-06, "loss": 0.626, "step": 638 }, { "epoch": 0.7786352558895208, "grad_norm": 0.5241053254774348, "learning_rate": 9.24249475738616e-06, "loss": 0.5959, "step": 639 }, { "epoch": 0.7798537774167343, "grad_norm": 0.5780889050780196, "learning_rate": 9.238735922095083e-06, "loss": 0.5783, "step": 640 }, { "epoch": 0.781072298943948, "grad_norm": 0.5164354758896532, "learning_rate": 9.234968552250612e-06, "loss": 0.6192, "step": 641 }, { "epoch": 0.7822908204711616, "grad_norm": 0.5672667605052139, "learning_rate": 9.231192655438222e-06, "loss": 0.6003, "step": 642 }, { "epoch": 0.7835093419983753, "grad_norm": 0.5135255221881695, "learning_rate": 9.22740823926055e-06, "loss": 0.6082, "step": 643 }, { "epoch": 0.784727863525589, "grad_norm": 0.5584536390516718, "learning_rate": 9.223615311337395e-06, "loss": 0.614, "step": 644 }, { "epoch": 0.7859463850528026, "grad_norm": 0.5216134140261057, "learning_rate": 9.219813879305692e-06, "loss": 0.6012, "step": 645 }, { "epoch": 0.7871649065800163, "grad_norm": 0.5736410922364097, "learning_rate": 9.216003950819497e-06, "loss": 0.6194, "step": 646 }, { "epoch": 0.7883834281072299, "grad_norm": 0.5049300976776431, "learning_rate": 9.21218553354997e-06, "loss": 0.6115, "step": 647 }, { "epoch": 0.7896019496344435, "grad_norm": 0.5596092247163901, "learning_rate": 9.208358635185372e-06, "loss": 0.6002, "step": 648 }, { "epoch": 0.7908204711616572, "grad_norm": 0.6492697062225624, "learning_rate": 9.204523263431034e-06, "loss": 0.6087, "step": 649 }, { "epoch": 0.7920389926888708, "grad_norm": 0.5493287831302429, "learning_rate": 9.200679426009347e-06, "loss": 0.6134, "step": 650 }, { "epoch": 0.7932575142160845, "grad_norm": 0.5393423473357866, "learning_rate": 9.196827130659752e-06, "loss": 0.6077, "step": 651 }, { "epoch": 0.7944760357432982, "grad_norm": 0.4822437257768845, "learning_rate": 9.192966385138714e-06, "loss": 0.6206, "step": 652 }, { "epoch": 0.7956945572705117, "grad_norm": 0.5489723911011465, "learning_rate": 9.189097197219718e-06, "loss": 0.6237, "step": 653 }, { "epoch": 0.7969130787977254, "grad_norm": 0.465446021569481, "learning_rate": 9.185219574693242e-06, "loss": 0.5969, "step": 654 }, { "epoch": 0.7981316003249391, "grad_norm": 0.5608574163560325, "learning_rate": 9.181333525366756e-06, "loss": 0.6116, "step": 655 }, { "epoch": 0.7993501218521527, "grad_norm": 0.47338894132856235, "learning_rate": 9.177439057064684e-06, "loss": 0.5898, "step": 656 }, { "epoch": 0.8005686433793664, "grad_norm": 0.5538432939088667, "learning_rate": 9.17353617762841e-06, "loss": 0.6042, "step": 657 }, { "epoch": 0.80178716490658, "grad_norm": 0.5129997268787104, "learning_rate": 9.169624894916252e-06, "loss": 0.6045, "step": 658 }, { "epoch": 0.8030056864337937, "grad_norm": 0.491484979669411, "learning_rate": 9.165705216803446e-06, "loss": 0.6159, "step": 659 }, { "epoch": 0.8042242079610074, "grad_norm": 0.4865407913972347, "learning_rate": 9.161777151182137e-06, "loss": 0.6095, "step": 660 }, { "epoch": 0.8054427294882209, "grad_norm": 0.5482167186016993, "learning_rate": 9.15784070596135e-06, "loss": 0.6063, "step": 661 }, { "epoch": 0.8066612510154346, "grad_norm": 0.4899874123032885, "learning_rate": 9.153895889066988e-06, "loss": 0.5993, "step": 662 }, { "epoch": 0.8078797725426482, "grad_norm": 0.4971658879090838, "learning_rate": 9.149942708441808e-06, "loss": 0.6349, "step": 663 }, { "epoch": 0.8090982940698619, "grad_norm": 0.4774943646678603, "learning_rate": 9.145981172045407e-06, "loss": 0.5937, "step": 664 }, { "epoch": 0.8103168155970756, "grad_norm": 0.5239506111079297, "learning_rate": 9.142011287854206e-06, "loss": 0.596, "step": 665 }, { "epoch": 0.8115353371242892, "grad_norm": 0.49171964255133527, "learning_rate": 9.138033063861436e-06, "loss": 0.5866, "step": 666 }, { "epoch": 0.8127538586515028, "grad_norm": 0.5198610207245239, "learning_rate": 9.134046508077116e-06, "loss": 0.6022, "step": 667 }, { "epoch": 0.8139723801787165, "grad_norm": 0.4768598644726109, "learning_rate": 9.130051628528046e-06, "loss": 0.6057, "step": 668 }, { "epoch": 0.8151909017059301, "grad_norm": 0.539806947114795, "learning_rate": 9.12604843325778e-06, "loss": 0.6175, "step": 669 }, { "epoch": 0.8164094232331438, "grad_norm": 0.49480984634291075, "learning_rate": 9.122036930326618e-06, "loss": 0.6214, "step": 670 }, { "epoch": 0.8176279447603574, "grad_norm": 0.5006857848218066, "learning_rate": 9.118017127811591e-06, "loss": 0.6084, "step": 671 }, { "epoch": 0.8188464662875711, "grad_norm": 0.4713529456554149, "learning_rate": 9.113989033806434e-06, "loss": 0.6177, "step": 672 }, { "epoch": 0.8200649878147848, "grad_norm": 0.5234744664186434, "learning_rate": 9.10995265642158e-06, "loss": 0.623, "step": 673 }, { "epoch": 0.8212835093419983, "grad_norm": 0.46959588708419714, "learning_rate": 9.105908003784142e-06, "loss": 0.6223, "step": 674 }, { "epoch": 0.822502030869212, "grad_norm": 0.483130564646199, "learning_rate": 9.101855084037893e-06, "loss": 0.6079, "step": 675 }, { "epoch": 0.8237205523964257, "grad_norm": 0.4707432015389284, "learning_rate": 9.097793905343251e-06, "loss": 0.6246, "step": 676 }, { "epoch": 0.8249390739236393, "grad_norm": 0.5109208158836949, "learning_rate": 9.093724475877262e-06, "loss": 0.6223, "step": 677 }, { "epoch": 0.826157595450853, "grad_norm": 0.524528742300806, "learning_rate": 9.089646803833589e-06, "loss": 0.6054, "step": 678 }, { "epoch": 0.8273761169780666, "grad_norm": 0.48479589382874644, "learning_rate": 9.085560897422487e-06, "loss": 0.5978, "step": 679 }, { "epoch": 0.8285946385052803, "grad_norm": 0.520310530932384, "learning_rate": 9.081466764870795e-06, "loss": 0.6141, "step": 680 }, { "epoch": 0.829813160032494, "grad_norm": 0.5320998645898771, "learning_rate": 9.07736441442191e-06, "loss": 0.5952, "step": 681 }, { "epoch": 0.8310316815597075, "grad_norm": 0.522944143229052, "learning_rate": 9.073253854335777e-06, "loss": 0.5966, "step": 682 }, { "epoch": 0.8322502030869212, "grad_norm": 0.5438608445694643, "learning_rate": 9.069135092888874e-06, "loss": 0.6036, "step": 683 }, { "epoch": 0.8334687246141349, "grad_norm": 0.4929729088140395, "learning_rate": 9.06500813837419e-06, "loss": 0.603, "step": 684 }, { "epoch": 0.8346872461413485, "grad_norm": 0.5376420120613337, "learning_rate": 9.060872999101206e-06, "loss": 0.6151, "step": 685 }, { "epoch": 0.8359057676685622, "grad_norm": 0.52471690520972, "learning_rate": 9.056729683395892e-06, "loss": 0.581, "step": 686 }, { "epoch": 0.8371242891957758, "grad_norm": 0.49865247625736375, "learning_rate": 9.052578199600675e-06, "loss": 0.6067, "step": 687 }, { "epoch": 0.8383428107229894, "grad_norm": 0.5035636474694776, "learning_rate": 9.048418556074425e-06, "loss": 0.605, "step": 688 }, { "epoch": 0.8395613322502031, "grad_norm": 0.5460518150855164, "learning_rate": 9.04425076119245e-06, "loss": 0.6008, "step": 689 }, { "epoch": 0.8407798537774167, "grad_norm": 0.5154326591857874, "learning_rate": 9.040074823346466e-06, "loss": 0.612, "step": 690 }, { "epoch": 0.8419983753046304, "grad_norm": 0.41895451726050503, "learning_rate": 9.035890750944583e-06, "loss": 0.5947, "step": 691 }, { "epoch": 0.843216896831844, "grad_norm": 0.49674088276174516, "learning_rate": 9.03169855241129e-06, "loss": 0.625, "step": 692 }, { "epoch": 0.8444354183590577, "grad_norm": 0.5650371934623263, "learning_rate": 9.02749823618744e-06, "loss": 0.5954, "step": 693 }, { "epoch": 0.8456539398862714, "grad_norm": 0.5010709938981562, "learning_rate": 9.02328981073023e-06, "loss": 0.6071, "step": 694 }, { "epoch": 0.8468724614134849, "grad_norm": 0.5831039880668286, "learning_rate": 9.019073284513184e-06, "loss": 0.5989, "step": 695 }, { "epoch": 0.8480909829406986, "grad_norm": 0.5796544622455602, "learning_rate": 9.014848666026138e-06, "loss": 0.6328, "step": 696 }, { "epoch": 0.8493095044679123, "grad_norm": 0.5898423233515925, "learning_rate": 9.01061596377522e-06, "loss": 0.6316, "step": 697 }, { "epoch": 0.8505280259951259, "grad_norm": 0.576717321636104, "learning_rate": 9.006375186282832e-06, "loss": 0.6129, "step": 698 }, { "epoch": 0.8517465475223396, "grad_norm": 0.5274725251295577, "learning_rate": 9.002126342087643e-06, "loss": 0.6103, "step": 699 }, { "epoch": 0.8529650690495532, "grad_norm": 0.5405289062395403, "learning_rate": 8.997869439744555e-06, "loss": 0.6252, "step": 700 }, { "epoch": 0.8541835905767668, "grad_norm": 0.5521347732238037, "learning_rate": 8.993604487824701e-06, "loss": 0.6008, "step": 701 }, { "epoch": 0.8554021121039805, "grad_norm": 0.5196724445810474, "learning_rate": 8.989331494915417e-06, "loss": 0.6185, "step": 702 }, { "epoch": 0.8566206336311941, "grad_norm": 0.5683878673891257, "learning_rate": 8.985050469620236e-06, "loss": 0.6245, "step": 703 }, { "epoch": 0.8578391551584078, "grad_norm": 0.5407694973000146, "learning_rate": 8.980761420558855e-06, "loss": 0.6142, "step": 704 }, { "epoch": 0.8590576766856215, "grad_norm": 0.5649995760138024, "learning_rate": 8.976464356367133e-06, "loss": 0.5985, "step": 705 }, { "epoch": 0.8602761982128351, "grad_norm": 0.4922853729727254, "learning_rate": 8.972159285697066e-06, "loss": 0.6128, "step": 706 }, { "epoch": 0.8614947197400488, "grad_norm": 0.5653149236554849, "learning_rate": 8.967846217216771e-06, "loss": 0.6085, "step": 707 }, { "epoch": 0.8627132412672623, "grad_norm": 0.5367471044143063, "learning_rate": 8.963525159610465e-06, "loss": 0.6148, "step": 708 }, { "epoch": 0.863931762794476, "grad_norm": 0.6165337631503633, "learning_rate": 8.959196121578455e-06, "loss": 0.6152, "step": 709 }, { "epoch": 0.8651502843216897, "grad_norm": 0.4805242301641202, "learning_rate": 8.954859111837115e-06, "loss": 0.6012, "step": 710 }, { "epoch": 0.8663688058489033, "grad_norm": 0.5673830583367931, "learning_rate": 8.950514139118868e-06, "loss": 0.6137, "step": 711 }, { "epoch": 0.867587327376117, "grad_norm": 0.6116666852593193, "learning_rate": 8.946161212172172e-06, "loss": 0.6067, "step": 712 }, { "epoch": 0.8688058489033307, "grad_norm": 0.4787324171983748, "learning_rate": 8.941800339761503e-06, "loss": 0.6229, "step": 713 }, { "epoch": 0.8700243704305443, "grad_norm": 0.5603801815973803, "learning_rate": 8.937431530667329e-06, "loss": 0.6105, "step": 714 }, { "epoch": 0.871242891957758, "grad_norm": 0.5681506397184968, "learning_rate": 8.933054793686102e-06, "loss": 0.6196, "step": 715 }, { "epoch": 0.8724614134849715, "grad_norm": 0.4745590461881841, "learning_rate": 8.928670137630236e-06, "loss": 0.6041, "step": 716 }, { "epoch": 0.8736799350121852, "grad_norm": 0.5290850478804046, "learning_rate": 8.924277571328091e-06, "loss": 0.5968, "step": 717 }, { "epoch": 0.8748984565393989, "grad_norm": 0.4724468577981056, "learning_rate": 8.919877103623949e-06, "loss": 0.5888, "step": 718 }, { "epoch": 0.8761169780666125, "grad_norm": 0.4710021425585232, "learning_rate": 8.915468743378009e-06, "loss": 0.6039, "step": 719 }, { "epoch": 0.8773354995938262, "grad_norm": 0.5615817507996624, "learning_rate": 8.911052499466358e-06, "loss": 0.611, "step": 720 }, { "epoch": 0.8785540211210398, "grad_norm": 0.5372617716587773, "learning_rate": 8.906628380780951e-06, "loss": 0.5853, "step": 721 }, { "epoch": 0.8797725426482534, "grad_norm": 0.4671881493526463, "learning_rate": 8.902196396229605e-06, "loss": 0.6135, "step": 722 }, { "epoch": 0.8809910641754671, "grad_norm": 0.6571538751607443, "learning_rate": 8.897756554735976e-06, "loss": 0.6166, "step": 723 }, { "epoch": 0.8822095857026807, "grad_norm": 0.5407143640334066, "learning_rate": 8.893308865239536e-06, "loss": 0.5946, "step": 724 }, { "epoch": 0.8834281072298944, "grad_norm": 0.53845654868447, "learning_rate": 8.888853336695558e-06, "loss": 0.6056, "step": 725 }, { "epoch": 0.8846466287571081, "grad_norm": 0.5501103328024185, "learning_rate": 8.884389978075098e-06, "loss": 0.5983, "step": 726 }, { "epoch": 0.8858651502843217, "grad_norm": 0.5308109296782529, "learning_rate": 8.879918798364984e-06, "loss": 0.5777, "step": 727 }, { "epoch": 0.8870836718115354, "grad_norm": 0.5017325039220928, "learning_rate": 8.875439806567786e-06, "loss": 0.6045, "step": 728 }, { "epoch": 0.8883021933387489, "grad_norm": 0.5901206372277947, "learning_rate": 8.870953011701804e-06, "loss": 0.604, "step": 729 }, { "epoch": 0.8895207148659626, "grad_norm": 0.45439896535640995, "learning_rate": 8.866458422801048e-06, "loss": 0.6073, "step": 730 }, { "epoch": 0.8907392363931763, "grad_norm": 0.5577426986098635, "learning_rate": 8.861956048915225e-06, "loss": 0.5915, "step": 731 }, { "epoch": 0.8919577579203899, "grad_norm": 0.6016567936834477, "learning_rate": 8.857445899109716e-06, "loss": 0.6046, "step": 732 }, { "epoch": 0.8931762794476036, "grad_norm": 0.5445868957449489, "learning_rate": 8.852927982465553e-06, "loss": 0.6106, "step": 733 }, { "epoch": 0.8943948009748173, "grad_norm": 0.74687623190731, "learning_rate": 8.848402308079415e-06, "loss": 0.6106, "step": 734 }, { "epoch": 0.8956133225020309, "grad_norm": 0.5720296451679941, "learning_rate": 8.843868885063594e-06, "loss": 0.6051, "step": 735 }, { "epoch": 0.8968318440292445, "grad_norm": 0.6556133763306434, "learning_rate": 8.839327722545985e-06, "loss": 0.6167, "step": 736 }, { "epoch": 0.8980503655564581, "grad_norm": 0.564067584928174, "learning_rate": 8.83477882967007e-06, "loss": 0.5994, "step": 737 }, { "epoch": 0.8992688870836718, "grad_norm": 0.7349456844478599, "learning_rate": 8.83022221559489e-06, "loss": 0.6114, "step": 738 }, { "epoch": 0.9004874086108855, "grad_norm": 0.5690040907358448, "learning_rate": 8.82565788949504e-06, "loss": 0.5881, "step": 739 }, { "epoch": 0.9017059301380991, "grad_norm": 0.6984688918514965, "learning_rate": 8.821085860560633e-06, "loss": 0.5983, "step": 740 }, { "epoch": 0.9029244516653128, "grad_norm": 0.5870268436598589, "learning_rate": 8.8165061379973e-06, "loss": 0.6158, "step": 741 }, { "epoch": 0.9041429731925265, "grad_norm": 0.730806962459982, "learning_rate": 8.81191873102616e-06, "loss": 0.6058, "step": 742 }, { "epoch": 0.90536149471974, "grad_norm": 0.5520509944838993, "learning_rate": 8.807323648883802e-06, "loss": 0.6076, "step": 743 }, { "epoch": 0.9065800162469537, "grad_norm": 0.5674479495642151, "learning_rate": 8.80272090082227e-06, "loss": 0.6017, "step": 744 }, { "epoch": 0.9077985377741673, "grad_norm": 0.6471015570221698, "learning_rate": 8.798110496109047e-06, "loss": 0.6114, "step": 745 }, { "epoch": 0.909017059301381, "grad_norm": 0.5077905529540144, "learning_rate": 8.793492444027027e-06, "loss": 0.6086, "step": 746 }, { "epoch": 0.9102355808285947, "grad_norm": 0.5684591151412205, "learning_rate": 8.788866753874504e-06, "loss": 0.5939, "step": 747 }, { "epoch": 0.9114541023558083, "grad_norm": 0.5373473945369368, "learning_rate": 8.784233434965149e-06, "loss": 0.605, "step": 748 }, { "epoch": 0.912672623883022, "grad_norm": 0.4922150085749876, "learning_rate": 8.779592496627998e-06, "loss": 0.6016, "step": 749 }, { "epoch": 0.9138911454102355, "grad_norm": 0.5346368247367626, "learning_rate": 8.774943948207427e-06, "loss": 0.5894, "step": 750 }, { "epoch": 0.9151096669374492, "grad_norm": 0.5910293461390073, "learning_rate": 8.770287799063128e-06, "loss": 0.5928, "step": 751 }, { "epoch": 0.9163281884646629, "grad_norm": 0.45941353467858154, "learning_rate": 8.765624058570106e-06, "loss": 0.606, "step": 752 }, { "epoch": 0.9175467099918765, "grad_norm": 0.5187731411231332, "learning_rate": 8.760952736118645e-06, "loss": 0.6128, "step": 753 }, { "epoch": 0.9187652315190902, "grad_norm": 0.5257713049314863, "learning_rate": 8.756273841114297e-06, "loss": 0.5954, "step": 754 }, { "epoch": 0.9199837530463039, "grad_norm": 0.5158216045537021, "learning_rate": 8.751587382977862e-06, "loss": 0.6016, "step": 755 }, { "epoch": 0.9212022745735174, "grad_norm": 0.48265635843326626, "learning_rate": 8.746893371145367e-06, "loss": 0.6023, "step": 756 }, { "epoch": 0.9224207961007311, "grad_norm": 0.56635290896361, "learning_rate": 8.742191815068048e-06, "loss": 0.6168, "step": 757 }, { "epoch": 0.9236393176279447, "grad_norm": 0.5246869149929032, "learning_rate": 8.737482724212331e-06, "loss": 0.6073, "step": 758 }, { "epoch": 0.9248578391551584, "grad_norm": 0.5675558144411569, "learning_rate": 8.732766108059814e-06, "loss": 0.6089, "step": 759 }, { "epoch": 0.9260763606823721, "grad_norm": 0.5373680000020842, "learning_rate": 8.728041976107247e-06, "loss": 0.6229, "step": 760 }, { "epoch": 0.9272948822095857, "grad_norm": 0.4781724675355625, "learning_rate": 8.723310337866508e-06, "loss": 0.6109, "step": 761 }, { "epoch": 0.9285134037367994, "grad_norm": 0.5425148864348092, "learning_rate": 8.718571202864598e-06, "loss": 0.6135, "step": 762 }, { "epoch": 0.929731925264013, "grad_norm": 0.5848574183660457, "learning_rate": 8.713824580643606e-06, "loss": 0.5856, "step": 763 }, { "epoch": 0.9309504467912266, "grad_norm": 0.5359644668976268, "learning_rate": 8.709070480760696e-06, "loss": 0.6005, "step": 764 }, { "epoch": 0.9321689683184403, "grad_norm": 0.620026762890768, "learning_rate": 8.70430891278809e-06, "loss": 0.6068, "step": 765 }, { "epoch": 0.9333874898456539, "grad_norm": 0.47117448230839937, "learning_rate": 8.699539886313047e-06, "loss": 0.6252, "step": 766 }, { "epoch": 0.9346060113728676, "grad_norm": 0.5057879313386596, "learning_rate": 8.69476341093784e-06, "loss": 0.6043, "step": 767 }, { "epoch": 0.9358245329000813, "grad_norm": 0.5719864673466165, "learning_rate": 8.689979496279747e-06, "loss": 0.6021, "step": 768 }, { "epoch": 0.9370430544272949, "grad_norm": 0.4550279435061135, "learning_rate": 8.685188151971018e-06, "loss": 0.5903, "step": 769 }, { "epoch": 0.9382615759545085, "grad_norm": 0.5815823584929373, "learning_rate": 8.680389387658866e-06, "loss": 0.5994, "step": 770 }, { "epoch": 0.9394800974817222, "grad_norm": 0.5037028317625714, "learning_rate": 8.675583213005443e-06, "loss": 0.619, "step": 771 }, { "epoch": 0.9406986190089358, "grad_norm": 0.5242690261886358, "learning_rate": 8.67076963768782e-06, "loss": 0.6048, "step": 772 }, { "epoch": 0.9419171405361495, "grad_norm": 0.6218367099845817, "learning_rate": 8.66594867139797e-06, "loss": 0.5839, "step": 773 }, { "epoch": 0.9431356620633631, "grad_norm": 0.47012822627564055, "learning_rate": 8.661120323842751e-06, "loss": 0.5901, "step": 774 }, { "epoch": 0.9443541835905768, "grad_norm": 0.5922308137676237, "learning_rate": 8.656284604743877e-06, "loss": 0.5949, "step": 775 }, { "epoch": 0.9455727051177905, "grad_norm": 0.5371260230634575, "learning_rate": 8.651441523837908e-06, "loss": 0.623, "step": 776 }, { "epoch": 0.946791226645004, "grad_norm": 0.5773759686297267, "learning_rate": 8.646591090876225e-06, "loss": 0.6234, "step": 777 }, { "epoch": 0.9480097481722177, "grad_norm": 0.5887590407239388, "learning_rate": 8.641733315625014e-06, "loss": 0.6111, "step": 778 }, { "epoch": 0.9492282696994313, "grad_norm": 0.5226241995561731, "learning_rate": 8.636868207865244e-06, "loss": 0.6206, "step": 779 }, { "epoch": 0.950446791226645, "grad_norm": 0.6014897765265561, "learning_rate": 8.631995777392645e-06, "loss": 0.6098, "step": 780 }, { "epoch": 0.9516653127538587, "grad_norm": 0.4728664792789181, "learning_rate": 8.627116034017697e-06, "loss": 0.6175, "step": 781 }, { "epoch": 0.9528838342810723, "grad_norm": 0.5599521599776955, "learning_rate": 8.622228987565597e-06, "loss": 0.6121, "step": 782 }, { "epoch": 0.954102355808286, "grad_norm": 0.45561297167703785, "learning_rate": 8.61733464787625e-06, "loss": 0.585, "step": 783 }, { "epoch": 0.9553208773354996, "grad_norm": 0.4965712938546266, "learning_rate": 8.612433024804246e-06, "loss": 0.5844, "step": 784 }, { "epoch": 0.9565393988627132, "grad_norm": 0.49923609484853176, "learning_rate": 8.607524128218842e-06, "loss": 0.6056, "step": 785 }, { "epoch": 0.9577579203899269, "grad_norm": 0.5194489854997212, "learning_rate": 8.602607968003935e-06, "loss": 0.6157, "step": 786 }, { "epoch": 0.9589764419171405, "grad_norm": 0.45374807644787585, "learning_rate": 8.597684554058053e-06, "loss": 0.6131, "step": 787 }, { "epoch": 0.9601949634443542, "grad_norm": 0.48980331599376176, "learning_rate": 8.59275389629432e-06, "loss": 0.6277, "step": 788 }, { "epoch": 0.9614134849715679, "grad_norm": 0.512984376262805, "learning_rate": 8.587816004640456e-06, "loss": 0.6079, "step": 789 }, { "epoch": 0.9626320064987814, "grad_norm": 0.46938679490869983, "learning_rate": 8.58287088903874e-06, "loss": 0.6024, "step": 790 }, { "epoch": 0.9638505280259951, "grad_norm": 0.5727370279954419, "learning_rate": 8.577918559445994e-06, "loss": 0.6133, "step": 791 }, { "epoch": 0.9650690495532088, "grad_norm": 0.46813355754433694, "learning_rate": 8.572959025833573e-06, "loss": 0.6091, "step": 792 }, { "epoch": 0.9662875710804224, "grad_norm": 0.5352006872401892, "learning_rate": 8.56799229818733e-06, "loss": 0.5926, "step": 793 }, { "epoch": 0.9675060926076361, "grad_norm": 0.5423797070420179, "learning_rate": 8.563018386507607e-06, "loss": 0.6055, "step": 794 }, { "epoch": 0.9687246141348497, "grad_norm": 0.5598760717169532, "learning_rate": 8.558037300809209e-06, "loss": 0.601, "step": 795 }, { "epoch": 0.9699431356620634, "grad_norm": 0.5899307915518814, "learning_rate": 8.553049051121383e-06, "loss": 0.5925, "step": 796 }, { "epoch": 0.971161657189277, "grad_norm": 0.5817700253793735, "learning_rate": 8.548053647487808e-06, "loss": 0.5794, "step": 797 }, { "epoch": 0.9723801787164906, "grad_norm": 0.6684891953193655, "learning_rate": 8.543051099966558e-06, "loss": 0.6158, "step": 798 }, { "epoch": 0.9735987002437043, "grad_norm": 0.6186641627844115, "learning_rate": 8.538041418630099e-06, "loss": 0.6045, "step": 799 }, { "epoch": 0.974817221770918, "grad_norm": 0.5620245115548018, "learning_rate": 8.533024613565256e-06, "loss": 0.6074, "step": 800 }, { "epoch": 0.9760357432981316, "grad_norm": 0.5360734619477909, "learning_rate": 8.5280006948732e-06, "loss": 0.5781, "step": 801 }, { "epoch": 0.9772542648253453, "grad_norm": 0.5649861774930516, "learning_rate": 8.522969672669419e-06, "loss": 0.603, "step": 802 }, { "epoch": 0.9784727863525589, "grad_norm": 0.5524388375136041, "learning_rate": 8.517931557083713e-06, "loss": 0.5927, "step": 803 }, { "epoch": 0.9796913078797725, "grad_norm": 0.5048333497363491, "learning_rate": 8.512886358260162e-06, "loss": 0.6218, "step": 804 }, { "epoch": 0.9809098294069862, "grad_norm": 0.5532699810235799, "learning_rate": 8.5078340863571e-06, "loss": 0.5935, "step": 805 }, { "epoch": 0.9821283509341998, "grad_norm": 0.482227106626454, "learning_rate": 8.502774751547108e-06, "loss": 0.5946, "step": 806 }, { "epoch": 0.9833468724614135, "grad_norm": 0.5612628853741157, "learning_rate": 8.49770836401699e-06, "loss": 0.6174, "step": 807 }, { "epoch": 0.9845653939886271, "grad_norm": 0.5165079876207431, "learning_rate": 8.492634933967749e-06, "loss": 0.586, "step": 808 }, { "epoch": 0.9857839155158408, "grad_norm": 0.5243350260461674, "learning_rate": 8.487554471614568e-06, "loss": 0.598, "step": 809 }, { "epoch": 0.9870024370430545, "grad_norm": 0.5334138693548346, "learning_rate": 8.482466987186785e-06, "loss": 0.6156, "step": 810 }, { "epoch": 0.988220958570268, "grad_norm": 0.5183630888601999, "learning_rate": 8.477372490927882e-06, "loss": 0.6043, "step": 811 }, { "epoch": 0.9894394800974817, "grad_norm": 0.5064511107410842, "learning_rate": 8.47227099309546e-06, "loss": 0.618, "step": 812 }, { "epoch": 0.9906580016246954, "grad_norm": 0.502910387382079, "learning_rate": 8.467162503961209e-06, "loss": 0.5921, "step": 813 }, { "epoch": 0.991876523151909, "grad_norm": 0.6360985673189292, "learning_rate": 8.462047033810906e-06, "loss": 0.6196, "step": 814 }, { "epoch": 0.9930950446791227, "grad_norm": 0.48804000994343705, "learning_rate": 8.456924592944377e-06, "loss": 0.5874, "step": 815 }, { "epoch": 0.9943135662063363, "grad_norm": 0.5525784026778128, "learning_rate": 8.451795191675488e-06, "loss": 0.6121, "step": 816 }, { "epoch": 0.99553208773355, "grad_norm": 0.6244758885512404, "learning_rate": 8.446658840332115e-06, "loss": 0.6117, "step": 817 }, { "epoch": 0.9967506092607636, "grad_norm": 0.5125354504575084, "learning_rate": 8.441515549256134e-06, "loss": 0.6029, "step": 818 }, { "epoch": 0.9979691307879772, "grad_norm": 0.48689738688414835, "learning_rate": 8.436365328803386e-06, "loss": 0.6118, "step": 819 }, { "epoch": 0.9991876523151909, "grad_norm": 0.6498259018985348, "learning_rate": 8.43120818934367e-06, "loss": 0.6102, "step": 820 }, { "epoch": 1.0008123476848092, "grad_norm": 0.9638337013283915, "learning_rate": 8.426044141260712e-06, "loss": 0.9573, "step": 821 }, { "epoch": 1.0020308692120228, "grad_norm": 0.49843778312392245, "learning_rate": 8.420873194952153e-06, "loss": 0.5312, "step": 822 }, { "epoch": 1.0032493907392364, "grad_norm": 0.5736142039977695, "learning_rate": 8.415695360829521e-06, "loss": 0.5481, "step": 823 }, { "epoch": 1.00446791226645, "grad_norm": 0.5588125856539439, "learning_rate": 8.410510649318211e-06, "loss": 0.6112, "step": 824 }, { "epoch": 1.0056864337936637, "grad_norm": 0.5238730088532109, "learning_rate": 8.405319070857466e-06, "loss": 0.5738, "step": 825 }, { "epoch": 1.0069049553208773, "grad_norm": 0.5729923093577028, "learning_rate": 8.40012063590036e-06, "loss": 0.563, "step": 826 }, { "epoch": 1.008123476848091, "grad_norm": 0.542308645982848, "learning_rate": 8.394915354913763e-06, "loss": 0.5825, "step": 827 }, { "epoch": 1.0093419983753047, "grad_norm": 0.5635399800755453, "learning_rate": 8.38970323837834e-06, "loss": 0.5596, "step": 828 }, { "epoch": 1.0105605199025183, "grad_norm": 0.5412240812641438, "learning_rate": 8.384484296788509e-06, "loss": 0.583, "step": 829 }, { "epoch": 1.0117790414297319, "grad_norm": 0.4985722523246039, "learning_rate": 8.379258540652438e-06, "loss": 0.5269, "step": 830 }, { "epoch": 1.0129975629569457, "grad_norm": 0.5577073237880519, "learning_rate": 8.37402598049201e-06, "loss": 0.5971, "step": 831 }, { "epoch": 1.0142160844841592, "grad_norm": 0.5397320632233633, "learning_rate": 8.368786626842815e-06, "loss": 0.576, "step": 832 }, { "epoch": 1.0154346060113728, "grad_norm": 0.5446374373068642, "learning_rate": 8.363540490254111e-06, "loss": 0.5604, "step": 833 }, { "epoch": 1.0166531275385866, "grad_norm": 0.5916157265480478, "learning_rate": 8.358287581288824e-06, "loss": 0.5977, "step": 834 }, { "epoch": 1.0178716490658002, "grad_norm": 0.44317757053413465, "learning_rate": 8.353027910523506e-06, "loss": 0.5386, "step": 835 }, { "epoch": 1.0190901705930138, "grad_norm": 0.5306644900080113, "learning_rate": 8.347761488548334e-06, "loss": 0.5685, "step": 836 }, { "epoch": 1.0203086921202273, "grad_norm": 0.554634789156319, "learning_rate": 8.342488325967068e-06, "loss": 0.5906, "step": 837 }, { "epoch": 1.0215272136474411, "grad_norm": 0.46926134132806735, "learning_rate": 8.337208433397051e-06, "loss": 0.5518, "step": 838 }, { "epoch": 1.0227457351746547, "grad_norm": 0.5223237573306092, "learning_rate": 8.331921821469164e-06, "loss": 0.5482, "step": 839 }, { "epoch": 1.0239642567018683, "grad_norm": 0.6456110639127597, "learning_rate": 8.326628500827826e-06, "loss": 0.5533, "step": 840 }, { "epoch": 1.025182778229082, "grad_norm": 0.49817045727119846, "learning_rate": 8.321328482130967e-06, "loss": 0.5828, "step": 841 }, { "epoch": 1.0264012997562957, "grad_norm": 0.6439926526967455, "learning_rate": 8.31602177604999e-06, "loss": 0.5445, "step": 842 }, { "epoch": 1.0276198212835093, "grad_norm": 0.5597217590326287, "learning_rate": 8.310708393269773e-06, "loss": 0.5919, "step": 843 }, { "epoch": 1.028838342810723, "grad_norm": 0.5067484108191753, "learning_rate": 8.305388344488636e-06, "loss": 0.5119, "step": 844 }, { "epoch": 1.0300568643379366, "grad_norm": 0.6138111359383427, "learning_rate": 8.300061640418322e-06, "loss": 0.5819, "step": 845 }, { "epoch": 1.0312753858651502, "grad_norm": 0.5228439245226578, "learning_rate": 8.294728291783967e-06, "loss": 0.5488, "step": 846 }, { "epoch": 1.032493907392364, "grad_norm": 0.5069119735333029, "learning_rate": 8.289388309324094e-06, "loss": 0.5531, "step": 847 }, { "epoch": 1.0337124289195776, "grad_norm": 0.6055259774711721, "learning_rate": 8.284041703790578e-06, "loss": 0.6323, "step": 848 }, { "epoch": 1.0349309504467912, "grad_norm": 0.40577407124920994, "learning_rate": 8.278688485948634e-06, "loss": 0.5171, "step": 849 }, { "epoch": 1.036149471974005, "grad_norm": 0.5480653507617855, "learning_rate": 8.273328666576783e-06, "loss": 0.5708, "step": 850 }, { "epoch": 1.0373679935012186, "grad_norm": 0.5332307457846426, "learning_rate": 8.267962256466845e-06, "loss": 0.5802, "step": 851 }, { "epoch": 1.0385865150284321, "grad_norm": 0.45617231239236866, "learning_rate": 8.262589266423908e-06, "loss": 0.5367, "step": 852 }, { "epoch": 1.0398050365556457, "grad_norm": 0.4487718203264924, "learning_rate": 8.257209707266308e-06, "loss": 0.5412, "step": 853 }, { "epoch": 1.0410235580828595, "grad_norm": 0.49617901681065096, "learning_rate": 8.251823589825608e-06, "loss": 0.582, "step": 854 }, { "epoch": 1.042242079610073, "grad_norm": 0.47465221989539974, "learning_rate": 8.246430924946575e-06, "loss": 0.5377, "step": 855 }, { "epoch": 1.0434606011372867, "grad_norm": 0.4988725203576914, "learning_rate": 8.24103172348716e-06, "loss": 0.6148, "step": 856 }, { "epoch": 1.0446791226645005, "grad_norm": 0.4769299659284957, "learning_rate": 8.235625996318475e-06, "loss": 0.5376, "step": 857 }, { "epoch": 1.045897644191714, "grad_norm": 0.5418879737499556, "learning_rate": 8.230213754324773e-06, "loss": 0.5688, "step": 858 }, { "epoch": 1.0471161657189276, "grad_norm": 0.4361367720716124, "learning_rate": 8.22479500840342e-06, "loss": 0.5337, "step": 859 }, { "epoch": 1.0483346872461414, "grad_norm": 0.5323815827851344, "learning_rate": 8.219369769464883e-06, "loss": 0.6055, "step": 860 }, { "epoch": 1.049553208773355, "grad_norm": 0.5879673529136081, "learning_rate": 8.213938048432697e-06, "loss": 0.5415, "step": 861 }, { "epoch": 1.0507717303005686, "grad_norm": 0.4684259408064238, "learning_rate": 8.208499856243453e-06, "loss": 0.5515, "step": 862 }, { "epoch": 1.0519902518277824, "grad_norm": 0.5196995774290054, "learning_rate": 8.20305520384677e-06, "loss": 0.5934, "step": 863 }, { "epoch": 1.053208773354996, "grad_norm": 0.555821404956024, "learning_rate": 8.19760410220527e-06, "loss": 0.5608, "step": 864 }, { "epoch": 1.0544272948822095, "grad_norm": 0.49067810902195214, "learning_rate": 8.19214656229457e-06, "loss": 0.5338, "step": 865 }, { "epoch": 1.0556458164094233, "grad_norm": 0.5035110725818862, "learning_rate": 8.186682595103241e-06, "loss": 0.579, "step": 866 }, { "epoch": 1.056864337936637, "grad_norm": 0.5005979772843533, "learning_rate": 8.1812122116328e-06, "loss": 0.5824, "step": 867 }, { "epoch": 1.0580828594638505, "grad_norm": 0.5504829458164456, "learning_rate": 8.175735422897682e-06, "loss": 0.5574, "step": 868 }, { "epoch": 1.059301380991064, "grad_norm": 0.5207101568397476, "learning_rate": 8.170252239925215e-06, "loss": 0.5894, "step": 869 }, { "epoch": 1.0605199025182779, "grad_norm": 0.41793216877614997, "learning_rate": 8.16476267375561e-06, "loss": 0.509, "step": 870 }, { "epoch": 1.0617384240454915, "grad_norm": 0.5270083025323902, "learning_rate": 8.159266735441922e-06, "loss": 0.584, "step": 871 }, { "epoch": 1.062956945572705, "grad_norm": 0.4966922910229618, "learning_rate": 8.15376443605004e-06, "loss": 0.5269, "step": 872 }, { "epoch": 1.0641754670999188, "grad_norm": 0.4961677071135526, "learning_rate": 8.148255786658661e-06, "loss": 0.6035, "step": 873 }, { "epoch": 1.0653939886271324, "grad_norm": 0.4946533201405728, "learning_rate": 8.142740798359268e-06, "loss": 0.5932, "step": 874 }, { "epoch": 1.066612510154346, "grad_norm": 0.49312465250267673, "learning_rate": 8.137219482256102e-06, "loss": 0.5337, "step": 875 }, { "epoch": 1.0678310316815598, "grad_norm": 0.5074238436289318, "learning_rate": 8.131691849466154e-06, "loss": 0.5536, "step": 876 }, { "epoch": 1.0690495532087734, "grad_norm": 0.5179722934326702, "learning_rate": 8.126157911119124e-06, "loss": 0.5781, "step": 877 }, { "epoch": 1.070268074735987, "grad_norm": 0.42106727984073683, "learning_rate": 8.120617678357415e-06, "loss": 0.5364, "step": 878 }, { "epoch": 1.0714865962632008, "grad_norm": 0.5619541047984238, "learning_rate": 8.115071162336099e-06, "loss": 0.6302, "step": 879 }, { "epoch": 1.0727051177904143, "grad_norm": 0.48218497269212, "learning_rate": 8.109518374222902e-06, "loss": 0.5081, "step": 880 }, { "epoch": 1.073923639317628, "grad_norm": 0.5288776434466912, "learning_rate": 8.103959325198178e-06, "loss": 0.6161, "step": 881 }, { "epoch": 1.0751421608448415, "grad_norm": 0.4396305550189922, "learning_rate": 8.098394026454886e-06, "loss": 0.5269, "step": 882 }, { "epoch": 1.0763606823720553, "grad_norm": 0.5705187563085431, "learning_rate": 8.09282248919857e-06, "loss": 0.5918, "step": 883 }, { "epoch": 1.0775792038992689, "grad_norm": 0.5173394574008403, "learning_rate": 8.087244724647333e-06, "loss": 0.55, "step": 884 }, { "epoch": 1.0787977254264824, "grad_norm": 0.5259195540857357, "learning_rate": 8.081660744031818e-06, "loss": 0.5587, "step": 885 }, { "epoch": 1.0800162469536962, "grad_norm": 0.5013768900277689, "learning_rate": 8.076070558595188e-06, "loss": 0.5847, "step": 886 }, { "epoch": 1.0812347684809098, "grad_norm": 0.5113716323758455, "learning_rate": 8.070474179593088e-06, "loss": 0.5841, "step": 887 }, { "epoch": 1.0824532900081234, "grad_norm": 0.4304893769830929, "learning_rate": 8.064871618293647e-06, "loss": 0.474, "step": 888 }, { "epoch": 1.0836718115353372, "grad_norm": 0.5581590870053381, "learning_rate": 8.05926288597743e-06, "loss": 0.5883, "step": 889 }, { "epoch": 1.0848903330625508, "grad_norm": 0.5966885478295298, "learning_rate": 8.053647993937436e-06, "loss": 0.6114, "step": 890 }, { "epoch": 1.0861088545897644, "grad_norm": 0.45798182910038504, "learning_rate": 8.048026953479062e-06, "loss": 0.5349, "step": 891 }, { "epoch": 1.0873273761169782, "grad_norm": 0.5977190234288519, "learning_rate": 8.042399775920084e-06, "loss": 0.5822, "step": 892 }, { "epoch": 1.0885458976441917, "grad_norm": 0.5579549068887683, "learning_rate": 8.036766472590636e-06, "loss": 0.5892, "step": 893 }, { "epoch": 1.0897644191714053, "grad_norm": 0.5035624965150097, "learning_rate": 8.031127054833192e-06, "loss": 0.5278, "step": 894 }, { "epoch": 1.090982940698619, "grad_norm": 0.569184764093924, "learning_rate": 8.025481534002524e-06, "loss": 0.5904, "step": 895 }, { "epoch": 1.0922014622258327, "grad_norm": 0.47339482033152885, "learning_rate": 8.019829921465703e-06, "loss": 0.5598, "step": 896 }, { "epoch": 1.0934199837530463, "grad_norm": 0.4510131001279952, "learning_rate": 8.014172228602063e-06, "loss": 0.5218, "step": 897 }, { "epoch": 1.0946385052802599, "grad_norm": 0.5778676271124781, "learning_rate": 8.00850846680318e-06, "loss": 0.6047, "step": 898 }, { "epoch": 1.0958570268074737, "grad_norm": 0.437095810398411, "learning_rate": 8.002838647472848e-06, "loss": 0.5497, "step": 899 }, { "epoch": 1.0970755483346872, "grad_norm": 0.5562520913467127, "learning_rate": 7.997162782027061e-06, "loss": 0.5555, "step": 900 }, { "epoch": 1.0982940698619008, "grad_norm": 0.49447252137766545, "learning_rate": 7.991480881893982e-06, "loss": 0.5282, "step": 901 }, { "epoch": 1.0995125913891146, "grad_norm": 0.5223776301957348, "learning_rate": 7.985792958513932e-06, "loss": 0.5936, "step": 902 }, { "epoch": 1.1007311129163282, "grad_norm": 0.43743454592876513, "learning_rate": 7.98009902333935e-06, "loss": 0.5209, "step": 903 }, { "epoch": 1.1019496344435418, "grad_norm": 0.48630293369462313, "learning_rate": 7.974399087834786e-06, "loss": 0.5629, "step": 904 }, { "epoch": 1.1031681559707556, "grad_norm": 0.4518898797022784, "learning_rate": 7.968693163476872e-06, "loss": 0.5469, "step": 905 }, { "epoch": 1.1043866774979691, "grad_norm": 0.5599257334925746, "learning_rate": 7.962981261754295e-06, "loss": 0.6093, "step": 906 }, { "epoch": 1.1056051990251827, "grad_norm": 0.508379851023288, "learning_rate": 7.957263394167778e-06, "loss": 0.5502, "step": 907 }, { "epoch": 1.1068237205523965, "grad_norm": 0.46905549399423435, "learning_rate": 7.951539572230058e-06, "loss": 0.5498, "step": 908 }, { "epoch": 1.10804224207961, "grad_norm": 0.5331570716206057, "learning_rate": 7.945809807465857e-06, "loss": 0.5936, "step": 909 }, { "epoch": 1.1092607636068237, "grad_norm": 0.43447287523932976, "learning_rate": 7.940074111411869e-06, "loss": 0.5205, "step": 910 }, { "epoch": 1.1104792851340373, "grad_norm": 0.4675250634574423, "learning_rate": 7.934332495616723e-06, "loss": 0.5921, "step": 911 }, { "epoch": 1.111697806661251, "grad_norm": 0.5710382430607513, "learning_rate": 7.928584971640974e-06, "loss": 0.5528, "step": 912 }, { "epoch": 1.1129163281884646, "grad_norm": 0.43616129376419555, "learning_rate": 7.922831551057068e-06, "loss": 0.5304, "step": 913 }, { "epoch": 1.1141348497156782, "grad_norm": 0.4931780007557348, "learning_rate": 7.917072245449327e-06, "loss": 0.5667, "step": 914 }, { "epoch": 1.115353371242892, "grad_norm": 0.46266355232192513, "learning_rate": 7.91130706641392e-06, "loss": 0.557, "step": 915 }, { "epoch": 1.1165718927701056, "grad_norm": 0.4769121004651534, "learning_rate": 7.90553602555884e-06, "loss": 0.5761, "step": 916 }, { "epoch": 1.1177904142973192, "grad_norm": 0.4543130942521957, "learning_rate": 7.899759134503888e-06, "loss": 0.5667, "step": 917 }, { "epoch": 1.119008935824533, "grad_norm": 0.4622820207175306, "learning_rate": 7.893976404880643e-06, "loss": 0.5217, "step": 918 }, { "epoch": 1.1202274573517466, "grad_norm": 0.45946359941638926, "learning_rate": 7.888187848332434e-06, "loss": 0.552, "step": 919 }, { "epoch": 1.1214459788789601, "grad_norm": 0.5221530283186372, "learning_rate": 7.88239347651433e-06, "loss": 0.6037, "step": 920 }, { "epoch": 1.122664500406174, "grad_norm": 0.490304437758209, "learning_rate": 7.876593301093104e-06, "loss": 0.5435, "step": 921 }, { "epoch": 1.1238830219333875, "grad_norm": 0.5353872887084351, "learning_rate": 7.870787333747216e-06, "loss": 0.5465, "step": 922 }, { "epoch": 1.125101543460601, "grad_norm": 0.5305459219097892, "learning_rate": 7.864975586166788e-06, "loss": 0.5401, "step": 923 }, { "epoch": 1.126320064987815, "grad_norm": 0.4522121891276298, "learning_rate": 7.859158070053578e-06, "loss": 0.56, "step": 924 }, { "epoch": 1.1275385865150285, "grad_norm": 0.5400674612069138, "learning_rate": 7.853334797120961e-06, "loss": 0.5938, "step": 925 }, { "epoch": 1.128757108042242, "grad_norm": 0.4735679351556697, "learning_rate": 7.847505779093906e-06, "loss": 0.5517, "step": 926 }, { "epoch": 1.1299756295694556, "grad_norm": 0.48850903658646466, "learning_rate": 7.841671027708945e-06, "loss": 0.5805, "step": 927 }, { "epoch": 1.1311941510966694, "grad_norm": 0.4465079826503964, "learning_rate": 7.835830554714153e-06, "loss": 0.5332, "step": 928 }, { "epoch": 1.132412672623883, "grad_norm": 0.5630070888376983, "learning_rate": 7.82998437186913e-06, "loss": 0.5744, "step": 929 }, { "epoch": 1.1336311941510966, "grad_norm": 0.4850227941162986, "learning_rate": 7.824132490944968e-06, "loss": 0.5284, "step": 930 }, { "epoch": 1.1348497156783104, "grad_norm": 0.5473017535296978, "learning_rate": 7.818274923724237e-06, "loss": 0.5853, "step": 931 }, { "epoch": 1.136068237205524, "grad_norm": 0.6180360857968815, "learning_rate": 7.81241168200095e-06, "loss": 0.6005, "step": 932 }, { "epoch": 1.1372867587327375, "grad_norm": 0.606221772548701, "learning_rate": 7.80654277758055e-06, "loss": 0.5534, "step": 933 }, { "epoch": 1.1385052802599513, "grad_norm": 0.4683974906247182, "learning_rate": 7.80066822227988e-06, "loss": 0.5557, "step": 934 }, { "epoch": 1.139723801787165, "grad_norm": 0.5733918926578689, "learning_rate": 7.794788027927165e-06, "loss": 0.5617, "step": 935 }, { "epoch": 1.1409423233143785, "grad_norm": 0.5394769205967501, "learning_rate": 7.788902206361974e-06, "loss": 0.5949, "step": 936 }, { "epoch": 1.1421608448415923, "grad_norm": 0.4616046919338432, "learning_rate": 7.783010769435216e-06, "loss": 0.5173, "step": 937 }, { "epoch": 1.1433793663688059, "grad_norm": 0.5796955213884182, "learning_rate": 7.7771137290091e-06, "loss": 0.5924, "step": 938 }, { "epoch": 1.1445978878960195, "grad_norm": 0.5847720129488866, "learning_rate": 7.771211096957125e-06, "loss": 0.5562, "step": 939 }, { "epoch": 1.145816409423233, "grad_norm": 0.5171314095714995, "learning_rate": 7.765302885164038e-06, "loss": 0.5548, "step": 940 }, { "epoch": 1.1470349309504468, "grad_norm": 0.49901458608547633, "learning_rate": 7.759389105525832e-06, "loss": 0.5725, "step": 941 }, { "epoch": 1.1482534524776604, "grad_norm": 0.5352472484551857, "learning_rate": 7.753469769949701e-06, "loss": 0.5582, "step": 942 }, { "epoch": 1.149471974004874, "grad_norm": 0.6669984026812862, "learning_rate": 7.747544890354031e-06, "loss": 0.6313, "step": 943 }, { "epoch": 1.1506904955320878, "grad_norm": 0.4640017618478166, "learning_rate": 7.74161447866837e-06, "loss": 0.5275, "step": 944 }, { "epoch": 1.1519090170593014, "grad_norm": 0.5032260303359475, "learning_rate": 7.735678546833403e-06, "loss": 0.5405, "step": 945 }, { "epoch": 1.153127538586515, "grad_norm": 0.545384651096698, "learning_rate": 7.729737106800932e-06, "loss": 0.5856, "step": 946 }, { "epoch": 1.1543460601137288, "grad_norm": 0.5735240939112272, "learning_rate": 7.723790170533848e-06, "loss": 0.571, "step": 947 }, { "epoch": 1.1555645816409423, "grad_norm": 0.4552234746793405, "learning_rate": 7.717837750006106e-06, "loss": 0.5067, "step": 948 }, { "epoch": 1.156783103168156, "grad_norm": 0.49406048197174507, "learning_rate": 7.71187985720271e-06, "loss": 0.592, "step": 949 }, { "epoch": 1.1580016246953697, "grad_norm": 0.5489847996831881, "learning_rate": 7.705916504119679e-06, "loss": 0.5716, "step": 950 }, { "epoch": 1.1592201462225833, "grad_norm": 0.48074624532511123, "learning_rate": 7.699947702764021e-06, "loss": 0.5287, "step": 951 }, { "epoch": 1.1604386677497969, "grad_norm": 0.4833115004977427, "learning_rate": 7.693973465153724e-06, "loss": 0.5667, "step": 952 }, { "epoch": 1.1616571892770104, "grad_norm": 0.5472052571967937, "learning_rate": 7.68799380331771e-06, "loss": 0.5806, "step": 953 }, { "epoch": 1.1628757108042242, "grad_norm": 0.4381241429842595, "learning_rate": 7.682008729295834e-06, "loss": 0.5448, "step": 954 }, { "epoch": 1.1640942323314378, "grad_norm": 0.6129536550799662, "learning_rate": 7.676018255138841e-06, "loss": 0.6091, "step": 955 }, { "epoch": 1.1653127538586514, "grad_norm": 0.524234969513479, "learning_rate": 7.67002239290835e-06, "loss": 0.5363, "step": 956 }, { "epoch": 1.1665312753858652, "grad_norm": 0.43755065750263256, "learning_rate": 7.664021154676828e-06, "loss": 0.5683, "step": 957 }, { "epoch": 1.1677497969130788, "grad_norm": 0.4767439220213808, "learning_rate": 7.658014552527572e-06, "loss": 0.5201, "step": 958 }, { "epoch": 1.1689683184402924, "grad_norm": 0.6051473086713034, "learning_rate": 7.652002598554675e-06, "loss": 0.6148, "step": 959 }, { "epoch": 1.1701868399675062, "grad_norm": 0.442810424258257, "learning_rate": 7.645985304863004e-06, "loss": 0.5089, "step": 960 }, { "epoch": 1.1714053614947197, "grad_norm": 0.5212534237408961, "learning_rate": 7.639962683568178e-06, "loss": 0.6398, "step": 961 }, { "epoch": 1.1726238830219333, "grad_norm": 0.4782128214916858, "learning_rate": 7.633934746796545e-06, "loss": 0.5247, "step": 962 }, { "epoch": 1.1738424045491471, "grad_norm": 0.555997733569589, "learning_rate": 7.627901506685157e-06, "loss": 0.57, "step": 963 }, { "epoch": 1.1750609260763607, "grad_norm": 0.4524690440478936, "learning_rate": 7.621862975381739e-06, "loss": 0.5032, "step": 964 }, { "epoch": 1.1762794476035743, "grad_norm": 0.5558207018565952, "learning_rate": 7.615819165044671e-06, "loss": 0.6055, "step": 965 }, { "epoch": 1.1774979691307879, "grad_norm": 0.5285401986639633, "learning_rate": 7.609770087842969e-06, "loss": 0.5232, "step": 966 }, { "epoch": 1.1787164906580017, "grad_norm": 0.4906926197877719, "learning_rate": 7.603715755956243e-06, "loss": 0.6184, "step": 967 }, { "epoch": 1.1799350121852152, "grad_norm": 0.5453800647325697, "learning_rate": 7.597656181574691e-06, "loss": 0.5449, "step": 968 }, { "epoch": 1.181153533712429, "grad_norm": 0.532023507332386, "learning_rate": 7.5915913768990615e-06, "loss": 0.574, "step": 969 }, { "epoch": 1.1823720552396426, "grad_norm": 0.46068002444123424, "learning_rate": 7.585521354140638e-06, "loss": 0.5616, "step": 970 }, { "epoch": 1.1835905767668562, "grad_norm": 0.45366600351939207, "learning_rate": 7.57944612552121e-06, "loss": 0.5576, "step": 971 }, { "epoch": 1.1848090982940698, "grad_norm": 0.5035963241142227, "learning_rate": 7.573365703273045e-06, "loss": 0.5842, "step": 972 }, { "epoch": 1.1860276198212836, "grad_norm": 0.46429524269523453, "learning_rate": 7.567280099638874e-06, "loss": 0.5603, "step": 973 }, { "epoch": 1.1872461413484972, "grad_norm": 0.4391995658392802, "learning_rate": 7.561189326871854e-06, "loss": 0.5483, "step": 974 }, { "epoch": 1.1884646628757107, "grad_norm": 0.5688078918566764, "learning_rate": 7.555093397235553e-06, "loss": 0.6145, "step": 975 }, { "epoch": 1.1896831844029245, "grad_norm": 0.4535069143341333, "learning_rate": 7.548992323003923e-06, "loss": 0.529, "step": 976 }, { "epoch": 1.190901705930138, "grad_norm": 0.5610828923463264, "learning_rate": 7.542886116461272e-06, "loss": 0.5604, "step": 977 }, { "epoch": 1.1921202274573517, "grad_norm": 0.49771566362561265, "learning_rate": 7.536774789902246e-06, "loss": 0.5339, "step": 978 }, { "epoch": 1.1933387489845655, "grad_norm": 0.5055933911391732, "learning_rate": 7.530658355631795e-06, "loss": 0.5307, "step": 979 }, { "epoch": 1.194557270511779, "grad_norm": 0.5075577294535538, "learning_rate": 7.524536825965154e-06, "loss": 0.5604, "step": 980 }, { "epoch": 1.1957757920389926, "grad_norm": 0.5520230309503728, "learning_rate": 7.518410213227823e-06, "loss": 0.6162, "step": 981 }, { "epoch": 1.1969943135662064, "grad_norm": 0.5218152039597276, "learning_rate": 7.512278529755529e-06, "loss": 0.5613, "step": 982 }, { "epoch": 1.19821283509342, "grad_norm": 0.4971095496314555, "learning_rate": 7.506141787894214e-06, "loss": 0.5643, "step": 983 }, { "epoch": 1.1994313566206336, "grad_norm": 0.5351931771239321, "learning_rate": 7.500000000000001e-06, "loss": 0.5365, "step": 984 }, { "epoch": 1.2006498781478472, "grad_norm": 0.49713221603010127, "learning_rate": 7.493853178439177e-06, "loss": 0.5276, "step": 985 }, { "epoch": 1.201868399675061, "grad_norm": 0.49687942243856253, "learning_rate": 7.48770133558816e-06, "loss": 0.5705, "step": 986 }, { "epoch": 1.2030869212022746, "grad_norm": 0.4638420387813551, "learning_rate": 7.481544483833485e-06, "loss": 0.5143, "step": 987 }, { "epoch": 1.2043054427294881, "grad_norm": 0.5737984880330318, "learning_rate": 7.475382635571761e-06, "loss": 0.6105, "step": 988 }, { "epoch": 1.205523964256702, "grad_norm": 0.4548720894167483, "learning_rate": 7.4692158032096706e-06, "loss": 0.5409, "step": 989 }, { "epoch": 1.2067424857839155, "grad_norm": 0.49711497244164915, "learning_rate": 7.463043999163919e-06, "loss": 0.5803, "step": 990 }, { "epoch": 1.207961007311129, "grad_norm": 0.47268020267724503, "learning_rate": 7.456867235861231e-06, "loss": 0.563, "step": 991 }, { "epoch": 1.209179528838343, "grad_norm": 0.4431695796449243, "learning_rate": 7.450685525738315e-06, "loss": 0.5458, "step": 992 }, { "epoch": 1.2103980503655565, "grad_norm": 0.5514220959709781, "learning_rate": 7.444498881241835e-06, "loss": 0.5719, "step": 993 }, { "epoch": 1.21161657189277, "grad_norm": 0.48730651156910637, "learning_rate": 7.4383073148283945e-06, "loss": 0.5547, "step": 994 }, { "epoch": 1.2128350934199839, "grad_norm": 0.48026701020561735, "learning_rate": 7.432110838964508e-06, "loss": 0.5446, "step": 995 }, { "epoch": 1.2140536149471974, "grad_norm": 0.49526550877005804, "learning_rate": 7.4259094661265685e-06, "loss": 0.5539, "step": 996 }, { "epoch": 1.215272136474411, "grad_norm": 0.5033075517007225, "learning_rate": 7.419703208800839e-06, "loss": 0.5885, "step": 997 }, { "epoch": 1.2164906580016246, "grad_norm": 0.4591330610679407, "learning_rate": 7.413492079483405e-06, "loss": 0.4958, "step": 998 }, { "epoch": 1.2177091795288384, "grad_norm": 0.5435516527726211, "learning_rate": 7.407276090680173e-06, "loss": 0.5941, "step": 999 }, { "epoch": 1.218927701056052, "grad_norm": 0.5014818934661753, "learning_rate": 7.401055254906829e-06, "loss": 0.5674, "step": 1000 }, { "epoch": 1.2201462225832655, "grad_norm": 0.5506374382220622, "learning_rate": 7.394829584688816e-06, "loss": 0.5623, "step": 1001 }, { "epoch": 1.2213647441104794, "grad_norm": 0.47988582460651985, "learning_rate": 7.388599092561315e-06, "loss": 0.579, "step": 1002 }, { "epoch": 1.222583265637693, "grad_norm": 0.5116646928435937, "learning_rate": 7.382363791069214e-06, "loss": 0.5789, "step": 1003 }, { "epoch": 1.2238017871649065, "grad_norm": 0.5815639981335669, "learning_rate": 7.376123692767084e-06, "loss": 0.5306, "step": 1004 }, { "epoch": 1.2250203086921203, "grad_norm": 0.47545875532554605, "learning_rate": 7.369878810219154e-06, "loss": 0.574, "step": 1005 }, { "epoch": 1.2262388302193339, "grad_norm": 0.5843762256050973, "learning_rate": 7.363629155999289e-06, "loss": 0.5835, "step": 1006 }, { "epoch": 1.2274573517465475, "grad_norm": 0.49038029629420044, "learning_rate": 7.357374742690956e-06, "loss": 0.5277, "step": 1007 }, { "epoch": 1.2286758732737613, "grad_norm": 0.4825203440227731, "learning_rate": 7.351115582887212e-06, "loss": 0.5749, "step": 1008 }, { "epoch": 1.2298943948009748, "grad_norm": 0.5230621508499962, "learning_rate": 7.344851689190662e-06, "loss": 0.5494, "step": 1009 }, { "epoch": 1.2311129163281884, "grad_norm": 0.49942387299855917, "learning_rate": 7.33858307421345e-06, "loss": 0.5684, "step": 1010 }, { "epoch": 1.232331437855402, "grad_norm": 0.5550781071831415, "learning_rate": 7.3323097505772225e-06, "loss": 0.5552, "step": 1011 }, { "epoch": 1.2335499593826158, "grad_norm": 0.5160851429477965, "learning_rate": 7.326031730913107e-06, "loss": 0.5365, "step": 1012 }, { "epoch": 1.2347684809098294, "grad_norm": 0.5594132080926748, "learning_rate": 7.319749027861687e-06, "loss": 0.5805, "step": 1013 }, { "epoch": 1.235987002437043, "grad_norm": 0.5035664881102385, "learning_rate": 7.313461654072974e-06, "loss": 0.5572, "step": 1014 }, { "epoch": 1.2372055239642568, "grad_norm": 0.5011647298301126, "learning_rate": 7.3071696222063874e-06, "loss": 0.5736, "step": 1015 }, { "epoch": 1.2384240454914703, "grad_norm": 0.5003447796526637, "learning_rate": 7.300872944930724e-06, "loss": 0.5724, "step": 1016 }, { "epoch": 1.239642567018684, "grad_norm": 0.4488541730554654, "learning_rate": 7.2945716349241305e-06, "loss": 0.5271, "step": 1017 }, { "epoch": 1.2408610885458977, "grad_norm": 0.48397897498100484, "learning_rate": 7.288265704874089e-06, "loss": 0.5702, "step": 1018 }, { "epoch": 1.2420796100731113, "grad_norm": 0.46076984680494393, "learning_rate": 7.281955167477372e-06, "loss": 0.5235, "step": 1019 }, { "epoch": 1.2432981316003249, "grad_norm": 0.46851694123351845, "learning_rate": 7.2756400354400445e-06, "loss": 0.5237, "step": 1020 }, { "epoch": 1.2445166531275387, "grad_norm": 0.48677378118465786, "learning_rate": 7.2693203214774084e-06, "loss": 0.6109, "step": 1021 }, { "epoch": 1.2457351746547523, "grad_norm": 0.4780766187805638, "learning_rate": 7.262996038314001e-06, "loss": 0.5765, "step": 1022 }, { "epoch": 1.2469536961819658, "grad_norm": 0.4640167779478858, "learning_rate": 7.2566671986835515e-06, "loss": 0.5642, "step": 1023 }, { "epoch": 1.2481722177091794, "grad_norm": 0.48778459720464146, "learning_rate": 7.25033381532897e-06, "loss": 0.4946, "step": 1024 }, { "epoch": 1.2493907392363932, "grad_norm": 0.4659728876017271, "learning_rate": 7.243995901002312e-06, "loss": 0.5638, "step": 1025 }, { "epoch": 1.2506092607636068, "grad_norm": 0.4038916973792116, "learning_rate": 7.237653468464756e-06, "loss": 0.5607, "step": 1026 }, { "epoch": 1.2518277822908206, "grad_norm": 0.5567339438269147, "learning_rate": 7.231306530486579e-06, "loss": 0.5561, "step": 1027 }, { "epoch": 1.2530463038180342, "grad_norm": 0.4641852200663108, "learning_rate": 7.224955099847129e-06, "loss": 0.6096, "step": 1028 }, { "epoch": 1.2542648253452477, "grad_norm": 0.4411515265169084, "learning_rate": 7.218599189334799e-06, "loss": 0.4709, "step": 1029 }, { "epoch": 1.2554833468724613, "grad_norm": 0.5058133934757223, "learning_rate": 7.212238811747003e-06, "loss": 0.5904, "step": 1030 }, { "epoch": 1.2567018683996751, "grad_norm": 0.41291563737696013, "learning_rate": 7.205873979890151e-06, "loss": 0.5436, "step": 1031 }, { "epoch": 1.2579203899268887, "grad_norm": 0.4994662597356207, "learning_rate": 7.199504706579617e-06, "loss": 0.6102, "step": 1032 }, { "epoch": 1.2591389114541023, "grad_norm": 0.419031706073167, "learning_rate": 7.193131004639722e-06, "loss": 0.5104, "step": 1033 }, { "epoch": 1.260357432981316, "grad_norm": 0.4373098819276125, "learning_rate": 7.186752886903702e-06, "loss": 0.5539, "step": 1034 }, { "epoch": 1.2615759545085297, "grad_norm": 0.42312469752099624, "learning_rate": 7.180370366213684e-06, "loss": 0.5685, "step": 1035 }, { "epoch": 1.2627944760357432, "grad_norm": 0.4976440200214435, "learning_rate": 7.173983455420659e-06, "loss": 0.5886, "step": 1036 }, { "epoch": 1.2640129975629568, "grad_norm": 0.4458571719063019, "learning_rate": 7.167592167384461e-06, "loss": 0.5481, "step": 1037 }, { "epoch": 1.2652315190901706, "grad_norm": 0.5011046191959967, "learning_rate": 7.161196514973735e-06, "loss": 0.591, "step": 1038 }, { "epoch": 1.2664500406173842, "grad_norm": 0.49133842958144974, "learning_rate": 7.154796511065914e-06, "loss": 0.5523, "step": 1039 }, { "epoch": 1.267668562144598, "grad_norm": 0.47022131838731085, "learning_rate": 7.148392168547191e-06, "loss": 0.5736, "step": 1040 }, { "epoch": 1.2688870836718116, "grad_norm": 0.41386960779050597, "learning_rate": 7.141983500312498e-06, "loss": 0.5529, "step": 1041 }, { "epoch": 1.2701056051990252, "grad_norm": 0.44977069875020453, "learning_rate": 7.135570519265473e-06, "loss": 0.548, "step": 1042 }, { "epoch": 1.2713241267262387, "grad_norm": 0.505607270978524, "learning_rate": 7.129153238318441e-06, "loss": 0.5685, "step": 1043 }, { "epoch": 1.2725426482534525, "grad_norm": 0.4473291490790123, "learning_rate": 7.122731670392381e-06, "loss": 0.5914, "step": 1044 }, { "epoch": 1.2737611697806661, "grad_norm": 0.42761462653683685, "learning_rate": 7.116305828416907e-06, "loss": 0.5596, "step": 1045 }, { "epoch": 1.2749796913078797, "grad_norm": 0.5367569602527996, "learning_rate": 7.109875725330239e-06, "loss": 0.5705, "step": 1046 }, { "epoch": 1.2761982128350935, "grad_norm": 0.4239534982631823, "learning_rate": 7.1034413740791705e-06, "loss": 0.4988, "step": 1047 }, { "epoch": 1.277416734362307, "grad_norm": 0.5193109373280052, "learning_rate": 7.097002787619059e-06, "loss": 0.5812, "step": 1048 }, { "epoch": 1.2786352558895206, "grad_norm": 0.5147411712979314, "learning_rate": 7.090559978913781e-06, "loss": 0.5916, "step": 1049 }, { "epoch": 1.2798537774167342, "grad_norm": 0.4224143215053458, "learning_rate": 7.0841129609357165e-06, "loss": 0.4905, "step": 1050 }, { "epoch": 1.281072298943948, "grad_norm": 0.47217055541643876, "learning_rate": 7.0776617466657196e-06, "loss": 0.5592, "step": 1051 }, { "epoch": 1.2822908204711616, "grad_norm": 0.4826081486423026, "learning_rate": 7.071206349093097e-06, "loss": 0.5822, "step": 1052 }, { "epoch": 1.2835093419983754, "grad_norm": 0.42489592319050484, "learning_rate": 7.064746781215578e-06, "loss": 0.539, "step": 1053 }, { "epoch": 1.284727863525589, "grad_norm": 0.4378036437269882, "learning_rate": 7.058283056039283e-06, "loss": 0.5224, "step": 1054 }, { "epoch": 1.2859463850528026, "grad_norm": 0.5090205584091956, "learning_rate": 7.051815186578711e-06, "loss": 0.6022, "step": 1055 }, { "epoch": 1.2871649065800161, "grad_norm": 0.421460820182392, "learning_rate": 7.045343185856701e-06, "loss": 0.5371, "step": 1056 }, { "epoch": 1.28838342810723, "grad_norm": 0.45568572401745694, "learning_rate": 7.038867066904407e-06, "loss": 0.5549, "step": 1057 }, { "epoch": 1.2896019496344435, "grad_norm": 0.4249363344861208, "learning_rate": 7.032386842761282e-06, "loss": 0.5434, "step": 1058 }, { "epoch": 1.2908204711616573, "grad_norm": 0.4562034562178344, "learning_rate": 7.025902526475039e-06, "loss": 0.5494, "step": 1059 }, { "epoch": 1.292038992688871, "grad_norm": 0.5341880271433396, "learning_rate": 7.0194141311016336e-06, "loss": 0.613, "step": 1060 }, { "epoch": 1.2932575142160845, "grad_norm": 0.4504428137448532, "learning_rate": 7.0129216697052345e-06, "loss": 0.5016, "step": 1061 }, { "epoch": 1.294476035743298, "grad_norm": 0.48710310604219204, "learning_rate": 7.006425155358195e-06, "loss": 0.5966, "step": 1062 }, { "epoch": 1.2956945572705119, "grad_norm": 0.4178638324054384, "learning_rate": 6.99992460114103e-06, "loss": 0.518, "step": 1063 }, { "epoch": 1.2969130787977254, "grad_norm": 0.4592904842250764, "learning_rate": 6.993420020142389e-06, "loss": 0.5731, "step": 1064 }, { "epoch": 1.298131600324939, "grad_norm": 0.44542709276757847, "learning_rate": 6.986911425459028e-06, "loss": 0.5713, "step": 1065 }, { "epoch": 1.2993501218521528, "grad_norm": 0.43271038208431817, "learning_rate": 6.980398830195785e-06, "loss": 0.5394, "step": 1066 }, { "epoch": 1.3005686433793664, "grad_norm": 0.42858083262689106, "learning_rate": 6.9738822474655555e-06, "loss": 0.5593, "step": 1067 }, { "epoch": 1.30178716490658, "grad_norm": 0.45958843226910784, "learning_rate": 6.967361690389258e-06, "loss": 0.6054, "step": 1068 }, { "epoch": 1.3030056864337936, "grad_norm": 0.4289960695158536, "learning_rate": 6.960837172095822e-06, "loss": 0.5548, "step": 1069 }, { "epoch": 1.3042242079610074, "grad_norm": 0.47468738466334404, "learning_rate": 6.954308705722142e-06, "loss": 0.572, "step": 1070 }, { "epoch": 1.305442729488221, "grad_norm": 0.47013938140744177, "learning_rate": 6.947776304413072e-06, "loss": 0.5705, "step": 1071 }, { "epoch": 1.3066612510154347, "grad_norm": 0.42486037624655837, "learning_rate": 6.941239981321379e-06, "loss": 0.5541, "step": 1072 }, { "epoch": 1.3078797725426483, "grad_norm": 0.49246997027712336, "learning_rate": 6.9346997496077365e-06, "loss": 0.5955, "step": 1073 }, { "epoch": 1.309098294069862, "grad_norm": 0.4472253157123058, "learning_rate": 6.92815562244068e-06, "loss": 0.5347, "step": 1074 }, { "epoch": 1.3103168155970755, "grad_norm": 0.4795845777067209, "learning_rate": 6.921607612996591e-06, "loss": 0.544, "step": 1075 }, { "epoch": 1.3115353371242893, "grad_norm": 0.4858592748082412, "learning_rate": 6.915055734459669e-06, "loss": 0.5825, "step": 1076 }, { "epoch": 1.3127538586515028, "grad_norm": 0.440529958757846, "learning_rate": 6.908500000021905e-06, "loss": 0.4894, "step": 1077 }, { "epoch": 1.3139723801787164, "grad_norm": 0.49777302193386763, "learning_rate": 6.9019404228830465e-06, "loss": 0.6143, "step": 1078 }, { "epoch": 1.3151909017059302, "grad_norm": 0.42490987305110145, "learning_rate": 6.895377016250589e-06, "loss": 0.5383, "step": 1079 }, { "epoch": 1.3164094232331438, "grad_norm": 0.4250241686101231, "learning_rate": 6.888809793339729e-06, "loss": 0.5343, "step": 1080 }, { "epoch": 1.3176279447603574, "grad_norm": 0.47908573640303, "learning_rate": 6.882238767373352e-06, "loss": 0.5766, "step": 1081 }, { "epoch": 1.318846466287571, "grad_norm": 0.4393923199378749, "learning_rate": 6.875663951582e-06, "loss": 0.518, "step": 1082 }, { "epoch": 1.3200649878147848, "grad_norm": 0.5298761025962999, "learning_rate": 6.869085359203844e-06, "loss": 0.5687, "step": 1083 }, { "epoch": 1.3212835093419983, "grad_norm": 0.4742825873608696, "learning_rate": 6.862503003484662e-06, "loss": 0.5804, "step": 1084 }, { "epoch": 1.3225020308692121, "grad_norm": 0.4633225475847929, "learning_rate": 6.855916897677806e-06, "loss": 0.556, "step": 1085 }, { "epoch": 1.3237205523964257, "grad_norm": 0.5225783981999679, "learning_rate": 6.849327055044182e-06, "loss": 0.5814, "step": 1086 }, { "epoch": 1.3249390739236393, "grad_norm": 0.4288152429153542, "learning_rate": 6.842733488852218e-06, "loss": 0.5576, "step": 1087 }, { "epoch": 1.3261575954508529, "grad_norm": 0.5221719185878941, "learning_rate": 6.836136212377839e-06, "loss": 0.5535, "step": 1088 }, { "epoch": 1.3273761169780667, "grad_norm": 0.5296939222461858, "learning_rate": 6.82953523890444e-06, "loss": 0.5367, "step": 1089 }, { "epoch": 1.3285946385052803, "grad_norm": 0.4975997883807605, "learning_rate": 6.822930581722864e-06, "loss": 0.5888, "step": 1090 }, { "epoch": 1.3298131600324938, "grad_norm": 0.5680495533922292, "learning_rate": 6.8163222541313646e-06, "loss": 0.5797, "step": 1091 }, { "epoch": 1.3310316815597076, "grad_norm": 0.4587905010305772, "learning_rate": 6.80971026943559e-06, "loss": 0.5202, "step": 1092 }, { "epoch": 1.3322502030869212, "grad_norm": 0.551574996506335, "learning_rate": 6.803094640948553e-06, "loss": 0.5777, "step": 1093 }, { "epoch": 1.3334687246141348, "grad_norm": 0.5703735684360373, "learning_rate": 6.796475381990598e-06, "loss": 0.5764, "step": 1094 }, { "epoch": 1.3346872461413484, "grad_norm": 0.4925036270565778, "learning_rate": 6.789852505889384e-06, "loss": 0.528, "step": 1095 }, { "epoch": 1.3359057676685622, "grad_norm": 0.47585637004253845, "learning_rate": 6.78322602597985e-06, "loss": 0.5379, "step": 1096 }, { "epoch": 1.3371242891957758, "grad_norm": 0.5098349120934949, "learning_rate": 6.776595955604192e-06, "loss": 0.5564, "step": 1097 }, { "epoch": 1.3383428107229896, "grad_norm": 0.45580609051116194, "learning_rate": 6.769962308111839e-06, "loss": 0.5753, "step": 1098 }, { "epoch": 1.3395613322502031, "grad_norm": 0.5171674920493432, "learning_rate": 6.7633250968594145e-06, "loss": 0.5949, "step": 1099 }, { "epoch": 1.3407798537774167, "grad_norm": 0.4877120256762604, "learning_rate": 6.756684335210724e-06, "loss": 0.515, "step": 1100 }, { "epoch": 1.3419983753046303, "grad_norm": 0.4814845112101113, "learning_rate": 6.750040036536718e-06, "loss": 0.5684, "step": 1101 }, { "epoch": 1.343216896831844, "grad_norm": 0.5705372014720597, "learning_rate": 6.743392214215473e-06, "loss": 0.6171, "step": 1102 }, { "epoch": 1.3444354183590577, "grad_norm": 0.41955386315882853, "learning_rate": 6.736740881632156e-06, "loss": 0.5509, "step": 1103 }, { "epoch": 1.3456539398862712, "grad_norm": 0.5028763983027598, "learning_rate": 6.7300860521790034e-06, "loss": 0.5519, "step": 1104 }, { "epoch": 1.346872461413485, "grad_norm": 0.4751712922206779, "learning_rate": 6.723427739255291e-06, "loss": 0.5871, "step": 1105 }, { "epoch": 1.3480909829406986, "grad_norm": 0.44250278427343415, "learning_rate": 6.716765956267313e-06, "loss": 0.5563, "step": 1106 }, { "epoch": 1.3493095044679122, "grad_norm": 0.42447760271061347, "learning_rate": 6.710100716628345e-06, "loss": 0.5246, "step": 1107 }, { "epoch": 1.3505280259951258, "grad_norm": 0.4884332973463199, "learning_rate": 6.7034320337586236e-06, "loss": 0.5906, "step": 1108 }, { "epoch": 1.3517465475223396, "grad_norm": 0.47868995347975324, "learning_rate": 6.696759921085321e-06, "loss": 0.56, "step": 1109 }, { "epoch": 1.3529650690495532, "grad_norm": 0.4891201962969247, "learning_rate": 6.690084392042514e-06, "loss": 0.5387, "step": 1110 }, { "epoch": 1.354183590576767, "grad_norm": 0.4799279297524152, "learning_rate": 6.683405460071158e-06, "loss": 0.5584, "step": 1111 }, { "epoch": 1.3554021121039805, "grad_norm": 0.468730614409241, "learning_rate": 6.676723138619056e-06, "loss": 0.5639, "step": 1112 }, { "epoch": 1.3566206336311941, "grad_norm": 0.4691028535874034, "learning_rate": 6.670037441140844e-06, "loss": 0.5249, "step": 1113 }, { "epoch": 1.3578391551584077, "grad_norm": 0.5055139224683095, "learning_rate": 6.663348381097949e-06, "loss": 0.5668, "step": 1114 }, { "epoch": 1.3590576766856215, "grad_norm": 0.4641835440622289, "learning_rate": 6.656655971958569e-06, "loss": 0.5168, "step": 1115 }, { "epoch": 1.360276198212835, "grad_norm": 0.5446202821644559, "learning_rate": 6.649960227197648e-06, "loss": 0.613, "step": 1116 }, { "epoch": 1.3614947197400489, "grad_norm": 0.4947120887114883, "learning_rate": 6.6432611602968445e-06, "loss": 0.5567, "step": 1117 }, { "epoch": 1.3627132412672625, "grad_norm": 0.43139439093199355, "learning_rate": 6.636558784744507e-06, "loss": 0.5242, "step": 1118 }, { "epoch": 1.363931762794476, "grad_norm": 0.5614131203778131, "learning_rate": 6.629853114035643e-06, "loss": 0.5333, "step": 1119 }, { "epoch": 1.3651502843216896, "grad_norm": 0.47984259019139797, "learning_rate": 6.623144161671899e-06, "loss": 0.6073, "step": 1120 }, { "epoch": 1.3663688058489034, "grad_norm": 0.48772092734746075, "learning_rate": 6.616431941161525e-06, "loss": 0.519, "step": 1121 }, { "epoch": 1.367587327376117, "grad_norm": 0.4944650047147664, "learning_rate": 6.609716466019356e-06, "loss": 0.5982, "step": 1122 }, { "epoch": 1.3688058489033306, "grad_norm": 0.4514730750801606, "learning_rate": 6.602997749766773e-06, "loss": 0.5215, "step": 1123 }, { "epoch": 1.3700243704305444, "grad_norm": 0.4806270554361702, "learning_rate": 6.596275805931691e-06, "loss": 0.6507, "step": 1124 }, { "epoch": 1.371242891957758, "grad_norm": 0.42879599863826967, "learning_rate": 6.589550648048517e-06, "loss": 0.5263, "step": 1125 }, { "epoch": 1.3724614134849715, "grad_norm": 0.5002076010149914, "learning_rate": 6.582822289658134e-06, "loss": 0.544, "step": 1126 }, { "epoch": 1.373679935012185, "grad_norm": 0.49996651647577767, "learning_rate": 6.576090744307866e-06, "loss": 0.6115, "step": 1127 }, { "epoch": 1.374898456539399, "grad_norm": 0.47192523752862847, "learning_rate": 6.569356025551454e-06, "loss": 0.5044, "step": 1128 }, { "epoch": 1.3761169780666125, "grad_norm": 0.5486850848812702, "learning_rate": 6.562618146949033e-06, "loss": 0.5963, "step": 1129 }, { "epoch": 1.3773354995938263, "grad_norm": 0.44516782959863155, "learning_rate": 6.5558771220670935e-06, "loss": 0.5424, "step": 1130 }, { "epoch": 1.3785540211210399, "grad_norm": 0.49271550503516953, "learning_rate": 6.5491329644784655e-06, "loss": 0.5241, "step": 1131 }, { "epoch": 1.3797725426482534, "grad_norm": 0.5660845065509308, "learning_rate": 6.542385687762287e-06, "loss": 0.6154, "step": 1132 }, { "epoch": 1.380991064175467, "grad_norm": 0.4271740206518289, "learning_rate": 6.53563530550397e-06, "loss": 0.4689, "step": 1133 }, { "epoch": 1.3822095857026808, "grad_norm": 0.5195908868358481, "learning_rate": 6.5288818312951886e-06, "loss": 0.5462, "step": 1134 }, { "epoch": 1.3834281072298944, "grad_norm": 0.5034196032593611, "learning_rate": 6.5221252787338365e-06, "loss": 0.587, "step": 1135 }, { "epoch": 1.384646628757108, "grad_norm": 0.5196583715973591, "learning_rate": 6.515365661424007e-06, "loss": 0.577, "step": 1136 }, { "epoch": 1.3858651502843218, "grad_norm": 0.47148796040432117, "learning_rate": 6.508602992975963e-06, "loss": 0.5516, "step": 1137 }, { "epoch": 1.3870836718115354, "grad_norm": 0.47240263639853314, "learning_rate": 6.501837287006112e-06, "loss": 0.5017, "step": 1138 }, { "epoch": 1.388302193338749, "grad_norm": 0.4848195827079731, "learning_rate": 6.495068557136979e-06, "loss": 0.6068, "step": 1139 }, { "epoch": 1.3895207148659625, "grad_norm": 0.464916968432065, "learning_rate": 6.4882968169971734e-06, "loss": 0.5114, "step": 1140 }, { "epoch": 1.3907392363931763, "grad_norm": 0.4672169290921844, "learning_rate": 6.4815220802213705e-06, "loss": 0.571, "step": 1141 }, { "epoch": 1.39195775792039, "grad_norm": 0.45354629086847004, "learning_rate": 6.474744360450274e-06, "loss": 0.559, "step": 1142 }, { "epoch": 1.3931762794476037, "grad_norm": 0.49697460412752753, "learning_rate": 6.467963671330602e-06, "loss": 0.5712, "step": 1143 }, { "epoch": 1.3943948009748173, "grad_norm": 0.42597106705666193, "learning_rate": 6.461180026515038e-06, "loss": 0.4836, "step": 1144 }, { "epoch": 1.3956133225020309, "grad_norm": 0.5696838757187256, "learning_rate": 6.45439343966223e-06, "loss": 0.6293, "step": 1145 }, { "epoch": 1.3968318440292444, "grad_norm": 0.44015111009766694, "learning_rate": 6.447603924436744e-06, "loss": 0.5672, "step": 1146 }, { "epoch": 1.3980503655564582, "grad_norm": 0.5171923824405892, "learning_rate": 6.44081149450904e-06, "loss": 0.543, "step": 1147 }, { "epoch": 1.3992688870836718, "grad_norm": 0.4861104307146921, "learning_rate": 6.434016163555452e-06, "loss": 0.5536, "step": 1148 }, { "epoch": 1.4004874086108854, "grad_norm": 0.4672428316707098, "learning_rate": 6.4272179452581505e-06, "loss": 0.5513, "step": 1149 }, { "epoch": 1.4017059301380992, "grad_norm": 0.5041699642923018, "learning_rate": 6.42041685330512e-06, "loss": 0.5579, "step": 1150 }, { "epoch": 1.4029244516653128, "grad_norm": 0.5689659183656529, "learning_rate": 6.413612901390136e-06, "loss": 0.5171, "step": 1151 }, { "epoch": 1.4041429731925263, "grad_norm": 0.4852858521398993, "learning_rate": 6.406806103212725e-06, "loss": 0.619, "step": 1152 }, { "epoch": 1.40536149471974, "grad_norm": 0.5193110473839417, "learning_rate": 6.39999647247815e-06, "loss": 0.549, "step": 1153 }, { "epoch": 1.4065800162469537, "grad_norm": 0.4769214566829931, "learning_rate": 6.393184022897375e-06, "loss": 0.526, "step": 1154 }, { "epoch": 1.4077985377741673, "grad_norm": 0.4334565428180597, "learning_rate": 6.38636876818704e-06, "loss": 0.5511, "step": 1155 }, { "epoch": 1.409017059301381, "grad_norm": 0.7019468412425452, "learning_rate": 6.3795507220694335e-06, "loss": 0.6058, "step": 1156 }, { "epoch": 1.4102355808285947, "grad_norm": 0.4559746858499104, "learning_rate": 6.372729898272463e-06, "loss": 0.5623, "step": 1157 }, { "epoch": 1.4114541023558083, "grad_norm": 0.4873072450531043, "learning_rate": 6.365906310529631e-06, "loss": 0.526, "step": 1158 }, { "epoch": 1.4126726238830218, "grad_norm": 0.4930877948197653, "learning_rate": 6.359079972580001e-06, "loss": 0.5417, "step": 1159 }, { "epoch": 1.4138911454102356, "grad_norm": 0.4585436187425684, "learning_rate": 6.352250898168181e-06, "loss": 0.5558, "step": 1160 }, { "epoch": 1.4151096669374492, "grad_norm": 0.4843507493218003, "learning_rate": 6.345419101044281e-06, "loss": 0.6178, "step": 1161 }, { "epoch": 1.4163281884646628, "grad_norm": 0.4103238225687281, "learning_rate": 6.338584594963898e-06, "loss": 0.486, "step": 1162 }, { "epoch": 1.4175467099918766, "grad_norm": 0.4339709319145015, "learning_rate": 6.3317473936880814e-06, "loss": 0.5516, "step": 1163 }, { "epoch": 1.4187652315190902, "grad_norm": 0.5006035176222503, "learning_rate": 6.32490751098331e-06, "loss": 0.5893, "step": 1164 }, { "epoch": 1.4199837530463038, "grad_norm": 0.43944118536778887, "learning_rate": 6.318064960621456e-06, "loss": 0.554, "step": 1165 }, { "epoch": 1.4212022745735173, "grad_norm": 0.4205988668702698, "learning_rate": 6.31121975637977e-06, "loss": 0.5705, "step": 1166 }, { "epoch": 1.4224207961007311, "grad_norm": 0.42492946208091176, "learning_rate": 6.30437191204084e-06, "loss": 0.5382, "step": 1167 }, { "epoch": 1.4236393176279447, "grad_norm": 0.4782081072972405, "learning_rate": 6.297521441392572e-06, "loss": 0.6081, "step": 1168 }, { "epoch": 1.4248578391551585, "grad_norm": 0.4056428801301219, "learning_rate": 6.290668358228162e-06, "loss": 0.5448, "step": 1169 }, { "epoch": 1.426076360682372, "grad_norm": 0.4346131086300656, "learning_rate": 6.2838126763460635e-06, "loss": 0.5339, "step": 1170 }, { "epoch": 1.4272948822095857, "grad_norm": 0.4104447709327887, "learning_rate": 6.276954409549963e-06, "loss": 0.5399, "step": 1171 }, { "epoch": 1.4285134037367992, "grad_norm": 0.46444896204069186, "learning_rate": 6.270093571648752e-06, "loss": 0.5941, "step": 1172 }, { "epoch": 1.429731925264013, "grad_norm": 0.4451786529645794, "learning_rate": 6.263230176456497e-06, "loss": 0.5384, "step": 1173 }, { "epoch": 1.4309504467912266, "grad_norm": 0.47981749578622157, "learning_rate": 6.256364237792419e-06, "loss": 0.5765, "step": 1174 }, { "epoch": 1.4321689683184404, "grad_norm": 0.4367054717344673, "learning_rate": 6.249495769480856e-06, "loss": 0.5124, "step": 1175 }, { "epoch": 1.433387489845654, "grad_norm": 0.42899069684022384, "learning_rate": 6.2426247853512355e-06, "loss": 0.5524, "step": 1176 }, { "epoch": 1.4346060113728676, "grad_norm": 0.4904917718170387, "learning_rate": 6.23575129923806e-06, "loss": 0.5613, "step": 1177 }, { "epoch": 1.4358245329000812, "grad_norm": 0.7624825038153906, "learning_rate": 6.228875324980862e-06, "loss": 0.5469, "step": 1178 }, { "epoch": 1.437043054427295, "grad_norm": 0.48032758007828885, "learning_rate": 6.221996876424186e-06, "loss": 0.6088, "step": 1179 }, { "epoch": 1.4382615759545085, "grad_norm": 0.4261676949483954, "learning_rate": 6.21511596741756e-06, "loss": 0.5269, "step": 1180 }, { "epoch": 1.4394800974817221, "grad_norm": 0.44938704101588606, "learning_rate": 6.208232611815463e-06, "loss": 0.5497, "step": 1181 }, { "epoch": 1.440698619008936, "grad_norm": 0.47843420481431187, "learning_rate": 6.2013468234773034e-06, "loss": 0.5673, "step": 1182 }, { "epoch": 1.4419171405361495, "grad_norm": 0.4143118724051908, "learning_rate": 6.194458616267388e-06, "loss": 0.5561, "step": 1183 }, { "epoch": 1.443135662063363, "grad_norm": 0.4687400706518928, "learning_rate": 6.187568004054888e-06, "loss": 0.5599, "step": 1184 }, { "epoch": 1.4443541835905767, "grad_norm": 0.43117586360472987, "learning_rate": 6.180675000713825e-06, "loss": 0.5579, "step": 1185 }, { "epoch": 1.4455727051177905, "grad_norm": 0.4677168526332838, "learning_rate": 6.173779620123028e-06, "loss": 0.5377, "step": 1186 }, { "epoch": 1.446791226645004, "grad_norm": 0.4684613773900322, "learning_rate": 6.166881876166119e-06, "loss": 0.5505, "step": 1187 }, { "epoch": 1.4480097481722178, "grad_norm": 0.45099302981330264, "learning_rate": 6.1599817827314744e-06, "loss": 0.5349, "step": 1188 }, { "epoch": 1.4492282696994314, "grad_norm": 0.44725643758516653, "learning_rate": 6.153079353712201e-06, "loss": 0.5445, "step": 1189 }, { "epoch": 1.450446791226645, "grad_norm": 0.509822041445509, "learning_rate": 6.14617460300611e-06, "loss": 0.6048, "step": 1190 }, { "epoch": 1.4516653127538586, "grad_norm": 0.48251767820083963, "learning_rate": 6.139267544515689e-06, "loss": 0.5214, "step": 1191 }, { "epoch": 1.4528838342810724, "grad_norm": 0.462469865969966, "learning_rate": 6.132358192148065e-06, "loss": 0.5628, "step": 1192 }, { "epoch": 1.454102355808286, "grad_norm": 0.42592720114566546, "learning_rate": 6.125446559814994e-06, "loss": 0.4844, "step": 1193 }, { "epoch": 1.4553208773354995, "grad_norm": 0.49275532261036237, "learning_rate": 6.118532661432812e-06, "loss": 0.5944, "step": 1194 }, { "epoch": 1.4565393988627133, "grad_norm": 0.4649906751266784, "learning_rate": 6.111616510922426e-06, "loss": 0.5493, "step": 1195 }, { "epoch": 1.457757920389927, "grad_norm": 0.46291320623399196, "learning_rate": 6.104698122209274e-06, "loss": 0.5172, "step": 1196 }, { "epoch": 1.4589764419171405, "grad_norm": 0.5426739834419568, "learning_rate": 6.097777509223299e-06, "loss": 0.5666, "step": 1197 }, { "epoch": 1.460194963444354, "grad_norm": 0.45093365966871296, "learning_rate": 6.090854685898928e-06, "loss": 0.5357, "step": 1198 }, { "epoch": 1.4614134849715679, "grad_norm": 0.46357917186858966, "learning_rate": 6.083929666175031e-06, "loss": 0.5102, "step": 1199 }, { "epoch": 1.4626320064987814, "grad_norm": 0.42735860218881255, "learning_rate": 6.077002463994908e-06, "loss": 0.5353, "step": 1200 }, { "epoch": 1.4638505280259952, "grad_norm": 0.48773472737225, "learning_rate": 6.070073093306246e-06, "loss": 0.5969, "step": 1201 }, { "epoch": 1.4650690495532088, "grad_norm": 0.45583834308371346, "learning_rate": 6.063141568061104e-06, "loss": 0.5501, "step": 1202 }, { "epoch": 1.4662875710804224, "grad_norm": 0.48230795906015783, "learning_rate": 6.056207902215874e-06, "loss": 0.5943, "step": 1203 }, { "epoch": 1.467506092607636, "grad_norm": 0.48530024356797447, "learning_rate": 6.049272109731266e-06, "loss": 0.535, "step": 1204 }, { "epoch": 1.4687246141348498, "grad_norm": 0.39847364405399893, "learning_rate": 6.042334204572261e-06, "loss": 0.5088, "step": 1205 }, { "epoch": 1.4699431356620634, "grad_norm": 0.4192802944065179, "learning_rate": 6.035394200708104e-06, "loss": 0.5541, "step": 1206 }, { "epoch": 1.471161657189277, "grad_norm": 0.5095459416726968, "learning_rate": 6.02845211211226e-06, "loss": 0.6044, "step": 1207 }, { "epoch": 1.4723801787164907, "grad_norm": 0.4834365213328995, "learning_rate": 6.021507952762392e-06, "loss": 0.5698, "step": 1208 }, { "epoch": 1.4735987002437043, "grad_norm": 0.43629510532697163, "learning_rate": 6.014561736640334e-06, "loss": 0.536, "step": 1209 }, { "epoch": 1.474817221770918, "grad_norm": 0.469188019208721, "learning_rate": 6.007613477732061e-06, "loss": 0.5495, "step": 1210 }, { "epoch": 1.4760357432981315, "grad_norm": 0.4901471440756352, "learning_rate": 6.000663190027658e-06, "loss": 0.5661, "step": 1211 }, { "epoch": 1.4772542648253453, "grad_norm": 0.4686562631964871, "learning_rate": 5.993710887521302e-06, "loss": 0.5812, "step": 1212 }, { "epoch": 1.4784727863525589, "grad_norm": 0.48734085024012297, "learning_rate": 5.986756584211217e-06, "loss": 0.5335, "step": 1213 }, { "epoch": 1.4796913078797727, "grad_norm": 0.5326878131009583, "learning_rate": 5.979800294099666e-06, "loss": 0.5689, "step": 1214 }, { "epoch": 1.4809098294069862, "grad_norm": 0.4253596342133157, "learning_rate": 5.972842031192901e-06, "loss": 0.5265, "step": 1215 }, { "epoch": 1.4821283509341998, "grad_norm": 0.4985627825685433, "learning_rate": 5.965881809501158e-06, "loss": 0.5632, "step": 1216 }, { "epoch": 1.4833468724614134, "grad_norm": 0.45204140138324095, "learning_rate": 5.958919643038609e-06, "loss": 0.5569, "step": 1217 }, { "epoch": 1.4845653939886272, "grad_norm": 0.4483567522219748, "learning_rate": 5.951955545823342e-06, "loss": 0.5731, "step": 1218 }, { "epoch": 1.4857839155158408, "grad_norm": 0.4426846776302582, "learning_rate": 5.944989531877337e-06, "loss": 0.528, "step": 1219 }, { "epoch": 1.4870024370430543, "grad_norm": 0.44399265576382146, "learning_rate": 5.938021615226431e-06, "loss": 0.5489, "step": 1220 }, { "epoch": 1.4882209585702681, "grad_norm": 0.4581264239244066, "learning_rate": 5.93105180990029e-06, "loss": 0.5794, "step": 1221 }, { "epoch": 1.4894394800974817, "grad_norm": 0.4198932355319627, "learning_rate": 5.924080129932386e-06, "loss": 0.5179, "step": 1222 }, { "epoch": 1.4906580016246953, "grad_norm": 0.47789225286091797, "learning_rate": 5.9171065893599625e-06, "loss": 0.5638, "step": 1223 }, { "epoch": 1.4918765231519089, "grad_norm": 0.4321558944637844, "learning_rate": 5.910131202224011e-06, "loss": 0.5057, "step": 1224 }, { "epoch": 1.4930950446791227, "grad_norm": 0.4450307808888705, "learning_rate": 5.903153982569243e-06, "loss": 0.5421, "step": 1225 }, { "epoch": 1.4943135662063363, "grad_norm": 0.5226652256866916, "learning_rate": 5.8961749444440555e-06, "loss": 0.576, "step": 1226 }, { "epoch": 1.49553208773355, "grad_norm": 0.42973467257152986, "learning_rate": 5.8891941019005095e-06, "loss": 0.6013, "step": 1227 }, { "epoch": 1.4967506092607636, "grad_norm": 0.40820805235816976, "learning_rate": 5.882211468994299e-06, "loss": 0.5175, "step": 1228 }, { "epoch": 1.4979691307879772, "grad_norm": 0.4801201178398426, "learning_rate": 5.87522705978472e-06, "loss": 0.5833, "step": 1229 }, { "epoch": 1.4991876523151908, "grad_norm": 0.45266942815985767, "learning_rate": 5.8682408883346535e-06, "loss": 0.5284, "step": 1230 }, { "epoch": 1.5004061738424046, "grad_norm": 0.4353303644229792, "learning_rate": 5.8612529687105156e-06, "loss": 0.526, "step": 1231 }, { "epoch": 1.5016246953696182, "grad_norm": 0.48827760996687436, "learning_rate": 5.854263314982252e-06, "loss": 0.5955, "step": 1232 }, { "epoch": 1.502843216896832, "grad_norm": 0.4255509928321056, "learning_rate": 5.847271941223301e-06, "loss": 0.5442, "step": 1233 }, { "epoch": 1.5040617384240456, "grad_norm": 0.43808076740492513, "learning_rate": 5.840278861510555e-06, "loss": 0.5433, "step": 1234 }, { "epoch": 1.5052802599512591, "grad_norm": 0.4760892817675488, "learning_rate": 5.83328408992435e-06, "loss": 0.5702, "step": 1235 }, { "epoch": 1.5064987814784727, "grad_norm": 0.4481600783932247, "learning_rate": 5.826287640548425e-06, "loss": 0.5946, "step": 1236 }, { "epoch": 1.5077173030056863, "grad_norm": 0.42699536589107157, "learning_rate": 5.819289527469897e-06, "loss": 0.5642, "step": 1237 }, { "epoch": 1.5089358245329, "grad_norm": 0.4633211620631564, "learning_rate": 5.812289764779232e-06, "loss": 0.4845, "step": 1238 }, { "epoch": 1.510154346060114, "grad_norm": 0.4571770115639661, "learning_rate": 5.80528836657022e-06, "loss": 0.5513, "step": 1239 }, { "epoch": 1.5113728675873275, "grad_norm": 0.4701604947751102, "learning_rate": 5.798285346939942e-06, "loss": 0.559, "step": 1240 }, { "epoch": 1.512591389114541, "grad_norm": 0.4865903026982408, "learning_rate": 5.791280719988747e-06, "loss": 0.5878, "step": 1241 }, { "epoch": 1.5138099106417546, "grad_norm": 0.4412282841651163, "learning_rate": 5.784274499820214e-06, "loss": 0.5197, "step": 1242 }, { "epoch": 1.5150284321689682, "grad_norm": 0.5747443137859876, "learning_rate": 5.777266700541134e-06, "loss": 0.6011, "step": 1243 }, { "epoch": 1.516246953696182, "grad_norm": 0.4564303892112499, "learning_rate": 5.770257336261482e-06, "loss": 0.5279, "step": 1244 }, { "epoch": 1.5174654752233956, "grad_norm": 0.45997367471162187, "learning_rate": 5.763246421094373e-06, "loss": 0.5255, "step": 1245 }, { "epoch": 1.5186839967506094, "grad_norm": 0.4695480650402549, "learning_rate": 5.7562339691560556e-06, "loss": 0.5885, "step": 1246 }, { "epoch": 1.519902518277823, "grad_norm": 0.5356612979245375, "learning_rate": 5.749219994565863e-06, "loss": 0.5569, "step": 1247 }, { "epoch": 1.5211210398050365, "grad_norm": 0.5813954013182587, "learning_rate": 5.742204511446203e-06, "loss": 0.5544, "step": 1248 }, { "epoch": 1.5223395613322501, "grad_norm": 0.43618938610834346, "learning_rate": 5.7351875339225164e-06, "loss": 0.5374, "step": 1249 }, { "epoch": 1.5235580828594637, "grad_norm": 0.4937073666394837, "learning_rate": 5.7281690761232515e-06, "loss": 0.5162, "step": 1250 }, { "epoch": 1.5247766043866775, "grad_norm": 0.4780704238400619, "learning_rate": 5.72114915217984e-06, "loss": 0.542, "step": 1251 }, { "epoch": 1.5259951259138913, "grad_norm": 0.458787226662822, "learning_rate": 5.714127776226667e-06, "loss": 0.5708, "step": 1252 }, { "epoch": 1.5272136474411049, "grad_norm": 0.4727970564003603, "learning_rate": 5.707104962401034e-06, "loss": 0.5678, "step": 1253 }, { "epoch": 1.5284321689683185, "grad_norm": 0.42019947987975415, "learning_rate": 5.7000807248431466e-06, "loss": 0.4449, "step": 1254 }, { "epoch": 1.529650690495532, "grad_norm": 0.5313576192243948, "learning_rate": 5.693055077696069e-06, "loss": 0.62, "step": 1255 }, { "epoch": 1.5308692120227456, "grad_norm": 0.4133150947222481, "learning_rate": 5.686028035105711e-06, "loss": 0.5446, "step": 1256 }, { "epoch": 1.5320877335499594, "grad_norm": 0.5182558216138413, "learning_rate": 5.6789996112207865e-06, "loss": 0.5589, "step": 1257 }, { "epoch": 1.533306255077173, "grad_norm": 0.5235310986043601, "learning_rate": 5.671969820192794e-06, "loss": 0.5516, "step": 1258 }, { "epoch": 1.5345247766043868, "grad_norm": 0.43543733715186, "learning_rate": 5.664938676175982e-06, "loss": 0.5463, "step": 1259 }, { "epoch": 1.5357432981316004, "grad_norm": 0.542523163223611, "learning_rate": 5.657906193327325e-06, "loss": 0.5289, "step": 1260 }, { "epoch": 1.536961819658814, "grad_norm": 0.6705586961902954, "learning_rate": 5.650872385806492e-06, "loss": 0.6, "step": 1261 }, { "epoch": 1.5381803411860275, "grad_norm": 0.4252405119039053, "learning_rate": 5.64383726777582e-06, "loss": 0.5558, "step": 1262 }, { "epoch": 1.5393988627132411, "grad_norm": 0.5168792668343379, "learning_rate": 5.636800853400285e-06, "loss": 0.5427, "step": 1263 }, { "epoch": 1.540617384240455, "grad_norm": 0.56808607878734, "learning_rate": 5.6297631568474705e-06, "loss": 0.5785, "step": 1264 }, { "epoch": 1.5418359057676687, "grad_norm": 0.4194312889852155, "learning_rate": 5.622724192287548e-06, "loss": 0.5061, "step": 1265 }, { "epoch": 1.5430544272948823, "grad_norm": 0.46739113422443607, "learning_rate": 5.615683973893235e-06, "loss": 0.5543, "step": 1266 }, { "epoch": 1.5442729488220959, "grad_norm": 0.4711436329274137, "learning_rate": 5.608642515839777e-06, "loss": 0.5468, "step": 1267 }, { "epoch": 1.5454914703493094, "grad_norm": 0.45925976085714865, "learning_rate": 5.601599832304915e-06, "loss": 0.5533, "step": 1268 }, { "epoch": 1.546709991876523, "grad_norm": 0.4629430310984532, "learning_rate": 5.594555937468856e-06, "loss": 0.6238, "step": 1269 }, { "epoch": 1.5479285134037368, "grad_norm": 0.4430875583116207, "learning_rate": 5.587510845514249e-06, "loss": 0.5334, "step": 1270 }, { "epoch": 1.5491470349309504, "grad_norm": 0.4964005647402626, "learning_rate": 5.5804645706261515e-06, "loss": 0.5563, "step": 1271 }, { "epoch": 1.5503655564581642, "grad_norm": 0.47908339446690995, "learning_rate": 5.573417126992004e-06, "loss": 0.5761, "step": 1272 }, { "epoch": 1.5515840779853778, "grad_norm": 0.4320719099995596, "learning_rate": 5.5663685288015955e-06, "loss": 0.5519, "step": 1273 }, { "epoch": 1.5528025995125914, "grad_norm": 0.45893470814872167, "learning_rate": 5.5593187902470465e-06, "loss": 0.5122, "step": 1274 }, { "epoch": 1.554021121039805, "grad_norm": 0.47949830848407404, "learning_rate": 5.55226792552277e-06, "loss": 0.5839, "step": 1275 }, { "epoch": 1.5552396425670185, "grad_norm": 0.415731009852519, "learning_rate": 5.545215948825447e-06, "loss": 0.5378, "step": 1276 }, { "epoch": 1.5564581640942323, "grad_norm": 0.466056698108541, "learning_rate": 5.538162874353994e-06, "loss": 0.4983, "step": 1277 }, { "epoch": 1.5576766856214461, "grad_norm": 0.5916240577351891, "learning_rate": 5.5311087163095475e-06, "loss": 0.6251, "step": 1278 }, { "epoch": 1.5588952071486597, "grad_norm": 0.44367509738450317, "learning_rate": 5.524053488895413e-06, "loss": 0.5488, "step": 1279 }, { "epoch": 1.5601137286758733, "grad_norm": 0.47062048808194906, "learning_rate": 5.516997206317061e-06, "loss": 0.5563, "step": 1280 }, { "epoch": 1.5613322502030869, "grad_norm": 0.5420478722656378, "learning_rate": 5.509939882782077e-06, "loss": 0.5416, "step": 1281 }, { "epoch": 1.5625507717303004, "grad_norm": 0.5222284367927739, "learning_rate": 5.502881532500149e-06, "loss": 0.5965, "step": 1282 }, { "epoch": 1.5637692932575142, "grad_norm": 0.42208342526415327, "learning_rate": 5.49582216968303e-06, "loss": 0.5467, "step": 1283 }, { "epoch": 1.5649878147847278, "grad_norm": 0.4294650898913376, "learning_rate": 5.4887618085445094e-06, "loss": 0.5287, "step": 1284 }, { "epoch": 1.5662063363119416, "grad_norm": 0.46855647055671784, "learning_rate": 5.48170046330039e-06, "loss": 0.5628, "step": 1285 }, { "epoch": 1.5674248578391552, "grad_norm": 0.4699651333558714, "learning_rate": 5.474638148168456e-06, "loss": 0.5574, "step": 1286 }, { "epoch": 1.5686433793663688, "grad_norm": 0.5135379339848296, "learning_rate": 5.467574877368441e-06, "loss": 0.547, "step": 1287 }, { "epoch": 1.5698619008935824, "grad_norm": 0.4810680839376017, "learning_rate": 5.460510665122007e-06, "loss": 0.557, "step": 1288 }, { "epoch": 1.5710804224207962, "grad_norm": 0.4098166771088161, "learning_rate": 5.453445525652711e-06, "loss": 0.5418, "step": 1289 }, { "epoch": 1.5722989439480097, "grad_norm": 0.450215288957951, "learning_rate": 5.446379473185972e-06, "loss": 0.5357, "step": 1290 }, { "epoch": 1.5735174654752235, "grad_norm": 0.5294521431799539, "learning_rate": 5.4393125219490536e-06, "loss": 0.5643, "step": 1291 }, { "epoch": 1.574735987002437, "grad_norm": 0.4592328236388863, "learning_rate": 5.432244686171025e-06, "loss": 0.5579, "step": 1292 }, { "epoch": 1.5759545085296507, "grad_norm": 0.43283051916010107, "learning_rate": 5.42517598008274e-06, "loss": 0.5045, "step": 1293 }, { "epoch": 1.5771730300568643, "grad_norm": 0.5659434705667795, "learning_rate": 5.418106417916799e-06, "loss": 0.6214, "step": 1294 }, { "epoch": 1.5783915515840778, "grad_norm": 0.43767902318474483, "learning_rate": 5.411036013907534e-06, "loss": 0.4785, "step": 1295 }, { "epoch": 1.5796100731112916, "grad_norm": 0.49107247160929135, "learning_rate": 5.403964782290962e-06, "loss": 0.6033, "step": 1296 }, { "epoch": 1.5808285946385054, "grad_norm": 0.4941184832970728, "learning_rate": 5.396892737304779e-06, "loss": 0.5625, "step": 1297 }, { "epoch": 1.582047116165719, "grad_norm": 0.45207210705440176, "learning_rate": 5.389819893188304e-06, "loss": 0.5955, "step": 1298 }, { "epoch": 1.5832656376929326, "grad_norm": 0.41624551022025036, "learning_rate": 5.38274626418248e-06, "loss": 0.4859, "step": 1299 }, { "epoch": 1.5844841592201462, "grad_norm": 0.5355211526596017, "learning_rate": 5.375671864529817e-06, "loss": 0.5847, "step": 1300 }, { "epoch": 1.5857026807473598, "grad_norm": 0.4975201469339488, "learning_rate": 5.368596708474388e-06, "loss": 0.5338, "step": 1301 }, { "epoch": 1.5869212022745736, "grad_norm": 0.4863357216575736, "learning_rate": 5.361520810261779e-06, "loss": 0.5535, "step": 1302 }, { "epoch": 1.5881397238017871, "grad_norm": 0.4458515473467672, "learning_rate": 5.354444184139077e-06, "loss": 0.5457, "step": 1303 }, { "epoch": 1.589358245329001, "grad_norm": 0.4614906452198629, "learning_rate": 5.347366844354833e-06, "loss": 0.5398, "step": 1304 }, { "epoch": 1.5905767668562145, "grad_norm": 0.4685010422012627, "learning_rate": 5.340288805159037e-06, "loss": 0.5407, "step": 1305 }, { "epoch": 1.591795288383428, "grad_norm": 0.48804182586096323, "learning_rate": 5.33321008080308e-06, "loss": 0.547, "step": 1306 }, { "epoch": 1.5930138099106417, "grad_norm": 0.44694564705893386, "learning_rate": 5.3261306855397395e-06, "loss": 0.5459, "step": 1307 }, { "epoch": 1.5942323314378553, "grad_norm": 0.4139859944920655, "learning_rate": 5.319050633623141e-06, "loss": 0.5519, "step": 1308 }, { "epoch": 1.595450852965069, "grad_norm": 0.5097755056565069, "learning_rate": 5.311969939308736e-06, "loss": 0.5901, "step": 1309 }, { "epoch": 1.5966693744922829, "grad_norm": 0.47592489399723925, "learning_rate": 5.304888616853265e-06, "loss": 0.5324, "step": 1310 }, { "epoch": 1.5978878960194964, "grad_norm": 0.4276883892776071, "learning_rate": 5.297806680514731e-06, "loss": 0.5106, "step": 1311 }, { "epoch": 1.59910641754671, "grad_norm": 0.4681244968477927, "learning_rate": 5.290724144552379e-06, "loss": 0.6054, "step": 1312 }, { "epoch": 1.6003249390739236, "grad_norm": 0.4896701927637777, "learning_rate": 5.283641023226661e-06, "loss": 0.5455, "step": 1313 }, { "epoch": 1.6015434606011372, "grad_norm": 0.4245053792739156, "learning_rate": 5.276557330799203e-06, "loss": 0.5471, "step": 1314 }, { "epoch": 1.602761982128351, "grad_norm": 0.4874649206218259, "learning_rate": 5.269473081532785e-06, "loss": 0.5782, "step": 1315 }, { "epoch": 1.6039805036555645, "grad_norm": 0.47549962008011226, "learning_rate": 5.262388289691303e-06, "loss": 0.575, "step": 1316 }, { "epoch": 1.6051990251827783, "grad_norm": 0.42642213924678707, "learning_rate": 5.255302969539753e-06, "loss": 0.5805, "step": 1317 }, { "epoch": 1.606417546709992, "grad_norm": 0.42684200856960786, "learning_rate": 5.248217135344191e-06, "loss": 0.5072, "step": 1318 }, { "epoch": 1.6076360682372055, "grad_norm": 0.4365701459872912, "learning_rate": 5.241130801371704e-06, "loss": 0.5658, "step": 1319 }, { "epoch": 1.608854589764419, "grad_norm": 0.42471390001052695, "learning_rate": 5.234043981890395e-06, "loss": 0.5698, "step": 1320 }, { "epoch": 1.6100731112916327, "grad_norm": 0.4535238587027896, "learning_rate": 5.226956691169332e-06, "loss": 0.5839, "step": 1321 }, { "epoch": 1.6112916328188465, "grad_norm": 0.4247946464572348, "learning_rate": 5.219868943478542e-06, "loss": 0.5577, "step": 1322 }, { "epoch": 1.6125101543460603, "grad_norm": 0.43376338736220743, "learning_rate": 5.212780753088968e-06, "loss": 0.5449, "step": 1323 }, { "epoch": 1.6137286758732738, "grad_norm": 0.4061841147634886, "learning_rate": 5.205692134272445e-06, "loss": 0.5179, "step": 1324 }, { "epoch": 1.6149471974004874, "grad_norm": 0.4596996267175098, "learning_rate": 5.1986031013016706e-06, "loss": 0.5818, "step": 1325 }, { "epoch": 1.616165718927701, "grad_norm": 0.43123766272618486, "learning_rate": 5.191513668450178e-06, "loss": 0.5687, "step": 1326 }, { "epoch": 1.6173842404549146, "grad_norm": 0.4329937345499755, "learning_rate": 5.184423849992299e-06, "loss": 0.5348, "step": 1327 }, { "epoch": 1.6186027619821284, "grad_norm": 0.49663961496101067, "learning_rate": 5.177333660203153e-06, "loss": 0.5956, "step": 1328 }, { "epoch": 1.619821283509342, "grad_norm": 0.3924685962518714, "learning_rate": 5.170243113358594e-06, "loss": 0.5125, "step": 1329 }, { "epoch": 1.6210398050365558, "grad_norm": 0.4856207429888876, "learning_rate": 5.163152223735206e-06, "loss": 0.5778, "step": 1330 }, { "epoch": 1.6222583265637693, "grad_norm": 0.45002527423182, "learning_rate": 5.156061005610258e-06, "loss": 0.5584, "step": 1331 }, { "epoch": 1.623476848090983, "grad_norm": 0.4310106517218945, "learning_rate": 5.1489694732616805e-06, "loss": 0.5377, "step": 1332 }, { "epoch": 1.6246953696181965, "grad_norm": 0.49448879444066074, "learning_rate": 5.141877640968037e-06, "loss": 0.623, "step": 1333 }, { "epoch": 1.62591389114541, "grad_norm": 0.40362533961876157, "learning_rate": 5.134785523008496e-06, "loss": 0.5014, "step": 1334 }, { "epoch": 1.6271324126726239, "grad_norm": 0.4269483197368071, "learning_rate": 5.127693133662801e-06, "loss": 0.573, "step": 1335 }, { "epoch": 1.6283509341998377, "grad_norm": 0.4258879503760348, "learning_rate": 5.12060048721124e-06, "loss": 0.5314, "step": 1336 }, { "epoch": 1.6295694557270513, "grad_norm": 0.44120462268057764, "learning_rate": 5.11350759793462e-06, "loss": 0.5373, "step": 1337 }, { "epoch": 1.6307879772542648, "grad_norm": 0.4276083907367786, "learning_rate": 5.106414480114238e-06, "loss": 0.5276, "step": 1338 }, { "epoch": 1.6320064987814784, "grad_norm": 0.4517524664721021, "learning_rate": 5.099321148031851e-06, "loss": 0.5504, "step": 1339 }, { "epoch": 1.633225020308692, "grad_norm": 0.44913374968040776, "learning_rate": 5.092227615969643e-06, "loss": 0.553, "step": 1340 }, { "epoch": 1.6344435418359058, "grad_norm": 0.49845971138611844, "learning_rate": 5.085133898210208e-06, "loss": 0.5653, "step": 1341 }, { "epoch": 1.6356620633631194, "grad_norm": 0.4427260322632497, "learning_rate": 5.078040009036509e-06, "loss": 0.5213, "step": 1342 }, { "epoch": 1.6368805848903332, "grad_norm": 0.4177253316358593, "learning_rate": 5.070945962731854e-06, "loss": 0.5397, "step": 1343 }, { "epoch": 1.6380991064175467, "grad_norm": 0.47651126334983296, "learning_rate": 5.06385177357987e-06, "loss": 0.5595, "step": 1344 }, { "epoch": 1.6393176279447603, "grad_norm": 0.5627892918210755, "learning_rate": 5.056757455864469e-06, "loss": 0.6096, "step": 1345 }, { "epoch": 1.640536149471974, "grad_norm": 0.44180856064958623, "learning_rate": 5.049663023869824e-06, "loss": 0.5025, "step": 1346 }, { "epoch": 1.6417546709991877, "grad_norm": 0.460979656039155, "learning_rate": 5.042568491880338e-06, "loss": 0.5982, "step": 1347 }, { "epoch": 1.6429731925264013, "grad_norm": 0.4821324897781787, "learning_rate": 5.035473874180612e-06, "loss": 0.5598, "step": 1348 }, { "epoch": 1.644191714053615, "grad_norm": 0.45517260087056105, "learning_rate": 5.028379185055424e-06, "loss": 0.5246, "step": 1349 }, { "epoch": 1.6454102355808287, "grad_norm": 0.4413055629736707, "learning_rate": 5.021284438789694e-06, "loss": 0.5341, "step": 1350 }, { "epoch": 1.6466287571080422, "grad_norm": 0.4614955719864221, "learning_rate": 5.014189649668456e-06, "loss": 0.5578, "step": 1351 }, { "epoch": 1.6478472786352558, "grad_norm": 0.4953936356649888, "learning_rate": 5.007094831976832e-06, "loss": 0.5765, "step": 1352 }, { "epoch": 1.6490658001624694, "grad_norm": 0.39648893153136167, "learning_rate": 5e-06, "loss": 0.5342, "step": 1353 }, { "epoch": 1.6502843216896832, "grad_norm": 0.43855043725681864, "learning_rate": 4.992905168023169e-06, "loss": 0.543, "step": 1354 }, { "epoch": 1.6515028432168968, "grad_norm": 0.5301209980205615, "learning_rate": 4.985810350331544e-06, "loss": 0.6293, "step": 1355 }, { "epoch": 1.6527213647441106, "grad_norm": 0.38590596359640195, "learning_rate": 4.9787155612103076e-06, "loss": 0.5296, "step": 1356 }, { "epoch": 1.6539398862713242, "grad_norm": 0.42738095322238806, "learning_rate": 4.9716208149445776e-06, "loss": 0.5308, "step": 1357 }, { "epoch": 1.6551584077985377, "grad_norm": 0.4555041349632123, "learning_rate": 4.96452612581939e-06, "loss": 0.5788, "step": 1358 }, { "epoch": 1.6563769293257513, "grad_norm": 0.4558921081759917, "learning_rate": 4.9574315081196634e-06, "loss": 0.5609, "step": 1359 }, { "epoch": 1.6575954508529651, "grad_norm": 0.4503929824518257, "learning_rate": 4.950336976130176e-06, "loss": 0.5341, "step": 1360 }, { "epoch": 1.6588139723801787, "grad_norm": 0.43711031728275695, "learning_rate": 4.9432425441355334e-06, "loss": 0.5793, "step": 1361 }, { "epoch": 1.6600324939073925, "grad_norm": 0.39568528756580684, "learning_rate": 4.936148226420133e-06, "loss": 0.5069, "step": 1362 }, { "epoch": 1.661251015434606, "grad_norm": 0.4309659404250017, "learning_rate": 4.929054037268147e-06, "loss": 0.5872, "step": 1363 }, { "epoch": 1.6624695369618196, "grad_norm": 0.482908985444469, "learning_rate": 4.921959990963493e-06, "loss": 0.5583, "step": 1364 }, { "epoch": 1.6636880584890332, "grad_norm": 0.4133363420753277, "learning_rate": 4.914866101789793e-06, "loss": 0.484, "step": 1365 }, { "epoch": 1.6649065800162468, "grad_norm": 0.46336848283664533, "learning_rate": 4.907772384030357e-06, "loss": 0.6055, "step": 1366 }, { "epoch": 1.6661251015434606, "grad_norm": 0.4021280914849084, "learning_rate": 4.900678851968152e-06, "loss": 0.4953, "step": 1367 }, { "epoch": 1.6673436230706744, "grad_norm": 0.4496122068891948, "learning_rate": 4.893585519885764e-06, "loss": 0.5631, "step": 1368 }, { "epoch": 1.668562144597888, "grad_norm": 0.4386416975070193, "learning_rate": 4.886492402065381e-06, "loss": 0.5632, "step": 1369 }, { "epoch": 1.6697806661251016, "grad_norm": 0.4335033691327931, "learning_rate": 4.8793995127887615e-06, "loss": 0.5377, "step": 1370 }, { "epoch": 1.6709991876523151, "grad_norm": 0.4639132609070873, "learning_rate": 4.8723068663372005e-06, "loss": 0.5658, "step": 1371 }, { "epoch": 1.6722177091795287, "grad_norm": 0.4186533135703324, "learning_rate": 4.865214476991506e-06, "loss": 0.538, "step": 1372 }, { "epoch": 1.6734362307067425, "grad_norm": 0.5100673554858591, "learning_rate": 4.858122359031964e-06, "loss": 0.5977, "step": 1373 }, { "epoch": 1.674654752233956, "grad_norm": 0.4284001466166066, "learning_rate": 4.851030526738321e-06, "loss": 0.5325, "step": 1374 }, { "epoch": 1.67587327376117, "grad_norm": 0.4048773843920905, "learning_rate": 4.843938994389744e-06, "loss": 0.4975, "step": 1375 }, { "epoch": 1.6770917952883835, "grad_norm": 0.4074001135895807, "learning_rate": 4.836847776264794e-06, "loss": 0.5762, "step": 1376 }, { "epoch": 1.678310316815597, "grad_norm": 0.41740364142746117, "learning_rate": 4.829756886641408e-06, "loss": 0.5731, "step": 1377 }, { "epoch": 1.6795288383428106, "grad_norm": 0.4812773839220182, "learning_rate": 4.82266633979685e-06, "loss": 0.5849, "step": 1378 }, { "epoch": 1.6807473598700242, "grad_norm": 0.39560445425868235, "learning_rate": 4.815576150007702e-06, "loss": 0.4699, "step": 1379 }, { "epoch": 1.681965881397238, "grad_norm": 0.4414471548591453, "learning_rate": 4.808486331549824e-06, "loss": 0.5626, "step": 1380 }, { "epoch": 1.6831844029244518, "grad_norm": 0.38187499198826846, "learning_rate": 4.801396898698329e-06, "loss": 0.5071, "step": 1381 }, { "epoch": 1.6844029244516654, "grad_norm": 0.4892251033230591, "learning_rate": 4.794307865727555e-06, "loss": 0.5552, "step": 1382 }, { "epoch": 1.685621445978879, "grad_norm": 0.482903388217794, "learning_rate": 4.787219246911034e-06, "loss": 0.5492, "step": 1383 }, { "epoch": 1.6868399675060926, "grad_norm": 0.45801551724996875, "learning_rate": 4.78013105652146e-06, "loss": 0.5838, "step": 1384 }, { "epoch": 1.6880584890333061, "grad_norm": 0.42866796779932836, "learning_rate": 4.77304330883067e-06, "loss": 0.5085, "step": 1385 }, { "epoch": 1.68927701056052, "grad_norm": 0.4475021559493066, "learning_rate": 4.765956018109607e-06, "loss": 0.5505, "step": 1386 }, { "epoch": 1.6904955320877335, "grad_norm": 0.4697192585313218, "learning_rate": 4.758869198628296e-06, "loss": 0.5479, "step": 1387 }, { "epoch": 1.6917140536149473, "grad_norm": 0.465791753930643, "learning_rate": 4.7517828646558115e-06, "loss": 0.56, "step": 1388 }, { "epoch": 1.692932575142161, "grad_norm": 0.4015038202394012, "learning_rate": 4.744697030460248e-06, "loss": 0.5492, "step": 1389 }, { "epoch": 1.6941510966693745, "grad_norm": 0.5232226854854597, "learning_rate": 4.7376117103086974e-06, "loss": 0.5464, "step": 1390 }, { "epoch": 1.695369618196588, "grad_norm": 0.4518351945360455, "learning_rate": 4.730526918467217e-06, "loss": 0.533, "step": 1391 }, { "epoch": 1.6965881397238016, "grad_norm": 0.4769324521614458, "learning_rate": 4.7234426692007985e-06, "loss": 0.6265, "step": 1392 }, { "epoch": 1.6978066612510154, "grad_norm": 0.39722631525643654, "learning_rate": 4.716358976773342e-06, "loss": 0.4616, "step": 1393 }, { "epoch": 1.6990251827782292, "grad_norm": 0.5143439560883679, "learning_rate": 4.7092758554476215e-06, "loss": 0.5927, "step": 1394 }, { "epoch": 1.7002437043054428, "grad_norm": 0.4893384326011186, "learning_rate": 4.702193319485271e-06, "loss": 0.581, "step": 1395 }, { "epoch": 1.7014622258326564, "grad_norm": 0.40330674171655206, "learning_rate": 4.695111383146738e-06, "loss": 0.5152, "step": 1396 }, { "epoch": 1.70268074735987, "grad_norm": 0.4812638116299566, "learning_rate": 4.688030060691264e-06, "loss": 0.6068, "step": 1397 }, { "epoch": 1.7038992688870835, "grad_norm": 0.42808075960070036, "learning_rate": 4.680949366376858e-06, "loss": 0.5232, "step": 1398 }, { "epoch": 1.7051177904142973, "grad_norm": 0.4186184139760809, "learning_rate": 4.673869314460262e-06, "loss": 0.5375, "step": 1399 }, { "epoch": 1.706336311941511, "grad_norm": 0.4351340155979422, "learning_rate": 4.666789919196923e-06, "loss": 0.5493, "step": 1400 }, { "epoch": 1.7075548334687247, "grad_norm": 0.5600164408896984, "learning_rate": 4.659711194840964e-06, "loss": 0.587, "step": 1401 }, { "epoch": 1.7087733549959383, "grad_norm": 0.43365827783641364, "learning_rate": 4.6526331556451674e-06, "loss": 0.519, "step": 1402 }, { "epoch": 1.7099918765231519, "grad_norm": 0.44015645831753214, "learning_rate": 4.645555815860923e-06, "loss": 0.5523, "step": 1403 }, { "epoch": 1.7112103980503655, "grad_norm": 0.4552471646368589, "learning_rate": 4.638479189738224e-06, "loss": 0.5404, "step": 1404 }, { "epoch": 1.7124289195775793, "grad_norm": 0.4535728417437257, "learning_rate": 4.631403291525615e-06, "loss": 0.5368, "step": 1405 }, { "epoch": 1.7136474411047928, "grad_norm": 0.4734624014107752, "learning_rate": 4.624328135470184e-06, "loss": 0.5778, "step": 1406 }, { "epoch": 1.7148659626320066, "grad_norm": 0.4934447889274217, "learning_rate": 4.617253735817522e-06, "loss": 0.5476, "step": 1407 }, { "epoch": 1.7160844841592202, "grad_norm": 0.4984539363836997, "learning_rate": 4.610180106811696e-06, "loss": 0.5649, "step": 1408 }, { "epoch": 1.7173030056864338, "grad_norm": 0.4848858968212611, "learning_rate": 4.603107262695225e-06, "loss": 0.5111, "step": 1409 }, { "epoch": 1.7185215272136474, "grad_norm": 0.47036832640121645, "learning_rate": 4.596035217709039e-06, "loss": 0.5948, "step": 1410 }, { "epoch": 1.719740048740861, "grad_norm": 0.44168165703224904, "learning_rate": 4.588963986092468e-06, "loss": 0.5941, "step": 1411 }, { "epoch": 1.7209585702680747, "grad_norm": 0.39666220117961165, "learning_rate": 4.5818935820832014e-06, "loss": 0.4913, "step": 1412 }, { "epoch": 1.7221770917952883, "grad_norm": 0.5025801254491269, "learning_rate": 4.574824019917262e-06, "loss": 0.5932, "step": 1413 }, { "epoch": 1.7233956133225021, "grad_norm": 0.3845664023510723, "learning_rate": 4.5677553138289764e-06, "loss": 0.5369, "step": 1414 }, { "epoch": 1.7246141348497157, "grad_norm": 0.42320355598590065, "learning_rate": 4.560687478050947e-06, "loss": 0.5294, "step": 1415 }, { "epoch": 1.7258326563769293, "grad_norm": 0.4096157422530506, "learning_rate": 4.553620526814029e-06, "loss": 0.519, "step": 1416 }, { "epoch": 1.7270511779041429, "grad_norm": 0.48631875630001814, "learning_rate": 4.546554474347291e-06, "loss": 0.6101, "step": 1417 }, { "epoch": 1.7282696994313567, "grad_norm": 0.4768787594020578, "learning_rate": 4.539489334877992e-06, "loss": 0.5629, "step": 1418 }, { "epoch": 1.7294882209585702, "grad_norm": 0.41978448851594347, "learning_rate": 4.532425122631559e-06, "loss": 0.5365, "step": 1419 }, { "epoch": 1.730706742485784, "grad_norm": 0.4298141402145644, "learning_rate": 4.5253618518315455e-06, "loss": 0.5346, "step": 1420 }, { "epoch": 1.7319252640129976, "grad_norm": 0.43330287443239485, "learning_rate": 4.5182995366996115e-06, "loss": 0.565, "step": 1421 }, { "epoch": 1.7331437855402112, "grad_norm": 0.4618063094916825, "learning_rate": 4.511238191455491e-06, "loss": 0.5669, "step": 1422 }, { "epoch": 1.7343623070674248, "grad_norm": 0.4330349372337, "learning_rate": 4.504177830316971e-06, "loss": 0.5563, "step": 1423 }, { "epoch": 1.7355808285946384, "grad_norm": 0.4061046490367817, "learning_rate": 4.497118467499852e-06, "loss": 0.5371, "step": 1424 }, { "epoch": 1.7367993501218522, "grad_norm": 0.4524064658816882, "learning_rate": 4.490060117217925e-06, "loss": 0.5273, "step": 1425 }, { "epoch": 1.738017871649066, "grad_norm": 0.4153684807216417, "learning_rate": 4.483002793682941e-06, "loss": 0.5202, "step": 1426 }, { "epoch": 1.7392363931762795, "grad_norm": 0.5126499568306361, "learning_rate": 4.475946511104588e-06, "loss": 0.5964, "step": 1427 }, { "epoch": 1.7404549147034931, "grad_norm": 0.442175693450011, "learning_rate": 4.468891283690454e-06, "loss": 0.514, "step": 1428 }, { "epoch": 1.7416734362307067, "grad_norm": 0.421309384527005, "learning_rate": 4.461837125646007e-06, "loss": 0.6091, "step": 1429 }, { "epoch": 1.7428919577579203, "grad_norm": 0.4380243684629681, "learning_rate": 4.4547840511745565e-06, "loss": 0.4913, "step": 1430 }, { "epoch": 1.744110479285134, "grad_norm": 0.4812216276097867, "learning_rate": 4.447732074477233e-06, "loss": 0.5582, "step": 1431 }, { "epoch": 1.7453290008123477, "grad_norm": 0.40488056766666325, "learning_rate": 4.440681209752955e-06, "loss": 0.5758, "step": 1432 }, { "epoch": 1.7465475223395615, "grad_norm": 0.4732265653920416, "learning_rate": 4.433631471198406e-06, "loss": 0.5962, "step": 1433 }, { "epoch": 1.747766043866775, "grad_norm": 0.42539261148413177, "learning_rate": 4.426582873007999e-06, "loss": 0.4769, "step": 1434 }, { "epoch": 1.7489845653939886, "grad_norm": 0.512036705158376, "learning_rate": 4.4195354293738484e-06, "loss": 0.582, "step": 1435 }, { "epoch": 1.7502030869212022, "grad_norm": 0.4305096055128761, "learning_rate": 4.412489154485752e-06, "loss": 0.5326, "step": 1436 }, { "epoch": 1.7514216084484158, "grad_norm": 0.5036201316708941, "learning_rate": 4.405444062531145e-06, "loss": 0.579, "step": 1437 }, { "epoch": 1.7526401299756296, "grad_norm": 0.42814700978676606, "learning_rate": 4.3984001676950875e-06, "loss": 0.5706, "step": 1438 }, { "epoch": 1.7538586515028434, "grad_norm": 0.4336700472324628, "learning_rate": 4.391357484160223e-06, "loss": 0.5429, "step": 1439 }, { "epoch": 1.755077173030057, "grad_norm": 0.4197620066836796, "learning_rate": 4.384316026106766e-06, "loss": 0.5312, "step": 1440 }, { "epoch": 1.7562956945572705, "grad_norm": 0.4358185412850227, "learning_rate": 4.377275807712453e-06, "loss": 0.5601, "step": 1441 }, { "epoch": 1.757514216084484, "grad_norm": 0.4593898380941711, "learning_rate": 4.37023684315253e-06, "loss": 0.5522, "step": 1442 }, { "epoch": 1.7587327376116977, "grad_norm": 0.41694136338662585, "learning_rate": 4.363199146599717e-06, "loss": 0.5436, "step": 1443 }, { "epoch": 1.7599512591389115, "grad_norm": 0.41051974887386045, "learning_rate": 4.3561627322241815e-06, "loss": 0.5484, "step": 1444 }, { "epoch": 1.761169780666125, "grad_norm": 0.49654495683052513, "learning_rate": 4.34912761419351e-06, "loss": 0.5471, "step": 1445 }, { "epoch": 1.7623883021933389, "grad_norm": 0.46755105267929675, "learning_rate": 4.342093806672678e-06, "loss": 0.5675, "step": 1446 }, { "epoch": 1.7636068237205524, "grad_norm": 0.4560949973440655, "learning_rate": 4.335061323824019e-06, "loss": 0.5921, "step": 1447 }, { "epoch": 1.764825345247766, "grad_norm": 0.4254462067059595, "learning_rate": 4.328030179807207e-06, "loss": 0.4801, "step": 1448 }, { "epoch": 1.7660438667749796, "grad_norm": 0.43590945113760904, "learning_rate": 4.321000388779214e-06, "loss": 0.55, "step": 1449 }, { "epoch": 1.7672623883021932, "grad_norm": 0.45385792985801476, "learning_rate": 4.313971964894289e-06, "loss": 0.5936, "step": 1450 }, { "epoch": 1.768480909829407, "grad_norm": 0.45173148922198614, "learning_rate": 4.306944922303932e-06, "loss": 0.5198, "step": 1451 }, { "epoch": 1.7696994313566208, "grad_norm": 0.4738870866999846, "learning_rate": 4.299919275156857e-06, "loss": 0.5695, "step": 1452 }, { "epoch": 1.7709179528838344, "grad_norm": 0.4308222859015806, "learning_rate": 4.292895037598968e-06, "loss": 0.5302, "step": 1453 }, { "epoch": 1.772136474411048, "grad_norm": 0.43194034641229945, "learning_rate": 4.285872223773336e-06, "loss": 0.5277, "step": 1454 }, { "epoch": 1.7733549959382615, "grad_norm": 0.44969920461261864, "learning_rate": 4.278850847820161e-06, "loss": 0.5552, "step": 1455 }, { "epoch": 1.774573517465475, "grad_norm": 0.45716879761679, "learning_rate": 4.2718309238767485e-06, "loss": 0.5785, "step": 1456 }, { "epoch": 1.775792038992689, "grad_norm": 0.4340108500215334, "learning_rate": 4.264812466077486e-06, "loss": 0.5973, "step": 1457 }, { "epoch": 1.7770105605199025, "grad_norm": 0.40605446125162264, "learning_rate": 4.2577954885537985e-06, "loss": 0.5262, "step": 1458 }, { "epoch": 1.7782290820471163, "grad_norm": 0.4862703366986213, "learning_rate": 4.2507800054341385e-06, "loss": 0.576, "step": 1459 }, { "epoch": 1.7794476035743299, "grad_norm": 0.48359582578678745, "learning_rate": 4.243766030843947e-06, "loss": 0.5998, "step": 1460 }, { "epoch": 1.7806661251015434, "grad_norm": 0.3661204928776939, "learning_rate": 4.236753578905627e-06, "loss": 0.4968, "step": 1461 }, { "epoch": 1.781884646628757, "grad_norm": 0.43106534453803774, "learning_rate": 4.229742663738521e-06, "loss": 0.5418, "step": 1462 }, { "epoch": 1.7831031681559708, "grad_norm": 0.48202616303627543, "learning_rate": 4.2227332994588666e-06, "loss": 0.5486, "step": 1463 }, { "epoch": 1.7843216896831844, "grad_norm": 0.46787213059943966, "learning_rate": 4.215725500179788e-06, "loss": 0.5394, "step": 1464 }, { "epoch": 1.7855402112103982, "grad_norm": 0.4786631164932734, "learning_rate": 4.208719280011255e-06, "loss": 0.6512, "step": 1465 }, { "epoch": 1.7867587327376118, "grad_norm": 0.5007334603891063, "learning_rate": 4.2017146530600585e-06, "loss": 0.5262, "step": 1466 }, { "epoch": 1.7879772542648253, "grad_norm": 0.4766688726563304, "learning_rate": 4.194711633429782e-06, "loss": 0.4996, "step": 1467 }, { "epoch": 1.789195775792039, "grad_norm": 0.46491040345633633, "learning_rate": 4.1877102352207695e-06, "loss": 0.5968, "step": 1468 }, { "epoch": 1.7904142973192525, "grad_norm": 0.4085352403505702, "learning_rate": 4.180710472530105e-06, "loss": 0.5262, "step": 1469 }, { "epoch": 1.7916328188464663, "grad_norm": 0.43126540204858976, "learning_rate": 4.173712359451576e-06, "loss": 0.5407, "step": 1470 }, { "epoch": 1.7928513403736799, "grad_norm": 0.5202009737302775, "learning_rate": 4.16671591007565e-06, "loss": 0.5644, "step": 1471 }, { "epoch": 1.7940698619008937, "grad_norm": 0.43231572065106405, "learning_rate": 4.159721138489445e-06, "loss": 0.5143, "step": 1472 }, { "epoch": 1.7952883834281073, "grad_norm": 0.4626044446914442, "learning_rate": 4.152728058776701e-06, "loss": 0.5853, "step": 1473 }, { "epoch": 1.7965069049553208, "grad_norm": 0.43407883748754916, "learning_rate": 4.145736685017749e-06, "loss": 0.5239, "step": 1474 }, { "epoch": 1.7977254264825344, "grad_norm": 0.4267857290356126, "learning_rate": 4.138747031289485e-06, "loss": 0.5558, "step": 1475 }, { "epoch": 1.7989439480097482, "grad_norm": 0.4315508799133083, "learning_rate": 4.131759111665349e-06, "loss": 0.5807, "step": 1476 }, { "epoch": 1.8001624695369618, "grad_norm": 0.4048772175153014, "learning_rate": 4.124772940215279e-06, "loss": 0.508, "step": 1477 }, { "epoch": 1.8013809910641756, "grad_norm": 0.4268392177126804, "learning_rate": 4.1177885310057045e-06, "loss": 0.552, "step": 1478 }, { "epoch": 1.8025995125913892, "grad_norm": 0.4495564163997895, "learning_rate": 4.110805898099492e-06, "loss": 0.5669, "step": 1479 }, { "epoch": 1.8038180341186028, "grad_norm": 0.4570284740109343, "learning_rate": 4.103825055555947e-06, "loss": 0.5503, "step": 1480 }, { "epoch": 1.8050365556458163, "grad_norm": 0.45712926339273185, "learning_rate": 4.096846017430758e-06, "loss": 0.5861, "step": 1481 }, { "epoch": 1.80625507717303, "grad_norm": 0.4363450699012883, "learning_rate": 4.0898687977759895e-06, "loss": 0.5698, "step": 1482 }, { "epoch": 1.8074735987002437, "grad_norm": 0.36642253412778386, "learning_rate": 4.08289341064004e-06, "loss": 0.4882, "step": 1483 }, { "epoch": 1.8086921202274575, "grad_norm": 0.4626576143609871, "learning_rate": 4.075919870067617e-06, "loss": 0.5695, "step": 1484 }, { "epoch": 1.809910641754671, "grad_norm": 0.46018408439267183, "learning_rate": 4.068948190099711e-06, "loss": 0.5529, "step": 1485 }, { "epoch": 1.8111291632818847, "grad_norm": 0.4119449731431994, "learning_rate": 4.06197838477357e-06, "loss": 0.5024, "step": 1486 }, { "epoch": 1.8123476848090982, "grad_norm": 0.4015730766144408, "learning_rate": 4.0550104681226635e-06, "loss": 0.5656, "step": 1487 }, { "epoch": 1.8135662063363118, "grad_norm": 0.4503343260237984, "learning_rate": 4.048044454176658e-06, "loss": 0.5661, "step": 1488 }, { "epoch": 1.8147847278635256, "grad_norm": 0.43240190245880916, "learning_rate": 4.041080356961393e-06, "loss": 0.4974, "step": 1489 }, { "epoch": 1.8160032493907392, "grad_norm": 0.4734473361008657, "learning_rate": 4.034118190498843e-06, "loss": 0.5663, "step": 1490 }, { "epoch": 1.817221770917953, "grad_norm": 0.43362890265223014, "learning_rate": 4.0271579688071e-06, "loss": 0.5531, "step": 1491 }, { "epoch": 1.8184402924451666, "grad_norm": 0.46894586845233727, "learning_rate": 4.020199705900335e-06, "loss": 0.5534, "step": 1492 }, { "epoch": 1.8196588139723802, "grad_norm": 0.5328522267534698, "learning_rate": 4.013243415788783e-06, "loss": 0.6018, "step": 1493 }, { "epoch": 1.8208773354995937, "grad_norm": 0.41829831723127575, "learning_rate": 4.0062891124787e-06, "loss": 0.5562, "step": 1494 }, { "epoch": 1.8220958570268073, "grad_norm": 0.45791268251247896, "learning_rate": 3.999336809972343e-06, "loss": 0.5226, "step": 1495 }, { "epoch": 1.8233143785540211, "grad_norm": 0.52749121116633, "learning_rate": 3.99238652226794e-06, "loss": 0.5885, "step": 1496 }, { "epoch": 1.824532900081235, "grad_norm": 0.4102080465654976, "learning_rate": 3.985438263359667e-06, "loss": 0.4996, "step": 1497 }, { "epoch": 1.8257514216084485, "grad_norm": 0.453636908099918, "learning_rate": 3.978492047237608e-06, "loss": 0.568, "step": 1498 }, { "epoch": 1.826969943135662, "grad_norm": 0.49160181331071584, "learning_rate": 3.971547887887742e-06, "loss": 0.574, "step": 1499 }, { "epoch": 1.8281884646628757, "grad_norm": 0.4128170671886108, "learning_rate": 3.964605799291897e-06, "loss": 0.4792, "step": 1500 }, { "epoch": 1.8294069861900892, "grad_norm": 0.4714468421227976, "learning_rate": 3.9576657954277406e-06, "loss": 0.5527, "step": 1501 }, { "epoch": 1.830625507717303, "grad_norm": 0.4759788646029719, "learning_rate": 3.950727890268736e-06, "loss": 0.564, "step": 1502 }, { "epoch": 1.8318440292445166, "grad_norm": 0.4269752606026449, "learning_rate": 3.943792097784126e-06, "loss": 0.5733, "step": 1503 }, { "epoch": 1.8330625507717304, "grad_norm": 0.4407801675115479, "learning_rate": 3.936858431938899e-06, "loss": 0.501, "step": 1504 }, { "epoch": 1.834281072298944, "grad_norm": 0.417785240435009, "learning_rate": 3.929926906693757e-06, "loss": 0.5292, "step": 1505 }, { "epoch": 1.8354995938261576, "grad_norm": 0.4954357818413886, "learning_rate": 3.922997536005094e-06, "loss": 0.5834, "step": 1506 }, { "epoch": 1.8367181153533712, "grad_norm": 0.4520246987276718, "learning_rate": 3.91607033382497e-06, "loss": 0.601, "step": 1507 }, { "epoch": 1.8379366368805847, "grad_norm": 0.41894543258809586, "learning_rate": 3.909145314101074e-06, "loss": 0.5201, "step": 1508 }, { "epoch": 1.8391551584077985, "grad_norm": 0.48207218741173996, "learning_rate": 3.9022224907767e-06, "loss": 0.5478, "step": 1509 }, { "epoch": 1.8403736799350123, "grad_norm": 0.45664125938234335, "learning_rate": 3.895301877790728e-06, "loss": 0.5646, "step": 1510 }, { "epoch": 1.841592201462226, "grad_norm": 0.4171767656165807, "learning_rate": 3.888383489077576e-06, "loss": 0.511, "step": 1511 }, { "epoch": 1.8428107229894395, "grad_norm": 0.43354385082610986, "learning_rate": 3.88146733856719e-06, "loss": 0.526, "step": 1512 }, { "epoch": 1.844029244516653, "grad_norm": 0.4920523523977966, "learning_rate": 3.874553440185008e-06, "loss": 0.5767, "step": 1513 }, { "epoch": 1.8452477660438666, "grad_norm": 0.46705637995124133, "learning_rate": 3.867641807851935e-06, "loss": 0.5835, "step": 1514 }, { "epoch": 1.8464662875710804, "grad_norm": 0.4461828469934018, "learning_rate": 3.860732455484314e-06, "loss": 0.4961, "step": 1515 }, { "epoch": 1.847684809098294, "grad_norm": 0.4633901834358105, "learning_rate": 3.853825396993891e-06, "loss": 0.5811, "step": 1516 }, { "epoch": 1.8489033306255078, "grad_norm": 0.4438313726356196, "learning_rate": 3.8469206462878e-06, "loss": 0.5655, "step": 1517 }, { "epoch": 1.8501218521527214, "grad_norm": 0.4546281191367206, "learning_rate": 3.840018217268527e-06, "loss": 0.5556, "step": 1518 }, { "epoch": 1.851340373679935, "grad_norm": 0.39692829259522316, "learning_rate": 3.833118123833881e-06, "loss": 0.5083, "step": 1519 }, { "epoch": 1.8525588952071486, "grad_norm": 0.4367611773412124, "learning_rate": 3.826220379876974e-06, "loss": 0.5621, "step": 1520 }, { "epoch": 1.8537774167343624, "grad_norm": 0.46250207668673327, "learning_rate": 3.819324999286177e-06, "loss": 0.5502, "step": 1521 }, { "epoch": 1.854995938261576, "grad_norm": 0.42252748085937586, "learning_rate": 3.8124319959451133e-06, "loss": 0.5428, "step": 1522 }, { "epoch": 1.8562144597887897, "grad_norm": 0.42520604252823624, "learning_rate": 3.8055413837326133e-06, "loss": 0.5484, "step": 1523 }, { "epoch": 1.8574329813160033, "grad_norm": 0.41242969185302275, "learning_rate": 3.7986531765226965e-06, "loss": 0.521, "step": 1524 }, { "epoch": 1.858651502843217, "grad_norm": 0.43001901614946786, "learning_rate": 3.7917673881845373e-06, "loss": 0.5943, "step": 1525 }, { "epoch": 1.8598700243704305, "grad_norm": 0.4097185174805625, "learning_rate": 3.7848840325824428e-06, "loss": 0.5407, "step": 1526 }, { "epoch": 1.861088545897644, "grad_norm": 0.4509948723964594, "learning_rate": 3.778003123575815e-06, "loss": 0.5526, "step": 1527 }, { "epoch": 1.8623070674248579, "grad_norm": 0.458525992527633, "learning_rate": 3.77112467501914e-06, "loss": 0.5546, "step": 1528 }, { "epoch": 1.8635255889520714, "grad_norm": 0.407821722457764, "learning_rate": 3.7642487007619417e-06, "loss": 0.5205, "step": 1529 }, { "epoch": 1.8647441104792852, "grad_norm": 0.4630986805415289, "learning_rate": 3.757375214648764e-06, "loss": 0.5733, "step": 1530 }, { "epoch": 1.8659626320064988, "grad_norm": 0.46336209457236627, "learning_rate": 3.7505042305191463e-06, "loss": 0.5653, "step": 1531 }, { "epoch": 1.8671811535337124, "grad_norm": 0.39311369649530103, "learning_rate": 3.743635762207582e-06, "loss": 0.5342, "step": 1532 }, { "epoch": 1.868399675060926, "grad_norm": 0.42025451422654897, "learning_rate": 3.7367698235435036e-06, "loss": 0.5474, "step": 1533 }, { "epoch": 1.8696181965881398, "grad_norm": 0.42303104824107784, "learning_rate": 3.72990642835125e-06, "loss": 0.52, "step": 1534 }, { "epoch": 1.8708367181153533, "grad_norm": 0.40431884043673944, "learning_rate": 3.7230455904500385e-06, "loss": 0.5468, "step": 1535 }, { "epoch": 1.8720552396425671, "grad_norm": 0.4312111249145443, "learning_rate": 3.716187323653939e-06, "loss": 0.5888, "step": 1536 }, { "epoch": 1.8732737611697807, "grad_norm": 0.41823157797464644, "learning_rate": 3.7093316417718407e-06, "loss": 0.5638, "step": 1537 }, { "epoch": 1.8744922826969943, "grad_norm": 0.42521945264285177, "learning_rate": 3.702478558607429e-06, "loss": 0.5357, "step": 1538 }, { "epoch": 1.8757108042242079, "grad_norm": 0.4652010212406273, "learning_rate": 3.695628087959162e-06, "loss": 0.5809, "step": 1539 }, { "epoch": 1.8769293257514215, "grad_norm": 0.399398106227539, "learning_rate": 3.6887802436202307e-06, "loss": 0.5233, "step": 1540 }, { "epoch": 1.8781478472786353, "grad_norm": 0.40874149994794573, "learning_rate": 3.6819350393785445e-06, "loss": 0.5534, "step": 1541 }, { "epoch": 1.879366368805849, "grad_norm": 0.4553334343530874, "learning_rate": 3.675092489016693e-06, "loss": 0.5369, "step": 1542 }, { "epoch": 1.8805848903330626, "grad_norm": 0.40028666583281924, "learning_rate": 3.6682526063119206e-06, "loss": 0.5209, "step": 1543 }, { "epoch": 1.8818034118602762, "grad_norm": 0.41838120396321116, "learning_rate": 3.661415405036103e-06, "loss": 0.5752, "step": 1544 }, { "epoch": 1.8830219333874898, "grad_norm": 0.4136128207763801, "learning_rate": 3.654580898955721e-06, "loss": 0.5277, "step": 1545 }, { "epoch": 1.8842404549147034, "grad_norm": 0.4024921294917515, "learning_rate": 3.647749101831821e-06, "loss": 0.5239, "step": 1546 }, { "epoch": 1.8854589764419172, "grad_norm": 0.4141269485652032, "learning_rate": 3.640920027420001e-06, "loss": 0.5508, "step": 1547 }, { "epoch": 1.8866774979691308, "grad_norm": 0.440719562642394, "learning_rate": 3.6340936894703717e-06, "loss": 0.5702, "step": 1548 }, { "epoch": 1.8878960194963446, "grad_norm": 0.4786206622488289, "learning_rate": 3.6272701017275385e-06, "loss": 0.5721, "step": 1549 }, { "epoch": 1.8891145410235581, "grad_norm": 0.4129960293133917, "learning_rate": 3.6204492779305678e-06, "loss": 0.5382, "step": 1550 }, { "epoch": 1.8903330625507717, "grad_norm": 0.425696733632996, "learning_rate": 3.61363123181296e-06, "loss": 0.546, "step": 1551 }, { "epoch": 1.8915515840779853, "grad_norm": 0.4653679268493305, "learning_rate": 3.6068159771026267e-06, "loss": 0.5789, "step": 1552 }, { "epoch": 1.8927701056051989, "grad_norm": 0.4522003365392251, "learning_rate": 3.6000035275218515e-06, "loss": 0.5224, "step": 1553 }, { "epoch": 1.8939886271324127, "grad_norm": 0.3920499944414549, "learning_rate": 3.593193896787277e-06, "loss": 0.4976, "step": 1554 }, { "epoch": 1.8952071486596265, "grad_norm": 0.45058258754829983, "learning_rate": 3.5863870986098655e-06, "loss": 0.5745, "step": 1555 }, { "epoch": 1.89642567018684, "grad_norm": 0.4244815509498337, "learning_rate": 3.5795831466948805e-06, "loss": 0.5414, "step": 1556 }, { "epoch": 1.8976441917140536, "grad_norm": 0.4331016099950002, "learning_rate": 3.5727820547418525e-06, "loss": 0.539, "step": 1557 }, { "epoch": 1.8988627132412672, "grad_norm": 0.417995629833821, "learning_rate": 3.5659838364445505e-06, "loss": 0.5156, "step": 1558 }, { "epoch": 1.9000812347684808, "grad_norm": 0.4734474494296379, "learning_rate": 3.5591885054909605e-06, "loss": 0.5925, "step": 1559 }, { "epoch": 1.9012997562956946, "grad_norm": 0.46115486081361745, "learning_rate": 3.5523960755632573e-06, "loss": 0.5091, "step": 1560 }, { "epoch": 1.9025182778229082, "grad_norm": 0.40875274875883305, "learning_rate": 3.5456065603377697e-06, "loss": 0.5567, "step": 1561 }, { "epoch": 1.903736799350122, "grad_norm": 0.45386074829816486, "learning_rate": 3.5388199734849626e-06, "loss": 0.5578, "step": 1562 }, { "epoch": 1.9049553208773355, "grad_norm": 0.38709828752403175, "learning_rate": 3.5320363286694015e-06, "loss": 0.5179, "step": 1563 }, { "epoch": 1.9061738424045491, "grad_norm": 0.42735900716697534, "learning_rate": 3.5252556395497274e-06, "loss": 0.5712, "step": 1564 }, { "epoch": 1.9073923639317627, "grad_norm": 0.4181700954501109, "learning_rate": 3.518477919778631e-06, "loss": 0.5781, "step": 1565 }, { "epoch": 1.9086108854589763, "grad_norm": 0.421517198534864, "learning_rate": 3.5117031830028274e-06, "loss": 0.5214, "step": 1566 }, { "epoch": 1.90982940698619, "grad_norm": 0.44082135382564575, "learning_rate": 3.504931442863023e-06, "loss": 0.5929, "step": 1567 }, { "epoch": 1.9110479285134039, "grad_norm": 0.3829707161093217, "learning_rate": 3.49816271299389e-06, "loss": 0.4973, "step": 1568 }, { "epoch": 1.9122664500406175, "grad_norm": 0.4241401054720212, "learning_rate": 3.4913970070240388e-06, "loss": 0.5694, "step": 1569 }, { "epoch": 1.913484971567831, "grad_norm": 0.4287983948624245, "learning_rate": 3.484634338575995e-06, "loss": 0.5123, "step": 1570 }, { "epoch": 1.9147034930950446, "grad_norm": 0.40950888500677163, "learning_rate": 3.4778747212661647e-06, "loss": 0.5595, "step": 1571 }, { "epoch": 1.9159220146222582, "grad_norm": 0.4272739781741268, "learning_rate": 3.4711181687048114e-06, "loss": 0.5609, "step": 1572 }, { "epoch": 1.917140536149472, "grad_norm": 0.41564421693161757, "learning_rate": 3.464364694496031e-06, "loss": 0.5336, "step": 1573 }, { "epoch": 1.9183590576766856, "grad_norm": 0.4359864387668293, "learning_rate": 3.457614312237716e-06, "loss": 0.5371, "step": 1574 }, { "epoch": 1.9195775792038994, "grad_norm": 0.4799831871173569, "learning_rate": 3.450867035521536e-06, "loss": 0.5299, "step": 1575 }, { "epoch": 1.920796100731113, "grad_norm": 0.4453426614702043, "learning_rate": 3.4441228779329073e-06, "loss": 0.5502, "step": 1576 }, { "epoch": 1.9220146222583265, "grad_norm": 0.4238694285973907, "learning_rate": 3.4373818530509686e-06, "loss": 0.5275, "step": 1577 }, { "epoch": 1.9232331437855401, "grad_norm": 0.41917397616804203, "learning_rate": 3.4306439744485453e-06, "loss": 0.5761, "step": 1578 }, { "epoch": 1.924451665312754, "grad_norm": 0.425933885933589, "learning_rate": 3.423909255692137e-06, "loss": 0.515, "step": 1579 }, { "epoch": 1.9256701868399675, "grad_norm": 0.4456890458176112, "learning_rate": 3.417177710341868e-06, "loss": 0.5522, "step": 1580 }, { "epoch": 1.9268887083671813, "grad_norm": 0.41653994234849523, "learning_rate": 3.4104493519514844e-06, "loss": 0.5675, "step": 1581 }, { "epoch": 1.9281072298943949, "grad_norm": 0.4065472343645043, "learning_rate": 3.40372419406831e-06, "loss": 0.5016, "step": 1582 }, { "epoch": 1.9293257514216084, "grad_norm": 0.4554171323483848, "learning_rate": 3.3970022502332273e-06, "loss": 0.5919, "step": 1583 }, { "epoch": 1.930544272948822, "grad_norm": 0.4247465501155384, "learning_rate": 3.3902835339806463e-06, "loss": 0.565, "step": 1584 }, { "epoch": 1.9317627944760356, "grad_norm": 0.44295772231878283, "learning_rate": 3.3835680588384767e-06, "loss": 0.5046, "step": 1585 }, { "epoch": 1.9329813160032494, "grad_norm": 0.4268447826543325, "learning_rate": 3.3768558383281024e-06, "loss": 0.5193, "step": 1586 }, { "epoch": 1.934199837530463, "grad_norm": 0.44799827489237776, "learning_rate": 3.3701468859643583e-06, "loss": 0.5631, "step": 1587 }, { "epoch": 1.9354183590576768, "grad_norm": 0.4332023943083594, "learning_rate": 3.363441215255495e-06, "loss": 0.5724, "step": 1588 }, { "epoch": 1.9366368805848904, "grad_norm": 0.3996520998473681, "learning_rate": 3.356738839703158e-06, "loss": 0.5255, "step": 1589 }, { "epoch": 1.937855402112104, "grad_norm": 0.44409520862327806, "learning_rate": 3.3500397728023536e-06, "loss": 0.5425, "step": 1590 }, { "epoch": 1.9390739236393175, "grad_norm": 0.4405722390495154, "learning_rate": 3.343344028041433e-06, "loss": 0.6053, "step": 1591 }, { "epoch": 1.9402924451665313, "grad_norm": 0.40826303676438436, "learning_rate": 3.336651618902054e-06, "loss": 0.5324, "step": 1592 }, { "epoch": 1.941510966693745, "grad_norm": 0.3851707449193732, "learning_rate": 3.3299625588591568e-06, "loss": 0.5088, "step": 1593 }, { "epoch": 1.9427294882209587, "grad_norm": 0.40859643518575894, "learning_rate": 3.3232768613809453e-06, "loss": 0.581, "step": 1594 }, { "epoch": 1.9439480097481723, "grad_norm": 0.3722542671220263, "learning_rate": 3.316594539928845e-06, "loss": 0.4977, "step": 1595 }, { "epoch": 1.9451665312753859, "grad_norm": 0.4383598182132238, "learning_rate": 3.309915607957487e-06, "loss": 0.6137, "step": 1596 }, { "epoch": 1.9463850528025994, "grad_norm": 0.4011690274677538, "learning_rate": 3.303240078914679e-06, "loss": 0.563, "step": 1597 }, { "epoch": 1.947603574329813, "grad_norm": 0.36889683412600394, "learning_rate": 3.2965679662413772e-06, "loss": 0.4968, "step": 1598 }, { "epoch": 1.9488220958570268, "grad_norm": 0.43411779671306283, "learning_rate": 3.289899283371657e-06, "loss": 0.5802, "step": 1599 }, { "epoch": 1.9500406173842406, "grad_norm": 0.3803158460715809, "learning_rate": 3.283234043732689e-06, "loss": 0.5093, "step": 1600 }, { "epoch": 1.9512591389114542, "grad_norm": 0.4391648613289349, "learning_rate": 3.276572260744709e-06, "loss": 0.565, "step": 1601 }, { "epoch": 1.9524776604386678, "grad_norm": 0.40723374350566227, "learning_rate": 3.2699139478209987e-06, "loss": 0.514, "step": 1602 }, { "epoch": 1.9536961819658814, "grad_norm": 0.454313203169353, "learning_rate": 3.263259118367845e-06, "loss": 0.6135, "step": 1603 }, { "epoch": 1.954914703493095, "grad_norm": 0.4178433726812962, "learning_rate": 3.256607785784527e-06, "loss": 0.5301, "step": 1604 }, { "epoch": 1.9561332250203087, "grad_norm": 0.42422765865196377, "learning_rate": 3.249959963463283e-06, "loss": 0.5278, "step": 1605 }, { "epoch": 1.9573517465475223, "grad_norm": 0.4507513531340492, "learning_rate": 3.2433156647892784e-06, "loss": 0.5154, "step": 1606 }, { "epoch": 1.958570268074736, "grad_norm": 0.4155636470893334, "learning_rate": 3.2366749031405875e-06, "loss": 0.5627, "step": 1607 }, { "epoch": 1.9597887896019497, "grad_norm": 0.41760402999256324, "learning_rate": 3.2300376918881628e-06, "loss": 0.5779, "step": 1608 }, { "epoch": 1.9610073111291633, "grad_norm": 0.4262673425446038, "learning_rate": 3.223404044395808e-06, "loss": 0.5939, "step": 1609 }, { "epoch": 1.9622258326563768, "grad_norm": 0.39002137904137807, "learning_rate": 3.216773974020152e-06, "loss": 0.4796, "step": 1610 }, { "epoch": 1.9634443541835904, "grad_norm": 0.48341233875628054, "learning_rate": 3.210147494110618e-06, "loss": 0.5623, "step": 1611 }, { "epoch": 1.9646628757108042, "grad_norm": 0.4263996506849499, "learning_rate": 3.203524618009403e-06, "loss": 0.5771, "step": 1612 }, { "epoch": 1.965881397238018, "grad_norm": 0.39778372039996124, "learning_rate": 3.1969053590514487e-06, "loss": 0.5291, "step": 1613 }, { "epoch": 1.9670999187652316, "grad_norm": 0.4303659167460693, "learning_rate": 3.19028973056441e-06, "loss": 0.5488, "step": 1614 }, { "epoch": 1.9683184402924452, "grad_norm": 0.4137612167679553, "learning_rate": 3.1836777458686363e-06, "loss": 0.5619, "step": 1615 }, { "epoch": 1.9695369618196588, "grad_norm": 0.36766081180510835, "learning_rate": 3.177069418277139e-06, "loss": 0.4946, "step": 1616 }, { "epoch": 1.9707554833468723, "grad_norm": 0.4182717579515874, "learning_rate": 3.1704647610955618e-06, "loss": 0.5297, "step": 1617 }, { "epoch": 1.9719740048740861, "grad_norm": 0.4584959850560608, "learning_rate": 3.163863787622162e-06, "loss": 0.6143, "step": 1618 }, { "epoch": 1.9731925264012997, "grad_norm": 0.4458263983902489, "learning_rate": 3.157266511147783e-06, "loss": 0.5079, "step": 1619 }, { "epoch": 1.9744110479285135, "grad_norm": 0.43613330489917596, "learning_rate": 3.150672944955818e-06, "loss": 0.5714, "step": 1620 }, { "epoch": 1.975629569455727, "grad_norm": 0.3858901721024831, "learning_rate": 3.1440831023221952e-06, "loss": 0.5283, "step": 1621 }, { "epoch": 1.9768480909829407, "grad_norm": 0.40043306380620164, "learning_rate": 3.137496996515339e-06, "loss": 0.5618, "step": 1622 }, { "epoch": 1.9780666125101543, "grad_norm": 0.4155561403542389, "learning_rate": 3.1309146407961565e-06, "loss": 0.5793, "step": 1623 }, { "epoch": 1.9792851340373678, "grad_norm": 0.48452491106537393, "learning_rate": 3.1243360484180012e-06, "loss": 0.5955, "step": 1624 }, { "epoch": 1.9805036555645816, "grad_norm": 0.4052625054606517, "learning_rate": 3.117761232626648e-06, "loss": 0.5113, "step": 1625 }, { "epoch": 1.9817221770917954, "grad_norm": 0.42003854375542504, "learning_rate": 3.111190206660273e-06, "loss": 0.5462, "step": 1626 }, { "epoch": 1.982940698619009, "grad_norm": 0.425058799574285, "learning_rate": 3.1046229837494123e-06, "loss": 0.5244, "step": 1627 }, { "epoch": 1.9841592201462226, "grad_norm": 0.4113830672023175, "learning_rate": 3.0980595771169543e-06, "loss": 0.5297, "step": 1628 }, { "epoch": 1.9853777416734362, "grad_norm": 0.4015669403567964, "learning_rate": 3.091499999978097e-06, "loss": 0.5261, "step": 1629 }, { "epoch": 1.9865962632006497, "grad_norm": 0.4283955622994288, "learning_rate": 3.0849442655403315e-06, "loss": 0.5755, "step": 1630 }, { "epoch": 1.9878147847278635, "grad_norm": 0.41898536957171045, "learning_rate": 3.0783923870034094e-06, "loss": 0.5468, "step": 1631 }, { "epoch": 1.9890333062550771, "grad_norm": 0.39218815049699407, "learning_rate": 3.0718443775593233e-06, "loss": 0.5094, "step": 1632 }, { "epoch": 1.990251827782291, "grad_norm": 0.4279916922662056, "learning_rate": 3.065300250392265e-06, "loss": 0.5914, "step": 1633 }, { "epoch": 1.9914703493095045, "grad_norm": 0.41267484908021385, "learning_rate": 3.058760018678622e-06, "loss": 0.5182, "step": 1634 }, { "epoch": 1.992688870836718, "grad_norm": 0.44135796853573733, "learning_rate": 3.0522236955869293e-06, "loss": 0.5306, "step": 1635 }, { "epoch": 1.9939073923639317, "grad_norm": 0.47924737111572846, "learning_rate": 3.0456912942778585e-06, "loss": 0.5286, "step": 1636 }, { "epoch": 1.9951259138911455, "grad_norm": 0.42225974588467396, "learning_rate": 3.0391628279041797e-06, "loss": 0.5143, "step": 1637 }, { "epoch": 1.996344435418359, "grad_norm": 0.443505986438843, "learning_rate": 3.0326383096107424e-06, "loss": 0.603, "step": 1638 }, { "epoch": 1.9975629569455728, "grad_norm": 0.43300461200802, "learning_rate": 3.0261177525344458e-06, "loss": 0.529, "step": 1639 }, { "epoch": 1.9987814784727864, "grad_norm": 0.44310730809971327, "learning_rate": 3.019601169804216e-06, "loss": 0.5712, "step": 1640 }, { "epoch": 2.0004061738424044, "grad_norm": 0.9468576561635692, "learning_rate": 3.0130885745409744e-06, "loss": 0.9149, "step": 1641 }, { "epoch": 2.0016246953696184, "grad_norm": 0.4706438333540491, "learning_rate": 3.0065799798576146e-06, "loss": 0.4931, "step": 1642 }, { "epoch": 2.002843216896832, "grad_norm": 0.4720816823665595, "learning_rate": 3.0000753988589717e-06, "loss": 0.4837, "step": 1643 }, { "epoch": 2.0040617384240456, "grad_norm": 0.47408797235071, "learning_rate": 2.993574844641807e-06, "loss": 0.4923, "step": 1644 }, { "epoch": 2.005280259951259, "grad_norm": 0.4519771428113875, "learning_rate": 2.987078330294767e-06, "loss": 0.5211, "step": 1645 }, { "epoch": 2.0064987814784727, "grad_norm": 0.519886504494184, "learning_rate": 2.9805858688983656e-06, "loss": 0.5746, "step": 1646 }, { "epoch": 2.0077173030056863, "grad_norm": 0.42316406477644747, "learning_rate": 2.9740974735249627e-06, "loss": 0.4762, "step": 1647 }, { "epoch": 2.0089358245329, "grad_norm": 0.4459084131070543, "learning_rate": 2.96761315723872e-06, "loss": 0.517, "step": 1648 }, { "epoch": 2.010154346060114, "grad_norm": 0.4787475311586957, "learning_rate": 2.961132933095595e-06, "loss": 0.5475, "step": 1649 }, { "epoch": 2.0113728675873275, "grad_norm": 0.44287904587306054, "learning_rate": 2.9546568141433007e-06, "loss": 0.513, "step": 1650 }, { "epoch": 2.012591389114541, "grad_norm": 0.3984732881743886, "learning_rate": 2.94818481342129e-06, "loss": 0.5093, "step": 1651 }, { "epoch": 2.0138099106417546, "grad_norm": 0.4399121723080827, "learning_rate": 2.941716943960716e-06, "loss": 0.511, "step": 1652 }, { "epoch": 2.015028432168968, "grad_norm": 0.44831302014249225, "learning_rate": 2.9352532187844254e-06, "loss": 0.4984, "step": 1653 }, { "epoch": 2.016246953696182, "grad_norm": 0.42996322667286735, "learning_rate": 2.9287936509069036e-06, "loss": 0.5191, "step": 1654 }, { "epoch": 2.017465475223396, "grad_norm": 0.4183124555440696, "learning_rate": 2.9223382533342825e-06, "loss": 0.545, "step": 1655 }, { "epoch": 2.0186839967506094, "grad_norm": 0.40446852524401855, "learning_rate": 2.915887039064287e-06, "loss": 0.503, "step": 1656 }, { "epoch": 2.019902518277823, "grad_norm": 0.46166747184685086, "learning_rate": 2.9094400210862206e-06, "loss": 0.5397, "step": 1657 }, { "epoch": 2.0211210398050365, "grad_norm": 0.44952433573058875, "learning_rate": 2.9029972123809425e-06, "loss": 0.5055, "step": 1658 }, { "epoch": 2.02233956133225, "grad_norm": 0.4250306396302766, "learning_rate": 2.8965586259208295e-06, "loss": 0.521, "step": 1659 }, { "epoch": 2.0235580828594637, "grad_norm": 0.4183203719527761, "learning_rate": 2.890124274669764e-06, "loss": 0.4974, "step": 1660 }, { "epoch": 2.0247766043866773, "grad_norm": 0.4411815617611041, "learning_rate": 2.8836941715830943e-06, "loss": 0.5129, "step": 1661 }, { "epoch": 2.0259951259138913, "grad_norm": 0.44795060381325624, "learning_rate": 2.8772683296076197e-06, "loss": 0.5142, "step": 1662 }, { "epoch": 2.027213647441105, "grad_norm": 0.41579452280547835, "learning_rate": 2.8708467616815606e-06, "loss": 0.4951, "step": 1663 }, { "epoch": 2.0284321689683185, "grad_norm": 0.4012708682220002, "learning_rate": 2.864429480734529e-06, "loss": 0.512, "step": 1664 }, { "epoch": 2.029650690495532, "grad_norm": 0.4273458204316277, "learning_rate": 2.858016499687503e-06, "loss": 0.5401, "step": 1665 }, { "epoch": 2.0308692120227456, "grad_norm": 0.45185500116453636, "learning_rate": 2.8516078314528082e-06, "loss": 0.4782, "step": 1666 }, { "epoch": 2.032087733549959, "grad_norm": 0.49808257517442245, "learning_rate": 2.8452034889340874e-06, "loss": 0.5078, "step": 1667 }, { "epoch": 2.033306255077173, "grad_norm": 0.4147857957964427, "learning_rate": 2.838803485026265e-06, "loss": 0.5092, "step": 1668 }, { "epoch": 2.034524776604387, "grad_norm": 0.4270575824323386, "learning_rate": 2.8324078326155403e-06, "loss": 0.5239, "step": 1669 }, { "epoch": 2.0357432981316004, "grad_norm": 0.4396795083275381, "learning_rate": 2.8260165445793417e-06, "loss": 0.5106, "step": 1670 }, { "epoch": 2.036961819658814, "grad_norm": 0.42042014253641513, "learning_rate": 2.819629633786319e-06, "loss": 0.4699, "step": 1671 }, { "epoch": 2.0381803411860275, "grad_norm": 0.4430726691393502, "learning_rate": 2.8132471130962997e-06, "loss": 0.4899, "step": 1672 }, { "epoch": 2.039398862713241, "grad_norm": 0.40352990345385337, "learning_rate": 2.806868995360278e-06, "loss": 0.5271, "step": 1673 }, { "epoch": 2.0406173842404547, "grad_norm": 0.4264717440525784, "learning_rate": 2.800495293420384e-06, "loss": 0.5358, "step": 1674 }, { "epoch": 2.0418359057676687, "grad_norm": 0.406701314146554, "learning_rate": 2.7941260201098513e-06, "loss": 0.5347, "step": 1675 }, { "epoch": 2.0430544272948823, "grad_norm": 0.3789120115073334, "learning_rate": 2.7877611882529978e-06, "loss": 0.5291, "step": 1676 }, { "epoch": 2.044272948822096, "grad_norm": 0.376105306550124, "learning_rate": 2.781400810665201e-06, "loss": 0.4798, "step": 1677 }, { "epoch": 2.0454914703493094, "grad_norm": 0.41689803288099314, "learning_rate": 2.775044900152873e-06, "loss": 0.5603, "step": 1678 }, { "epoch": 2.046709991876523, "grad_norm": 0.39084573445190973, "learning_rate": 2.7686934695134237e-06, "loss": 0.5172, "step": 1679 }, { "epoch": 2.0479285134037366, "grad_norm": 0.3983625559018197, "learning_rate": 2.762346531535246e-06, "loss": 0.5169, "step": 1680 }, { "epoch": 2.0491470349309506, "grad_norm": 0.4214853676308127, "learning_rate": 2.7560040989976894e-06, "loss": 0.4956, "step": 1681 }, { "epoch": 2.050365556458164, "grad_norm": 0.3999213184120299, "learning_rate": 2.749666184671032e-06, "loss": 0.4772, "step": 1682 }, { "epoch": 2.051584077985378, "grad_norm": 0.47662956549783836, "learning_rate": 2.7433328013164493e-06, "loss": 0.5384, "step": 1683 }, { "epoch": 2.0528025995125914, "grad_norm": 0.41183336935775333, "learning_rate": 2.737003961686e-06, "loss": 0.5383, "step": 1684 }, { "epoch": 2.054021121039805, "grad_norm": 0.4162288645157988, "learning_rate": 2.730679678522592e-06, "loss": 0.4879, "step": 1685 }, { "epoch": 2.0552396425670185, "grad_norm": 0.4208621537377079, "learning_rate": 2.724359964559958e-06, "loss": 0.5302, "step": 1686 }, { "epoch": 2.0564581640942325, "grad_norm": 0.46940669999316137, "learning_rate": 2.7180448325226283e-06, "loss": 0.5038, "step": 1687 }, { "epoch": 2.057676685621446, "grad_norm": 0.42207329602275184, "learning_rate": 2.711734295125913e-06, "loss": 0.5136, "step": 1688 }, { "epoch": 2.0588952071486597, "grad_norm": 0.42923148362073005, "learning_rate": 2.705428365075868e-06, "loss": 0.4974, "step": 1689 }, { "epoch": 2.0601137286758733, "grad_norm": 0.4486303504796547, "learning_rate": 2.6991270550692794e-06, "loss": 0.4896, "step": 1690 }, { "epoch": 2.061332250203087, "grad_norm": 0.4450084773476215, "learning_rate": 2.692830377793614e-06, "loss": 0.5368, "step": 1691 }, { "epoch": 2.0625507717303004, "grad_norm": 0.41459790491115805, "learning_rate": 2.686538345927027e-06, "loss": 0.5181, "step": 1692 }, { "epoch": 2.063769293257514, "grad_norm": 0.39428684445951, "learning_rate": 2.680250972138314e-06, "loss": 0.5002, "step": 1693 }, { "epoch": 2.064987814784728, "grad_norm": 0.46824872444922333, "learning_rate": 2.6739682690868947e-06, "loss": 0.5303, "step": 1694 }, { "epoch": 2.0662063363119416, "grad_norm": 0.4328466707209096, "learning_rate": 2.6676902494227795e-06, "loss": 0.5603, "step": 1695 }, { "epoch": 2.067424857839155, "grad_norm": 0.3839348433674553, "learning_rate": 2.6614169257865513e-06, "loss": 0.4682, "step": 1696 }, { "epoch": 2.0686433793663688, "grad_norm": 0.4527195063930813, "learning_rate": 2.6551483108093378e-06, "loss": 0.5468, "step": 1697 }, { "epoch": 2.0698619008935824, "grad_norm": 0.3864719481645061, "learning_rate": 2.6488844171127903e-06, "loss": 0.4596, "step": 1698 }, { "epoch": 2.071080422420796, "grad_norm": 0.46426343352179134, "learning_rate": 2.6426252573090437e-06, "loss": 0.56, "step": 1699 }, { "epoch": 2.07229894394801, "grad_norm": 0.4288286079073576, "learning_rate": 2.6363708440007136e-06, "loss": 0.5161, "step": 1700 }, { "epoch": 2.0735174654752235, "grad_norm": 0.40637110346427036, "learning_rate": 2.6301211897808463e-06, "loss": 0.5389, "step": 1701 }, { "epoch": 2.074735987002437, "grad_norm": 0.3951734512163483, "learning_rate": 2.623876307232919e-06, "loss": 0.526, "step": 1702 }, { "epoch": 2.0759545085296507, "grad_norm": 0.3885989062888163, "learning_rate": 2.6176362089307873e-06, "loss": 0.4725, "step": 1703 }, { "epoch": 2.0771730300568643, "grad_norm": 0.43823376795203267, "learning_rate": 2.611400907438685e-06, "loss": 0.5124, "step": 1704 }, { "epoch": 2.078391551584078, "grad_norm": 0.39530623859105424, "learning_rate": 2.6051704153111847e-06, "loss": 0.4934, "step": 1705 }, { "epoch": 2.0796100731112914, "grad_norm": 0.3703827995949618, "learning_rate": 2.598944745093174e-06, "loss": 0.477, "step": 1706 }, { "epoch": 2.0808285946385054, "grad_norm": 0.399727045865617, "learning_rate": 2.5927239093198273e-06, "loss": 0.5887, "step": 1707 }, { "epoch": 2.082047116165719, "grad_norm": 0.366776126773729, "learning_rate": 2.5865079205165953e-06, "loss": 0.4682, "step": 1708 }, { "epoch": 2.0832656376929326, "grad_norm": 0.4208917158039958, "learning_rate": 2.5802967911991637e-06, "loss": 0.5203, "step": 1709 }, { "epoch": 2.084484159220146, "grad_norm": 0.4493230756017366, "learning_rate": 2.574090533873431e-06, "loss": 0.5273, "step": 1710 }, { "epoch": 2.0857026807473598, "grad_norm": 0.4464314211780269, "learning_rate": 2.567889161035494e-06, "loss": 0.589, "step": 1711 }, { "epoch": 2.0869212022745733, "grad_norm": 0.37995030061127644, "learning_rate": 2.5616926851716055e-06, "loss": 0.4443, "step": 1712 }, { "epoch": 2.0881397238017874, "grad_norm": 0.43366495148118606, "learning_rate": 2.555501118758167e-06, "loss": 0.5068, "step": 1713 }, { "epoch": 2.089358245329001, "grad_norm": 0.43859783988060524, "learning_rate": 2.549314474261686e-06, "loss": 0.5061, "step": 1714 }, { "epoch": 2.0905767668562145, "grad_norm": 0.41699646136345586, "learning_rate": 2.5431327641387682e-06, "loss": 0.5149, "step": 1715 }, { "epoch": 2.091795288383428, "grad_norm": 0.4456011191053756, "learning_rate": 2.5369560008360826e-06, "loss": 0.521, "step": 1716 }, { "epoch": 2.0930138099106417, "grad_norm": 0.3959554858313093, "learning_rate": 2.5307841967903337e-06, "loss": 0.5048, "step": 1717 }, { "epoch": 2.0942323314378553, "grad_norm": 0.41396649662012225, "learning_rate": 2.52461736442824e-06, "loss": 0.5162, "step": 1718 }, { "epoch": 2.095450852965069, "grad_norm": 0.42258000014128283, "learning_rate": 2.518455516166517e-06, "loss": 0.5517, "step": 1719 }, { "epoch": 2.096669374492283, "grad_norm": 0.39450373632220187, "learning_rate": 2.512298664411841e-06, "loss": 0.4964, "step": 1720 }, { "epoch": 2.0978878960194964, "grad_norm": 0.3723209543555369, "learning_rate": 2.5061468215608243e-06, "loss": 0.5218, "step": 1721 }, { "epoch": 2.09910641754671, "grad_norm": 0.41901342453157836, "learning_rate": 2.5000000000000015e-06, "loss": 0.5245, "step": 1722 }, { "epoch": 2.1003249390739236, "grad_norm": 0.40321870380750857, "learning_rate": 2.493858212105788e-06, "loss": 0.5008, "step": 1723 }, { "epoch": 2.101543460601137, "grad_norm": 0.41094871301137714, "learning_rate": 2.487721470244473e-06, "loss": 0.5255, "step": 1724 }, { "epoch": 2.1027619821283507, "grad_norm": 0.38906496669749396, "learning_rate": 2.481589786772178e-06, "loss": 0.5077, "step": 1725 }, { "epoch": 2.1039805036555648, "grad_norm": 0.4041745743019385, "learning_rate": 2.4754631740348455e-06, "loss": 0.5387, "step": 1726 }, { "epoch": 2.1051990251827783, "grad_norm": 0.393377125185753, "learning_rate": 2.4693416443682074e-06, "loss": 0.5206, "step": 1727 }, { "epoch": 2.106417546709992, "grad_norm": 0.43228252852381843, "learning_rate": 2.4632252100977567e-06, "loss": 0.5457, "step": 1728 }, { "epoch": 2.1076360682372055, "grad_norm": 0.3665069578741783, "learning_rate": 2.4571138835387293e-06, "loss": 0.4513, "step": 1729 }, { "epoch": 2.108854589764419, "grad_norm": 0.40575471379817674, "learning_rate": 2.4510076769960784e-06, "loss": 0.486, "step": 1730 }, { "epoch": 2.1100731112916327, "grad_norm": 0.43487942611154445, "learning_rate": 2.4449066027644473e-06, "loss": 0.542, "step": 1731 }, { "epoch": 2.1112916328188467, "grad_norm": 0.41618547734069145, "learning_rate": 2.4388106731281496e-06, "loss": 0.5405, "step": 1732 }, { "epoch": 2.1125101543460603, "grad_norm": 0.37250571753343925, "learning_rate": 2.4327199003611285e-06, "loss": 0.5298, "step": 1733 }, { "epoch": 2.113728675873274, "grad_norm": 0.38311544589645186, "learning_rate": 2.426634296726955e-06, "loss": 0.4806, "step": 1734 }, { "epoch": 2.1149471974004874, "grad_norm": 0.41137106387325834, "learning_rate": 2.4205538744787904e-06, "loss": 0.5201, "step": 1735 }, { "epoch": 2.116165718927701, "grad_norm": 0.39341688405639397, "learning_rate": 2.4144786458593635e-06, "loss": 0.4973, "step": 1736 }, { "epoch": 2.1173842404549146, "grad_norm": 0.4261520463046671, "learning_rate": 2.40840862310094e-06, "loss": 0.5574, "step": 1737 }, { "epoch": 2.118602761982128, "grad_norm": 0.4270827914830235, "learning_rate": 2.4023438184253115e-06, "loss": 0.5011, "step": 1738 }, { "epoch": 2.119821283509342, "grad_norm": 0.37282235236530675, "learning_rate": 2.3962842440437584e-06, "loss": 0.4675, "step": 1739 }, { "epoch": 2.1210398050365558, "grad_norm": 0.44674425439539434, "learning_rate": 2.3902299121570332e-06, "loss": 0.5741, "step": 1740 }, { "epoch": 2.1222583265637693, "grad_norm": 0.41462268384604345, "learning_rate": 2.384180834955329e-06, "loss": 0.4876, "step": 1741 }, { "epoch": 2.123476848090983, "grad_norm": 0.4534841107739354, "learning_rate": 2.378137024618262e-06, "loss": 0.5135, "step": 1742 }, { "epoch": 2.1246953696181965, "grad_norm": 0.3978485819501479, "learning_rate": 2.3720984933148443e-06, "loss": 0.5208, "step": 1743 }, { "epoch": 2.12591389114541, "grad_norm": 0.37184067874428883, "learning_rate": 2.366065253203456e-06, "loss": 0.5007, "step": 1744 }, { "epoch": 2.1271324126726237, "grad_norm": 0.4276632980042562, "learning_rate": 2.360037316431823e-06, "loss": 0.5317, "step": 1745 }, { "epoch": 2.1283509341998377, "grad_norm": 0.4617864367024641, "learning_rate": 2.354014695136997e-06, "loss": 0.5064, "step": 1746 }, { "epoch": 2.1295694557270513, "grad_norm": 0.3859544135248513, "learning_rate": 2.3479974014453255e-06, "loss": 0.4865, "step": 1747 }, { "epoch": 2.130787977254265, "grad_norm": 0.39298854898729213, "learning_rate": 2.3419854474724284e-06, "loss": 0.5399, "step": 1748 }, { "epoch": 2.1320064987814784, "grad_norm": 0.4537583444692293, "learning_rate": 2.3359788453231723e-06, "loss": 0.5134, "step": 1749 }, { "epoch": 2.133225020308692, "grad_norm": 0.4143013070479495, "learning_rate": 2.329977607091652e-06, "loss": 0.5128, "step": 1750 }, { "epoch": 2.1344435418359056, "grad_norm": 0.37360321916937234, "learning_rate": 2.323981744861162e-06, "loss": 0.5181, "step": 1751 }, { "epoch": 2.1356620633631196, "grad_norm": 0.4044550908338376, "learning_rate": 2.317991270704167e-06, "loss": 0.5197, "step": 1752 }, { "epoch": 2.136880584890333, "grad_norm": 0.4067716384869161, "learning_rate": 2.3120061966822915e-06, "loss": 0.4899, "step": 1753 }, { "epoch": 2.1380991064175467, "grad_norm": 0.4141293721178242, "learning_rate": 2.3060265348462777e-06, "loss": 0.5499, "step": 1754 }, { "epoch": 2.1393176279447603, "grad_norm": 0.4005349519648887, "learning_rate": 2.3000522972359803e-06, "loss": 0.5395, "step": 1755 }, { "epoch": 2.140536149471974, "grad_norm": 0.39248292555402464, "learning_rate": 2.2940834958803228e-06, "loss": 0.4931, "step": 1756 }, { "epoch": 2.1417546709991875, "grad_norm": 0.38822332167948, "learning_rate": 2.2881201427972894e-06, "loss": 0.4722, "step": 1757 }, { "epoch": 2.1429731925264015, "grad_norm": 0.38077200885472606, "learning_rate": 2.282162249993895e-06, "loss": 0.5326, "step": 1758 }, { "epoch": 2.144191714053615, "grad_norm": 0.38524212575031547, "learning_rate": 2.2762098294661556e-06, "loss": 0.5109, "step": 1759 }, { "epoch": 2.1454102355808287, "grad_norm": 0.40347299946589055, "learning_rate": 2.27026289319907e-06, "loss": 0.5579, "step": 1760 }, { "epoch": 2.1466287571080422, "grad_norm": 0.3924233257276901, "learning_rate": 2.264321453166598e-06, "loss": 0.5165, "step": 1761 }, { "epoch": 2.147847278635256, "grad_norm": 0.37575656828172116, "learning_rate": 2.2583855213316326e-06, "loss": 0.4895, "step": 1762 }, { "epoch": 2.1490658001624694, "grad_norm": 0.3937169968667044, "learning_rate": 2.2524551096459703e-06, "loss": 0.53, "step": 1763 }, { "epoch": 2.150284321689683, "grad_norm": 0.39457712101518455, "learning_rate": 2.2465302300503012e-06, "loss": 0.4689, "step": 1764 }, { "epoch": 2.151502843216897, "grad_norm": 0.4393918889033505, "learning_rate": 2.2406108944741696e-06, "loss": 0.5178, "step": 1765 }, { "epoch": 2.1527213647441106, "grad_norm": 0.4147988916944454, "learning_rate": 2.234697114835963e-06, "loss": 0.5385, "step": 1766 }, { "epoch": 2.153939886271324, "grad_norm": 0.40920290551320604, "learning_rate": 2.228788903042877e-06, "loss": 0.5229, "step": 1767 }, { "epoch": 2.1551584077985377, "grad_norm": 0.3841912028808192, "learning_rate": 2.2228862709909e-06, "loss": 0.4859, "step": 1768 }, { "epoch": 2.1563769293257513, "grad_norm": 0.4017528120034774, "learning_rate": 2.2169892305647865e-06, "loss": 0.5067, "step": 1769 }, { "epoch": 2.157595450852965, "grad_norm": 0.4106356542533839, "learning_rate": 2.211097793638029e-06, "loss": 0.511, "step": 1770 }, { "epoch": 2.158813972380179, "grad_norm": 0.3922497877364856, "learning_rate": 2.2052119720728375e-06, "loss": 0.5213, "step": 1771 }, { "epoch": 2.1600324939073925, "grad_norm": 0.41142355231120237, "learning_rate": 2.1993317777201197e-06, "loss": 0.55, "step": 1772 }, { "epoch": 2.161251015434606, "grad_norm": 0.3793501040249683, "learning_rate": 2.19345722241945e-06, "loss": 0.4942, "step": 1773 }, { "epoch": 2.1624695369618196, "grad_norm": 0.4181558266950597, "learning_rate": 2.1875883179990515e-06, "loss": 0.5179, "step": 1774 }, { "epoch": 2.1636880584890332, "grad_norm": 0.4101546221225696, "learning_rate": 2.1817250762757657e-06, "loss": 0.4854, "step": 1775 }, { "epoch": 2.164906580016247, "grad_norm": 0.40370317236247594, "learning_rate": 2.175867509055033e-06, "loss": 0.5675, "step": 1776 }, { "epoch": 2.166125101543461, "grad_norm": 0.34161977537337374, "learning_rate": 2.170015628130871e-06, "loss": 0.4693, "step": 1777 }, { "epoch": 2.1673436230706744, "grad_norm": 0.3929226138427561, "learning_rate": 2.1641694452858486e-06, "loss": 0.4932, "step": 1778 }, { "epoch": 2.168562144597888, "grad_norm": 0.414139184233712, "learning_rate": 2.158328972291056e-06, "loss": 0.5428, "step": 1779 }, { "epoch": 2.1697806661251016, "grad_norm": 0.4021702827253732, "learning_rate": 2.1524942209060944e-06, "loss": 0.553, "step": 1780 }, { "epoch": 2.170999187652315, "grad_norm": 0.3914173100634304, "learning_rate": 2.1466652028790384e-06, "loss": 0.4846, "step": 1781 }, { "epoch": 2.1722177091795287, "grad_norm": 0.4155952702393289, "learning_rate": 2.1408419299464245e-06, "loss": 0.5062, "step": 1782 }, { "epoch": 2.1734362307067423, "grad_norm": 0.4029405381679447, "learning_rate": 2.1350244138332143e-06, "loss": 0.5543, "step": 1783 }, { "epoch": 2.1746547522339563, "grad_norm": 0.3847467238608974, "learning_rate": 2.1292126662527846e-06, "loss": 0.4783, "step": 1784 }, { "epoch": 2.17587327376117, "grad_norm": 0.3774561138941112, "learning_rate": 2.1234066989068972e-06, "loss": 0.5736, "step": 1785 }, { "epoch": 2.1770917952883835, "grad_norm": 0.3791792062977483, "learning_rate": 2.1176065234856725e-06, "loss": 0.4782, "step": 1786 }, { "epoch": 2.178310316815597, "grad_norm": 0.40849575227250023, "learning_rate": 2.111812151667567e-06, "loss": 0.498, "step": 1787 }, { "epoch": 2.1795288383428106, "grad_norm": 0.36289578272484346, "learning_rate": 2.106023595119358e-06, "loss": 0.4866, "step": 1788 }, { "epoch": 2.180747359870024, "grad_norm": 0.3869017558290611, "learning_rate": 2.1002408654961124e-06, "loss": 0.4643, "step": 1789 }, { "epoch": 2.181965881397238, "grad_norm": 0.4954162333959639, "learning_rate": 2.0944639744411627e-06, "loss": 0.5415, "step": 1790 }, { "epoch": 2.183184402924452, "grad_norm": 0.42167227063607843, "learning_rate": 2.088692933586083e-06, "loss": 0.5359, "step": 1791 }, { "epoch": 2.1844029244516654, "grad_norm": 0.38236452945033045, "learning_rate": 2.0829277545506736e-06, "loss": 0.4971, "step": 1792 }, { "epoch": 2.185621445978879, "grad_norm": 0.4285834277650781, "learning_rate": 2.077168448942933e-06, "loss": 0.5475, "step": 1793 }, { "epoch": 2.1868399675060926, "grad_norm": 0.4023920796181869, "learning_rate": 2.071415028359026e-06, "loss": 0.4797, "step": 1794 }, { "epoch": 2.188058489033306, "grad_norm": 0.4074540063011702, "learning_rate": 2.065667504383276e-06, "loss": 0.5254, "step": 1795 }, { "epoch": 2.1892770105605197, "grad_norm": 0.39510353720495955, "learning_rate": 2.0599258885881317e-06, "loss": 0.4899, "step": 1796 }, { "epoch": 2.1904955320877337, "grad_norm": 0.5548900318530214, "learning_rate": 2.0541901925341446e-06, "loss": 0.5198, "step": 1797 }, { "epoch": 2.1917140536149473, "grad_norm": 0.3825302397550243, "learning_rate": 2.0484604277699437e-06, "loss": 0.5098, "step": 1798 }, { "epoch": 2.192932575142161, "grad_norm": 0.40564085390755233, "learning_rate": 2.042736605832222e-06, "loss": 0.5323, "step": 1799 }, { "epoch": 2.1941510966693745, "grad_norm": 0.39704056600422283, "learning_rate": 2.037018738245707e-06, "loss": 0.5108, "step": 1800 }, { "epoch": 2.195369618196588, "grad_norm": 0.41600542688505693, "learning_rate": 2.0313068365231303e-06, "loss": 0.4978, "step": 1801 }, { "epoch": 2.1965881397238016, "grad_norm": 0.39387757095797193, "learning_rate": 2.0256009121652147e-06, "loss": 0.5273, "step": 1802 }, { "epoch": 2.1978066612510156, "grad_norm": 0.36242925936592724, "learning_rate": 2.019900976660651e-06, "loss": 0.4982, "step": 1803 }, { "epoch": 2.1990251827782292, "grad_norm": 0.35509245865119315, "learning_rate": 2.0142070414860704e-06, "loss": 0.4878, "step": 1804 }, { "epoch": 2.200243704305443, "grad_norm": 0.38952269804673, "learning_rate": 2.0085191181060176e-06, "loss": 0.5369, "step": 1805 }, { "epoch": 2.2014622258326564, "grad_norm": 0.3866235545814812, "learning_rate": 2.0028372179729405e-06, "loss": 0.4802, "step": 1806 }, { "epoch": 2.20268074735987, "grad_norm": 0.3922622961361272, "learning_rate": 1.9971613525271523e-06, "loss": 0.5284, "step": 1807 }, { "epoch": 2.2038992688870835, "grad_norm": 0.38718886845940065, "learning_rate": 1.9914915331968217e-06, "loss": 0.4846, "step": 1808 }, { "epoch": 2.205117790414297, "grad_norm": 0.39994907283167946, "learning_rate": 1.985827771397938e-06, "loss": 0.5433, "step": 1809 }, { "epoch": 2.206336311941511, "grad_norm": 0.37905307147188894, "learning_rate": 1.980170078534297e-06, "loss": 0.5145, "step": 1810 }, { "epoch": 2.2075548334687247, "grad_norm": 0.4192247244192786, "learning_rate": 1.9745184659974764e-06, "loss": 0.5118, "step": 1811 }, { "epoch": 2.2087733549959383, "grad_norm": 0.3522566177407128, "learning_rate": 1.9688729451668116e-06, "loss": 0.4751, "step": 1812 }, { "epoch": 2.209991876523152, "grad_norm": 0.3772830661699162, "learning_rate": 1.9632335274093645e-06, "loss": 0.4859, "step": 1813 }, { "epoch": 2.2112103980503655, "grad_norm": 0.41564321254010056, "learning_rate": 1.957600224079917e-06, "loss": 0.5474, "step": 1814 }, { "epoch": 2.212428919577579, "grad_norm": 0.40463784380961515, "learning_rate": 1.9519730465209384e-06, "loss": 0.5135, "step": 1815 }, { "epoch": 2.213647441104793, "grad_norm": 0.42048597280314337, "learning_rate": 1.9463520060625647e-06, "loss": 0.51, "step": 1816 }, { "epoch": 2.2148659626320066, "grad_norm": 0.4057972852875916, "learning_rate": 1.940737114022572e-06, "loss": 0.5291, "step": 1817 }, { "epoch": 2.21608448415922, "grad_norm": 0.3860966909234876, "learning_rate": 1.935128381706355e-06, "loss": 0.4638, "step": 1818 }, { "epoch": 2.217303005686434, "grad_norm": 0.3992185398314597, "learning_rate": 1.9295258204069116e-06, "loss": 0.4846, "step": 1819 }, { "epoch": 2.2185215272136474, "grad_norm": 0.4414225404994573, "learning_rate": 1.9239294414048143e-06, "loss": 0.5729, "step": 1820 }, { "epoch": 2.219740048740861, "grad_norm": 0.38820997327095225, "learning_rate": 1.9183392559681812e-06, "loss": 0.4883, "step": 1821 }, { "epoch": 2.2209585702680745, "grad_norm": 0.3830961932249294, "learning_rate": 1.9127552753526683e-06, "loss": 0.4959, "step": 1822 }, { "epoch": 2.2221770917952886, "grad_norm": 0.40371127197935186, "learning_rate": 1.907177510801431e-06, "loss": 0.5322, "step": 1823 }, { "epoch": 2.223395613322502, "grad_norm": 0.4422584980389884, "learning_rate": 1.901605973545116e-06, "loss": 0.544, "step": 1824 }, { "epoch": 2.2246141348497157, "grad_norm": 0.3671460120788879, "learning_rate": 1.8960406748018229e-06, "loss": 0.447, "step": 1825 }, { "epoch": 2.2258326563769293, "grad_norm": 0.41787679838261416, "learning_rate": 1.8904816257770976e-06, "loss": 0.4837, "step": 1826 }, { "epoch": 2.227051177904143, "grad_norm": 0.40739753945636065, "learning_rate": 1.884928837663902e-06, "loss": 0.5215, "step": 1827 }, { "epoch": 2.2282696994313564, "grad_norm": 0.418750898184457, "learning_rate": 1.8793823216425872e-06, "loss": 0.5042, "step": 1828 }, { "epoch": 2.2294882209585705, "grad_norm": 0.42352381285835144, "learning_rate": 1.8738420888808767e-06, "loss": 0.5266, "step": 1829 }, { "epoch": 2.230706742485784, "grad_norm": 0.3766712142861196, "learning_rate": 1.8683081505338468e-06, "loss": 0.4898, "step": 1830 }, { "epoch": 2.2319252640129976, "grad_norm": 0.3646915065525184, "learning_rate": 1.8627805177438984e-06, "loss": 0.5102, "step": 1831 }, { "epoch": 2.233143785540211, "grad_norm": 0.3728256406154505, "learning_rate": 1.8572592016407337e-06, "loss": 0.5124, "step": 1832 }, { "epoch": 2.234362307067425, "grad_norm": 0.4171588204608096, "learning_rate": 1.8517442133413405e-06, "loss": 0.543, "step": 1833 }, { "epoch": 2.2355808285946384, "grad_norm": 0.38601375413922817, "learning_rate": 1.8462355639499614e-06, "loss": 0.4802, "step": 1834 }, { "epoch": 2.236799350121852, "grad_norm": 0.41191751363191975, "learning_rate": 1.8407332645580805e-06, "loss": 0.498, "step": 1835 }, { "epoch": 2.238017871649066, "grad_norm": 0.36858808021561745, "learning_rate": 1.8352373262443918e-06, "loss": 0.5308, "step": 1836 }, { "epoch": 2.2392363931762795, "grad_norm": 0.39319359572416623, "learning_rate": 1.8297477600747854e-06, "loss": 0.5147, "step": 1837 }, { "epoch": 2.240454914703493, "grad_norm": 0.4146287760930502, "learning_rate": 1.8242645771023205e-06, "loss": 0.4951, "step": 1838 }, { "epoch": 2.2416734362307067, "grad_norm": 0.42074375031523503, "learning_rate": 1.8187877883672024e-06, "loss": 0.5238, "step": 1839 }, { "epoch": 2.2428919577579203, "grad_norm": 0.3933564686288827, "learning_rate": 1.81331740489676e-06, "loss": 0.5319, "step": 1840 }, { "epoch": 2.244110479285134, "grad_norm": 0.39931640514560685, "learning_rate": 1.8078534377054303e-06, "loss": 0.4921, "step": 1841 }, { "epoch": 2.245329000812348, "grad_norm": 0.43198490048399485, "learning_rate": 1.8023958977947303e-06, "loss": 0.55, "step": 1842 }, { "epoch": 2.2465475223395615, "grad_norm": 0.4043002892149948, "learning_rate": 1.7969447961532333e-06, "loss": 0.4992, "step": 1843 }, { "epoch": 2.247766043866775, "grad_norm": 0.4118718760088592, "learning_rate": 1.7915001437565481e-06, "loss": 0.4981, "step": 1844 }, { "epoch": 2.2489845653939886, "grad_norm": 0.4099852822242898, "learning_rate": 1.7860619515673034e-06, "loss": 0.5036, "step": 1845 }, { "epoch": 2.250203086921202, "grad_norm": 0.4061790060023943, "learning_rate": 1.7806302305351191e-06, "loss": 0.518, "step": 1846 }, { "epoch": 2.2514216084484158, "grad_norm": 0.38920893319388733, "learning_rate": 1.7752049915965807e-06, "loss": 0.5347, "step": 1847 }, { "epoch": 2.25264012997563, "grad_norm": 0.378445552638885, "learning_rate": 1.7697862456752273e-06, "loss": 0.4489, "step": 1848 }, { "epoch": 2.2538586515028434, "grad_norm": 0.43624250871726583, "learning_rate": 1.764374003681526e-06, "loss": 0.5076, "step": 1849 }, { "epoch": 2.255077173030057, "grad_norm": 0.4301972293121319, "learning_rate": 1.7589682765128424e-06, "loss": 0.5106, "step": 1850 }, { "epoch": 2.2562956945572705, "grad_norm": 0.4206525298070121, "learning_rate": 1.7535690750534268e-06, "loss": 0.5224, "step": 1851 }, { "epoch": 2.257514216084484, "grad_norm": 0.3846425486629517, "learning_rate": 1.7481764101743925e-06, "loss": 0.4962, "step": 1852 }, { "epoch": 2.2587327376116977, "grad_norm": 0.3994656170665921, "learning_rate": 1.7427902927336932e-06, "loss": 0.5142, "step": 1853 }, { "epoch": 2.2599512591389113, "grad_norm": 0.4156779226062884, "learning_rate": 1.7374107335760937e-06, "loss": 0.5224, "step": 1854 }, { "epoch": 2.2611697806661253, "grad_norm": 0.44546616602212363, "learning_rate": 1.732037743533156e-06, "loss": 0.49, "step": 1855 }, { "epoch": 2.262388302193339, "grad_norm": 0.41287367815029863, "learning_rate": 1.7266713334232177e-06, "loss": 0.5125, "step": 1856 }, { "epoch": 2.2636068237205524, "grad_norm": 0.4171809094817276, "learning_rate": 1.7213115140513687e-06, "loss": 0.4866, "step": 1857 }, { "epoch": 2.264825345247766, "grad_norm": 0.4055921589949916, "learning_rate": 1.7159582962094224e-06, "loss": 0.5221, "step": 1858 }, { "epoch": 2.2660438667749796, "grad_norm": 0.37574794466013317, "learning_rate": 1.710611690675908e-06, "loss": 0.5475, "step": 1859 }, { "epoch": 2.267262388302193, "grad_norm": 0.3923703686975919, "learning_rate": 1.7052717082160348e-06, "loss": 0.502, "step": 1860 }, { "epoch": 2.2684809098294068, "grad_norm": 0.4104360688359129, "learning_rate": 1.6999383595816816e-06, "loss": 0.4915, "step": 1861 }, { "epoch": 2.2696994313566208, "grad_norm": 0.42448276721497596, "learning_rate": 1.694611655511365e-06, "loss": 0.5187, "step": 1862 }, { "epoch": 2.2709179528838344, "grad_norm": 0.4182208678556554, "learning_rate": 1.6892916067302279e-06, "loss": 0.5431, "step": 1863 }, { "epoch": 2.272136474411048, "grad_norm": 0.3809524496490651, "learning_rate": 1.6839782239500114e-06, "loss": 0.4962, "step": 1864 }, { "epoch": 2.2733549959382615, "grad_norm": 0.4044653821113984, "learning_rate": 1.6786715178690372e-06, "loss": 0.5455, "step": 1865 }, { "epoch": 2.274573517465475, "grad_norm": 0.4017956989739632, "learning_rate": 1.6733714991721738e-06, "loss": 0.5124, "step": 1866 }, { "epoch": 2.275792038992689, "grad_norm": 0.3948697674235281, "learning_rate": 1.668078178530837e-06, "loss": 0.5121, "step": 1867 }, { "epoch": 2.2770105605199027, "grad_norm": 0.406458521824727, "learning_rate": 1.6627915666029503e-06, "loss": 0.5111, "step": 1868 }, { "epoch": 2.2782290820471163, "grad_norm": 0.3778951735019038, "learning_rate": 1.6575116740329316e-06, "loss": 0.4983, "step": 1869 }, { "epoch": 2.27944760357433, "grad_norm": 0.3577602501518659, "learning_rate": 1.6522385114516681e-06, "loss": 0.4748, "step": 1870 }, { "epoch": 2.2806661251015434, "grad_norm": 0.39147597379268634, "learning_rate": 1.6469720894764945e-06, "loss": 0.5167, "step": 1871 }, { "epoch": 2.281884646628757, "grad_norm": 0.41526652630800426, "learning_rate": 1.6417124187111778e-06, "loss": 0.4856, "step": 1872 }, { "epoch": 2.2831031681559706, "grad_norm": 0.46476506582341715, "learning_rate": 1.6364595097458901e-06, "loss": 0.5541, "step": 1873 }, { "epoch": 2.2843216896831846, "grad_norm": 0.4413380147809054, "learning_rate": 1.6312133731571867e-06, "loss": 0.5681, "step": 1874 }, { "epoch": 2.285540211210398, "grad_norm": 0.41316580395580427, "learning_rate": 1.6259740195079903e-06, "loss": 0.4902, "step": 1875 }, { "epoch": 2.2867587327376118, "grad_norm": 0.375223833084769, "learning_rate": 1.6207414593475634e-06, "loss": 0.5059, "step": 1876 }, { "epoch": 2.2879772542648253, "grad_norm": 0.4191217491734361, "learning_rate": 1.6155157032114926e-06, "loss": 0.4903, "step": 1877 }, { "epoch": 2.289195775792039, "grad_norm": 0.4104399365050538, "learning_rate": 1.610296761621662e-06, "loss": 0.4978, "step": 1878 }, { "epoch": 2.2904142973192525, "grad_norm": 0.452132557586679, "learning_rate": 1.6050846450862368e-06, "loss": 0.5529, "step": 1879 }, { "epoch": 2.291632818846466, "grad_norm": 0.38189023256593707, "learning_rate": 1.5998793640996418e-06, "loss": 0.4534, "step": 1880 }, { "epoch": 2.29285134037368, "grad_norm": 0.4105896104811722, "learning_rate": 1.5946809291425352e-06, "loss": 0.5157, "step": 1881 }, { "epoch": 2.2940698619008937, "grad_norm": 0.39415858749113886, "learning_rate": 1.589489350681791e-06, "loss": 0.504, "step": 1882 }, { "epoch": 2.2952883834281073, "grad_norm": 0.35980183287414685, "learning_rate": 1.5843046391704802e-06, "loss": 0.5077, "step": 1883 }, { "epoch": 2.296506904955321, "grad_norm": 0.38396469649464077, "learning_rate": 1.5791268050478487e-06, "loss": 0.5051, "step": 1884 }, { "epoch": 2.2977254264825344, "grad_norm": 0.3821275005707828, "learning_rate": 1.573955858739289e-06, "loss": 0.5345, "step": 1885 }, { "epoch": 2.298943948009748, "grad_norm": 0.39018524126022086, "learning_rate": 1.5687918106563326e-06, "loss": 0.4713, "step": 1886 }, { "epoch": 2.3001624695369616, "grad_norm": 0.4269492782915114, "learning_rate": 1.5636346711966154e-06, "loss": 0.5396, "step": 1887 }, { "epoch": 2.3013809910641756, "grad_norm": 0.40024049549443624, "learning_rate": 1.5584844507438678e-06, "loss": 0.5119, "step": 1888 }, { "epoch": 2.302599512591389, "grad_norm": 0.3769373466661974, "learning_rate": 1.5533411596678843e-06, "loss": 0.4858, "step": 1889 }, { "epoch": 2.3038180341186028, "grad_norm": 0.40957300518215145, "learning_rate": 1.5482048083245116e-06, "loss": 0.5299, "step": 1890 }, { "epoch": 2.3050365556458163, "grad_norm": 0.3887283184505036, "learning_rate": 1.543075407055623e-06, "loss": 0.5276, "step": 1891 }, { "epoch": 2.30625507717303, "grad_norm": 0.3944358213197462, "learning_rate": 1.5379529661890956e-06, "loss": 0.512, "step": 1892 }, { "epoch": 2.307473598700244, "grad_norm": 0.38335955851758596, "learning_rate": 1.532837496038792e-06, "loss": 0.4802, "step": 1893 }, { "epoch": 2.3086921202274575, "grad_norm": 0.40294432984687933, "learning_rate": 1.5277290069045414e-06, "loss": 0.5171, "step": 1894 }, { "epoch": 2.309910641754671, "grad_norm": 0.39774386298303005, "learning_rate": 1.5226275090721183e-06, "loss": 0.4993, "step": 1895 }, { "epoch": 2.3111291632818847, "grad_norm": 0.4200593782198228, "learning_rate": 1.517533012813217e-06, "loss": 0.5606, "step": 1896 }, { "epoch": 2.3123476848090982, "grad_norm": 0.4204431416124692, "learning_rate": 1.512445528385434e-06, "loss": 0.5538, "step": 1897 }, { "epoch": 2.313566206336312, "grad_norm": 0.33671871586550156, "learning_rate": 1.5073650660322509e-06, "loss": 0.4575, "step": 1898 }, { "epoch": 2.3147847278635254, "grad_norm": 0.42276990891040056, "learning_rate": 1.5022916359830114e-06, "loss": 0.5744, "step": 1899 }, { "epoch": 2.3160032493907394, "grad_norm": 0.35916851603197664, "learning_rate": 1.4972252484528938e-06, "loss": 0.4721, "step": 1900 }, { "epoch": 2.317221770917953, "grad_norm": 0.40308654798116644, "learning_rate": 1.4921659136429022e-06, "loss": 0.5283, "step": 1901 }, { "epoch": 2.3184402924451666, "grad_norm": 0.36536184604215355, "learning_rate": 1.4871136417398407e-06, "loss": 0.4748, "step": 1902 }, { "epoch": 2.31965881397238, "grad_norm": 0.42296472427505094, "learning_rate": 1.4820684429162879e-06, "loss": 0.6, "step": 1903 }, { "epoch": 2.3208773354995937, "grad_norm": 0.3598564220383708, "learning_rate": 1.477030327330582e-06, "loss": 0.4422, "step": 1904 }, { "epoch": 2.3220958570268073, "grad_norm": 0.3948776090594206, "learning_rate": 1.4719993051268023e-06, "loss": 0.5343, "step": 1905 }, { "epoch": 2.323314378554021, "grad_norm": 0.4028197151468667, "learning_rate": 1.466975386434744e-06, "loss": 0.5355, "step": 1906 }, { "epoch": 2.324532900081235, "grad_norm": 0.3958394776231179, "learning_rate": 1.4619585813699032e-06, "loss": 0.5119, "step": 1907 }, { "epoch": 2.3257514216084485, "grad_norm": 0.3640898960169131, "learning_rate": 1.4569489000334435e-06, "loss": 0.4749, "step": 1908 }, { "epoch": 2.326969943135662, "grad_norm": 0.42215058629769103, "learning_rate": 1.4519463525121934e-06, "loss": 0.5157, "step": 1909 }, { "epoch": 2.3281884646628757, "grad_norm": 0.38263832424003186, "learning_rate": 1.4469509488786165e-06, "loss": 0.509, "step": 1910 }, { "epoch": 2.3294069861900892, "grad_norm": 0.41466783476778923, "learning_rate": 1.4419626991907925e-06, "loss": 0.5222, "step": 1911 }, { "epoch": 2.330625507717303, "grad_norm": 0.3818440846598212, "learning_rate": 1.436981613492394e-06, "loss": 0.5153, "step": 1912 }, { "epoch": 2.331844029244517, "grad_norm": 0.3744448281484142, "learning_rate": 1.4320077018126704e-06, "loss": 0.4932, "step": 1913 }, { "epoch": 2.3330625507717304, "grad_norm": 0.35650000061694337, "learning_rate": 1.427040974166427e-06, "loss": 0.4711, "step": 1914 }, { "epoch": 2.334281072298944, "grad_norm": 0.44375949024201433, "learning_rate": 1.4220814405540067e-06, "loss": 0.6081, "step": 1915 }, { "epoch": 2.3354995938261576, "grad_norm": 0.3632417016214969, "learning_rate": 1.4171291109612618e-06, "loss": 0.4439, "step": 1916 }, { "epoch": 2.336718115353371, "grad_norm": 0.391995578036673, "learning_rate": 1.412183995359544e-06, "loss": 0.5208, "step": 1917 }, { "epoch": 2.3379366368805847, "grad_norm": 0.4106831300879619, "learning_rate": 1.4072461037056806e-06, "loss": 0.5185, "step": 1918 }, { "epoch": 2.3391551584077988, "grad_norm": 0.37156715583934985, "learning_rate": 1.4023154459419497e-06, "loss": 0.492, "step": 1919 }, { "epoch": 2.3403736799350123, "grad_norm": 0.4194748560314305, "learning_rate": 1.3973920319960654e-06, "loss": 0.5387, "step": 1920 }, { "epoch": 2.341592201462226, "grad_norm": 0.4089237007134858, "learning_rate": 1.3924758717811582e-06, "loss": 0.5258, "step": 1921 }, { "epoch": 2.3428107229894395, "grad_norm": 0.3694012500954815, "learning_rate": 1.3875669751957548e-06, "loss": 0.4604, "step": 1922 }, { "epoch": 2.344029244516653, "grad_norm": 0.4054490233784107, "learning_rate": 1.3826653521237526e-06, "loss": 0.5113, "step": 1923 }, { "epoch": 2.3452477660438666, "grad_norm": 0.39405086162290015, "learning_rate": 1.3777710124344058e-06, "loss": 0.5753, "step": 1924 }, { "epoch": 2.3464662875710802, "grad_norm": 0.3708763755034275, "learning_rate": 1.3728839659823045e-06, "loss": 0.5154, "step": 1925 }, { "epoch": 2.3476848090982942, "grad_norm": 0.3772580609058518, "learning_rate": 1.3680042226073554e-06, "loss": 0.4871, "step": 1926 }, { "epoch": 2.348903330625508, "grad_norm": 0.39403743871876235, "learning_rate": 1.3631317921347564e-06, "loss": 0.5306, "step": 1927 }, { "epoch": 2.3501218521527214, "grad_norm": 0.3873906953232042, "learning_rate": 1.358266684374987e-06, "loss": 0.5123, "step": 1928 }, { "epoch": 2.351340373679935, "grad_norm": 0.37942846145056086, "learning_rate": 1.3534089091237757e-06, "loss": 0.5054, "step": 1929 }, { "epoch": 2.3525588952071486, "grad_norm": 0.3607773079027305, "learning_rate": 1.348558476162094e-06, "loss": 0.481, "step": 1930 }, { "epoch": 2.353777416734362, "grad_norm": 0.4005225791492384, "learning_rate": 1.343715395256124e-06, "loss": 0.5331, "step": 1931 }, { "epoch": 2.3549959382615757, "grad_norm": 0.36782048392498773, "learning_rate": 1.3388796761572493e-06, "loss": 0.4872, "step": 1932 }, { "epoch": 2.3562144597887897, "grad_norm": 0.38694592195175675, "learning_rate": 1.3340513286020307e-06, "loss": 0.5245, "step": 1933 }, { "epoch": 2.3574329813160033, "grad_norm": 0.3951746347940695, "learning_rate": 1.3292303623121828e-06, "loss": 0.5296, "step": 1934 }, { "epoch": 2.358651502843217, "grad_norm": 0.4201044387513612, "learning_rate": 1.324416786994559e-06, "loss": 0.5284, "step": 1935 }, { "epoch": 2.3598700243704305, "grad_norm": 0.4088086029813002, "learning_rate": 1.3196106123411345e-06, "loss": 0.5212, "step": 1936 }, { "epoch": 2.361088545897644, "grad_norm": 0.38621048916855116, "learning_rate": 1.3148118480289834e-06, "loss": 0.5078, "step": 1937 }, { "epoch": 2.362307067424858, "grad_norm": 0.38423444639816645, "learning_rate": 1.310020503720254e-06, "loss": 0.5363, "step": 1938 }, { "epoch": 2.3635255889520717, "grad_norm": 0.3807864286378039, "learning_rate": 1.3052365890621615e-06, "loss": 0.5349, "step": 1939 }, { "epoch": 2.3647441104792852, "grad_norm": 0.3969153122233334, "learning_rate": 1.3004601136869555e-06, "loss": 0.5245, "step": 1940 }, { "epoch": 2.365962632006499, "grad_norm": 0.3706070359987908, "learning_rate": 1.295691087211912e-06, "loss": 0.4639, "step": 1941 }, { "epoch": 2.3671811535337124, "grad_norm": 0.4076566869065765, "learning_rate": 1.2909295192393057e-06, "loss": 0.5623, "step": 1942 }, { "epoch": 2.368399675060926, "grad_norm": 0.3609321931094141, "learning_rate": 1.2861754193563948e-06, "loss": 0.4532, "step": 1943 }, { "epoch": 2.3696181965881395, "grad_norm": 0.3775351942907534, "learning_rate": 1.2814287971354023e-06, "loss": 0.5515, "step": 1944 }, { "epoch": 2.3708367181153536, "grad_norm": 0.37390993961577534, "learning_rate": 1.2766896621334928e-06, "loss": 0.5097, "step": 1945 }, { "epoch": 2.372055239642567, "grad_norm": 0.37384134850125694, "learning_rate": 1.2719580238927553e-06, "loss": 0.5557, "step": 1946 }, { "epoch": 2.3732737611697807, "grad_norm": 0.3775237757952146, "learning_rate": 1.2672338919401866e-06, "loss": 0.5197, "step": 1947 }, { "epoch": 2.3744922826969943, "grad_norm": 0.3845839355688612, "learning_rate": 1.2625172757876691e-06, "loss": 0.5175, "step": 1948 }, { "epoch": 2.375710804224208, "grad_norm": 0.3930000499088082, "learning_rate": 1.2578081849319547e-06, "loss": 0.4908, "step": 1949 }, { "epoch": 2.3769293257514215, "grad_norm": 0.3679733160971826, "learning_rate": 1.253106628854635e-06, "loss": 0.4807, "step": 1950 }, { "epoch": 2.378147847278635, "grad_norm": 0.41138237630333274, "learning_rate": 1.2484126170221388e-06, "loss": 0.5494, "step": 1951 }, { "epoch": 2.379366368805849, "grad_norm": 0.3644013554869518, "learning_rate": 1.2437261588857037e-06, "loss": 0.4715, "step": 1952 }, { "epoch": 2.3805848903330626, "grad_norm": 0.3754357671998505, "learning_rate": 1.2390472638813572e-06, "loss": 0.5106, "step": 1953 }, { "epoch": 2.381803411860276, "grad_norm": 0.4210050485232648, "learning_rate": 1.2343759414298955e-06, "loss": 0.5755, "step": 1954 }, { "epoch": 2.38302193338749, "grad_norm": 0.3648248351254956, "learning_rate": 1.229712200936874e-06, "loss": 0.4928, "step": 1955 }, { "epoch": 2.3842404549147034, "grad_norm": 0.34960648428885643, "learning_rate": 1.2250560517925747e-06, "loss": 0.4643, "step": 1956 }, { "epoch": 2.385458976441917, "grad_norm": 0.3790545819611439, "learning_rate": 1.2204075033720025e-06, "loss": 0.4949, "step": 1957 }, { "epoch": 2.386677497969131, "grad_norm": 0.3711764526859297, "learning_rate": 1.2157665650348516e-06, "loss": 0.4838, "step": 1958 }, { "epoch": 2.3878960194963446, "grad_norm": 0.4040275413347584, "learning_rate": 1.211133246125497e-06, "loss": 0.5255, "step": 1959 }, { "epoch": 2.389114541023558, "grad_norm": 0.40676437477424116, "learning_rate": 1.2065075559729749e-06, "loss": 0.5417, "step": 1960 }, { "epoch": 2.3903330625507717, "grad_norm": 0.3828497235073974, "learning_rate": 1.201889503890955e-06, "loss": 0.5003, "step": 1961 }, { "epoch": 2.3915515840779853, "grad_norm": 0.40024563629448995, "learning_rate": 1.197279099177731e-06, "loss": 0.5627, "step": 1962 }, { "epoch": 2.392770105605199, "grad_norm": 0.3508330713937016, "learning_rate": 1.1926763511161993e-06, "loss": 0.4607, "step": 1963 }, { "epoch": 2.393988627132413, "grad_norm": 0.4280688001820654, "learning_rate": 1.188081268973842e-06, "loss": 0.5389, "step": 1964 }, { "epoch": 2.3952071486596265, "grad_norm": 0.3853051982812674, "learning_rate": 1.183493862002702e-06, "loss": 0.4576, "step": 1965 }, { "epoch": 2.39642567018684, "grad_norm": 0.4045347572603379, "learning_rate": 1.1789141394393683e-06, "loss": 0.5698, "step": 1966 }, { "epoch": 2.3976441917140536, "grad_norm": 0.3983176516664499, "learning_rate": 1.1743421105049612e-06, "loss": 0.4725, "step": 1967 }, { "epoch": 2.398862713241267, "grad_norm": 0.39056153824479495, "learning_rate": 1.1697777844051105e-06, "loss": 0.5365, "step": 1968 }, { "epoch": 2.400081234768481, "grad_norm": 0.38372959754674263, "learning_rate": 1.165221170329931e-06, "loss": 0.5051, "step": 1969 }, { "epoch": 2.4012997562956944, "grad_norm": 0.3938860376877866, "learning_rate": 1.1606722774540146e-06, "loss": 0.4948, "step": 1970 }, { "epoch": 2.4025182778229084, "grad_norm": 0.38806865450446354, "learning_rate": 1.1561311149364075e-06, "loss": 0.5132, "step": 1971 }, { "epoch": 2.403736799350122, "grad_norm": 0.41405852900105905, "learning_rate": 1.1515976919205869e-06, "loss": 0.5287, "step": 1972 }, { "epoch": 2.4049553208773355, "grad_norm": 0.41486030319490325, "learning_rate": 1.1470720175344473e-06, "loss": 0.4826, "step": 1973 }, { "epoch": 2.406173842404549, "grad_norm": 0.4073479693226464, "learning_rate": 1.1425541008902852e-06, "loss": 0.5061, "step": 1974 }, { "epoch": 2.4073923639317627, "grad_norm": 0.3698628863486537, "learning_rate": 1.1380439510847757e-06, "loss": 0.4822, "step": 1975 }, { "epoch": 2.4086108854589763, "grad_norm": 0.40735700660846696, "learning_rate": 1.1335415771989538e-06, "loss": 0.5198, "step": 1976 }, { "epoch": 2.40982940698619, "grad_norm": 0.39248680656683244, "learning_rate": 1.1290469882981987e-06, "loss": 0.5513, "step": 1977 }, { "epoch": 2.411047928513404, "grad_norm": 0.36602544238604245, "learning_rate": 1.1245601934322148e-06, "loss": 0.5042, "step": 1978 }, { "epoch": 2.4122664500406175, "grad_norm": 0.3765733599147946, "learning_rate": 1.1200812016350172e-06, "loss": 0.5031, "step": 1979 }, { "epoch": 2.413484971567831, "grad_norm": 0.34319770025526913, "learning_rate": 1.1156100219249022e-06, "loss": 0.5049, "step": 1980 }, { "epoch": 2.4147034930950446, "grad_norm": 0.4344345273336274, "learning_rate": 1.1111466633044448e-06, "loss": 0.6097, "step": 1981 }, { "epoch": 2.415922014622258, "grad_norm": 0.4011208632543373, "learning_rate": 1.1066911347604653e-06, "loss": 0.4408, "step": 1982 }, { "epoch": 2.417140536149472, "grad_norm": 0.3602299607841865, "learning_rate": 1.1022434452640252e-06, "loss": 0.4878, "step": 1983 }, { "epoch": 2.418359057676686, "grad_norm": 0.3938174382324399, "learning_rate": 1.0978036037703955e-06, "loss": 0.5246, "step": 1984 }, { "epoch": 2.4195775792038994, "grad_norm": 0.40627392150384173, "learning_rate": 1.0933716192190502e-06, "loss": 0.5191, "step": 1985 }, { "epoch": 2.420796100731113, "grad_norm": 0.3960581508676551, "learning_rate": 1.0889475005336447e-06, "loss": 0.4755, "step": 1986 }, { "epoch": 2.4220146222583265, "grad_norm": 0.3909127795418108, "learning_rate": 1.0845312566219924e-06, "loss": 0.5128, "step": 1987 }, { "epoch": 2.42323314378554, "grad_norm": 0.39827799729251556, "learning_rate": 1.0801228963760518e-06, "loss": 0.5425, "step": 1988 }, { "epoch": 2.4244516653127537, "grad_norm": 0.37511159704629343, "learning_rate": 1.075722428671911e-06, "loss": 0.4761, "step": 1989 }, { "epoch": 2.4256701868399677, "grad_norm": 0.3982398641015277, "learning_rate": 1.0713298623697654e-06, "loss": 0.5386, "step": 1990 }, { "epoch": 2.4268887083671813, "grad_norm": 0.3947556572315106, "learning_rate": 1.0669452063138992e-06, "loss": 0.4842, "step": 1991 }, { "epoch": 2.428107229894395, "grad_norm": 0.40576214681574757, "learning_rate": 1.0625684693326727e-06, "loss": 0.5423, "step": 1992 }, { "epoch": 2.4293257514216084, "grad_norm": 0.40693906455637163, "learning_rate": 1.0581996602384975e-06, "loss": 0.5159, "step": 1993 }, { "epoch": 2.430544272948822, "grad_norm": 0.3488770060857356, "learning_rate": 1.0538387878278283e-06, "loss": 0.5187, "step": 1994 }, { "epoch": 2.4317627944760356, "grad_norm": 0.4158444319209436, "learning_rate": 1.0494858608811326e-06, "loss": 0.5313, "step": 1995 }, { "epoch": 2.432981316003249, "grad_norm": 0.4169970056929273, "learning_rate": 1.0451408881628855e-06, "loss": 0.4866, "step": 1996 }, { "epoch": 2.434199837530463, "grad_norm": 0.3823144671122315, "learning_rate": 1.0408038784215462e-06, "loss": 0.4871, "step": 1997 }, { "epoch": 2.435418359057677, "grad_norm": 0.38435379244248535, "learning_rate": 1.0364748403895368e-06, "loss": 0.5276, "step": 1998 }, { "epoch": 2.4366368805848904, "grad_norm": 0.39978814004238356, "learning_rate": 1.0321537827832311e-06, "loss": 0.5374, "step": 1999 }, { "epoch": 2.437855402112104, "grad_norm": 0.3858037469760484, "learning_rate": 1.0278407143029346e-06, "loss": 0.4967, "step": 2000 }, { "epoch": 2.4390739236393175, "grad_norm": 0.36427416648269983, "learning_rate": 1.0235356436328675e-06, "loss": 0.5147, "step": 2001 }, { "epoch": 2.440292445166531, "grad_norm": 0.42530639031865014, "learning_rate": 1.019238579441148e-06, "loss": 0.4949, "step": 2002 }, { "epoch": 2.4415109666937447, "grad_norm": 0.4047119130435624, "learning_rate": 1.014949530379767e-06, "loss": 0.491, "step": 2003 }, { "epoch": 2.4427294882209587, "grad_norm": 0.38798880943669517, "learning_rate": 1.0106685050845838e-06, "loss": 0.5433, "step": 2004 }, { "epoch": 2.4439480097481723, "grad_norm": 0.4009909590310544, "learning_rate": 1.0063955121752999e-06, "loss": 0.5113, "step": 2005 }, { "epoch": 2.445166531275386, "grad_norm": 0.35796810794196016, "learning_rate": 1.0021305602554459e-06, "loss": 0.5113, "step": 2006 }, { "epoch": 2.4463850528025994, "grad_norm": 0.3755800564081513, "learning_rate": 9.978736579123577e-07, "loss": 0.5004, "step": 2007 }, { "epoch": 2.447603574329813, "grad_norm": 0.3683007765287416, "learning_rate": 9.936248137171684e-07, "loss": 0.4974, "step": 2008 }, { "epoch": 2.448822095857027, "grad_norm": 0.3952937239349372, "learning_rate": 9.893840362247809e-07, "loss": 0.4971, "step": 2009 }, { "epoch": 2.4500406173842406, "grad_norm": 0.4284741934979282, "learning_rate": 9.851513339738627e-07, "loss": 0.561, "step": 2010 }, { "epoch": 2.451259138911454, "grad_norm": 0.4056720067131079, "learning_rate": 9.809267154868163e-07, "loss": 0.5179, "step": 2011 }, { "epoch": 2.4524776604386678, "grad_norm": 0.35255993479863745, "learning_rate": 9.7671018926977e-07, "loss": 0.4424, "step": 2012 }, { "epoch": 2.4536961819658814, "grad_norm": 0.40547305130915784, "learning_rate": 9.725017638125612e-07, "loss": 0.5524, "step": 2013 }, { "epoch": 2.454914703493095, "grad_norm": 0.35656299866798635, "learning_rate": 9.683014475887126e-07, "loss": 0.4676, "step": 2014 }, { "epoch": 2.4561332250203085, "grad_norm": 0.37086670515583137, "learning_rate": 9.641092490554195e-07, "loss": 0.5398, "step": 2015 }, { "epoch": 2.4573517465475225, "grad_norm": 0.3791246421084454, "learning_rate": 9.599251766535344e-07, "loss": 0.4933, "step": 2016 }, { "epoch": 2.458570268074736, "grad_norm": 0.40912338906986023, "learning_rate": 9.5574923880755e-07, "loss": 0.562, "step": 2017 }, { "epoch": 2.4597887896019497, "grad_norm": 0.41181426146954847, "learning_rate": 9.51581443925576e-07, "loss": 0.4892, "step": 2018 }, { "epoch": 2.4610073111291633, "grad_norm": 0.4026513287049664, "learning_rate": 9.474218003993275e-07, "loss": 0.5278, "step": 2019 }, { "epoch": 2.462225832656377, "grad_norm": 0.38613963899490084, "learning_rate": 9.432703166041085e-07, "loss": 0.4996, "step": 2020 }, { "epoch": 2.4634443541835904, "grad_norm": 0.38482050851065364, "learning_rate": 9.391270008987946e-07, "loss": 0.5189, "step": 2021 }, { "epoch": 2.464662875710804, "grad_norm": 0.39758696168412205, "learning_rate": 9.349918616258113e-07, "loss": 0.5078, "step": 2022 }, { "epoch": 2.465881397238018, "grad_norm": 0.38614047610686164, "learning_rate": 9.308649071111259e-07, "loss": 0.4729, "step": 2023 }, { "epoch": 2.4670999187652316, "grad_norm": 0.37024789300038397, "learning_rate": 9.267461456642235e-07, "loss": 0.5187, "step": 2024 }, { "epoch": 2.468318440292445, "grad_norm": 0.393874443470908, "learning_rate": 9.226355855780922e-07, "loss": 0.5266, "step": 2025 }, { "epoch": 2.4695369618196588, "grad_norm": 0.39515255892810097, "learning_rate": 9.185332351292059e-07, "loss": 0.4979, "step": 2026 }, { "epoch": 2.4707554833468723, "grad_norm": 0.36209573390534977, "learning_rate": 9.144391025775123e-07, "loss": 0.4685, "step": 2027 }, { "epoch": 2.471974004874086, "grad_norm": 0.3690675939364142, "learning_rate": 9.10353196166412e-07, "loss": 0.5109, "step": 2028 }, { "epoch": 2.4731925264013, "grad_norm": 0.4126364096164172, "learning_rate": 9.0627552412274e-07, "loss": 0.551, "step": 2029 }, { "epoch": 2.4744110479285135, "grad_norm": 0.39808305056022897, "learning_rate": 9.022060946567512e-07, "loss": 0.4829, "step": 2030 }, { "epoch": 2.475629569455727, "grad_norm": 0.3791592284080857, "learning_rate": 8.981449159621075e-07, "loss": 0.4993, "step": 2031 }, { "epoch": 2.4768480909829407, "grad_norm": 0.3890390516980624, "learning_rate": 8.940919962158584e-07, "loss": 0.5213, "step": 2032 }, { "epoch": 2.4780666125101543, "grad_norm": 0.42524657999992466, "learning_rate": 8.900473435784196e-07, "loss": 0.5666, "step": 2033 }, { "epoch": 2.479285134037368, "grad_norm": 0.3815964696084361, "learning_rate": 8.860109661935673e-07, "loss": 0.4625, "step": 2034 }, { "epoch": 2.480503655564582, "grad_norm": 0.42469861666467223, "learning_rate": 8.819828721884094e-07, "loss": 0.5373, "step": 2035 }, { "epoch": 2.4817221770917954, "grad_norm": 0.38320361649924684, "learning_rate": 8.779630696733821e-07, "loss": 0.5375, "step": 2036 }, { "epoch": 2.482940698619009, "grad_norm": 0.3687214848508832, "learning_rate": 8.739515667422211e-07, "loss": 0.4435, "step": 2037 }, { "epoch": 2.4841592201462226, "grad_norm": 0.40061711007827416, "learning_rate": 8.699483714719547e-07, "loss": 0.5467, "step": 2038 }, { "epoch": 2.485377741673436, "grad_norm": 0.40521522379814523, "learning_rate": 8.659534919228845e-07, "loss": 0.536, "step": 2039 }, { "epoch": 2.4865962632006497, "grad_norm": 0.3672113864753048, "learning_rate": 8.619669361385663e-07, "loss": 0.4978, "step": 2040 }, { "epoch": 2.4878147847278633, "grad_norm": 0.3620893091593676, "learning_rate": 8.579887121457952e-07, "loss": 0.5038, "step": 2041 }, { "epoch": 2.4890333062550773, "grad_norm": 0.3663778220669876, "learning_rate": 8.540188279545942e-07, "loss": 0.4862, "step": 2042 }, { "epoch": 2.490251827782291, "grad_norm": 0.38043441191253624, "learning_rate": 8.500572915581923e-07, "loss": 0.5152, "step": 2043 }, { "epoch": 2.4914703493095045, "grad_norm": 0.3942635437659399, "learning_rate": 8.461041109330132e-07, "loss": 0.5055, "step": 2044 }, { "epoch": 2.492688870836718, "grad_norm": 0.3729970950643679, "learning_rate": 8.421592940386514e-07, "loss": 0.5022, "step": 2045 }, { "epoch": 2.4939073923639317, "grad_norm": 0.40364708615741257, "learning_rate": 8.382228488178639e-07, "loss": 0.5297, "step": 2046 }, { "epoch": 2.4951259138911452, "grad_norm": 0.3841836875471797, "learning_rate": 8.342947831965537e-07, "loss": 0.4594, "step": 2047 }, { "epoch": 2.496344435418359, "grad_norm": 0.39167222446559674, "learning_rate": 8.3037510508375e-07, "loss": 0.538, "step": 2048 }, { "epoch": 2.497562956945573, "grad_norm": 0.36017475560838597, "learning_rate": 8.264638223715916e-07, "loss": 0.4904, "step": 2049 }, { "epoch": 2.4987814784727864, "grad_norm": 0.38494521342543364, "learning_rate": 8.225609429353187e-07, "loss": 0.5098, "step": 2050 }, { "epoch": 2.5, "grad_norm": 0.3915568133305174, "learning_rate": 8.186664746332457e-07, "loss": 0.5479, "step": 2051 }, { "epoch": 2.5012185215272136, "grad_norm": 0.3653001722783512, "learning_rate": 8.147804253067581e-07, "loss": 0.5505, "step": 2052 }, { "epoch": 2.502437043054427, "grad_norm": 0.38529539896383097, "learning_rate": 8.109028027802834e-07, "loss": 0.5075, "step": 2053 }, { "epoch": 2.503655564581641, "grad_norm": 0.32985269566739706, "learning_rate": 8.070336148612873e-07, "loss": 0.4737, "step": 2054 }, { "epoch": 2.5048740861088543, "grad_norm": 0.3688596078684635, "learning_rate": 8.031728693402502e-07, "loss": 0.4933, "step": 2055 }, { "epoch": 2.5060926076360683, "grad_norm": 0.3574147764462336, "learning_rate": 7.993205739906551e-07, "loss": 0.5036, "step": 2056 }, { "epoch": 2.507311129163282, "grad_norm": 0.3933673997370336, "learning_rate": 7.954767365689675e-07, "loss": 0.5284, "step": 2057 }, { "epoch": 2.5085296506904955, "grad_norm": 0.3804892757598497, "learning_rate": 7.916413648146282e-07, "loss": 0.5314, "step": 2058 }, { "epoch": 2.509748172217709, "grad_norm": 0.3972280482401795, "learning_rate": 7.878144664500304e-07, "loss": 0.5042, "step": 2059 }, { "epoch": 2.5109666937449227, "grad_norm": 0.4139170649517036, "learning_rate": 7.839960491805048e-07, "loss": 0.513, "step": 2060 }, { "epoch": 2.5121852152721367, "grad_norm": 0.3682306221980358, "learning_rate": 7.80186120694309e-07, "loss": 0.5082, "step": 2061 }, { "epoch": 2.5134037367993503, "grad_norm": 0.40743403282060575, "learning_rate": 7.763846886626048e-07, "loss": 0.4982, "step": 2062 }, { "epoch": 2.514622258326564, "grad_norm": 0.3807959558438016, "learning_rate": 7.725917607394512e-07, "loss": 0.4893, "step": 2063 }, { "epoch": 2.5158407798537774, "grad_norm": 0.3774151979891591, "learning_rate": 7.6880734456178e-07, "loss": 0.5308, "step": 2064 }, { "epoch": 2.517059301380991, "grad_norm": 0.39200277210093626, "learning_rate": 7.650314477493875e-07, "loss": 0.5221, "step": 2065 }, { "epoch": 2.5182778229082046, "grad_norm": 0.3987158423288902, "learning_rate": 7.612640779049174e-07, "loss": 0.5387, "step": 2066 }, { "epoch": 2.519496344435418, "grad_norm": 0.3432299316334845, "learning_rate": 7.575052426138424e-07, "loss": 0.448, "step": 2067 }, { "epoch": 2.520714865962632, "grad_norm": 0.40306877146829656, "learning_rate": 7.537549494444502e-07, "loss": 0.5319, "step": 2068 }, { "epoch": 2.5219333874898457, "grad_norm": 0.3624054180666312, "learning_rate": 7.500132059478327e-07, "loss": 0.4755, "step": 2069 }, { "epoch": 2.5231519090170593, "grad_norm": 0.3943720013643357, "learning_rate": 7.462800196578662e-07, "loss": 0.5517, "step": 2070 }, { "epoch": 2.524370430544273, "grad_norm": 0.3760692184644974, "learning_rate": 7.425553980911959e-07, "loss": 0.5198, "step": 2071 }, { "epoch": 2.5255889520714865, "grad_norm": 0.36875663183404517, "learning_rate": 7.388393487472223e-07, "loss": 0.5099, "step": 2072 }, { "epoch": 2.5268074735987005, "grad_norm": 0.3765385801804941, "learning_rate": 7.351318791080881e-07, "loss": 0.4877, "step": 2073 }, { "epoch": 2.5280259951259136, "grad_norm": 0.3880867322532877, "learning_rate": 7.314329966386596e-07, "loss": 0.5191, "step": 2074 }, { "epoch": 2.5292445166531277, "grad_norm": 0.38480762192630874, "learning_rate": 7.277427087865124e-07, "loss": 0.5367, "step": 2075 }, { "epoch": 2.5304630381803412, "grad_norm": 0.37367690807549686, "learning_rate": 7.240610229819195e-07, "loss": 0.4796, "step": 2076 }, { "epoch": 2.531681559707555, "grad_norm": 0.356459470205227, "learning_rate": 7.203879466378311e-07, "loss": 0.4846, "step": 2077 }, { "epoch": 2.5329000812347684, "grad_norm": 0.368312803237026, "learning_rate": 7.167234871498646e-07, "loss": 0.512, "step": 2078 }, { "epoch": 2.534118602761982, "grad_norm": 0.42790949260764394, "learning_rate": 7.130676518962859e-07, "loss": 0.5199, "step": 2079 }, { "epoch": 2.535337124289196, "grad_norm": 0.3760245356587111, "learning_rate": 7.094204482379985e-07, "loss": 0.5206, "step": 2080 }, { "epoch": 2.5365556458164096, "grad_norm": 0.36529563925832975, "learning_rate": 7.057818835185243e-07, "loss": 0.5169, "step": 2081 }, { "epoch": 2.537774167343623, "grad_norm": 0.37415123963436103, "learning_rate": 7.021519650639952e-07, "loss": 0.4682, "step": 2082 }, { "epoch": 2.5389926888708367, "grad_norm": 0.3599256024573686, "learning_rate": 6.985307001831266e-07, "loss": 0.5237, "step": 2083 }, { "epoch": 2.5402112103980503, "grad_norm": 0.37172969261280475, "learning_rate": 6.949180961672159e-07, "loss": 0.5229, "step": 2084 }, { "epoch": 2.541429731925264, "grad_norm": 0.3692464609849223, "learning_rate": 6.913141602901213e-07, "loss": 0.4967, "step": 2085 }, { "epoch": 2.5426482534524775, "grad_norm": 0.41481021551912467, "learning_rate": 6.877188998082484e-07, "loss": 0.5364, "step": 2086 }, { "epoch": 2.5438667749796915, "grad_norm": 0.3587567944310898, "learning_rate": 6.841323219605333e-07, "loss": 0.477, "step": 2087 }, { "epoch": 2.545085296506905, "grad_norm": 0.36227017983644627, "learning_rate": 6.805544339684295e-07, "loss": 0.5186, "step": 2088 }, { "epoch": 2.5463038180341186, "grad_norm": 0.3848961894752312, "learning_rate": 6.769852430358969e-07, "loss": 0.494, "step": 2089 }, { "epoch": 2.5475223395613322, "grad_norm": 0.400827672871941, "learning_rate": 6.734247563493829e-07, "loss": 0.5104, "step": 2090 }, { "epoch": 2.548740861088546, "grad_norm": 0.3858206572812583, "learning_rate": 6.698729810778065e-07, "loss": 0.5203, "step": 2091 }, { "epoch": 2.5499593826157594, "grad_norm": 0.39420570104347397, "learning_rate": 6.663299243725512e-07, "loss": 0.514, "step": 2092 }, { "epoch": 2.551177904142973, "grad_norm": 0.37623344903141814, "learning_rate": 6.627955933674412e-07, "loss": 0.4675, "step": 2093 }, { "epoch": 2.552396425670187, "grad_norm": 0.37984856280561025, "learning_rate": 6.592699951787362e-07, "loss": 0.5349, "step": 2094 }, { "epoch": 2.5536149471974006, "grad_norm": 0.38942296808421134, "learning_rate": 6.55753136905109e-07, "loss": 0.5222, "step": 2095 }, { "epoch": 2.554833468724614, "grad_norm": 0.38744941426091656, "learning_rate": 6.522450256276363e-07, "loss": 0.4997, "step": 2096 }, { "epoch": 2.5560519902518277, "grad_norm": 0.40862429991424404, "learning_rate": 6.487456684097848e-07, "loss": 0.5409, "step": 2097 }, { "epoch": 2.5572705117790413, "grad_norm": 0.37635062650001033, "learning_rate": 6.452550722973927e-07, "loss": 0.4627, "step": 2098 }, { "epoch": 2.5584890333062553, "grad_norm": 0.4221777822228316, "learning_rate": 6.417732443186575e-07, "loss": 0.5358, "step": 2099 }, { "epoch": 2.5597075548334685, "grad_norm": 0.39847174733267055, "learning_rate": 6.383001914841252e-07, "loss": 0.5012, "step": 2100 }, { "epoch": 2.5609260763606825, "grad_norm": 0.3748715416676312, "learning_rate": 6.348359207866722e-07, "loss": 0.4956, "step": 2101 }, { "epoch": 2.562144597887896, "grad_norm": 0.37750025006496746, "learning_rate": 6.313804392014905e-07, "loss": 0.4854, "step": 2102 }, { "epoch": 2.5633631194151096, "grad_norm": 0.3998375296968308, "learning_rate": 6.279337536860786e-07, "loss": 0.5143, "step": 2103 }, { "epoch": 2.564581640942323, "grad_norm": 0.3710721048856582, "learning_rate": 6.244958711802213e-07, "loss": 0.5591, "step": 2104 }, { "epoch": 2.565800162469537, "grad_norm": 0.34868738151134687, "learning_rate": 6.210667986059821e-07, "loss": 0.4551, "step": 2105 }, { "epoch": 2.567018683996751, "grad_norm": 0.35595641503961983, "learning_rate": 6.17646542867682e-07, "loss": 0.5152, "step": 2106 }, { "epoch": 2.5682372055239644, "grad_norm": 0.36663979047928985, "learning_rate": 6.142351108518929e-07, "loss": 0.503, "step": 2107 }, { "epoch": 2.569455727051178, "grad_norm": 0.34787252687208675, "learning_rate": 6.108325094274209e-07, "loss": 0.5031, "step": 2108 }, { "epoch": 2.5706742485783916, "grad_norm": 0.39033263561688103, "learning_rate": 6.074387454452891e-07, "loss": 0.5214, "step": 2109 }, { "epoch": 2.571892770105605, "grad_norm": 0.38512927731883373, "learning_rate": 6.040538257387268e-07, "loss": 0.5198, "step": 2110 }, { "epoch": 2.5731112916328187, "grad_norm": 0.3590301126097114, "learning_rate": 6.006777571231587e-07, "loss": 0.5027, "step": 2111 }, { "epoch": 2.5743298131600323, "grad_norm": 0.3732504638805604, "learning_rate": 5.973105463961864e-07, "loss": 0.5066, "step": 2112 }, { "epoch": 2.5755483346872463, "grad_norm": 0.3729739011338398, "learning_rate": 5.939522003375753e-07, "loss": 0.4958, "step": 2113 }, { "epoch": 2.57676685621446, "grad_norm": 0.37186730911837346, "learning_rate": 5.906027257092444e-07, "loss": 0.4761, "step": 2114 }, { "epoch": 2.5779853777416735, "grad_norm": 0.3661760756265481, "learning_rate": 5.872621292552477e-07, "loss": 0.5327, "step": 2115 }, { "epoch": 2.579203899268887, "grad_norm": 0.40542839626324956, "learning_rate": 5.839304177017663e-07, "loss": 0.5512, "step": 2116 }, { "epoch": 2.5804224207961006, "grad_norm": 0.3840467276263846, "learning_rate": 5.806075977570886e-07, "loss": 0.4793, "step": 2117 }, { "epoch": 2.5816409423233146, "grad_norm": 0.37820337565321277, "learning_rate": 5.772936761116027e-07, "loss": 0.506, "step": 2118 }, { "epoch": 2.582859463850528, "grad_norm": 0.3797306170789339, "learning_rate": 5.739886594377803e-07, "loss": 0.508, "step": 2119 }, { "epoch": 2.584077985377742, "grad_norm": 0.3828935693851265, "learning_rate": 5.706925543901609e-07, "loss": 0.5097, "step": 2120 }, { "epoch": 2.5852965069049554, "grad_norm": 0.3900080504691436, "learning_rate": 5.674053676053415e-07, "loss": 0.5168, "step": 2121 }, { "epoch": 2.586515028432169, "grad_norm": 0.3587725291460617, "learning_rate": 5.641271057019637e-07, "loss": 0.4565, "step": 2122 }, { "epoch": 2.5877335499593825, "grad_norm": 0.3939424632788925, "learning_rate": 5.608577752806987e-07, "loss": 0.5494, "step": 2123 }, { "epoch": 2.588952071486596, "grad_norm": 0.3725432276278501, "learning_rate": 5.575973829242365e-07, "loss": 0.4588, "step": 2124 }, { "epoch": 2.59017059301381, "grad_norm": 0.38604468058456287, "learning_rate": 5.543459351972635e-07, "loss": 0.529, "step": 2125 }, { "epoch": 2.5913891145410237, "grad_norm": 0.36341318860508387, "learning_rate": 5.511034386464642e-07, "loss": 0.494, "step": 2126 }, { "epoch": 2.5926076360682373, "grad_norm": 0.35625493095798805, "learning_rate": 5.478698998004967e-07, "loss": 0.5456, "step": 2127 }, { "epoch": 2.593826157595451, "grad_norm": 0.36227564286221264, "learning_rate": 5.446453251699851e-07, "loss": 0.514, "step": 2128 }, { "epoch": 2.5950446791226645, "grad_norm": 0.3662431166869742, "learning_rate": 5.414297212475012e-07, "loss": 0.5157, "step": 2129 }, { "epoch": 2.596263200649878, "grad_norm": 0.3558072452798451, "learning_rate": 5.382230945075556e-07, "loss": 0.4961, "step": 2130 }, { "epoch": 2.5974817221770916, "grad_norm": 0.3795263836967965, "learning_rate": 5.350254514065856e-07, "loss": 0.5127, "step": 2131 }, { "epoch": 2.5987002437043056, "grad_norm": 0.3690040036136185, "learning_rate": 5.318367983829393e-07, "loss": 0.4908, "step": 2132 }, { "epoch": 2.599918765231519, "grad_norm": 0.3608821461773019, "learning_rate": 5.286571418568615e-07, "loss": 0.5289, "step": 2133 }, { "epoch": 2.601137286758733, "grad_norm": 0.4006495491671045, "learning_rate": 5.254864882304855e-07, "loss": 0.5254, "step": 2134 }, { "epoch": 2.6023558082859464, "grad_norm": 0.38150929128537214, "learning_rate": 5.223248438878176e-07, "loss": 0.4622, "step": 2135 }, { "epoch": 2.60357432981316, "grad_norm": 0.400783680185111, "learning_rate": 5.191722151947227e-07, "loss": 0.5474, "step": 2136 }, { "epoch": 2.6047928513403735, "grad_norm": 0.3662412318337768, "learning_rate": 5.160286084989119e-07, "loss": 0.536, "step": 2137 }, { "epoch": 2.606011372867587, "grad_norm": 0.37308572148487257, "learning_rate": 5.128940301299334e-07, "loss": 0.4731, "step": 2138 }, { "epoch": 2.607229894394801, "grad_norm": 0.39187078975715245, "learning_rate": 5.097684863991575e-07, "loss": 0.5249, "step": 2139 }, { "epoch": 2.6084484159220147, "grad_norm": 0.3885064721528569, "learning_rate": 5.066519835997613e-07, "loss": 0.5225, "step": 2140 }, { "epoch": 2.6096669374492283, "grad_norm": 0.41543896829402627, "learning_rate": 5.03544528006718e-07, "loss": 0.5476, "step": 2141 }, { "epoch": 2.610885458976442, "grad_norm": 0.33915812403705176, "learning_rate": 5.004461258767873e-07, "loss": 0.4825, "step": 2142 }, { "epoch": 2.6121039805036554, "grad_norm": 0.39963867108256157, "learning_rate": 4.973567834484988e-07, "loss": 0.4868, "step": 2143 }, { "epoch": 2.6133225020308695, "grad_norm": 0.4052069661227251, "learning_rate": 4.942765069421384e-07, "loss": 0.5707, "step": 2144 }, { "epoch": 2.6145410235580826, "grad_norm": 0.3744715850855104, "learning_rate": 4.91205302559743e-07, "loss": 0.4698, "step": 2145 }, { "epoch": 2.6157595450852966, "grad_norm": 0.39172802789195654, "learning_rate": 4.881431764850775e-07, "loss": 0.5429, "step": 2146 }, { "epoch": 2.61697806661251, "grad_norm": 0.3617617734796279, "learning_rate": 4.850901348836328e-07, "loss": 0.5195, "step": 2147 }, { "epoch": 2.618196588139724, "grad_norm": 0.3582182319101665, "learning_rate": 4.820461839026047e-07, "loss": 0.5237, "step": 2148 }, { "epoch": 2.6194151096669374, "grad_norm": 0.382565389265081, "learning_rate": 4.79011329670887e-07, "loss": 0.508, "step": 2149 }, { "epoch": 2.620633631194151, "grad_norm": 0.36371999280375944, "learning_rate": 4.7598557829905913e-07, "loss": 0.5138, "step": 2150 }, { "epoch": 2.621852152721365, "grad_norm": 0.36372813807546805, "learning_rate": 4.729689358793693e-07, "loss": 0.4863, "step": 2151 }, { "epoch": 2.6230706742485785, "grad_norm": 0.4358272328748702, "learning_rate": 4.699614084857257e-07, "loss": 0.5501, "step": 2152 }, { "epoch": 2.624289195775792, "grad_norm": 0.40082789201202496, "learning_rate": 4.669630021736854e-07, "loss": 0.4957, "step": 2153 }, { "epoch": 2.6255077173030057, "grad_norm": 0.38531826765138316, "learning_rate": 4.639737229804403e-07, "loss": 0.5189, "step": 2154 }, { "epoch": 2.6267262388302193, "grad_norm": 0.3510117904392168, "learning_rate": 4.609935769248025e-07, "loss": 0.4438, "step": 2155 }, { "epoch": 2.627944760357433, "grad_norm": 0.3854632098940677, "learning_rate": 4.5802257000719885e-07, "loss": 0.5672, "step": 2156 }, { "epoch": 2.6291632818846464, "grad_norm": 0.356713590588076, "learning_rate": 4.5506070820964973e-07, "loss": 0.4941, "step": 2157 }, { "epoch": 2.6303818034118605, "grad_norm": 0.37107534196018116, "learning_rate": 4.5210799749576815e-07, "loss": 0.537, "step": 2158 }, { "epoch": 2.631600324939074, "grad_norm": 0.36951174703750844, "learning_rate": 4.4916444381073674e-07, "loss": 0.487, "step": 2159 }, { "epoch": 2.6328188464662876, "grad_norm": 0.3737744583819628, "learning_rate": 4.4623005308130243e-07, "loss": 0.5047, "step": 2160 }, { "epoch": 2.634037367993501, "grad_norm": 0.41814109045623277, "learning_rate": 4.433048312157651e-07, "loss": 0.4921, "step": 2161 }, { "epoch": 2.6352558895207148, "grad_norm": 0.38314084064991044, "learning_rate": 4.4038878410396003e-07, "loss": 0.545, "step": 2162 }, { "epoch": 2.636474411047929, "grad_norm": 0.34232486717400545, "learning_rate": 4.374819176172501e-07, "loss": 0.451, "step": 2163 }, { "epoch": 2.637692932575142, "grad_norm": 0.4161225048009829, "learning_rate": 4.3458423760851523e-07, "loss": 0.5468, "step": 2164 }, { "epoch": 2.638911454102356, "grad_norm": 0.3670515864821956, "learning_rate": 4.316957499121377e-07, "loss": 0.5067, "step": 2165 }, { "epoch": 2.6401299756295695, "grad_norm": 0.3624129319596192, "learning_rate": 4.2881646034398926e-07, "loss": 0.4816, "step": 2166 }, { "epoch": 2.641348497156783, "grad_norm": 0.3972920967019095, "learning_rate": 4.2594637470142587e-07, "loss": 0.5452, "step": 2167 }, { "epoch": 2.6425670186839967, "grad_norm": 0.36647997443354524, "learning_rate": 4.230854987632671e-07, "loss": 0.4962, "step": 2168 }, { "epoch": 2.6437855402112103, "grad_norm": 0.38616087967711843, "learning_rate": 4.2023383828979305e-07, "loss": 0.5471, "step": 2169 }, { "epoch": 2.6450040617384243, "grad_norm": 0.35103710867257426, "learning_rate": 4.173913990227252e-07, "loss": 0.4679, "step": 2170 }, { "epoch": 2.6462225832656374, "grad_norm": 0.39309483512948734, "learning_rate": 4.145581866852211e-07, "loss": 0.5224, "step": 2171 }, { "epoch": 2.6474411047928514, "grad_norm": 0.38439655446848475, "learning_rate": 4.1173420698186027e-07, "loss": 0.504, "step": 2172 }, { "epoch": 2.648659626320065, "grad_norm": 0.3577220323312346, "learning_rate": 4.089194655986306e-07, "loss": 0.5131, "step": 2173 }, { "epoch": 2.6498781478472786, "grad_norm": 0.36326136971896916, "learning_rate": 4.0611396820291915e-07, "loss": 0.5451, "step": 2174 }, { "epoch": 2.651096669374492, "grad_norm": 0.36056618051796013, "learning_rate": 4.0331772044350235e-07, "loss": 0.5175, "step": 2175 }, { "epoch": 2.6523151909017058, "grad_norm": 0.35979010393998906, "learning_rate": 4.0053072795053163e-07, "loss": 0.5057, "step": 2176 }, { "epoch": 2.6535337124289198, "grad_norm": 0.38026873249239673, "learning_rate": 3.9775299633552535e-07, "loss": 0.5173, "step": 2177 }, { "epoch": 2.6547522339561334, "grad_norm": 0.34812609416605766, "learning_rate": 3.9498453119134917e-07, "loss": 0.4774, "step": 2178 }, { "epoch": 2.655970755483347, "grad_norm": 0.37873267778767, "learning_rate": 3.9222533809221864e-07, "loss": 0.5171, "step": 2179 }, { "epoch": 2.6571892770105605, "grad_norm": 0.3876778226332137, "learning_rate": 3.894754225936753e-07, "loss": 0.5367, "step": 2180 }, { "epoch": 2.658407798537774, "grad_norm": 0.37800746580583733, "learning_rate": 3.8673479023258464e-07, "loss": 0.5366, "step": 2181 }, { "epoch": 2.6596263200649877, "grad_norm": 0.3592372247497727, "learning_rate": 3.840034465271164e-07, "loss": 0.4612, "step": 2182 }, { "epoch": 2.6608448415922012, "grad_norm": 0.37676898244480056, "learning_rate": 3.812813969767398e-07, "loss": 0.5335, "step": 2183 }, { "epoch": 2.6620633631194153, "grad_norm": 0.3889687505966972, "learning_rate": 3.7856864706221187e-07, "loss": 0.5379, "step": 2184 }, { "epoch": 2.663281884646629, "grad_norm": 0.3429621452435135, "learning_rate": 3.7586520224556444e-07, "loss": 0.4249, "step": 2185 }, { "epoch": 2.6645004061738424, "grad_norm": 0.4100593265019823, "learning_rate": 3.731710679700923e-07, "loss": 0.5571, "step": 2186 }, { "epoch": 2.665718927701056, "grad_norm": 0.3754827320099358, "learning_rate": 3.7048624966034506e-07, "loss": 0.4772, "step": 2187 }, { "epoch": 2.6669374492282696, "grad_norm": 0.506362418039737, "learning_rate": 3.6781075272211643e-07, "loss": 0.4898, "step": 2188 }, { "epoch": 2.6681559707554836, "grad_norm": 0.39960584269392463, "learning_rate": 3.6514458254242936e-07, "loss": 0.5355, "step": 2189 }, { "epoch": 2.6693744922826967, "grad_norm": 0.38884516821746157, "learning_rate": 3.6248774448952695e-07, "loss": 0.4607, "step": 2190 }, { "epoch": 2.6705930138099108, "grad_norm": 0.38681697956869593, "learning_rate": 3.598402439128656e-07, "loss": 0.5662, "step": 2191 }, { "epoch": 2.6718115353371243, "grad_norm": 0.3756082857300239, "learning_rate": 3.572020861430997e-07, "loss": 0.5143, "step": 2192 }, { "epoch": 2.673030056864338, "grad_norm": 0.40002189934283794, "learning_rate": 3.545732764920717e-07, "loss": 0.5061, "step": 2193 }, { "epoch": 2.6742485783915515, "grad_norm": 0.36384776166641386, "learning_rate": 3.519538202528011e-07, "loss": 0.504, "step": 2194 }, { "epoch": 2.675467099918765, "grad_norm": 0.3788979703828696, "learning_rate": 3.4934372269947613e-07, "loss": 0.4801, "step": 2195 }, { "epoch": 2.676685621445979, "grad_norm": 0.3818268978478761, "learning_rate": 3.467429890874424e-07, "loss": 0.5279, "step": 2196 }, { "epoch": 2.6779041429731927, "grad_norm": 0.35141288719000796, "learning_rate": 3.4415162465318843e-07, "loss": 0.4803, "step": 2197 }, { "epoch": 2.6791226645004063, "grad_norm": 0.39220258510601774, "learning_rate": 3.4156963461434156e-07, "loss": 0.5009, "step": 2198 }, { "epoch": 2.68034118602762, "grad_norm": 0.4103479084725928, "learning_rate": 3.3899702416965166e-07, "loss": 0.6119, "step": 2199 }, { "epoch": 2.6815597075548334, "grad_norm": 0.3797647584117213, "learning_rate": 3.364337984989846e-07, "loss": 0.4665, "step": 2200 }, { "epoch": 2.682778229082047, "grad_norm": 0.3540938338082574, "learning_rate": 3.3387996276330934e-07, "loss": 0.4382, "step": 2201 }, { "epoch": 2.6839967506092606, "grad_norm": 0.3743322734466896, "learning_rate": 3.313355221046888e-07, "loss": 0.5334, "step": 2202 }, { "epoch": 2.6852152721364746, "grad_norm": 0.38933035048539233, "learning_rate": 3.2880048164627087e-07, "loss": 0.5351, "step": 2203 }, { "epoch": 2.686433793663688, "grad_norm": 0.37060820135278527, "learning_rate": 3.262748464922738e-07, "loss": 0.5097, "step": 2204 }, { "epoch": 2.6876523151909018, "grad_norm": 0.38495794293474345, "learning_rate": 3.2375862172797866e-07, "loss": 0.5678, "step": 2205 }, { "epoch": 2.6888708367181153, "grad_norm": 0.36198556723986514, "learning_rate": 3.212518124197217e-07, "loss": 0.4704, "step": 2206 }, { "epoch": 2.690089358245329, "grad_norm": 0.36538988897114305, "learning_rate": 3.1875442361487987e-07, "loss": 0.5394, "step": 2207 }, { "epoch": 2.6913078797725425, "grad_norm": 0.3529695149923601, "learning_rate": 3.1626646034186084e-07, "loss": 0.4924, "step": 2208 }, { "epoch": 2.692526401299756, "grad_norm": 0.3589469252207183, "learning_rate": 3.1378792761009745e-07, "loss": 0.5141, "step": 2209 }, { "epoch": 2.69374492282697, "grad_norm": 0.3716457330306638, "learning_rate": 3.1131883041003065e-07, "loss": 0.5162, "step": 2210 }, { "epoch": 2.6949634443541837, "grad_norm": 0.39002214118541273, "learning_rate": 3.0885917371310745e-07, "loss": 0.5371, "step": 2211 }, { "epoch": 2.6961819658813972, "grad_norm": 0.38634067585511356, "learning_rate": 3.0640896247176257e-07, "loss": 0.5303, "step": 2212 }, { "epoch": 2.697400487408611, "grad_norm": 0.3679719670190458, "learning_rate": 3.039682016194162e-07, "loss": 0.4844, "step": 2213 }, { "epoch": 2.6986190089358244, "grad_norm": 0.3650561363535968, "learning_rate": 3.015368960704584e-07, "loss": 0.5491, "step": 2214 }, { "epoch": 2.6998375304630384, "grad_norm": 0.35589244778023654, "learning_rate": 2.9911505072024173e-07, "loss": 0.4435, "step": 2215 }, { "epoch": 2.7010560519902516, "grad_norm": 0.3971577742524068, "learning_rate": 2.967026704450704e-07, "loss": 0.5417, "step": 2216 }, { "epoch": 2.7022745735174656, "grad_norm": 0.36768123246070666, "learning_rate": 2.942997601021924e-07, "loss": 0.4946, "step": 2217 }, { "epoch": 2.703493095044679, "grad_norm": 0.3868406198179559, "learning_rate": 2.9190632452978706e-07, "loss": 0.5273, "step": 2218 }, { "epoch": 2.7047116165718927, "grad_norm": 0.3708200192787325, "learning_rate": 2.895223685469578e-07, "loss": 0.5005, "step": 2219 }, { "epoch": 2.7059301380991063, "grad_norm": 0.37953093384871284, "learning_rate": 2.871478969537206e-07, "loss": 0.5435, "step": 2220 }, { "epoch": 2.70714865962632, "grad_norm": 0.3614383708651899, "learning_rate": 2.847829145309933e-07, "loss": 0.4749, "step": 2221 }, { "epoch": 2.708367181153534, "grad_norm": 0.3737932290872502, "learning_rate": 2.824274260405896e-07, "loss": 0.5178, "step": 2222 }, { "epoch": 2.7095857026807475, "grad_norm": 0.3678542573451642, "learning_rate": 2.800814362252091e-07, "loss": 0.5328, "step": 2223 }, { "epoch": 2.710804224207961, "grad_norm": 0.3590587724355208, "learning_rate": 2.7774494980842117e-07, "loss": 0.488, "step": 2224 }, { "epoch": 2.7120227457351747, "grad_norm": 0.37585088629389257, "learning_rate": 2.754179714946653e-07, "loss": 0.4925, "step": 2225 }, { "epoch": 2.7132412672623882, "grad_norm": 0.3719950620904676, "learning_rate": 2.7310050596923323e-07, "loss": 0.4999, "step": 2226 }, { "epoch": 2.714459788789602, "grad_norm": 0.3568715514712545, "learning_rate": 2.7079255789826565e-07, "loss": 0.4807, "step": 2227 }, { "epoch": 2.7156783103168154, "grad_norm": 0.35482907121703094, "learning_rate": 2.6849413192873816e-07, "loss": 0.4793, "step": 2228 }, { "epoch": 2.7168968318440294, "grad_norm": 0.395651827257008, "learning_rate": 2.662052326884551e-07, "loss": 0.544, "step": 2229 }, { "epoch": 2.718115353371243, "grad_norm": 0.3935098863850654, "learning_rate": 2.639258647860399e-07, "loss": 0.5635, "step": 2230 }, { "epoch": 2.7193338748984566, "grad_norm": 0.40165173172581203, "learning_rate": 2.616560328109219e-07, "loss": 0.4864, "step": 2231 }, { "epoch": 2.72055239642567, "grad_norm": 0.37629992760618575, "learning_rate": 2.593957413333331e-07, "loss": 0.4642, "step": 2232 }, { "epoch": 2.7217709179528837, "grad_norm": 0.39061948229389115, "learning_rate": 2.571449949042942e-07, "loss": 0.4931, "step": 2233 }, { "epoch": 2.7229894394800978, "grad_norm": 0.37406505289417313, "learning_rate": 2.549037980556096e-07, "loss": 0.5149, "step": 2234 }, { "epoch": 2.724207961007311, "grad_norm": 0.3865469301953249, "learning_rate": 2.5267215529985346e-07, "loss": 0.5662, "step": 2235 }, { "epoch": 2.725426482534525, "grad_norm": 0.4176157521247648, "learning_rate": 2.5045007113036315e-07, "loss": 0.4846, "step": 2236 }, { "epoch": 2.7266450040617385, "grad_norm": 0.3593515844357923, "learning_rate": 2.4823755002123253e-07, "loss": 0.5028, "step": 2237 }, { "epoch": 2.727863525588952, "grad_norm": 0.3763091273664034, "learning_rate": 2.4603459642729867e-07, "loss": 0.4883, "step": 2238 }, { "epoch": 2.7290820471161656, "grad_norm": 0.3254080082849582, "learning_rate": 2.4384121478413403e-07, "loss": 0.4552, "step": 2239 }, { "epoch": 2.7303005686433792, "grad_norm": 0.3704714874750766, "learning_rate": 2.416574095080404e-07, "loss": 0.5491, "step": 2240 }, { "epoch": 2.7315190901705932, "grad_norm": 0.36236149508058013, "learning_rate": 2.394831849960377e-07, "loss": 0.5425, "step": 2241 }, { "epoch": 2.732737611697807, "grad_norm": 0.3749697833768894, "learning_rate": 2.373185456258531e-07, "loss": 0.5278, "step": 2242 }, { "epoch": 2.7339561332250204, "grad_norm": 0.34972552754271036, "learning_rate": 2.3516349575591568e-07, "loss": 0.4618, "step": 2243 }, { "epoch": 2.735174654752234, "grad_norm": 0.375574385039016, "learning_rate": 2.330180397253473e-07, "loss": 0.5175, "step": 2244 }, { "epoch": 2.7363931762794476, "grad_norm": 0.37141386139233595, "learning_rate": 2.3088218185395195e-07, "loss": 0.5511, "step": 2245 }, { "epoch": 2.737611697806661, "grad_norm": 0.3813893728380543, "learning_rate": 2.2875592644220846e-07, "loss": 0.4508, "step": 2246 }, { "epoch": 2.7388302193338747, "grad_norm": 0.3775198226604257, "learning_rate": 2.266392777712595e-07, "loss": 0.4983, "step": 2247 }, { "epoch": 2.7400487408610887, "grad_norm": 0.3750870423354922, "learning_rate": 2.245322401029082e-07, "loss": 0.5044, "step": 2248 }, { "epoch": 2.7412672623883023, "grad_norm": 0.40047002764422285, "learning_rate": 2.2243481767960483e-07, "loss": 0.5827, "step": 2249 }, { "epoch": 2.742485783915516, "grad_norm": 0.36949294277977374, "learning_rate": 2.2034701472443854e-07, "loss": 0.4752, "step": 2250 }, { "epoch": 2.7437043054427295, "grad_norm": 0.3824346198409812, "learning_rate": 2.1826883544113165e-07, "loss": 0.5286, "step": 2251 }, { "epoch": 2.744922826969943, "grad_norm": 0.3417914549036838, "learning_rate": 2.1620028401402815e-07, "loss": 0.4697, "step": 2252 }, { "epoch": 2.7461413484971566, "grad_norm": 0.41461070303616865, "learning_rate": 2.141413646080881e-07, "loss": 0.5349, "step": 2253 }, { "epoch": 2.74735987002437, "grad_norm": 0.37741611628599325, "learning_rate": 2.1209208136887593e-07, "loss": 0.5375, "step": 2254 }, { "epoch": 2.7485783915515842, "grad_norm": 0.39321969452161015, "learning_rate": 2.1005243842255552e-07, "loss": 0.5025, "step": 2255 }, { "epoch": 2.749796913078798, "grad_norm": 0.36043353231485903, "learning_rate": 2.0802243987588068e-07, "loss": 0.479, "step": 2256 }, { "epoch": 2.7510154346060114, "grad_norm": 0.3771749654256085, "learning_rate": 2.060020898161863e-07, "loss": 0.5296, "step": 2257 }, { "epoch": 2.752233956133225, "grad_norm": 0.36344961189872743, "learning_rate": 2.0399139231137731e-07, "loss": 0.513, "step": 2258 }, { "epoch": 2.7534524776604385, "grad_norm": 0.37387207502104647, "learning_rate": 2.019903514099275e-07, "loss": 0.4837, "step": 2259 }, { "epoch": 2.7546709991876526, "grad_norm": 0.4074258198058105, "learning_rate": 1.999989711408662e-07, "loss": 0.5165, "step": 2260 }, { "epoch": 2.7558895207148657, "grad_norm": 0.40163838399796564, "learning_rate": 1.9801725551377217e-07, "loss": 0.484, "step": 2261 }, { "epoch": 2.7571080422420797, "grad_norm": 0.39748068787202046, "learning_rate": 1.9604520851876196e-07, "loss": 0.5346, "step": 2262 }, { "epoch": 2.7583265637692933, "grad_norm": 0.3837696844219795, "learning_rate": 1.940828341264861e-07, "loss": 0.5195, "step": 2263 }, { "epoch": 2.759545085296507, "grad_norm": 0.372851636737616, "learning_rate": 1.9213013628812173e-07, "loss": 0.5025, "step": 2264 }, { "epoch": 2.7607636068237205, "grad_norm": 0.3824295932103617, "learning_rate": 1.9018711893535991e-07, "loss": 0.4982, "step": 2265 }, { "epoch": 2.761982128350934, "grad_norm": 0.3941575511286188, "learning_rate": 1.8825378598040067e-07, "loss": 0.4943, "step": 2266 }, { "epoch": 2.763200649878148, "grad_norm": 0.3942171147413845, "learning_rate": 1.863301413159474e-07, "loss": 0.5597, "step": 2267 }, { "epoch": 2.7644191714053616, "grad_norm": 0.3934183991195872, "learning_rate": 1.8441618881519186e-07, "loss": 0.483, "step": 2268 }, { "epoch": 2.765637692932575, "grad_norm": 0.37982935450069527, "learning_rate": 1.825119323318153e-07, "loss": 0.4977, "step": 2269 }, { "epoch": 2.766856214459789, "grad_norm": 0.35859063843934896, "learning_rate": 1.8061737569997407e-07, "loss": 0.5082, "step": 2270 }, { "epoch": 2.7680747359870024, "grad_norm": 0.3882067848746523, "learning_rate": 1.787325227342951e-07, "loss": 0.5204, "step": 2271 }, { "epoch": 2.769293257514216, "grad_norm": 0.3679939619950258, "learning_rate": 1.768573772298665e-07, "loss": 0.5395, "step": 2272 }, { "epoch": 2.7705117790414295, "grad_norm": 0.3720953279482073, "learning_rate": 1.7499194296223209e-07, "loss": 0.5176, "step": 2273 }, { "epoch": 2.7717303005686436, "grad_norm": 0.3858227060687811, "learning_rate": 1.7313622368738014e-07, "loss": 0.5067, "step": 2274 }, { "epoch": 2.772948822095857, "grad_norm": 0.3747485160463101, "learning_rate": 1.7129022314174015e-07, "loss": 0.4811, "step": 2275 }, { "epoch": 2.7741673436230707, "grad_norm": 0.38326441088479096, "learning_rate": 1.694539450421734e-07, "loss": 0.4991, "step": 2276 }, { "epoch": 2.7753858651502843, "grad_norm": 0.34922626985376715, "learning_rate": 1.6762739308596343e-07, "loss": 0.5068, "step": 2277 }, { "epoch": 2.776604386677498, "grad_norm": 0.37804059096779785, "learning_rate": 1.6581057095081288e-07, "loss": 0.4969, "step": 2278 }, { "epoch": 2.777822908204712, "grad_norm": 0.39482724733491026, "learning_rate": 1.640034822948311e-07, "loss": 0.5356, "step": 2279 }, { "epoch": 2.779041429731925, "grad_norm": 0.3564142364029908, "learning_rate": 1.6220613075653201e-07, "loss": 0.5082, "step": 2280 }, { "epoch": 2.780259951259139, "grad_norm": 0.3808785336291618, "learning_rate": 1.604185199548225e-07, "loss": 0.5012, "step": 2281 }, { "epoch": 2.7814784727863526, "grad_norm": 0.3466837847337903, "learning_rate": 1.586406534889967e-07, "loss": 0.5215, "step": 2282 }, { "epoch": 2.782696994313566, "grad_norm": 0.35748226035408104, "learning_rate": 1.5687253493873068e-07, "loss": 0.4975, "step": 2283 }, { "epoch": 2.78391551584078, "grad_norm": 0.38342498504946887, "learning_rate": 1.5511416786407164e-07, "loss": 0.499, "step": 2284 }, { "epoch": 2.7851340373679934, "grad_norm": 0.38368966272625266, "learning_rate": 1.5336555580543256e-07, "loss": 0.5289, "step": 2285 }, { "epoch": 2.7863525588952074, "grad_norm": 0.3761350556362916, "learning_rate": 1.51626702283586e-07, "loss": 0.5334, "step": 2286 }, { "epoch": 2.7875710804224205, "grad_norm": 0.34488391672914276, "learning_rate": 1.4989761079965583e-07, "loss": 0.4731, "step": 2287 }, { "epoch": 2.7887896019496345, "grad_norm": 0.3711824659401226, "learning_rate": 1.4817828483510933e-07, "loss": 0.5647, "step": 2288 }, { "epoch": 2.790008123476848, "grad_norm": 0.36364946680537624, "learning_rate": 1.4646872785175182e-07, "loss": 0.5068, "step": 2289 }, { "epoch": 2.7912266450040617, "grad_norm": 0.3640609104391418, "learning_rate": 1.4476894329172042e-07, "loss": 0.5129, "step": 2290 }, { "epoch": 2.7924451665312753, "grad_norm": 0.36422428829864073, "learning_rate": 1.4307893457747358e-07, "loss": 0.5234, "step": 2291 }, { "epoch": 2.793663688058489, "grad_norm": 0.3541446608840156, "learning_rate": 1.4139870511178767e-07, "loss": 0.5035, "step": 2292 }, { "epoch": 2.794882209585703, "grad_norm": 0.3772886381428785, "learning_rate": 1.3972825827774928e-07, "loss": 0.5069, "step": 2293 }, { "epoch": 2.7961007311129165, "grad_norm": 0.39173775427502877, "learning_rate": 1.3806759743874688e-07, "loss": 0.5421, "step": 2294 }, { "epoch": 2.79731925264013, "grad_norm": 0.3652175638097129, "learning_rate": 1.3641672593846632e-07, "loss": 0.5213, "step": 2295 }, { "epoch": 2.7985377741673436, "grad_norm": 0.36544225086563453, "learning_rate": 1.3477564710088097e-07, "loss": 0.4687, "step": 2296 }, { "epoch": 2.799756295694557, "grad_norm": 0.3740249931778571, "learning_rate": 1.3314436423024935e-07, "loss": 0.518, "step": 2297 }, { "epoch": 2.8009748172217708, "grad_norm": 0.37454883087183666, "learning_rate": 1.3152288061110518e-07, "loss": 0.4902, "step": 2298 }, { "epoch": 2.8021933387489844, "grad_norm": 0.3629763422238737, "learning_rate": 1.2991119950825138e-07, "loss": 0.5329, "step": 2299 }, { "epoch": 2.8034118602761984, "grad_norm": 0.3825610104170137, "learning_rate": 1.2830932416675323e-07, "loss": 0.5217, "step": 2300 }, { "epoch": 2.804630381803412, "grad_norm": 0.35458779804689705, "learning_rate": 1.2671725781193467e-07, "loss": 0.482, "step": 2301 }, { "epoch": 2.8058489033306255, "grad_norm": 0.4060620936449498, "learning_rate": 1.251350036493676e-07, "loss": 0.5396, "step": 2302 }, { "epoch": 2.807067424857839, "grad_norm": 0.37363725202431575, "learning_rate": 1.2356256486486806e-07, "loss": 0.4898, "step": 2303 }, { "epoch": 2.8082859463850527, "grad_norm": 0.35835154484517495, "learning_rate": 1.2199994462448906e-07, "loss": 0.493, "step": 2304 }, { "epoch": 2.8095044679122667, "grad_norm": 0.42120102584212404, "learning_rate": 1.2044714607451436e-07, "loss": 0.5257, "step": 2305 }, { "epoch": 2.81072298943948, "grad_norm": 0.3556943551485984, "learning_rate": 1.1890417234145246e-07, "loss": 0.5095, "step": 2306 }, { "epoch": 2.811941510966694, "grad_norm": 0.38386985507916965, "learning_rate": 1.1737102653202825e-07, "loss": 0.5279, "step": 2307 }, { "epoch": 2.8131600324939074, "grad_norm": 0.36317152822127746, "learning_rate": 1.1584771173318076e-07, "loss": 0.4927, "step": 2308 }, { "epoch": 2.814378554021121, "grad_norm": 0.4011884968558589, "learning_rate": 1.1433423101205321e-07, "loss": 0.5282, "step": 2309 }, { "epoch": 2.8155970755483346, "grad_norm": 0.3642862846860186, "learning_rate": 1.1283058741598962e-07, "loss": 0.4734, "step": 2310 }, { "epoch": 2.816815597075548, "grad_norm": 0.38393727230766617, "learning_rate": 1.1133678397252434e-07, "loss": 0.5357, "step": 2311 }, { "epoch": 2.818034118602762, "grad_norm": 0.3837474969152823, "learning_rate": 1.0985282368938199e-07, "loss": 0.5024, "step": 2312 }, { "epoch": 2.819252640129976, "grad_norm": 0.4030179784599761, "learning_rate": 1.0837870955446639e-07, "loss": 0.5339, "step": 2313 }, { "epoch": 2.8204711616571894, "grad_norm": 0.3539980646211641, "learning_rate": 1.0691444453585775e-07, "loss": 0.4979, "step": 2314 }, { "epoch": 2.821689683184403, "grad_norm": 0.3490699508092425, "learning_rate": 1.0546003158180496e-07, "loss": 0.4861, "step": 2315 }, { "epoch": 2.8229082047116165, "grad_norm": 0.3504210969211817, "learning_rate": 1.0401547362071939e-07, "loss": 0.4995, "step": 2316 }, { "epoch": 2.82412672623883, "grad_norm": 0.3626756496837975, "learning_rate": 1.0258077356117057e-07, "loss": 0.5019, "step": 2317 }, { "epoch": 2.8253452477660437, "grad_norm": 0.39120905301617104, "learning_rate": 1.0115593429187942e-07, "loss": 0.5056, "step": 2318 }, { "epoch": 2.8265637692932577, "grad_norm": 0.38360724438244953, "learning_rate": 9.974095868171164e-08, "loss": 0.4574, "step": 2319 }, { "epoch": 2.8277822908204713, "grad_norm": 0.4154087623735947, "learning_rate": 9.833584957967491e-08, "loss": 0.5459, "step": 2320 }, { "epoch": 2.829000812347685, "grad_norm": 0.3684945590836955, "learning_rate": 9.694060981490783e-08, "loss": 0.5044, "step": 2321 }, { "epoch": 2.8302193338748984, "grad_norm": 0.3759625212141255, "learning_rate": 9.555524219667989e-08, "loss": 0.5045, "step": 2322 }, { "epoch": 2.831437855402112, "grad_norm": 0.3711414030240098, "learning_rate": 9.417974951438203e-08, "loss": 0.4909, "step": 2323 }, { "epoch": 2.8326563769293256, "grad_norm": 0.40793520425182356, "learning_rate": 9.281413453752386e-08, "loss": 0.5911, "step": 2324 }, { "epoch": 2.833874898456539, "grad_norm": 0.35038615442889337, "learning_rate": 9.145840001572537e-08, "loss": 0.5061, "step": 2325 }, { "epoch": 2.835093419983753, "grad_norm": 0.33298280777144973, "learning_rate": 9.011254867871244e-08, "loss": 0.4843, "step": 2326 }, { "epoch": 2.8363119415109668, "grad_norm": 0.37732042868872595, "learning_rate": 8.877658323631188e-08, "loss": 0.5434, "step": 2327 }, { "epoch": 2.8375304630381804, "grad_norm": 0.3989852512203333, "learning_rate": 8.745050637844532e-08, "loss": 0.5179, "step": 2328 }, { "epoch": 2.838748984565394, "grad_norm": 0.38524826151374814, "learning_rate": 8.613432077512474e-08, "loss": 0.5135, "step": 2329 }, { "epoch": 2.8399675060926075, "grad_norm": 0.3561681050422304, "learning_rate": 8.482802907644528e-08, "loss": 0.5332, "step": 2330 }, { "epoch": 2.8411860276198215, "grad_norm": 0.35905357929351883, "learning_rate": 8.353163391258302e-08, "loss": 0.4736, "step": 2331 }, { "epoch": 2.8424045491470347, "grad_norm": 0.3771943557678179, "learning_rate": 8.224513789378497e-08, "loss": 0.4974, "step": 2332 }, { "epoch": 2.8436230706742487, "grad_norm": 0.3689008459698471, "learning_rate": 8.09685436103691e-08, "loss": 0.5007, "step": 2333 }, { "epoch": 2.8448415922014623, "grad_norm": 0.3946551692601427, "learning_rate": 7.970185363271432e-08, "loss": 0.5555, "step": 2334 }, { "epoch": 2.846060113728676, "grad_norm": 0.3814135760411076, "learning_rate": 7.844507051125937e-08, "loss": 0.4953, "step": 2335 }, { "epoch": 2.8472786352558894, "grad_norm": 0.3877741958314108, "learning_rate": 7.71981967764951e-08, "loss": 0.5059, "step": 2336 }, { "epoch": 2.848497156783103, "grad_norm": 0.364018962944753, "learning_rate": 7.59612349389599e-08, "loss": 0.4948, "step": 2337 }, { "epoch": 2.849715678310317, "grad_norm": 0.4075558874034061, "learning_rate": 7.473418748923545e-08, "loss": 0.5948, "step": 2338 }, { "epoch": 2.8509341998375306, "grad_norm": 0.37855730911487784, "learning_rate": 7.351705689794042e-08, "loss": 0.424, "step": 2339 }, { "epoch": 2.852152721364744, "grad_norm": 0.3738096901546095, "learning_rate": 7.230984561572729e-08, "loss": 0.5434, "step": 2340 }, { "epoch": 2.8533712428919578, "grad_norm": 0.37627980072639083, "learning_rate": 7.11125560732756e-08, "loss": 0.4934, "step": 2341 }, { "epoch": 2.8545897644191713, "grad_norm": 0.39211088076356204, "learning_rate": 6.992519068128701e-08, "loss": 0.4979, "step": 2342 }, { "epoch": 2.855808285946385, "grad_norm": 0.36180712823693784, "learning_rate": 6.8747751830483e-08, "loss": 0.54, "step": 2343 }, { "epoch": 2.8570268074735985, "grad_norm": 0.3422753121701595, "learning_rate": 6.758024189159718e-08, "loss": 0.4674, "step": 2344 }, { "epoch": 2.8582453290008125, "grad_norm": 0.3670191252106066, "learning_rate": 6.64226632153725e-08, "loss": 0.5208, "step": 2345 }, { "epoch": 2.859463850528026, "grad_norm": 0.3796974952937155, "learning_rate": 6.527501813255344e-08, "loss": 0.5399, "step": 2346 }, { "epoch": 2.8606823720552397, "grad_norm": 0.36481445761250786, "learning_rate": 6.413730895388714e-08, "loss": 0.5072, "step": 2347 }, { "epoch": 2.8619008935824533, "grad_norm": 0.3636784036139616, "learning_rate": 6.300953797011178e-08, "loss": 0.5291, "step": 2348 }, { "epoch": 2.863119415109667, "grad_norm": 0.35701639845422045, "learning_rate": 6.18917074519565e-08, "loss": 0.503, "step": 2349 }, { "epoch": 2.864337936636881, "grad_norm": 0.3780638398451471, "learning_rate": 6.078381965013646e-08, "loss": 0.526, "step": 2350 }, { "epoch": 2.865556458164094, "grad_norm": 0.3863922669183168, "learning_rate": 5.968587679534621e-08, "loss": 0.4887, "step": 2351 }, { "epoch": 2.866774979691308, "grad_norm": 0.36868437000440496, "learning_rate": 5.8597881098257924e-08, "loss": 0.535, "step": 2352 }, { "epoch": 2.8679935012185216, "grad_norm": 0.3667806587451281, "learning_rate": 5.751983474951317e-08, "loss": 0.5357, "step": 2353 }, { "epoch": 2.869212022745735, "grad_norm": 0.35160456413985003, "learning_rate": 5.6451739919723417e-08, "loss": 0.4966, "step": 2354 }, { "epoch": 2.8704305442729487, "grad_norm": 0.3702286255153397, "learning_rate": 5.539359875946171e-08, "loss": 0.5364, "step": 2355 }, { "epoch": 2.8716490658001623, "grad_norm": 0.3545425220241147, "learning_rate": 5.434541339926047e-08, "loss": 0.4989, "step": 2356 }, { "epoch": 2.8728675873273763, "grad_norm": 0.37190280510667345, "learning_rate": 5.3307185949605935e-08, "loss": 0.5177, "step": 2357 }, { "epoch": 2.87408610885459, "grad_norm": 0.39562598350439043, "learning_rate": 5.227891850093314e-08, "loss": 0.5489, "step": 2358 }, { "epoch": 2.8753046303818035, "grad_norm": 0.3653775053445267, "learning_rate": 5.12606131236254e-08, "loss": 0.485, "step": 2359 }, { "epoch": 2.876523151909017, "grad_norm": 0.3641584823261951, "learning_rate": 5.025227186800652e-08, "loss": 0.5217, "step": 2360 }, { "epoch": 2.8777416734362307, "grad_norm": 0.38104906167780134, "learning_rate": 4.925389676433745e-08, "loss": 0.485, "step": 2361 }, { "epoch": 2.8789601949634442, "grad_norm": 0.39160093264511175, "learning_rate": 4.8265489822814094e-08, "loss": 0.515, "step": 2362 }, { "epoch": 2.880178716490658, "grad_norm": 0.37954553391255796, "learning_rate": 4.728705303356007e-08, "loss": 0.4743, "step": 2363 }, { "epoch": 2.881397238017872, "grad_norm": 0.3786501167734631, "learning_rate": 4.631858836662562e-08, "loss": 0.5282, "step": 2364 }, { "epoch": 2.8826157595450854, "grad_norm": 0.3847521896579275, "learning_rate": 4.536009777198203e-08, "loss": 0.4954, "step": 2365 }, { "epoch": 2.883834281072299, "grad_norm": 0.3686885036028533, "learning_rate": 4.441158317951777e-08, "loss": 0.5, "step": 2366 }, { "epoch": 2.8850528025995126, "grad_norm": 0.3476414945698473, "learning_rate": 4.347304649903572e-08, "loss": 0.5112, "step": 2367 }, { "epoch": 2.886271324126726, "grad_norm": 0.3501811924875589, "learning_rate": 4.2544489620248155e-08, "loss": 0.5212, "step": 2368 }, { "epoch": 2.8874898456539397, "grad_norm": 0.3522575951304983, "learning_rate": 4.162591441277341e-08, "loss": 0.5216, "step": 2369 }, { "epoch": 2.8887083671811533, "grad_norm": 0.34261605478893353, "learning_rate": 4.071732272613149e-08, "loss": 0.4688, "step": 2370 }, { "epoch": 2.8899268887083673, "grad_norm": 0.3647353980991927, "learning_rate": 3.981871638974177e-08, "loss": 0.5131, "step": 2371 }, { "epoch": 2.891145410235581, "grad_norm": 0.37709939404151627, "learning_rate": 3.8930097212918625e-08, "loss": 0.5103, "step": 2372 }, { "epoch": 2.8923639317627945, "grad_norm": 0.35808464221731223, "learning_rate": 3.805146698486695e-08, "loss": 0.4684, "step": 2373 }, { "epoch": 2.893582453290008, "grad_norm": 0.3812340713874895, "learning_rate": 3.7182827474678273e-08, "loss": 0.5575, "step": 2374 }, { "epoch": 2.8948009748172217, "grad_norm": 0.36259528565916516, "learning_rate": 3.632418043133079e-08, "loss": 0.513, "step": 2375 }, { "epoch": 2.8960194963444357, "grad_norm": 0.3422911070110535, "learning_rate": 3.5475527583681005e-08, "loss": 0.4727, "step": 2376 }, { "epoch": 2.897238017871649, "grad_norm": 0.3609331496351682, "learning_rate": 3.463687064046317e-08, "loss": 0.529, "step": 2377 }, { "epoch": 2.898456539398863, "grad_norm": 0.3819585041721209, "learning_rate": 3.3808211290284886e-08, "loss": 0.481, "step": 2378 }, { "epoch": 2.8996750609260764, "grad_norm": 0.3731434598411168, "learning_rate": 3.2989551201624836e-08, "loss": 0.5226, "step": 2379 }, { "epoch": 2.90089358245329, "grad_norm": 0.3885400700621752, "learning_rate": 3.2180892022826705e-08, "loss": 0.5329, "step": 2380 }, { "epoch": 2.9021121039805036, "grad_norm": 0.3572845398778449, "learning_rate": 3.138223538209973e-08, "loss": 0.4753, "step": 2381 }, { "epoch": 2.903330625507717, "grad_norm": 0.3614383781834413, "learning_rate": 3.059358288751202e-08, "loss": 0.5409, "step": 2382 }, { "epoch": 2.904549147034931, "grad_norm": 0.37189556032401827, "learning_rate": 2.981493612698838e-08, "loss": 0.5195, "step": 2383 }, { "epoch": 2.9057676685621447, "grad_norm": 0.3789215584826568, "learning_rate": 2.9046296668309716e-08, "loss": 0.5074, "step": 2384 }, { "epoch": 2.9069861900893583, "grad_norm": 0.370512893367381, "learning_rate": 2.8287666059104713e-08, "loss": 0.5191, "step": 2385 }, { "epoch": 2.908204711616572, "grad_norm": 0.38297199689289846, "learning_rate": 2.753904582685096e-08, "loss": 0.4737, "step": 2386 }, { "epoch": 2.9094232331437855, "grad_norm": 0.3959947838736986, "learning_rate": 2.6800437478870512e-08, "loss": 0.5115, "step": 2387 }, { "epoch": 2.910641754670999, "grad_norm": 0.3957470998942836, "learning_rate": 2.6071842502326526e-08, "loss": 0.5063, "step": 2388 }, { "epoch": 2.9118602761982126, "grad_norm": 0.3723580924084453, "learning_rate": 2.535326236422053e-08, "loss": 0.4892, "step": 2389 }, { "epoch": 2.9130787977254267, "grad_norm": 0.37260731194295516, "learning_rate": 2.464469851139073e-08, "loss": 0.5542, "step": 2390 }, { "epoch": 2.9142973192526402, "grad_norm": 0.35868024153034395, "learning_rate": 2.394615237050535e-08, "loss": 0.523, "step": 2391 }, { "epoch": 2.915515840779854, "grad_norm": 0.36371587526669685, "learning_rate": 2.3257625348064306e-08, "loss": 0.4825, "step": 2392 }, { "epoch": 2.9167343623070674, "grad_norm": 0.38007578965240923, "learning_rate": 2.2579118830393654e-08, "loss": 0.5096, "step": 2393 }, { "epoch": 2.917952883834281, "grad_norm": 0.3894212168804138, "learning_rate": 2.1910634183644475e-08, "loss": 0.4839, "step": 2394 }, { "epoch": 2.9191714053614946, "grad_norm": 0.40052317739398724, "learning_rate": 2.1252172753787324e-08, "loss": 0.5651, "step": 2395 }, { "epoch": 2.920389926888708, "grad_norm": 0.35188112107988123, "learning_rate": 2.060373586661224e-08, "loss": 0.4977, "step": 2396 }, { "epoch": 2.921608448415922, "grad_norm": 0.3499426602627878, "learning_rate": 1.996532482772595e-08, "loss": 0.4519, "step": 2397 }, { "epoch": 2.9228269699431357, "grad_norm": 0.3812985888964239, "learning_rate": 1.933694092254801e-08, "loss": 0.5197, "step": 2398 }, { "epoch": 2.9240454914703493, "grad_norm": 0.37294376101878496, "learning_rate": 1.8718585416307443e-08, "loss": 0.5252, "step": 2399 }, { "epoch": 2.925264012997563, "grad_norm": 0.390284947297633, "learning_rate": 1.811025955404333e-08, "loss": 0.4939, "step": 2400 }, { "epoch": 2.9264825345247765, "grad_norm": 0.38299656914528474, "learning_rate": 1.751196456059867e-08, "loss": 0.5282, "step": 2401 }, { "epoch": 2.9277010560519905, "grad_norm": 0.4033256657794055, "learning_rate": 1.6923701640621514e-08, "loss": 0.5516, "step": 2402 }, { "epoch": 2.9289195775792036, "grad_norm": 0.3564131129340729, "learning_rate": 1.6345471978558847e-08, "loss": 0.4492, "step": 2403 }, { "epoch": 2.9301380991064176, "grad_norm": 0.3696818550866779, "learning_rate": 1.577727673865659e-08, "loss": 0.5282, "step": 2404 }, { "epoch": 2.9313566206336312, "grad_norm": 0.3511572891694367, "learning_rate": 1.5219117064957934e-08, "loss": 0.5573, "step": 2405 }, { "epoch": 2.932575142160845, "grad_norm": 0.3777779794080296, "learning_rate": 1.4670994081297796e-08, "loss": 0.4964, "step": 2406 }, { "epoch": 2.9337936636880584, "grad_norm": 0.3408974732458375, "learning_rate": 1.413290889130392e-08, "loss": 0.5008, "step": 2407 }, { "epoch": 2.935012185215272, "grad_norm": 0.35326628190060844, "learning_rate": 1.3604862578392996e-08, "loss": 0.4734, "step": 2408 }, { "epoch": 2.936230706742486, "grad_norm": 0.3885332399006649, "learning_rate": 1.3086856205768439e-08, "loss": 0.5695, "step": 2409 }, { "epoch": 2.9374492282696996, "grad_norm": 0.36720005893901214, "learning_rate": 1.257889081641872e-08, "loss": 0.4626, "step": 2410 }, { "epoch": 2.938667749796913, "grad_norm": 0.38654367810832296, "learning_rate": 1.208096743311571e-08, "loss": 0.5201, "step": 2411 }, { "epoch": 2.9398862713241267, "grad_norm": 0.35989398437475195, "learning_rate": 1.159308705841078e-08, "loss": 0.524, "step": 2412 }, { "epoch": 2.9411047928513403, "grad_norm": 0.36447060978418805, "learning_rate": 1.111525067463537e-08, "loss": 0.4977, "step": 2413 }, { "epoch": 2.942323314378554, "grad_norm": 0.3941566187065989, "learning_rate": 1.0647459243897095e-08, "loss": 0.5241, "step": 2414 }, { "epoch": 2.9435418359057675, "grad_norm": 0.3971195756945423, "learning_rate": 1.0189713708078086e-08, "loss": 0.5083, "step": 2415 }, { "epoch": 2.9447603574329815, "grad_norm": 0.3617746907444414, "learning_rate": 9.74201498883387e-09, "loss": 0.4824, "step": 2416 }, { "epoch": 2.945978878960195, "grad_norm": 0.38983833247818245, "learning_rate": 9.304363987591158e-09, "loss": 0.5426, "step": 2417 }, { "epoch": 2.9471974004874086, "grad_norm": 0.3739423997155145, "learning_rate": 8.87676158554507e-09, "loss": 0.4452, "step": 2418 }, { "epoch": 2.948415922014622, "grad_norm": 0.37952476055522766, "learning_rate": 8.459208643659122e-09, "loss": 0.5432, "step": 2419 }, { "epoch": 2.949634443541836, "grad_norm": 0.3634354801661458, "learning_rate": 8.051706002661919e-09, "loss": 0.5223, "step": 2420 }, { "epoch": 2.95085296506905, "grad_norm": 0.3655917645054435, "learning_rate": 7.65425448304713e-09, "loss": 0.4784, "step": 2421 }, { "epoch": 2.952071486596263, "grad_norm": 0.3878857225849821, "learning_rate": 7.266854885069619e-09, "loss": 0.536, "step": 2422 }, { "epoch": 2.953290008123477, "grad_norm": 0.3860010124815134, "learning_rate": 6.889507988745436e-09, "loss": 0.5343, "step": 2423 }, { "epoch": 2.9545085296506906, "grad_norm": 0.37137826994221546, "learning_rate": 6.5222145538501595e-09, "loss": 0.4683, "step": 2424 }, { "epoch": 2.955727051177904, "grad_norm": 0.40358716261973293, "learning_rate": 6.164975319917221e-09, "loss": 0.5118, "step": 2425 }, { "epoch": 2.9569455727051177, "grad_norm": 0.3805519912768825, "learning_rate": 5.817791006235141e-09, "loss": 0.5446, "step": 2426 }, { "epoch": 2.9581640942323313, "grad_norm": 0.3645099929908209, "learning_rate": 5.480662311848628e-09, "loss": 0.4789, "step": 2427 }, { "epoch": 2.9593826157595453, "grad_norm": 0.36366977745298745, "learning_rate": 5.153589915554702e-09, "loss": 0.5268, "step": 2428 }, { "epoch": 2.960601137286759, "grad_norm": 0.35084949812297817, "learning_rate": 4.836574475903244e-09, "loss": 0.4545, "step": 2429 }, { "epoch": 2.9618196588139725, "grad_norm": 0.3688342331523407, "learning_rate": 4.5296166311931125e-09, "loss": 0.5512, "step": 2430 }, { "epoch": 2.963038180341186, "grad_norm": 0.3521398793576215, "learning_rate": 4.232716999474917e-09, "loss": 0.5379, "step": 2431 }, { "epoch": 2.9642567018683996, "grad_norm": 0.3706937567678196, "learning_rate": 3.9458761785460266e-09, "loss": 0.5445, "step": 2432 }, { "epoch": 2.965475223395613, "grad_norm": 0.35060638658108007, "learning_rate": 3.669094745950008e-09, "loss": 0.5027, "step": 2433 }, { "epoch": 2.966693744922827, "grad_norm": 0.33178253347322945, "learning_rate": 3.4023732589777426e-09, "loss": 0.4681, "step": 2434 }, { "epoch": 2.967912266450041, "grad_norm": 0.35617115102694386, "learning_rate": 3.1457122546635353e-09, "loss": 0.5019, "step": 2435 }, { "epoch": 2.9691307879772544, "grad_norm": 0.3548326105732938, "learning_rate": 2.899112249786229e-09, "loss": 0.5276, "step": 2436 }, { "epoch": 2.970349309504468, "grad_norm": 0.3984987781160192, "learning_rate": 2.6625737408669804e-09, "loss": 0.5172, "step": 2437 }, { "epoch": 2.9715678310316815, "grad_norm": 0.362754815960633, "learning_rate": 2.436097204167043e-09, "loss": 0.5206, "step": 2438 }, { "epoch": 2.972786352558895, "grad_norm": 0.38356526868895263, "learning_rate": 2.2196830956905392e-09, "loss": 0.4762, "step": 2439 }, { "epoch": 2.9740048740861087, "grad_norm": 0.37390570550389457, "learning_rate": 2.0133318511800227e-09, "loss": 0.5343, "step": 2440 }, { "epoch": 2.9752233956133223, "grad_norm": 0.35679383760064753, "learning_rate": 1.8170438861159212e-09, "loss": 0.4894, "step": 2441 }, { "epoch": 2.9764419171405363, "grad_norm": 0.3592057260000064, "learning_rate": 1.6308195957182028e-09, "loss": 0.5362, "step": 2442 }, { "epoch": 2.97766043866775, "grad_norm": 0.3259038266407775, "learning_rate": 1.4546593549424892e-09, "loss": 0.4727, "step": 2443 }, { "epoch": 2.9788789601949635, "grad_norm": 0.37363652956982973, "learning_rate": 1.2885635184828326e-09, "loss": 0.5277, "step": 2444 }, { "epoch": 2.980097481722177, "grad_norm": 0.35379174534673236, "learning_rate": 1.1325324207667187e-09, "loss": 0.4837, "step": 2445 }, { "epoch": 2.9813160032493906, "grad_norm": 0.3675995235345453, "learning_rate": 9.865663759578426e-10, "loss": 0.5461, "step": 2446 }, { "epoch": 2.9825345247766046, "grad_norm": 0.37636771913858935, "learning_rate": 8.50665677953888e-10, "loss": 0.4862, "step": 2447 }, { "epoch": 2.9837530463038178, "grad_norm": 0.3681022019205161, "learning_rate": 7.24830600386528e-10, "loss": 0.4963, "step": 2448 }, { "epoch": 2.984971567831032, "grad_norm": 0.3746370138837488, "learning_rate": 6.09061396620314e-10, "loss": 0.534, "step": 2449 }, { "epoch": 2.9861900893582454, "grad_norm": 0.36486933688431966, "learning_rate": 5.033582997526765e-10, "loss": 0.577, "step": 2450 }, { "epoch": 2.987408610885459, "grad_norm": 0.3650821349016925, "learning_rate": 4.0772152261336906e-10, "loss": 0.4508, "step": 2451 }, { "epoch": 2.9886271324126725, "grad_norm": 0.380070504025212, "learning_rate": 3.221512577639141e-10, "loss": 0.5051, "step": 2452 }, { "epoch": 2.989845653939886, "grad_norm": 0.3786185974021647, "learning_rate": 2.466476774970472e-10, "loss": 0.4845, "step": 2453 }, { "epoch": 2.9910641754671, "grad_norm": 0.360482935976555, "learning_rate": 1.812109338367174e-10, "loss": 0.5414, "step": 2454 }, { "epoch": 2.9922826969943137, "grad_norm": 0.372228898631232, "learning_rate": 1.2584115853808697e-10, "loss": 0.5194, "step": 2455 }, { "epoch": 2.9935012185215273, "grad_norm": 0.38381176412541346, "learning_rate": 8.053846308531122e-11, "loss": 0.4912, "step": 2456 }, { "epoch": 2.994719740048741, "grad_norm": 0.3934048198873252, "learning_rate": 4.53029386948689e-11, "loss": 0.5137, "step": 2457 }, { "epoch": 2.9959382615759544, "grad_norm": 0.38117458951656574, "learning_rate": 2.0134656311676658e-11, "loss": 0.5353, "step": 2458 }, { "epoch": 2.997156783103168, "grad_norm": 0.38135142008825745, "learning_rate": 5.033666611864441e-12, "loss": 0.4967, "step": 2459 }, { "epoch": 2.9983753046303816, "grad_norm": 0.36779717909800114, "learning_rate": 0.0, "loss": 0.5127, "step": 2460 }, { "epoch": 2.9983753046303816, "step": 2460, "total_flos": 2708356203970560.0, "train_loss": 0.5759637464110444, "train_runtime": 38974.7056, "train_samples_per_second": 6.062, "train_steps_per_second": 0.063 } ], "logging_steps": 1, "max_steps": 2460, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2708356203970560.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }